Skip to content

Commit

Permalink
Merge branch 'master' into feature/cus-3546-tableau-fine-grained-page…
Browse files Browse the repository at this point in the history
…-size
  • Loading branch information
sgomezvillamor authored Jan 16, 2025
2 parents 0d869fb + 18701b7 commit 8cd90bd
Show file tree
Hide file tree
Showing 12 changed files with 47 additions and 22 deletions.
2 changes: 0 additions & 2 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " +
"isort --check --diff src/ tests/ examples/ && " +
"ruff check src/ tests/ examples/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}
Expand All @@ -118,7 +117,6 @@ task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black src/ tests/ examples/ && " +
"isort src/ tests/ examples/ && " +
"ruff check --fix src/ tests/ examples/"
}

Expand Down
3 changes: 1 addition & 2 deletions metadata-ingestion/developing.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,11 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap

## Code style

We use black, isort, flake8, and mypy to ensure consistent code style and quality.
We use black, ruff, and mypy to ensure consistent code style and quality.

```shell
# Assumes: pip install -e '.[dev]' and venv is activated
black src/ tests/
isort src/ tests/
ruff check src/ tests/
mypy src/ tests/
```
Expand Down
24 changes: 17 additions & 7 deletions metadata-ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@ extend-exclude = '''
include = '\.pyi?$'
target-version = ['py38', 'py39', 'py310', 'py311']

[tool.isort]
combine_as_imports = true
indent = ' '
known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub.sql_parsing._sqlglot_patch']
profile = 'black'
sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
skip_glob = 'src/datahub/metadata'
[tool.ruff.lint.isort]
combine-as-imports = true
known-first-party = ["datahub"]
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
force-sort-within-sections = false
force-wrap-aliases = false
split-on-trailing-comma = false
order-by-type = true
relative-imports-order = "closest-to-furthest"
force-single-line = false
single-line-exclusions = ["typing"]
length-sort = false
from-first = false
required-imports = []
classes = ["typing"]

[tool.pyright]
extraPaths = ['tests']
Expand Down Expand Up @@ -47,6 +56,7 @@ select = [
"C90",
"E",
"F",
"I", # For isort
"TID",
]
ignore = [
Expand Down
1 change: 0 additions & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,6 @@
# We should make an effort to keep it up to date.
"black==23.3.0",
"ruff==0.9.1",
"isort>=5.7.0",
"mypy==1.10.1",
}

Expand Down
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@

MAX_CONTENT_WIDTH = 120

if sys.version_info >= (3, 12):
click.secho(
"Python versions above 3.11 are not tested with. Please use Python 3.11.",
fg="red",
)


@click.group(
context_settings=dict(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
"globalSettingsKey",
"globalSettingsInfo",
"testResults",
"dataHubExecutionRequestKey",
"dataHubExecutionRequestInput",
"dataHubExecutionRequestSignal",
"dataHubExecutionRequestResult",
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def revoke_expired_tokens(self) -> None:
tokens = list_access_tokens.get("tokens", [])
total = list_access_tokens.get("total", 0)
if tokens == []:
# Due to a server bug we cannot rely on just total
break
for token in tokens:
self.report.expired_tokens_revoked += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class SoftDeletedEntitiesCleanupConfig(ConfigModel):

@dataclass
class SoftDeletedEntitiesReport(SourceReport):
num_calls_made: Dict[str, int] = field(default_factory=dict)
num_entities_found: Dict[str, int] = field(default_factory=dict)
num_soft_deleted_entity_processed: int = 0
num_soft_deleted_retained_due_to_age: int = 0
Expand Down Expand Up @@ -242,6 +243,11 @@ def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[st

while True:
try:
if entity_type not in self.report.num_calls_made:
self.report.num_calls_made[entity_type] = 1
else:
self.report.num_calls_made[entity_type] += 1
self._print_report()
result = self.ctx.graph.execute_graphql(
graphql_query,
{
Expand Down Expand Up @@ -270,7 +276,13 @@ def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[st
)
break
scroll_across_entities = result.get("scrollAcrossEntities")
if not scroll_across_entities or not scroll_across_entities.get("count"):
if not scroll_across_entities:
break
search_results = scroll_across_entities.get("searchResults")
count = scroll_across_entities.get("count")
if not count or not search_results:
# Due to a server bug we cannot rely on just count as it was returning response like this
# {'count': 1, 'nextScrollId': None, 'searchResults': []}
break
if entity_type == "DATA_PROCESS_INSTANCE":
# Temp workaround. See note in beginning of the function
Expand All @@ -282,7 +294,7 @@ def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[st
self.report.num_entities_found[entity_type] += scroll_across_entities.get(
"count"
)
for query in scroll_across_entities.get("searchResults"):
for query in search_results:
yield query["entity"]["urn"]

def _get_urns(self) -> Iterable[str]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED

import collections
import concurrent.futures
import contextlib
Expand All @@ -12,6 +10,7 @@
import traceback
import unittest.mock
import uuid
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
from functools import lru_cache
from typing import (
TYPE_CHECKING,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import dataclasses
import functools
import logging
import traceback
from collections import defaultdict
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union

import pydantic.dataclasses
Expand Down
3 changes: 1 addition & 2 deletions metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import functools
import hashlib
import logging
import re
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Dict, Iterable, Optional, Tuple, Union

import sqlglot
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import time
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import pytest
import sqlglot
Expand Down

0 comments on commit 8cd90bd

Please sign in to comment.