diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle
index 1bcb58e6b7c543..95b4ee3118f03e 100644
--- a/metadata-ingestion-modules/airflow-plugin/build.gradle
+++ b/metadata-ingestion-modules/airflow-plugin/build.gradle
@@ -73,16 +73,15 @@ task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff src/ tests/ && " +
"ruff check src/ tests/ && " +
+ "ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black src/ tests/ && " +
- "ruff check --fix src/ tests/"
- "mypy src/ tests/ "
+ "ruff check --fix src/ tests/ && " +
+ "ruff format src/ tests/ "
}
// HACK: Some of the Airflow constraint files conflict with packages that we install (e.g. black).
@@ -119,5 +118,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
+ delete '.ruff_cache'
+ delete '.mypy_cache'
+ delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
@@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+ ".git",
+ "venv",
+ ".tox",
+ "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
[tool.ruff.lint.isort]
combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]
[tool.ruff.lint]
-select = [
- "B",
- "C90",
- "E",
- "F",
- "I", # For isort
- "TID",
+extend-select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
ignore = [
- # Ignore line length violations (handled by Black)
- "E501",
- # Ignore whitespace before ':' (matches Black)
- "E203",
- "E203",
- # Allow usages of functools.lru_cache
- "B019",
- # Allow function call in argument defaults
- "B008",
+ "E501", # Line length violations (handled by formatter)
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 79c18a5188dd84..58c04158957ccd 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -71,7 +71,6 @@ def get_long_description():
dev_requirements = {
*base_requirements,
*mypy_stubs,
- "black==22.12.0",
"coverage>=5.1",
"ruff==0.9.2",
"mypy==1.10.1",
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
index fd01ac10f98de9..5904ce1e9e978c 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
@@ -63,9 +63,9 @@ def __init__(self):
self.task_to_extractor.extractors["AthenaOperator"] = AthenaOperatorExtractor
- self.task_to_extractor.extractors[
- "BigQueryInsertJobOperator"
- ] = BigQueryInsertJobOperatorExtractor
+ self.task_to_extractor.extractors["BigQueryInsertJobOperator"] = (
+ BigQueryInsertJobOperatorExtractor
+ )
self._graph: Optional["DataHubGraph"] = None
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
index 9de44811f60a48..b2ca61e3de3bf5 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -286,9 +286,9 @@ def _extract_lineage(
if sql_parsing_result:
if error := sql_parsing_result.debug_info.error:
logger.info(f"SQL parsing error: {error}", exc_info=error)
- datajob.properties[
- "datahub_sql_parser_error"
- ] = f"{type(error).__name__}: {error}"
+ datajob.properties["datahub_sql_parser_error"] = (
+ f"{type(error).__name__}: {error}"
+ )
if not sql_parsing_result.debug_info.table_error:
input_urns.extend(sql_parsing_result.in_tables)
output_urns.extend(sql_parsing_result.out_tables)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
index 4bf050d41473e4..99b0a40fd3c13e 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
@@ -44,11 +44,9 @@ def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:
if task_inlets and isinstance(task_inlets, list):
inlets = []
- task_ids = (
- {o for o in task_inlets if isinstance(o, str)}
- .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
- .intersection(task.get_flat_relative_ids(upstream=True))
- )
+ task_ids = {o for o in task_inlets if isinstance(o, str)}.union(
+ op.task_id for op in task_inlets if isinstance(op, BaseOperator)
+ ).intersection(task.get_flat_relative_ids(upstream=True))
from airflow.lineage import AUTO
from cattr import structure
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
index 4351f40fe7e3ad..24e89211dd3c5b 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
@@ -2,6 +2,7 @@
This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
"""
+
from datetime import timedelta
from airflow import DAG
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
index d2c9821295419c..2744c26021cde3 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
@@ -273,13 +273,21 @@ def _run_airflow(
subprocess.check_call(
[
# fmt: off
- "airflow", "users", "create",
- "--username", "airflow",
- "--password", "airflow",
- "--firstname", "admin",
- "--lastname", "admin",
- "--role", "Admin",
- "--email", "airflow@example.com",
+ "airflow",
+ "users",
+ "create",
+ "--username",
+ "airflow",
+ "--password",
+ "airflow",
+ "--firstname",
+ "admin",
+ "--lastname",
+ "admin",
+ "--role",
+ "Admin",
+ "--email",
+ "airflow@example.com",
# fmt: on
],
env=environment,
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
index 1dc8e14a425dfc..4219c5fb9cefb3 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
@@ -242,9 +242,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
},
), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch(
"airflow.models.BaseOperator.xcom_push"
- ), patch_airflow_connection(
- datahub_rest_connection_config
- ):
+ ), patch_airflow_connection(datahub_rest_connection_config):
func = mock.Mock()
func.__name__ = "foo"
@@ -275,7 +273,10 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
if AIRFLOW_VERSION < packaging.version.parse("2.2.0"):
ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE)
# Ignoring type here because DagRun state is just a sring at Airflow 1
- dag_run = DagRun(state="success", run_id=f"scheduled_{DEFAULT_DATE.isoformat()}") # type: ignore
+ dag_run = DagRun(
+ state="success", # type: ignore[arg-type]
+ run_id=f"scheduled_{DEFAULT_DATE.isoformat()}",
+ )
else:
from airflow.utils.state import DagRunState
diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle
index 503b3556a41bfe..7dd7036e276151 100644
--- a/metadata-ingestion-modules/dagster-plugin/build.gradle
+++ b/metadata-ingestion-modules/dagster-plugin/build.gradle
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff src/ tests/ examples/ && " +
- "ruff check src/ tests/ && " +
+ "ruff check src/ tests/ examples/ && " +
+ "ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
- "black src/ tests/ examples/ && " +
- "ruff check --fix src/ tests/"
- "mypy src/ tests/ examples/"
+ "ruff check --fix src/ tests/ examples/ && " +
+ "ruff format src/ tests/ examples/ "
}
task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -105,5 +104,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
+ delete '.ruff_cache'
+ delete '.mypy_cache'
+ delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py b/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
index 7b7616b1ec11de..75cab237b05a3e 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
@@ -9,9 +9,9 @@
job,
op,
)
+
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.utilities.urns.dataset_urn import DatasetUrn
-
from datahub_dagster_plugin.client.dagster_generator import (
DagsterGenerator,
DatasetLineage,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py b/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
index 1ed3f2f915061b..9b26b502d770f2 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
@@ -7,9 +7,9 @@
define_asset_job,
multi_asset,
)
+
from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn
-
from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py b/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
index 300cf9df022c66..2eeff225697261 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
@@ -1,6 +1,6 @@
from dagster import Definitions
-from datahub.ingestion.graph.client import DatahubClientConfig
+from datahub.ingestion.graph.client import DatahubClientConfig
from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py b/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
index a17fc89e6922df..aa2902ee5c708b 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
@@ -1,7 +1,7 @@
from dagster import Definitions, In, Out, PythonObjectDagsterType, job, op
+
from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn
-
from datahub_dagster_plugin.sensors.datahub_sensors import (
DatahubDagsterSourceConfig,
make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml
@@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+ ".git",
+ "venv",
+ ".tox",
+ "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
[tool.ruff.lint.isort]
combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]
[tool.ruff.lint]
-select = [
- "B",
- "C90",
- "E",
- "F",
- "I", # For isort
- "TID",
+extend-select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
ignore = [
- # Ignore line length violations (handled by Black)
- "E501",
- # Ignore whitespace before ':' (matches Black)
- "E203",
- "E203",
- # Allow usages of functools.lru_cache
- "B019",
- # Allow function call in argument defaults
- "B008",
+ "E501", # Line length violations (handled by formatter)
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py
index b15f3716b28d0a..09859b6c4344e3 100644
--- a/metadata-ingestion-modules/dagster-plugin/setup.py
+++ b/metadata-ingestion-modules/dagster-plugin/setup.py
@@ -51,7 +51,6 @@ def get_long_description():
"dagster-aws >= 0.11.0",
"dagster-snowflake >= 0.11.0",
"dagster-snowflake-pandas >= 0.11.0",
- "black==22.12.0",
"coverage>=5.1",
"ruff==0.9.2",
"mypy>=1.4.0",
diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
index 9a0a9a1b3a75ed..033d3967145017 100644
--- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
+++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
@@ -507,7 +507,7 @@ def generate_datajob(
job_property_bag: Dict[str, str] = {}
if input_datasets:
self.logger.info(
- f"Input datasets for {op_def_snap.name} are { list(input_datasets.get(op_def_snap.name, []))}"
+ f"Input datasets for {op_def_snap.name} are {list(input_datasets.get(op_def_snap.name, []))}"
)
inlets.update(input_datasets.get(op_def_snap.name, []))
@@ -515,7 +515,7 @@ def generate_datajob(
if output_datasets:
self.logger.info(
- f"Output datasets for {op_def_snap.name} are { list(output_datasets.get(op_def_snap.name, []))}"
+ f"Output datasets for {op_def_snap.name} are {list(output_datasets.get(op_def_snap.name, []))}"
)
datajob.outlets = list(output_datasets.get(op_def_snap.name, []))
@@ -606,7 +606,7 @@ def emit_job_run(
if run.status not in status_result_map:
raise Exception(
f"Job run status should be either complete, failed or cancelled and it was "
- f"{run.status }"
+ f"{run.status}"
)
if run_stats.start_time is not None:
@@ -673,7 +673,7 @@ def emit_op_run(
if run_step_stats.status not in status_result_map:
raise Exception(
f"Step run status should be either complete, failed or cancelled and it was "
- f"{run_step_stats.status }"
+ f"{run_step_stats.status}"
)
if run_step_stats.start_time is not None:
diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
index b91a9cfa56d398..5f049d55c16a12 100644
--- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
+++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
@@ -262,7 +262,6 @@ def get_dagster_environment(
and context.dagster_run.job_code_origin.repository_origin
and context.dagster_run.job_code_origin.repository_origin.code_pointer
):
-
code_pointer = (
context.dagster_run.job_code_origin.repository_origin.code_pointer
)
diff --git a/metadata-ingestion-modules/gx-plugin/build.gradle b/metadata-ingestion-modules/gx-plugin/build.gradle
index a0604215426bf7..57a1ed0b2169d3 100644
--- a/metadata-ingestion-modules/gx-plugin/build.gradle
+++ b/metadata-ingestion-modules/gx-plugin/build.gradle
@@ -25,7 +25,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} && " +
- "${venv_name}/bin/python -m pip install --upgrade uv && " +
+ "${venv_name}/bin/pip install --upgrade uv && " +
"touch ${sentinel_file}"
}
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff src/ tests/ && " +
"ruff check src/ tests/ && " +
+ "ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
- "black src/ tests/ && " +
- "ruff check --fix src/ tests/"
- "mypy src/ tests/"
+ "ruff check --fix src/ tests/ && " +
+ "ruff format src/ tests/ "
}
task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -105,5 +104,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
+ delete '.ruff_cache'
+ delete '.mypy_cache'
+ delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/gx-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml
@@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+ ".git",
+ "venv",
+ ".tox",
+ "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
[tool.ruff.lint.isort]
combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]
[tool.ruff.lint]
-select = [
- "B",
- "C90",
- "E",
- "F",
- "I", # For isort
- "TID",
+extend-select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
ignore = [
- # Ignore line length violations (handled by Black)
- "E501",
- # Ignore whitespace before ':' (matches Black)
- "E203",
- "E203",
- # Allow usages of functools.lru_cache
- "B019",
- # Allow function call in argument defaults
- "B008",
+ "E501", # Line length violations (handled by formatter)
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py
index d114a4130ca4f2..fbc4097388993f 100644
--- a/metadata-ingestion-modules/gx-plugin/setup.py
+++ b/metadata-ingestion-modules/gx-plugin/setup.py
@@ -58,9 +58,8 @@ def get_long_description():
base_dev_requirements = {
*base_requirements,
*mypy_stubs,
- "black==22.12.0",
"coverage>=5.1",
- "ruff==0.9.2",
+ "ruff==0.9.1",
"mypy>=1.4.0",
# pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
index 4f2aee52c3319f..1070d4d3d5d66d 100644
--- a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
+++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
@@ -108,7 +108,6 @@ def __init__(
convert_urns_to_lowercase: bool = False,
name: str = "DataHubValidationAction",
):
-
if has_name_positional_arg:
if len(args) >= 1 and isinstance(args[0], str):
name = args[0]
@@ -164,9 +163,7 @@ def _run(
if isinstance(
validation_result_suite_identifier, ValidationResultIdentifier
):
- expectation_suite_name = (
- validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name
- )
+ expectation_suite_name = validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name
run_id = validation_result_suite_identifier.run_id
batch_identifier = validation_result_suite_identifier.batch_identifier
diff --git a/metadata-ingestion-modules/prefect-plugin/build.gradle b/metadata-ingestion-modules/prefect-plugin/build.gradle
index d16201834a0ff0..d13c9fe3c9abe7 100644
--- a/metadata-ingestion-modules/prefect-plugin/build.gradle
+++ b/metadata-ingestion-modules/prefect-plugin/build.gradle
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff src/ tests/ && " +
"ruff check src/ tests/ && " +
+ "ruff format --check src/ tests/ && " +
"mypy --show-traceback --show-error-codes src/ tests/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && " +
- "black src/ tests/ && " +
- "ruff check --fix src/ tests/"
- "mypy src/ tests/ "
+ "ruff check --fix src/ tests/ && " +
+ "ruff format src/ tests/ "
}
task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -111,5 +110,8 @@ clean {
delete venv_name
delete 'build'
delete 'dist'
+ delete '.ruff_cache'
+ delete '.mypy_cache'
+ delete '.pytest_cache'
}
clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml
@@ -2,13 +2,21 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+ ".git",
+ "venv",
+ ".tox",
+ "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
[tool.ruff.lint.isort]
combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
classes = ["typing"]
[tool.ruff.lint]
-select = [
- "B",
- "C90",
- "E",
- "F",
- "I", # For isort
- "TID",
+extend-select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
ignore = [
- # Ignore line length violations (handled by Black)
- "E501",
- # Ignore whitespace before ':' (matches Black)
- "E203",
- "E203",
- # Allow usages of functools.lru_cache
- "B019",
- # Allow function call in argument defaults
- "B008",
+ "E501", # Line length violations (handled by formatter)
]
[tool.ruff.lint.mccabe]
max-complexity = 15
[tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py
index 9587f0ed73780b..1d56cae8d938a2 100644
--- a/metadata-ingestion-modules/prefect-plugin/setup.py
+++ b/metadata-ingestion-modules/prefect-plugin/setup.py
@@ -57,9 +57,8 @@ def get_long_description():
dev_requirements = {
*base_requirements,
*mypy_stubs,
- "black==22.12.0",
"coverage>=5.1",
- "ruff==0.9.1",
+ "ruff==0.9.2",
"mypy>=1.4.0",
# pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
index fcab6b6fd91430..190a249a912d1a 100644
--- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
+++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
@@ -351,9 +351,9 @@ def _emit_tasks(
for prefect_future in flow_run_ctx.task_run_futures:
if prefect_future.task_run is not None:
- task_run_key_map[
- str(prefect_future.task_run.id)
- ] = prefect_future.task_run.task_key
+ task_run_key_map[str(prefect_future.task_run.id)] = (
+ prefect_future.task_run.task_key
+ )
for node in graph_json:
datajob_urn = DataJobUrn.create_from_ids(
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 16a6704949c875..be9d69a2f0e4b6 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -110,16 +110,16 @@ task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) {
task lint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff src/ tests/ examples/ && " +
"ruff check src/ tests/ examples/ && " +
+ "ruff format --check src/ tests/ examples/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black src/ tests/ examples/ && " +
- "ruff check --fix src/ tests/ examples/"
+ "ruff check --fix src/ tests/ examples/ && " +
+ "ruff format src/ tests/ examples/ "
}
def pytest_default_env = "PYTHONDEVMODE=1"
@@ -216,6 +216,7 @@ clean {
delete 'src/datahub/metadata'
delete '../docs/generated'
delete 'generated'
+ delete '.ruff_cache'
delete '.mypy_cache'
delete '.pytest_cache'
delete '.preflight_sentinel'
diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md
index ebe1cd3df81990..005b0427a5e6ad 100644
--- a/metadata-ingestion/developing.md
+++ b/metadata-ingestion/developing.md
@@ -177,11 +177,10 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap
## Code style
-We use black, ruff, and mypy to ensure consistent code style and quality.
+We use ruff, and mypy to ensure consistent code style and quality.
```shell
# Assumes: pip install -e '.[dev]' and venv is activated
-black src/ tests/
ruff check src/ tests/
mypy src/ tests/
```
diff --git a/metadata-ingestion/examples/library/run_assertion.py b/metadata-ingestion/examples/library/run_assertion.py
index 414e5f46cc7f91..e7c717837eed3c 100644
--- a/metadata-ingestion/examples/library/run_assertion.py
+++ b/metadata-ingestion/examples/library/run_assertion.py
@@ -16,5 +16,5 @@
assertion_result = graph.run_assertion(urn=assertion_urn, save_result=True)
log.info(
- f'Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get("type")}'
+ f"Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get('type')}"
)
diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml
index 07f2010fde25f0..1d434eb8c3a94f 100644
--- a/metadata-ingestion/pyproject.toml
+++ b/metadata-ingestion/pyproject.toml
@@ -2,15 +2,6 @@
build-backend = "setuptools.build_meta"
requires = ["setuptools>=63.0.0", "wheel"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
-target-version = ['py38', 'py39', 'py310', 'py311']
-
[tool.ruff.lint.isort]
section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]
sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] }
@@ -31,23 +22,22 @@ exclude = [
[tool.ruff.lint]
extend-select = [
- "B", # Bugbear
- "C90",
- "E",
- "F",
- "G010", # logging.warn -> logging.warning
- "I", # Import sorting
- "TID", # Tidy imports
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
extend-ignore = [
- # Ignore line length violations (handled by Black)
- "E501",
- # Ignore whitespace before ':' (matches Black)
- "E203",
- # Allow usages of functools.lru_cache
- "B019",
- # Allow function call in argument defaults
- "B008",
+ "E501", # Handled by formatter
+ "E111", # Handled by formatter
+ "E114", # Handled by formatter
+ "E117", # Handled by formatter
+ "E203", # Ignore whitespace before ':' (matches Black)
+ "B019", # Allow usages of functools.lru_cache
+ "B008", # Allow function call in argument defaults
# TODO: Enable these later
"B006", # Mutable args
"B017", # Do not assert blind exception
@@ -61,4 +51,4 @@ max-complexity = 20
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401"]
\ No newline at end of file
+"__init__.py" = ["F401"]
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index ea1b0ad1582576..2cfdf9837f45ad 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -592,7 +592,6 @@
lint_requirements = {
# This is pinned only to avoid spurious errors in CI.
# We should make an effort to keep it up to date.
- "black==23.3.0",
"ruff==0.9.2",
"mypy==1.10.1",
}
diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
index 8704ed13cb6c30..a05386798495de 100644
--- a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
+++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
@@ -20,15 +20,13 @@ class Operator(Protocol):
operator: str
- def id(self) -> str:
- ...
+ def id(self) -> str: ...
- def generate_parameters(self) -> AssertionStdParametersClass:
- ...
+ def generate_parameters(self) -> AssertionStdParametersClass: ...
def _generate_assertion_std_parameter(
- value: Union[str, int, float, list]
+ value: Union[str, int, float, list],
) -> AssertionStdParameterClass:
if isinstance(value, str):
return AssertionStdParameterClass(
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
index dc0c97d1c74e56..145a6097d7336c 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
@@ -19,15 +19,13 @@ class Operator(Protocol):
operator: str
- def id(self) -> str:
- ...
+ def id(self) -> str: ...
- def generate_parameters(self) -> AssertionStdParametersClass:
- ...
+ def generate_parameters(self) -> AssertionStdParametersClass: ...
def _generate_assertion_std_parameter(
- value: Union[str, int, float]
+ value: Union[str, int, float],
) -> AssertionStdParameterClass:
if isinstance(value, str):
return AssertionStdParameterClass(
diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
index 39de4d7f80558e..d2035d560716ae 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
@@ -321,9 +321,9 @@ def from_yaml(
@classmethod
def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct:
- data_product_properties: Optional[
- DataProductPropertiesClass
- ] = graph.get_aspect(id, DataProductPropertiesClass)
+ data_product_properties: Optional[DataProductPropertiesClass] = (
+ graph.get_aspect(id, DataProductPropertiesClass)
+ )
domains: Optional[DomainsClass] = graph.get_aspect(id, DomainsClass)
assert domains, "Data Product must have an associated domain. Found none."
owners: Optional[OwnershipClass] = graph.get_aspect(id, OwnershipClass)
diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
index 315f2249d2e5cd..bf824a11a77b5d 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
@@ -266,7 +266,8 @@ def generate_mcp(
if self.schema_metadata.fields:
for field in self.schema_metadata.fields:
field_urn = field.urn or make_schema_field_urn(
- self.urn, field.id # type: ignore[arg-type]
+ self.urn, # type: ignore[arg-type]
+ field.id, # type: ignore[arg-type]
)
assert field_urn.startswith("urn:li:schemaField:")
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index 179dbdb231c912..b0b434751ad2cc 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -118,9 +118,9 @@ def fqn(self) -> str:
id = StructuredPropertyUrn.from_string(self.urn).id
if self.qualified_name is not None:
# ensure that qualified name and ID match
- assert (
- self.qualified_name == id
- ), "ID in the urn and the qualified_name must match"
+ assert self.qualified_name == id, (
+ "ID in the urn and the qualified_name must match"
+ )
return id
@validator("urn", pre=True, always=True)
@@ -184,9 +184,9 @@ def create(file: str, graph: DataHubGraph) -> None:
@classmethod
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
- structured_property: Optional[
- StructuredPropertyDefinitionClass
- ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+ structured_property: Optional[StructuredPropertyDefinitionClass] = (
+ graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+ )
if structured_property is None:
raise Exception(
"StructuredPropertyDefinition aspect is None. Unable to create structured property."
diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index f6b5ba6176c59d..1f13391644c6c8 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -412,7 +412,7 @@ def generate_access_token(
def ensure_has_system_metadata(
event: Union[
MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
- ]
+ ],
) -> None:
if event.systemMetadata is None:
event.systemMetadata = SystemMetadataClass()
diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 86bcd7eff1cbfc..b744ac573aed6e 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -296,9 +296,9 @@ def _restore(
restore_indices: Optional[bool],
primary_restore_file: Optional[str],
) -> int:
- assert (
- restore_primary or restore_indices
- ), "Either restore_primary or restore_indices must be set"
+ assert restore_primary or restore_indices, (
+ "Either restore_primary or restore_indices must be set"
+ )
msg = "datahub> "
if restore_primary:
msg += f"Will restore primary database from {primary_restore_file}. "
@@ -314,9 +314,9 @@ def _restore(
assert primary_restore_file
resolved_restore_file = os.path.expanduser(primary_restore_file)
logger.info(f"Restoring primary db from backup at {resolved_restore_file}")
- assert os.path.exists(
- resolved_restore_file
- ), f"File {resolved_restore_file} does not exist"
+ assert os.path.exists(resolved_restore_file), (
+ f"File {resolved_restore_file} does not exist"
+ )
with open(resolved_restore_file) as fp:
result = subprocess.run(
[
diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py
index 957ee16245dd81..90bbb353deab18 100644
--- a/metadata-ingestion/src/datahub/cli/lite_cli.py
+++ b/metadata-ingestion/src/datahub/cli/lite_cli.py
@@ -176,7 +176,7 @@ def get(
)
)
end_time = time.time()
- logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+ logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
@lite.command()
@@ -228,7 +228,7 @@ def ls(path: Optional[str]) -> None:
try:
browseables = lite.ls(path)
end_time = time.time()
- logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+ logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
auto_complete: List[AutoComplete] = [
b.auto_complete for b in browseables if b.auto_complete is not None
]
diff --git a/metadata-ingestion/src/datahub/cli/migrate.py b/metadata-ingestion/src/datahub/cli/migrate.py
index 1bf1211674f596..3bd1b6fc4dc124 100644
--- a/metadata-ingestion/src/datahub/cli/migrate.py
+++ b/metadata-ingestion/src/datahub/cli/migrate.py
@@ -426,9 +426,9 @@ def batch_get_ids(
entities_yielded += 1
log.debug(f"yielding {x}")
yield x
- assert (
- entities_yielded == num_entities
- ), "Did not delete all entities, try running this command again!"
+ assert entities_yielded == num_entities, (
+ "Did not delete all entities, try running this command again!"
+ )
else:
log.error(f"Failed to execute batch get with {str(response.content)}")
response.raise_for_status()
diff --git a/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
index dad724bfe11157..c0d93af90ada00 100644
--- a/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
@@ -136,9 +136,9 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
extra_properties: Dict[str, str] = dict()
for x in extras:
parts = x.split("=")
- assert (
- len(parts) == 2
- ), f"Invalid value for extras {x}, should be in format key=value"
+ assert len(parts) == 2, (
+ f"Invalid value for extras {x}, should be in format key=value"
+ )
extra_properties[parts[0]] = parts[1]
return extra_properties
diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index 37089e6f051f0d..174ce63e84ef4c 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -50,7 +50,7 @@ def pretty_id(id: Optional[str]) -> str:
if id.startswith("urn:li:dataset"):
dataset_key = dataset_urn_to_key(id)
if dataset_key:
- return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
+ return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:') :], fg='white')}:{click.style(dataset_key.name, fg='white')}"
# failed to prettify, return original
return id
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 08817d9d5fdb93..8052de1b0669c4 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -200,8 +200,7 @@ class IgnorableError(MetaError):
@runtime_checkable
class ExceptionWithProps(Protocol):
- def get_telemetry_props(self) -> Dict[str, Any]:
- ...
+ def get_telemetry_props(self) -> Dict[str, Any]: ...
def should_show_stack_trace(exc: Exception) -> bool:
diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py
index e7e9bfd43adca5..7e68e9f80da4ff 100644
--- a/metadata-ingestion/src/datahub/configuration/git.py
+++ b/metadata-ingestion/src/datahub/configuration/git.py
@@ -121,9 +121,9 @@ def infer_repo_ssh_locator(
repo: str = values["repo"]
if repo.startswith(_GITHUB_PREFIX):
- return f"git@github.com:{repo[len(_GITHUB_PREFIX):]}.git"
+ return f"git@github.com:{repo[len(_GITHUB_PREFIX) :]}.git"
elif repo.startswith(_GITLAB_PREFIX):
- return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX):]}.git"
+ return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX) :]}.git"
else:
raise ValueError(
"Unable to infer repo_ssh_locator from repo. Please set repo_ssh_locator manually."
diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py
index b3cc0316091173..5fabcf904d3219 100644
--- a/metadata-ingestion/src/datahub/configuration/time_window_config.py
+++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py
@@ -47,7 +47,10 @@ class BaseTimeWindowConfig(ConfigModel):
default_factory=lambda: datetime.now(tz=timezone.utc),
description="Latest date of lineage/usage to consider. Default: Current time in UTC",
)
- start_time: datetime = Field(default=None, description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.") # type: ignore
+ start_time: datetime = Field(
+ default=None,
+ description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.",
+ ) # type: ignore
@pydantic.validator("start_time", pre=True, always=True)
def default_start_time(
@@ -63,12 +66,14 @@ def default_start_time(
# This is where start_time str is resolved to datetime
try:
delta = parse_relative_timespan(v)
- assert delta < timedelta(
- 0
- ), "Relative start time should start with minus sign (-) e.g. '-2 days'."
+ assert delta < timedelta(0), (
+ "Relative start time should start with minus sign (-) e.g. '-2 days'."
+ )
assert abs(delta) >= get_bucket_duration_delta(
values["bucket_duration"]
- ), "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+ ), (
+ "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+ )
# The end_time's default value is not yet populated, in which case
# we can just manually generate it here.
diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index f095fffbaea6b4..f5da90a86c9ef6 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -88,13 +88,11 @@ def get_sys_time() -> int:
@overload
-def make_ts_millis(ts: None) -> None:
- ...
+def make_ts_millis(ts: None) -> None: ...
@overload
-def make_ts_millis(ts: datetime) -> int:
- ...
+def make_ts_millis(ts: datetime) -> int: ...
def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
@@ -105,13 +103,11 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
@overload
-def parse_ts_millis(ts: float) -> datetime:
- ...
+def parse_ts_millis(ts: float) -> datetime: ...
@overload
-def parse_ts_millis(ts: None) -> None:
- ...
+def parse_ts_millis(ts: None) -> None: ...
def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
index 17026a4114c128..e51c37d96e90f0 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
@@ -33,8 +33,7 @@
@runtime_checkable
class SupportsToObj(Protocol):
- def to_obj(self) -> Any:
- ...
+ def to_obj(self) -> Any: ...
def _recursive_to_obj(obj: Any) -> Any:
diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
index 78a091f1ffe689..92ee158661d3d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
@@ -55,15 +55,9 @@ def convert_chart_info_to_patch(
aspect.externalUrl
).set_type(aspect.type).set_title(aspect.title).set_access(
aspect.access
- ).set_last_modified(
- aspect.lastModified
- ).set_last_refreshed(
+ ).set_last_modified(aspect.lastModified).set_last_refreshed(
aspect.lastRefreshed
- ).set_description(
- aspect.description
- ).add_inputs(
- aspect.inputs
- )
+ ).set_description(aspect.description).add_inputs(aspect.inputs)
values = patch_builder.build()
if values:
diff --git a/metadata-ingestion/src/datahub/ingestion/api/report.py b/metadata-ingestion/src/datahub/ingestion/api/report.py
index 32810189acd00b..8cfca5782bee40 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/report.py
@@ -21,8 +21,7 @@
@runtime_checkable
class SupportsAsObj(Protocol):
- def as_obj(self) -> dict:
- ...
+ def as_obj(self) -> dict: ...
@dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index f3e5b1db6a1c85..08af39cd24982a 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -48,7 +48,7 @@
def auto_workunit(
- stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]]
+ stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]],
) -> Iterable[MetadataWorkUnit]:
"""Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
index 88d1fcc52e2196..1c440642e06d8b 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
@@ -131,9 +131,9 @@ def get_recursive(self, schema: Dict) -> Optional[str]:
for i, schema_type in enumerate(p.schema_types):
if schema_type == schema_str:
# return the corresponding type for the schema that's a match
- assert (
- len(p.type) > i
- ), f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+ assert len(p.type) > i, (
+ f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+ )
return p.type[i]
return None
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
index d5af4f7a2389c0..dbb851c74e7e34 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
@@ -263,15 +263,13 @@ def _get_type_annotation(schema: SchemaOrField) -> str:
@overload
def _get_underlying_type_if_option_as_union(
schema: SchemaOrField, default: SchemaOrField
- ) -> SchemaOrField:
- ...
+ ) -> SchemaOrField: ...
@staticmethod
@overload
def _get_underlying_type_if_option_as_union(
schema: SchemaOrField, default: Optional[SchemaOrField] = None
- ) -> Optional[SchemaOrField]:
- ...
+ ) -> Optional[SchemaOrField]: ...
@staticmethod
def _get_underlying_type_if_option_as_union(
@@ -386,7 +384,7 @@ def emit(self) -> Iterable[SchemaField]:
if "deprecated" in merged_props:
description = (
- f"DEPRECATED: {merged_props['deprecated']}\n"
+ f'DEPRECATED: {merged_props["deprecated"]}\n'
+ description
if description
else ""
diff --git a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
index 9c34c4f83b0a93..beec42724529e6 100644
--- a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
+++ b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
@@ -17,9 +17,9 @@ def parse_s3_path(path: str) -> "S3Path":
def assert_ok_status(s3_response):
is_ok = s3_response["ResponseMetadata"]["HTTPStatusCode"] == 200
- assert (
- is_ok
- ), f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+ assert is_ok, (
+ f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+ )
@dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
index 50268768d0ce9f..ba03083854e785 100644
--- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
@@ -148,9 +148,9 @@ def input_config_selectively_overrides_default_config(cls, info_types_config):
weight,
) in custom_infotype_config.Prediction_Factors_and_Weights.dict().items():
if weight > 0:
- assert (
- getattr(custom_infotype_config, factor) is not None
- ), f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+ assert getattr(custom_infotype_config, factor) is not None, (
+ f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+ )
# Custom infotype supports only regex based prediction for column values
if custom_infotype_config.Prediction_Factors_and_Weights.Values > 0:
@@ -158,7 +158,9 @@ def input_config_selectively_overrides_default_config(cls, info_types_config):
assert (
custom_infotype_config.Values.prediction_type
== ValuePredictionType.REGEX
- ), f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+ ), (
+ f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+ )
return info_types_config
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 8c5f894a072d93..48a008536ed1ed 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -519,9 +519,9 @@ def get_aspects_for_entity(
:return: Optionally, a map of aspect_name to aspect_value as a dictionary if present, aspect_value will be set to None if that aspect was not found. Returns None on HTTP status 404.
:raises HttpError: if the HTTP response is not a 200
"""
- assert len(aspects) == len(
- aspect_types
- ), f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+ assert len(aspects) == len(aspect_types), (
+ f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+ )
# TODO: generate aspects list from type classes
response_json = self.get_entity_raw(entity_urn, aspects)
@@ -1576,9 +1576,7 @@ def run_assertion(
... assertionResult
}
}
- """ % (
- self._assertion_result_shared()
- )
+ """ % (self._assertion_result_shared())
variables = {
"assertionUrn": urn,
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index ef59ba7a3b58b4..25cbd340c9674b 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -109,9 +109,9 @@ def on_failure(
mcp.systemMetadata.properties = {}
if "workunit_id" not in mcp.systemMetadata.properties:
# update the workunit id
- mcp.systemMetadata.properties[
- "workunit_id"
- ] = record_envelope.metadata["workunit_id"]
+ mcp.systemMetadata.properties["workunit_id"] = (
+ record_envelope.metadata["workunit_id"]
+ )
record_envelope.record = mcp
self.file_sink.write_record_async(record_envelope, self.logging_callback)
@@ -701,7 +701,7 @@ def pretty_print_summary(
num_failures_sink = len(self.sink.get_report().failures)
click.secho(
message_template.format(
- status=f"with at least {num_failures_source+num_failures_sink} failures"
+ status=f"with at least {num_failures_source + num_failures_sink} failures"
),
fg=self._get_text_color(
running=currently_running, failures=True, warnings=False
@@ -719,7 +719,7 @@ def pretty_print_summary(
num_warn_global = len(global_warnings)
click.secho(
message_template.format(
- status=f"with at least {num_warn_source+num_warn_sink+num_warn_global} warnings"
+ status=f"with at least {num_warn_source + num_warn_sink + num_warn_global} warnings"
),
fg=self._get_text_color(
running=currently_running, failures=False, warnings=True
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 7a4e7ec52a8e96..53e31aa2ea96e1 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -92,9 +92,9 @@ class PipelineConfig(ConfigModel):
pipeline_name: Optional[str] = None
failure_log: FailureLoggingConfig = FailureLoggingConfig()
- _raw_dict: Optional[
- dict
- ] = None # the raw dict that was parsed to construct this config
+ _raw_dict: Optional[dict] = (
+ None # the raw dict that was parsed to construct this config
+ )
@validator("run_id", pre=True, always=True)
def run_id_should_be_semantic(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
index 9f6d13a08b182e..d12ff7415faefc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
@@ -85,8 +85,8 @@ def ensure_field_level_settings_are_normalized(
if field_level_metric.startswith("include_field_"):
values.setdefault(field_level_metric, False)
- assert (
- max_num_fields_to_profile is None
- ), f"{max_num_fields_to_profile_key} should be set to None"
+ assert max_num_fields_to_profile is None, (
+ f"{max_num_fields_to_profile_key} should be set to None"
+ )
return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
index e4f9cd0ee7e018..586e7a3af3bcd1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
@@ -508,7 +508,12 @@ def abs_browser(
):
abs_path = self.create_abs_path(obj.name)
logger.debug(f"Sampling file: {abs_path}")
- yield abs_path, obj.name, obj.last_modified, obj.size,
+ yield (
+ abs_path,
+ obj.name,
+ obj.last_modified,
+ obj.size,
+ )
except Exception as e:
# This odd check if being done because boto does not have a proper exception to catch
# The exception that appears in stacktrace cannot actually be caught without a lot more work
@@ -552,9 +557,12 @@ def local_browser(
if os.path.isfile(prefix):
logger.debug(f"Scanning single local file: {prefix}")
file_name = prefix
- yield prefix, file_name, datetime.utcfromtimestamp(
- os.path.getmtime(prefix)
- ), os.path.getsize(prefix)
+ yield (
+ prefix,
+ file_name,
+ datetime.utcfromtimestamp(os.path.getmtime(prefix)),
+ os.path.getsize(prefix),
+ )
else:
logger.debug(f"Scanning files under local folder: {prefix}")
for root, dirs, files in os.walk(prefix):
@@ -565,9 +573,12 @@ def local_browser(
full_path = PurePath(
os.path.normpath(os.path.join(root, file))
).as_posix()
- yield full_path, file, datetime.utcfromtimestamp(
- os.path.getmtime(full_path)
- ), os.path.getsize(full_path)
+ yield (
+ full_path,
+ file,
+ datetime.utcfromtimestamp(os.path.getmtime(full_path)),
+ os.path.getsize(full_path),
+ )
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
self.container_WU_creator = ContainerWUCreator(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
index 30e81643837375..2509927854d4a0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
@@ -521,7 +521,7 @@ def process_dataflow_node(
# otherwise, a node represents a transformation
else:
node_urn = mce_builder.make_data_job_urn_with_flow(
- flow_urn, job_id=f'{node["NodeType"]}-{node["Id"]}'
+ flow_urn, job_id=f"{node['NodeType']}-{node['Id']}"
)
return {
@@ -679,7 +679,7 @@ def get_datajob_wu(self, node: Dict[str, Any], job_name: str) -> MetadataWorkUni
)
)
- return MetadataWorkUnit(id=f'{job_name}-{node["Id"]}', mce=mce)
+ return MetadataWorkUnit(id=f"{job_name}-{node['Id']}", mce=mce)
def get_all_databases(self) -> Iterable[Mapping[str, Any]]:
logger.debug("Getting all databases")
@@ -750,13 +750,13 @@ def get_lineage_if_enabled(
) -> Optional[MetadataWorkUnit]:
if self.source_config.emit_s3_lineage:
# extract dataset properties aspect
- dataset_properties: Optional[
- DatasetPropertiesClass
- ] = mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+ dataset_properties: Optional[DatasetPropertiesClass] = (
+ mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+ )
# extract dataset schema aspect
- schema_metadata: Optional[
- SchemaMetadataClass
- ] = mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+ schema_metadata: Optional[SchemaMetadataClass] = (
+ mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+ )
if dataset_properties and "Location" in dataset_properties.customProperties:
location = dataset_properties.customProperties["Location"]
@@ -765,9 +765,9 @@ def get_lineage_if_enabled(
location, self.source_config.env
)
assert self.ctx.graph
- schema_metadata_for_s3: Optional[
- SchemaMetadataClass
- ] = self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+ schema_metadata_for_s3: Optional[SchemaMetadataClass] = (
+ self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+ )
if self.source_config.glue_s3_lineage_direction == "upstream":
fine_grained_lineages = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
index c4561b9d9e676a..d46d1c099383fe 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
@@ -257,7 +257,7 @@ def get_feature_wu(
mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)
return MetadataWorkUnit(
- id=f'{feature_group_details["FeatureGroupName"]}-{feature["FeatureName"]}',
+ id=f"{feature_group_details['FeatureGroupName']}-{feature['FeatureName']}",
mce=mce,
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
index 0f433aaecf2d96..f1374117af775f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
@@ -212,7 +212,7 @@ def get_endpoint_wu(
mce = MetadataChangeEvent(proposedSnapshot=endpoint_snapshot)
return MetadataWorkUnit(
- id=f'{endpoint_details["EndpointName"]}',
+ id=f"{endpoint_details['EndpointName']}",
mce=mce,
)
@@ -503,7 +503,7 @@ def get_model_wu(
mce = MetadataChangeEvent(proposedSnapshot=model_snapshot)
return MetadataWorkUnit(
- id=f'{model_details["ModelName"]}',
+ id=f"{model_details['ModelName']}",
mce=mce,
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 508b4bbaa277dc..ceb010a7f0675f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -132,9 +132,9 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
self.filters = BigQueryFilter(self.config, self.report)
self.identifiers = BigQueryIdentifierBuilder(self.config, self.report)
- redundant_lineage_run_skip_handler: Optional[
- RedundantLineageRunSkipHandler
- ] = None
+ redundant_lineage_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = (
+ None
+ )
if self.config.enable_stateful_lineage_ingestion:
redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
source=self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index 42f82704c81b99..d35c5265878c03 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -37,9 +37,9 @@ class BigqueryTableIdentifier:
# Note: this regex may get overwritten by the sharded_table_pattern config.
# The class-level constant, however, will not be overwritten.
- _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[
- str
- ] = _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+ _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[str] = (
+ _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+ )
_BIGQUERY_WILDCARD_REGEX: ClassVar[str] = "((_(\\d+)?)\\*$)|\\*$"
_BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index afbe919df4dcae..57bfa2e3090d31 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -137,9 +137,9 @@ class BigQueryCredential(ConfigModel):
@root_validator(skip_on_failure=True)
def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
if values.get("client_x509_cert_url") is None:
- values[
- "client_x509_cert_url"
- ] = f'https://www.googleapis.com/robot/v1/metadata/x509/{values["client_email"]}'
+ values["client_x509_cert_url"] = (
+ f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
+ )
return values
def create_credential_temp_file(self) -> str:
@@ -611,9 +611,9 @@ def validate_bigquery_audit_metadata_datasets(
cls, v: Optional[List[str]], values: Dict
) -> Optional[List[str]]:
if values.get("use_exported_bigquery_audit_metadata"):
- assert (
- v and len(v) > 0
- ), "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+ assert v and len(v) > 0, (
+ "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+ )
return v
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
index 9da2aceb19220a..7dc0e4195d5dc9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
@@ -87,9 +87,9 @@ def get_platform_resource(
key=platform_resource_key, graph_client=self.graph
)
if platform_resource:
- self.platform_resource_cache[
- platform_resource_key.primary_key
- ] = platform_resource
+ self.platform_resource_cache[platform_resource_key.primary_key] = (
+ platform_resource
+ )
return platform_resource
return None
@@ -115,7 +115,11 @@ def generate_label_platform_resource(
and platform_resource.resource_info.value
):
try:
- existing_info: Optional[BigQueryLabelInfo] = platform_resource.resource_info.value.as_pydantic_object(BigQueryLabelInfo) # type: ignore
+ existing_info: Optional[BigQueryLabelInfo] = (
+ platform_resource.resource_info.value.as_pydantic_object( # type: ignore
+ BigQueryLabelInfo
+ )
+ )
except ValidationError as e:
logger.error(
f"Error converting existing value to BigQueryLabelInfo: {e}. Creating new one. Maybe this is because of a non backward compatible schema change."
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
index 56e930dfb811f1..ebfbbf0639c38c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
@@ -311,8 +311,10 @@ def gen_dataset_containers(
platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
label, tag_urn, managed_by_datahub=False
)
- label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore
- BigQueryLabelInfo
+ label_info: BigQueryLabelInfo = (
+ platform_resource.resource_info.value.as_pydantic_object( # type: ignore
+ BigQueryLabelInfo
+ )
)
tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -820,8 +822,10 @@ def gen_table_dataset_workunits(
platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
label, tag_urn, managed_by_datahub=False
)
- label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore
- BigQueryLabelInfo
+ label_info: BigQueryLabelInfo = (
+ platform_resource.resource_info.value.as_pydantic_object( # type: ignore
+ BigQueryLabelInfo
+ )
)
tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -860,8 +864,10 @@ def gen_view_dataset_workunits(
platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
label, tag_urn, managed_by_datahub=False
)
- label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object( # type: ignore
- BigQueryLabelInfo
+ label_info: BigQueryLabelInfo = (
+ platform_resource.resource_info.value.as_pydantic_object( # type: ignore
+ BigQueryLabelInfo
+ )
)
tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -1203,9 +1209,9 @@ def get_tables_for_dataset(
report=self.report,
)
- self.report.metadata_extraction_sec[
- f"{project_id}.{dataset.name}"
- ] = timer.elapsed_seconds(digits=2)
+ self.report.metadata_extraction_sec[f"{project_id}.{dataset.name}"] = (
+ timer.elapsed_seconds(digits=2)
+ )
def get_core_table_details(
self, dataset_name: str, project_id: str, temp_table_dataset_prefix: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index 433282a21fdb66..da82c6a06f0395 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -697,7 +697,7 @@ def _create_lineage_map(
if parsed_queries[-1]:
query = f"""create table `{destination_table.get_sanitized_table_ref().table_identifier.get_table_name()}` AS
(
- {parsed_queries[-1].sql(dialect='bigquery')}
+ {parsed_queries[-1].sql(dialect="bigquery")}
)"""
else:
query = e.query
@@ -809,11 +809,11 @@ def get_upstream_tables(
upstream_lineage, temp_table_upstream
)
- upstreams[
- ref_temp_table_upstream
- ] = _merge_lineage_edge_columns(
- upstreams.get(ref_temp_table_upstream),
- collapsed_lineage,
+ upstreams[ref_temp_table_upstream] = (
+ _merge_lineage_edge_columns(
+ upstreams.get(ref_temp_table_upstream),
+ collapsed_lineage,
+ )
)
else:
upstreams[upstream_table_ref] = _merge_lineage_edge_columns(
@@ -1004,9 +1004,9 @@ def get_lineage_for_external_table(
dataset_urn
)
for gcs_dataset_urn in gcs_urns:
- schema_metadata_for_gcs: Optional[
- SchemaMetadataClass
- ] = graph.get_schema_metadata(gcs_dataset_urn)
+ schema_metadata_for_gcs: Optional[SchemaMetadataClass] = (
+ graph.get_schema_metadata(gcs_dataset_urn)
+ )
if schema_metadata and schema_metadata_for_gcs:
fine_grained_lineage = self.get_fine_grained_lineages_with_gcs(
dataset_urn,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index 2ac40a48de4cc7..8a558d7736a389 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -387,9 +387,7 @@ class BigqueryQuery:
OR
protoPayload.metadata.tableDataRead.reason = "JOB"
)
-""".strip(
- "\t \n"
-)
+""".strip("\t \n")
def bigquery_audit_metadata_query_template_lineage(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
index 08c9beaa73c53b..0f9471219c6590 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
@@ -271,9 +271,9 @@ def get_workunits_internal(
# Preprocessing stage that deduplicates the queries using query hash per usage bucket
# Note: FileBackedDict is an ordered dictionary, so the order of execution of
# queries is inherently maintained
- queries_deduped: FileBackedDict[
- Dict[int, ObservedQuery]
- ] = self.deduplicate_queries(queries)
+ queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] = (
+ self.deduplicate_queries(queries)
+ )
self.report.num_unique_queries = len(queries_deduped)
logger.info(f"Found {self.report.num_unique_queries} unique queries")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
index f2f6cc731858d1..c2b849e58fc6dc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@@ -763,9 +763,9 @@ def _create_operational_custom_properties(
)
if event.query_event.default_dataset:
- custom_properties[
- "defaultDatabase"
- ] = event.query_event.default_dataset
+ custom_properties["defaultDatabase"] = (
+ event.query_event.default_dataset
+ )
if event.read_event:
if event.read_event.readReason:
custom_properties["readReason"] = event.read_event.readReason
diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
index dcdccc08ce0483..062c64d45767fc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
@@ -91,7 +91,6 @@ class KeyspaceKey(ContainerKey):
supported=True,
)
class CassandraSource(StatefulIngestionSourceBase):
-
"""
This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
index 75a0ba0c617734..b467ca0aca6be4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
@@ -107,10 +107,10 @@ class CassandraToSchemaFieldConverter:
@staticmethod
def get_column_type(cassandra_column_type: str) -> SchemaFieldDataType:
- type_class: Optional[
- Type
- ] = CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
- cassandra_column_type
+ type_class: Optional[Type] = (
+ CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
+ cassandra_column_type
+ )
)
if type_class is None:
logger.warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
index 2b75d0dca53cb7..5ba4dd13fb2ac9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
@@ -293,9 +293,9 @@ def _get_schema_and_fields(
def _load_json_schema_with_resolved_references(
self, schema: Schema, name: str, subject: str
) -> dict:
- imported_json_schemas: List[
- JsonSchemaWrapper
- ] = self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+ imported_json_schemas: List[JsonSchemaWrapper] = (
+ self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+ )
schema_dict = json.loads(schema.schema_str)
reference_map = {}
for imported_schema in imported_json_schemas:
@@ -332,9 +332,9 @@ def _get_schema_fields(
)
elif schema.schema_type == "PROTOBUF":
- imported_schemas: List[
- ProtobufSchema
- ] = self.get_schemas_from_confluent_ref_protobuf(schema)
+ imported_schemas: List[ProtobufSchema] = (
+ self.get_schemas_from_confluent_ref_protobuf(schema)
+ )
base_name: str = topic.replace(".", "_")
fields = protobuf_util.protobuf_schema_to_mce_fields(
ProtobufSchema(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
index 42e025073b534e..8ebb7b9ef7fbdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
@@ -371,11 +371,11 @@ def get_resource_workunits(
domain: Optional[str],
description: Optional[str],
) -> Iterable[MetadataWorkUnit]:
- maybe_terms_wu: Optional[
- MetadataWorkUnit
- ] = self.get_resource_glossary_terms_work_unit(
- entity_urn=entity_urn,
- term_associations=term_associations,
+ maybe_terms_wu: Optional[MetadataWorkUnit] = (
+ self.get_resource_glossary_terms_work_unit(
+ entity_urn=entity_urn,
+ term_associations=term_associations,
+ )
)
if maybe_terms_wu:
self.report.num_glossary_term_workunits_produced += 1
@@ -389,31 +389,31 @@ def get_resource_workunits(
self.report.num_tag_workunits_produced += 1
yield maybe_tags_wu
- maybe_owners_wu: Optional[
- MetadataWorkUnit
- ] = self.get_resource_owners_work_unit(
- entity_urn=entity_urn,
- owners=owners,
+ maybe_owners_wu: Optional[MetadataWorkUnit] = (
+ self.get_resource_owners_work_unit(
+ entity_urn=entity_urn,
+ owners=owners,
+ )
)
if maybe_owners_wu:
self.report.num_owners_workunits_produced += 1
yield maybe_owners_wu
- maybe_domain_wu: Optional[
- MetadataWorkUnit
- ] = self.get_resource_domain_work_unit(
- entity_urn=entity_urn,
- domain=domain,
+ maybe_domain_wu: Optional[MetadataWorkUnit] = (
+ self.get_resource_domain_work_unit(
+ entity_urn=entity_urn,
+ domain=domain,
+ )
)
if maybe_domain_wu:
self.report.num_domain_workunits_produced += 1
yield maybe_domain_wu
- maybe_description_wu: Optional[
- MetadataWorkUnit
- ] = self.get_resource_description_work_unit(
- entity_urn=entity_urn,
- description=description,
+ maybe_description_wu: Optional[MetadataWorkUnit] = (
+ self.get_resource_description_work_unit(
+ entity_urn=entity_urn,
+ description=description,
+ )
)
if maybe_description_wu:
self.report.num_description_workunits_produced += 1
@@ -426,9 +426,9 @@ def process_sub_resource_row(
needs_write: bool,
) -> Tuple[EditableSchemaMetadataClass, bool]:
field_path: str = sub_resource_row.field_path
- term_associations: List[
- GlossaryTermAssociationClass
- ] = sub_resource_row.term_associations
+ term_associations: List[GlossaryTermAssociationClass] = (
+ sub_resource_row.term_associations
+ )
tag_associations: List[TagAssociationClass] = sub_resource_row.tag_associations
description: Optional[str] = sub_resource_row.description
has_terms: bool = len(term_associations) > 0
@@ -517,9 +517,9 @@ def get_sub_resource_work_units(self) -> Iterable[MetadataWorkUnit]:
# Boolean field to tell whether we need to write an MCPW.
needs_write = False
- current_editable_schema_metadata: Optional[
- EditableSchemaMetadataClass
- ] = None
+ current_editable_schema_metadata: Optional[EditableSchemaMetadataClass] = (
+ None
+ )
if self.ctx.graph and not self.should_overwrite:
# Fetch the current editable schema metadata
current_editable_schema_metadata = self.ctx.graph.get_aspect(
@@ -655,9 +655,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
entity_urn = row["resource"]
entity_type = Urn.from_string(row["resource"]).get_type()
- term_associations: List[
- GlossaryTermAssociationClass
- ] = self.maybe_extract_glossary_terms(row)
+ term_associations: List[GlossaryTermAssociationClass] = (
+ self.maybe_extract_glossary_terms(row)
+ )
tag_associations: List[TagAssociationClass] = self.maybe_extract_tags(row)
owners: List[OwnerClass] = self.maybe_extract_owners(row, is_resource_row)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index ee105f4862caba..51a25829d21dba 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -152,7 +152,9 @@ def execute_server_cursor(
) -> Iterable[Dict[str, Any]]:
with self.engine.connect() as conn:
if self.engine.dialect.name in ["postgresql", "mysql", "mariadb"]:
- with conn.begin(): # Transaction required for PostgreSQL server-side cursor
+ with (
+ conn.begin()
+ ): # Transaction required for PostgreSQL server-side cursor
# Note that stream_results=True is mainly supported by PostgreSQL and MySQL-based dialects.
# https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execution_options.params.stream_results
conn = conn.execution_options(
@@ -222,7 +224,7 @@ def _parse_row(
)
except Exception as e:
logger.warning(
- f'Failed to parse metadata for {row["urn"]}: {e}', exc_info=True
+ f"Failed to parse metadata for {row['urn']}: {e}", exc_info=True
)
self.report.num_database_parse_errors += 1
self.report.database_parse_errors.setdefault(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index 5042f6d69b261a..41b59a9c8b892c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -194,20 +194,20 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
_DBT_FIELDS_BY_TYPE = {
"models": f"""
- { _DBT_GRAPHQL_COMMON_FIELDS }
- { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
- { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+ {_DBT_GRAPHQL_COMMON_FIELDS}
+ {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+ {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
dependsOn
materializedType
""",
"seeds": f"""
- { _DBT_GRAPHQL_COMMON_FIELDS }
- { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
- { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+ {_DBT_GRAPHQL_COMMON_FIELDS}
+ {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+ {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
""",
"sources": f"""
- { _DBT_GRAPHQL_COMMON_FIELDS }
- { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
+ {_DBT_GRAPHQL_COMMON_FIELDS}
+ {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
identifier
sourceName
sourceDescription
@@ -218,9 +218,9 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
loader
""",
"snapshots": f"""
- { _DBT_GRAPHQL_COMMON_FIELDS }
- { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
- { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+ {_DBT_GRAPHQL_COMMON_FIELDS}
+ {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+ {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
parentsSources {{
uniqueId
}}
@@ -229,7 +229,7 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
}}
""",
"tests": f"""
- { _DBT_GRAPHQL_COMMON_FIELDS }
+ {_DBT_GRAPHQL_COMMON_FIELDS}
state
columnName
status
@@ -315,7 +315,7 @@ def _send_graphql_query(
res = response.json()
if "errors" in res:
raise ValueError(
- f'Unable to fetch metadata from dbt Cloud: {res["errors"]}'
+ f"Unable to fetch metadata from dbt Cloud: {res['errors']}"
)
data = res["data"]
except JSONDecodeError as e:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 499e7e1231d050..fa85308b325979 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -506,16 +506,18 @@ class DBTNode:
materialization: Optional[str] # table, view, ephemeral, incremental, snapshot
# see https://docs.getdbt.com/reference/artifacts/manifest-json
catalog_type: Optional[str]
- missing_from_catalog: bool # indicates if the node was missing from the catalog.json
+ missing_from_catalog: (
+ bool # indicates if the node was missing from the catalog.json
+ )
owner: Optional[str]
columns: List[DBTColumn] = field(default_factory=list)
upstream_nodes: List[str] = field(default_factory=list) # list of upstream dbt_name
upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list)
- raw_sql_parsing_result: Optional[
- SqlParsingResult
- ] = None # only set for nodes that don't depend on ephemeral models
+ raw_sql_parsing_result: Optional[SqlParsingResult] = (
+ None # only set for nodes that don't depend on ephemeral models
+ )
cll_debug_info: Optional[SqlParsingDebugInfo] = None
meta: Dict[str, Any] = field(default_factory=dict)
@@ -869,10 +871,10 @@ def create_test_entity_mcps(
"platform": DBT_PLATFORM,
"name": node.dbt_name,
"instance": self.config.platform_instance,
+ # Ideally we'd include the env unconditionally. However, we started out
+ # not including env in the guid, so we need to maintain backwards compatibility
+ # with existing PROD assertions.
**(
- # Ideally we'd include the env unconditionally. However, we started out
- # not including env in the guid, so we need to maintain backwards compatibility
- # with existing PROD assertions.
{"env": self.config.env}
if self.config.env != mce_builder.DEFAULT_ENV
and self.config.include_env_in_assertion_guid
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
index 072995c10ebcef..cf2d9670400ca5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
@@ -191,9 +191,9 @@ def authenticate(self, connection_args: "DremioSourceConfig") -> None:
)
return
else:
- assert (
- connection_args.username and connection_args.password
- ), "Username and password are required for authentication"
+ assert connection_args.username and connection_args.password, (
+ "Username and password are required for authentication"
+ )
host = connection_args.hostname
port = connection_args.port
protocol = "https" if connection_args.tls else "http"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
index e5d6b8e40fb3d8..482647f8d77da1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
@@ -101,9 +101,9 @@ def add_mapping(
Add a new source type if not in the map (e.g., Dremio ARP).
"""
dremio_source_type = dremio_source_type.upper()
- DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[
- dremio_source_type
- ] = datahub_source_type
+ DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[dremio_source_type] = (
+ datahub_source_type
+ )
if category:
if category.lower() == "file_object_storage":
diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
index 99aa5f54f6a576..ce1c60dcafdd46 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
@@ -111,10 +111,10 @@ class ElasticToSchemaFieldConverter:
@staticmethod
def get_column_type(elastic_column_type: str) -> SchemaFieldDataType:
- type_class: Optional[
- Type
- ] = ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
- elastic_column_type
+ type_class: Optional[Type] = (
+ ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
+ elastic_column_type
+ )
)
if type_class is None:
logger.warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
index 02b29051dd2ebe..ffcd9218a2103c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
@@ -155,9 +155,9 @@ def _update_report(self, urn: str, entity_type: str) -> None:
current_count = self.report.num_hard_deleted_by_type.get(entity_type, 0)
self.report.num_hard_deleted_by_type[entity_type] = current_count + 1
if entity_type not in self.report.sample_hard_deleted_aspects_by_type:
- self.report.sample_hard_deleted_aspects_by_type[
- entity_type
- ] = LossyList()
+ self.report.sample_hard_deleted_aspects_by_type[entity_type] = (
+ LossyList()
+ )
self.report.sample_hard_deleted_aspects_by_type[entity_type].append(urn)
def delete_entity(self, urn: str) -> None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
index 18838af9bdf85f..5196c8ec5b998b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
@@ -141,8 +141,9 @@ def s3_source_overrides(self, source: S3Source) -> S3Source:
source.source_config.platform = PLATFORM_GCS
source.is_s3_platform = lambda: True # type: ignore
- source.create_s3_path = lambda bucket_name, key: unquote(f"s3://{bucket_name}/{key}") # type: ignore
-
+ source.create_s3_path = lambda bucket_name, key: unquote( # type: ignore
+ f"s3://{bucket_name}/{key}"
+ )
return source
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index aba0deebd356c5..bde26f97bf271f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -327,7 +327,7 @@ def _is_single_row_query_method(query: Any) -> bool:
def _run_with_query_combiner(
- method: Callable[Concatenate["_SingleDatasetProfiler", P], None]
+ method: Callable[Concatenate["_SingleDatasetProfiler", P], None],
) -> Callable[Concatenate["_SingleDatasetProfiler", P], None]:
@functools.wraps(method)
def inner(
@@ -1537,9 +1537,7 @@ def create_bigquery_temp_table(
query_job: Optional["google.cloud.bigquery.job.query.QueryJob"] = (
# In google-cloud-bigquery 3.15.0, the _query_job attribute was
# made public and renamed to query_job.
- cursor.query_job
- if hasattr(cursor, "query_job")
- else cursor._query_job # type: ignore[attr-defined]
+ cursor.query_job if hasattr(cursor, "query_job") else cursor._query_job # type: ignore[attr-defined]
)
assert query_job
temp_destination_table = query_job.destination
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 42d0def0a46e7d..93142a347ca0e6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -220,9 +220,9 @@ def ensure_field_level_settings_are_normalized(
)
values[field_level_metric] = False
- assert (
- max_num_fields_to_profile is None
- ), f"{max_num_fields_to_profile_key} should be set to None"
+ assert max_num_fields_to_profile is None, (
+ f"{max_num_fields_to_profile_key} should be set to None"
+ )
# Disable expensive queries.
if values.get("turn_off_expensive_profiling_metrics"):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
index 8101f0110509e3..9a62ee2dab52f4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
@@ -296,9 +296,9 @@ def _create_iceberg_workunit(
custom_properties["snapshot-id"] = str(
table.current_snapshot().snapshot_id
)
- custom_properties[
- "manifest-list"
- ] = table.current_snapshot().manifest_list
+ custom_properties["manifest-list"] = (
+ table.current_snapshot().manifest_list
+ )
dataset_properties = DatasetPropertiesClass(
name=table.name()[-1],
description=table.metadata.properties.get("comment", None),
diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
index 885b6514779cc4..edb9b7b8bd5264 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
@@ -354,9 +354,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
yield MetadataWorkUnit(id=group_status_wu_id, mcp=group_status_mcp)
# Populate GroupMembership Aspects for CorpUsers
- datahub_corp_user_urn_to_group_membership: Dict[
- str, GroupMembershipClass
- ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
+ datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
+ defaultdict(lambda: GroupMembershipClass(groups=[]))
+ )
if (
self.config.ingest_group_membership
and len(self.selected_azure_ad_groups) > 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
index dda81b0e34a8d2..5452fbcd3f053b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
@@ -344,9 +344,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
).as_workunit()
# Step 2: Populate GroupMembership Aspects for CorpUsers
- datahub_corp_user_urn_to_group_membership: Dict[
- str, GroupMembershipClass
- ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
+ datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
+ defaultdict(lambda: GroupMembershipClass(groups=[]))
+ )
if self.config.ingest_group_membership and okta_groups is not None:
# Fetch membership for each group.
for okta_group in okta_groups:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
index fa842a15ba7328..9f15eda1501f11 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
@@ -419,10 +419,10 @@ def _extract_record(
custom_props = self.build_custom_properties(
topic, topic_detail, extra_topic_config
)
- schema_name: Optional[
- str
- ] = self.schema_registry_client._get_subject_for_topic(
- topic, is_key_schema=False
+ schema_name: Optional[str] = (
+ self.schema_registry_client._get_subject_for_topic(
+ topic, is_key_schema=False
+ )
)
if schema_name is not None:
custom_props["Schema Name"] = schema_name
@@ -610,11 +610,13 @@ def fetch_extra_topic_details(self, topics: List[str]) -> Dict[str, dict]:
def fetch_topic_configurations(self, topics: List[str]) -> Dict[str, dict]:
logger.info("Fetching config details for all topics")
- configs: Dict[
- ConfigResource, concurrent.futures.Future
- ] = self.admin_client.describe_configs(
- resources=[ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics],
- request_timeout=self.source_config.connection.client_timeout_seconds,
+ configs: Dict[ConfigResource, concurrent.futures.Future] = (
+ self.admin_client.describe_configs(
+ resources=[
+ ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics
+ ],
+ request_timeout=self.source_config.connection.client_timeout_seconds,
+ )
)
logger.debug("Waiting for config details futures to complete")
concurrent.futures.wait(configs.values())
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
index 72be864fc30a1c..9edfce5855f430 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
@@ -110,9 +110,8 @@ def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
connector_manifest = self._get_connector_manifest(
connector_name, connector_url
)
- if (
- connector_manifest is None
- or not self.config.connector_patterns.allowed(connector_manifest.name)
+ if connector_manifest is None or not self.config.connector_patterns.allowed(
+ connector_manifest.name
):
self.report.report_dropped(connector_name)
continue
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
index 2790460c8e6019..10255ed544b812 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
@@ -199,9 +199,9 @@ def get_parser(
transforms.append(transform)
for key in self.connector_manifest.config.keys():
if key.startswith(f"transforms.{name}."):
- transform[
- key.replace(f"transforms.{name}.", "")
- ] = self.connector_manifest.config[key]
+ transform[key.replace(f"transforms.{name}.", "")] = (
+ self.connector_manifest.config[key]
+ )
if "defaultDataset" in connector_manifest.config:
defaultDataset = connector_manifest.config["defaultDataset"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
index 7b3b6e551a0a1f..5e64d4e161e3ea 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
@@ -123,9 +123,9 @@ def get_parser(
transforms.append(transform)
for key in self.connector_manifest.config.keys():
if key.startswith(f"transforms.{name}."):
- transform[
- key.replace(f"transforms.{name}.", "")
- ] = self.connector_manifest.config[key]
+ transform[key.replace(f"transforms.{name}.", "")] = (
+ self.connector_manifest.config[key]
+ )
return self.JdbcParser(
db_connection_url,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 1183916e9b3fef..abe9b5684f8f1f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -596,9 +596,9 @@ class LookerUtil:
@staticmethod
def _extract_view_from_field(field: str) -> str:
- assert (
- field.count(".") == 1
- ), f"Error: A field must be prefixed by a view name, field is: {field}"
+ assert field.count(".") == 1, (
+ f"Error: A field must be prefixed by a view name, field is: {field}"
+ )
return field.split(".")[0]
@staticmethod
@@ -815,9 +815,9 @@ class LookerExplore:
project_name: Optional[str] = None
label: Optional[str] = None
description: Optional[str] = None
- upstream_views: Optional[
- List[ProjectInclude]
- ] = None # captures the view name(s) this explore is derived from
+ upstream_views: Optional[List[ProjectInclude]] = (
+ None # captures the view name(s) this explore is derived from
+ )
upstream_views_file_path: Dict[str, Optional[str]] = dataclasses_field(
default_factory=dict
) # view_name is key and file_path is value. A single file may contains multiple views
@@ -889,7 +889,7 @@ def from_dict(
upstream_views.extend(parsed_explore.upstream_views or [])
else:
logger.warning(
- f'Could not find extended explore {extended_explore} for explore {dict["name"]} in model {model_name}'
+ f"Could not find extended explore {extended_explore} for explore {dict['name']} in model {model_name}"
)
else:
# we only fallback to the view_names list if this is not an extended explore
@@ -903,7 +903,7 @@ def from_dict(
)
if not info:
logger.warning(
- f'Could not resolve view {view_name} for explore {dict["name"]} in model {model_name}'
+ f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
)
else:
upstream_views.append(
@@ -935,9 +935,9 @@ def from_api( # noqa: C901
try:
explore = client.lookml_model_explore(model, explore_name)
views: Set[str] = set()
- lkml_fields: List[
- LookmlModelExploreField
- ] = explore_field_set_to_lkml_fields(explore)
+ lkml_fields: List[LookmlModelExploreField] = (
+ explore_field_set_to_lkml_fields(explore)
+ )
if explore.view_name is not None and explore.view_name != explore.name:
# explore is not named after a view and is instead using a from field, which is modeled as view_name.
@@ -1034,9 +1034,9 @@ def from_api( # noqa: C901
if measure_field.name is None:
continue
else:
- field_name_vs_raw_explore_field[
- measure_field.name
- ] = measure_field
+ field_name_vs_raw_explore_field[measure_field.name] = (
+ measure_field
+ )
view_fields.append(
ViewField(
@@ -1072,11 +1072,11 @@ def from_api( # noqa: C901
if view_project_map:
logger.debug(f"views and their projects: {view_project_map}")
- upstream_views_file_path: Dict[
- str, Optional[str]
- ] = create_upstream_views_file_path_map(
- lkml_fields=lkml_fields,
- view_names=views,
+ upstream_views_file_path: Dict[str, Optional[str]] = (
+ create_upstream_views_file_path_map(
+ lkml_fields=lkml_fields,
+ view_names=views,
+ )
)
if upstream_views_file_path:
logger.debug(f"views and their file-paths: {upstream_views_file_path}")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
index 4e9d0f68928a45..3ed3186399588e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
@@ -166,9 +166,9 @@ def _get_generic_definition(
# e.g. spark1 or hive2 or druid_18
platform = re.sub(r"[0-9]+", "", dialect_name.split("_")[0])
- assert (
- platform is not None
- ), f"Failed to extract a valid platform from connection {looker_connection}"
+ assert platform is not None, (
+ f"Failed to extract a valid platform from connection {looker_connection}"
+ )
db = looker_connection.database
schema = looker_connection.schema # ok for this to be None
return platform, db, schema
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 8487d5113bc1d3..2f1fcd378d40fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -250,9 +250,9 @@ def _set_test_connection_capability(
@staticmethod
def _extract_view_from_field(field: str) -> str:
- assert (
- field.count(".") == 1
- ), f"Error: A field must be prefixed by a view name, field is: {field}"
+ assert field.count(".") == 1, (
+ f"Error: A field must be prefixed by a view name, field is: {field}"
+ )
return field.split(".")[0]
def _get_views_from_fields(self, fields: List[str]) -> List[str]:
@@ -610,12 +610,12 @@ def _get_folder_browse_path_v2_entries(
def _create_platform_instance_aspect(
self,
) -> DataPlatformInstance:
- assert (
- self.source_config.platform_name
- ), "Platform name is not set in the configuration."
- assert (
- self.source_config.platform_instance
- ), "Platform instance is not set in the configuration."
+ assert self.source_config.platform_name, (
+ "Platform name is not set in the configuration."
+ )
+ assert self.source_config.platform_instance, (
+ "Platform instance is not set in the configuration."
+ )
return DataPlatformInstance(
platform=builder.make_data_platform_urn(self.source_config.platform_name),
@@ -1016,9 +1016,9 @@ def _make_dashboard_and_chart_mces(
yield from chart_events
# Step 2: Emit metadata events for the Dashboard itself.
- chart_urns: Set[
- str
- ] = set() # Collect the unique child chart urns for dashboard input lineage.
+ chart_urns: Set[str] = (
+ set()
+ ) # Collect the unique child chart urns for dashboard input lineage.
for chart_event in chart_events:
chart_event_urn = self._extract_event_urn(chart_event)
if chart_event_urn:
@@ -1538,20 +1538,20 @@ def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
}
)
- dashboard_element: Optional[
- LookerDashboardElement
- ] = self._get_looker_dashboard_element(
- DashboardElement(
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
- # we add the "looks_" prefix to look.id.
- title=look.title,
- subtitle_text=look.description,
- look_id=look.id,
- dashboard_id=None, # As this is an independent look
- look=LookWithQuery(
- query=query, folder=look.folder, user_id=look.user_id
+ dashboard_element: Optional[LookerDashboardElement] = (
+ self._get_looker_dashboard_element(
+ DashboardElement(
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
+ # we add the "looks_" prefix to look.id.
+ title=look.title,
+ subtitle_text=look.description,
+ look_id=look.id,
+ dashboard_id=None, # As this is an independent look
+ look=LookWithQuery(
+ query=query, folder=look.folder, user_id=look.user_id
+ ),
),
- ),
+ )
)
if dashboard_element is not None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
index 6d49d57e077435..2bcae4d46b8d52 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
@@ -33,9 +33,9 @@
class SpecialVariable:
- SPECIAL_VARIABLE_PATTERN: ClassVar[
- str
- ] = r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
+ SPECIAL_VARIABLE_PATTERN: ClassVar[str] = (
+ r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
+ )
liquid_variable: dict
def __init__(self, liquid_variable):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
index 098d7d73a3da84..05806840b5c954 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
@@ -257,9 +257,9 @@ def _process_entity_timeseries_rows(
for row in rows:
logger.debug(row)
- entity_stat_aspect[
- self.get_entity_stat_key(row)
- ] = self.to_entity_timeseries_stat_aspect(row)
+ entity_stat_aspect[self.get_entity_stat_key(row)] = (
+ self.to_entity_timeseries_stat_aspect(row)
+ )
return entity_stat_aspect
@@ -385,10 +385,8 @@ def generate_usage_stat_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
entity_rows: List[Dict] = self._execute_query(
entity_query_with_filters, "entity_query"
)
- entity_usage_stat: Dict[
- Tuple[str, str], Any
- ] = self._process_entity_timeseries_rows(
- entity_rows
+ entity_usage_stat: Dict[Tuple[str, str], Any] = (
+ self._process_entity_timeseries_rows(entity_rows)
) # Any type to pass mypy unbound Aspect type error
user_wise_query_with_filters: LookerQuery = self._append_filters(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
index 103f4175a9ccff..4e38165bb56286 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
@@ -38,16 +38,16 @@ def merge_parent_and_child_fields(
# Create a map field-name vs field
child_field_map: dict = {}
for field in child_fields:
- assert (
- NAME in field
- ), "A lookml view must have a name field" # name is required field of lookml field array
+ assert NAME in field, (
+ "A lookml view must have a name field"
+ ) # name is required field of lookml field array
child_field_map[field[NAME]] = field
for field in parent_fields:
- assert (
- NAME in field
- ), "A lookml view must have a name field" # name is required field of lookml field array
+ assert NAME in field, (
+ "A lookml view must have a name field"
+ ) # name is required field of lookml field array
if field[NAME] in child_field_map:
# Fields defined in the child view take higher precedence.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
index c7d3724472d3c8..a8575c84b510d5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
@@ -482,14 +482,14 @@ def get_project_name(self, model_name: str) -> str:
if self.source_config.project_name is not None:
return self.source_config.project_name
- assert (
- self.looker_client is not None
- ), "Failed to find a configured Looker API client"
+ assert self.looker_client is not None, (
+ "Failed to find a configured Looker API client"
+ )
try:
model = self.looker_client.lookml_model(model_name, fields="project_name")
- assert (
- model.project_name is not None
- ), f"Failed to find a project name for model {model_name}"
+ assert model.project_name is not None, (
+ f"Failed to find a project name for model {model_name}"
+ )
return model.project_name
except SDKError:
raise ValueError(
@@ -541,9 +541,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
self.reporter.git_clone_latency = datetime.now() - start_time
self.source_config.base_folder = checkout_dir.resolve()
- self.base_projects_folder[
- BASE_PROJECT_NAME
- ] = self.source_config.base_folder
+ self.base_projects_folder[BASE_PROJECT_NAME] = (
+ self.source_config.base_folder
+ )
visited_projects: Set[str] = set()
@@ -641,9 +641,9 @@ def _recursively_check_manifests(
repo_url=remote_project.url,
)
- self.base_projects_folder[
- remote_project.name
- ] = p_checkout_dir.resolve()
+ self.base_projects_folder[remote_project.name] = (
+ p_checkout_dir.resolve()
+ )
repo = p_cloner.get_last_repo_cloned()
assert repo
remote_git_info = GitInfo(
@@ -930,9 +930,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
logger.warning(
f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
)
- if (
- not self.source_config.emit_reachable_views_only
- ):
+ if not self.source_config.emit_reachable_views_only:
logger.warning(
"Consider enabling the `emit_reachable_views_only` flag to handle this case."
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
index 971181e4300d69..f77eebb3cdd8cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
@@ -484,11 +484,11 @@ def __init__(
)
def __get_upstream_dataset_urn(self) -> List[str]:
- current_view_id: Optional[
- LookerViewId
- ] = self.looker_view_id_cache.get_looker_view_id(
- view_name=self.view_context.name(),
- base_folder_path=self.view_context.base_folder_path,
+ current_view_id: Optional[LookerViewId] = (
+ self.looker_view_id_cache.get_looker_view_id(
+ view_name=self.view_context.name(),
+ base_folder_path=self.view_context.base_folder_path,
+ )
)
# Current view will always be present in cache. assert will silence the lint
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
index b0b04dff20bffc..02125db83d2582 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@@ -172,10 +172,10 @@ def _get_mlflow_registered_models(self) -> Iterable[RegisteredModel]:
"""
Get all Registered Models in MLflow Model Registry.
"""
- registered_models: Iterable[
- RegisteredModel
- ] = self._traverse_mlflow_search_func(
- search_func=self.client.search_registered_models,
+ registered_models: Iterable[RegisteredModel] = (
+ self._traverse_mlflow_search_func(
+ search_func=self.client.search_registered_models,
+ )
)
return registered_models
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
index bbc4897d227bac..ad8487c1a759ec 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
@@ -288,7 +288,9 @@ def __init__(self, ctx: PipelineContext, config: MongoDBConfig):
# See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
self.mongo_client = MongoClient(
- self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options # type: ignore
+ self.config.connect_uri,
+ datetime_conversion="DATETIME_AUTO",
+ **options, # type: ignore
)
# This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ def _infer_schema_metadata(
)
# Add this information to the custom properties so user can know they are looking at downsampled schema
dataset_properties.customProperties["schema.downsampled"] = "True"
- dataset_properties.customProperties[
- "schema.totalFields"
- ] = f"{collection_schema_size}"
+ dataset_properties.customProperties["schema.totalFields"] = (
+ f"{collection_schema_size}"
+ )
logger.debug(f"Size of collection fields = {len(collection_fields)}")
# append each schema field (sort so output is consistent)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index 7f446f6d1c2718..52b1386e21d85a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -184,9 +184,9 @@ def validator_site_url_to_site_name(cls, values):
@validator("site_url")
def validator_site_url(cls, site_url: str) -> str:
- assert site_url.startswith(
- ("http://", "https://")
- ), "site_url must start with http:// or https://"
+ assert site_url.startswith(("http://", "https://")), (
+ "site_url must start with http:// or https://"
+ )
if not site_url.endswith("/"):
site_url = site_url + "/"
@@ -487,9 +487,7 @@ def rest_api_base_url(self):
def get_report(self) -> SourceReport:
return self.report
- def update_flow(
- self, pg_flow_dto: Dict, recursion_level: int = 0
- ) -> None: # noqa: C901
+ def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901
"""
Update self.nifi_flow with contents of the input process group `pg_flow_dto`
"""
@@ -548,16 +546,16 @@ def update_flow(
for inputPort in flow_dto.get("inputPorts", []):
component = inputPort.get("component")
if inputPort.get("allowRemoteAccess"):
- self.nifi_flow.remotely_accessible_ports[
- component.get("id")
- ] = NifiComponent(
- component.get("id"),
- component.get("name"),
- component.get("type"),
- component.get("parentGroupId"),
- NifiType.INPUT_PORT,
- comments=component.get("comments"),
- status=component.get("status", {}).get("runStatus"),
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+ NifiComponent(
+ component.get("id"),
+ component.get("name"),
+ component.get("type"),
+ component.get("parentGroupId"),
+ NifiType.INPUT_PORT,
+ comments=component.get("comments"),
+ status=component.get("status", {}).get("runStatus"),
+ )
)
logger.debug(f"Adding remotely accessible port {component.get('id')}")
else:
@@ -576,16 +574,16 @@ def update_flow(
for outputPort in flow_dto.get("outputPorts", []):
component = outputPort.get("component")
if outputPort.get("allowRemoteAccess"):
- self.nifi_flow.remotely_accessible_ports[
- component.get("id")
- ] = NifiComponent(
- component.get("id"),
- component.get("name"),
- component.get("type"),
- component.get("parentGroupId"),
- NifiType.OUTPUT_PORT,
- comments=component.get("comments"),
- status=component.get("status", {}).get("runStatus"),
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+ NifiComponent(
+ component.get("id"),
+ component.get("name"),
+ component.get("type"),
+ component.get("parentGroupId"),
+ NifiType.OUTPUT_PORT,
+ comments=component.get("comments"),
+ status=component.get("status", {}).get("runStatus"),
+ )
)
logger.debug(f"Adding remotely accessible port {component.get('id')}")
else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
index 8289265483d598..2075e999ea1d0e 100755
--- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
@@ -101,16 +101,16 @@ def get_swagger(self) -> Dict:
# details there once, and then use that session for all requests.
self.token = f"Bearer {self.bearer_token}"
else:
- assert (
- "url_complement" in self.get_token.keys()
- ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
+ assert "url_complement" in self.get_token.keys(), (
+ "When 'request_type' is set to 'get', an url_complement is needed for the request."
+ )
if self.get_token["request_type"] == "get":
- assert (
- "{username}" in self.get_token["url_complement"]
- ), "we expect the keyword {username} to be present in the url"
- assert (
- "{password}" in self.get_token["url_complement"]
- ), "we expect the keyword {password} to be present in the url"
+ assert "{username}" in self.get_token["url_complement"], (
+ "we expect the keyword {username} to be present in the url"
+ )
+ assert "{password}" in self.get_token["url_complement"], (
+ "we expect the keyword {password} to be present in the url"
+ )
url4req = self.get_token["url_complement"].replace(
"{username}", self.username
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index b49d40a0c7eb6a..14beab6bc9391e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -225,9 +225,9 @@ def report_charts_dropped(self, view: str) -> None:
def default_for_dataset_type_mapping() -> Dict[str, str]:
dict_: dict = {}
for item in SupportedDataPlatform:
- dict_[
- item.value.powerbi_data_platform_name
- ] = item.value.datahub_data_platform_name
+ dict_[item.value.powerbi_data_platform_name] = (
+ item.value.datahub_data_platform_name
+ )
return dict_
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
# Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
# DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
# mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
- dataset_type_mapping: Union[
- Dict[str, str], Dict[str, PlatformDetail]
- ] = pydantic.Field(
- default_factory=default_for_dataset_type_mapping,
- description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
- "DataHub supported datasources."
- "You can configured platform instance for dataset lineage. "
- "See Quickstart Recipe for mapping",
- hidden_from_docs=True,
+ dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
+ pydantic.Field(
+ default_factory=default_for_dataset_type_mapping,
+ description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
+ "DataHub supported datasources."
+ "You can configured platform instance for dataset lineage. "
+ "See Quickstart Recipe for mapping",
+ hidden_from_docs=True,
+ )
)
# PowerBI datasource's server to platform instance mapping
server_to_platform_instance: Dict[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 2a5de7494920b2..759fc6d7dadfba 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -128,17 +128,17 @@ def get_upstream_tables(
reporter.m_query_parse_successes += 1
try:
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = resolver.MQueryResolver(
- table=table,
- parse_tree=parse_tree,
- reporter=reporter,
- parameters=parameters,
- ).resolve_to_lineage(
- ctx=ctx,
- config=config,
- platform_instance_resolver=platform_instance_resolver,
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ resolver.MQueryResolver(
+ table=table,
+ parse_tree=parse_tree,
+ reporter=reporter,
+ parameters=parameters,
+ ).resolve_to_lineage(
+ ctx=ctx,
+ config=config,
+ platform_instance_resolver=platform_instance_resolver,
+ )
)
if lineage:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
index 63520bd731de86..54b810650f5854 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
@@ -170,8 +170,7 @@ def create_reference_table(
logger.debug(f"Processing arguments {arguments}")
if (
- len(arguments)
- >= 4 # [0] is warehouse FQDN.
+ len(arguments) >= 4 # [0] is warehouse FQDN.
# [1] is endpoint, we are not using it.
# [2] is "Catalog" key
# [3] is catalog's value
@@ -215,16 +214,16 @@ def parse_custom_sql(
native_sql_parser.remove_special_characters(query)
)
- parsed_result: Optional[
- "SqlParsingResult"
- ] = native_sql_parser.parse_custom_sql(
- ctx=self.ctx,
- query=query,
- platform=self.get_platform_pair().datahub_data_platform_name,
- platform_instance=platform_detail.platform_instance,
- env=platform_detail.env,
- database=database,
- schema=schema,
+ parsed_result: Optional["SqlParsingResult"] = (
+ native_sql_parser.parse_custom_sql(
+ ctx=self.ctx,
+ query=query,
+ platform=self.get_platform_pair().datahub_data_platform_name,
+ platform_instance=platform_detail.platform_instance,
+ env=platform_detail.env,
+ database=database,
+ schema=schema,
+ )
)
if parsed_result is None:
@@ -410,9 +409,9 @@ def create_lineage(
f"Processing Databrick data-access function detail {data_access_func_detail}"
)
table_detail: Dict[str, str] = {}
- temp_accessor: Optional[
- IdentifierAccessor
- ] = data_access_func_detail.identifier_accessor
+ temp_accessor: Optional[IdentifierAccessor] = (
+ data_access_func_detail.identifier_accessor
+ )
while temp_accessor:
# Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ def create_lineage(
db_name: str = data_access_func_detail.identifier_accessor.items["Name"] # type: ignore
# Second is schema name
schema_name: str = cast(
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next # type: ignore
+ IdentifierAccessor,
+ data_access_func_detail.identifier_accessor.next, # type: ignore
).items["Name"]
# Third is table name
table_name: str = cast(
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next # type: ignore
+ IdentifierAccessor,
+ data_access_func_detail.identifier_accessor.next.next, # type: ignore
).items["Name"]
qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ def get_db_name(self, data_access_tokens: List[str]) -> Optional[str]:
): # database name is explicitly set
return database
- return get_next_item( # database name is set in Name argument
- data_access_tokens, "Name"
- ) or get_next_item( # If both above arguments are not available, then try Catalog
- data_access_tokens, "Catalog"
+ return (
+ get_next_item( # database name is set in Name argument
+ data_access_tokens, "Name"
+ )
+ or get_next_item( # If both above arguments are not available, then try Catalog
+ data_access_tokens, "Catalog"
+ )
)
def create_lineage(
@@ -819,9 +823,7 @@ def create_lineage(
values=tree_function.remove_whitespaces_from_list(
tree_function.token_values(flat_argument_list[1])
),
- )[
- 0
- ] # Remove any whitespaces and double quotes character
+ )[0] # Remove any whitespaces and double quotes character
server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 2756a113d1ef0c..42963c08d992d1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -188,9 +188,9 @@ def _process_invoke_expression(
# - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
# (a table reference) as its first argument
# - Its result is then passed as the first argument to Table.SplitColumn
- second_invoke_expression: Optional[
- Tree
- ] = tree_function.first_invoke_expression_func(first_argument)
+ second_invoke_expression: Optional[Tree] = (
+ tree_function.first_invoke_expression_func(first_argument)
+ )
if second_invoke_expression:
# 1. The First argument is function call
# 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ def internal(
logger.debug(v_statement.pretty())
return None
- invoke_expression: Optional[
- Tree
- ] = tree_function.first_invoke_expression_func(rh_tree)
+ invoke_expression: Optional[Tree] = (
+ tree_function.first_invoke_expression_func(rh_tree)
+ )
if invoke_expression is not None:
- result: Union[
- DataAccessFunctionDetail, List[str], None
- ] = self._process_invoke_expression(invoke_expression)
+ result: Union[DataAccessFunctionDetail, List[str], None] = (
+ self._process_invoke_expression(invoke_expression)
+ )
if result is None:
return None # No need to process some un-expected grammar found while processing invoke_expression
if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ def resolve_to_lineage(
return lineage
# Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
- table_links: List[
- DataAccessFunctionDetail
- ] = self.create_data_access_functional_detail(output_variable)
+ table_links: List[DataAccessFunctionDetail] = (
+ self.create_data_access_functional_detail(output_variable)
+ )
# Each item is data-access function
for f_detail in table_links:
@@ -390,7 +390,7 @@ def resolve_to_lineage(
# From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
# & also pass additional information that will be need to generate lineage
- pattern_handler: (AbstractLineage) = supported_resolver.handler()(
+ pattern_handler: AbstractLineage = supported_resolver.handler()(
ctx=ctx,
table=self.table,
config=config,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 044946a5d308d1..5e5636f2d50fe3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -945,9 +945,9 @@ def to_datahub_work_units(
# Convert tiles to charts
ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
# Lets convert dashboard to datahub dashboard
- dashboard_mcps: List[
- MetadataChangeProposalWrapper
- ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+ dashboard_mcps: List[MetadataChangeProposalWrapper] = (
+ self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+ )
# Now add MCPs in sequence
mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ def get_workspace_workunit(
def _get_dashboard_patch_work_unit(
self, work_unit: MetadataWorkUnit
) -> Optional[MetadataWorkUnit]:
- dashboard_info_aspect: Optional[
- DashboardInfoClass
- ] = work_unit.get_aspect_of_type(DashboardInfoClass)
+ dashboard_info_aspect: Optional[DashboardInfoClass] = (
+ work_unit.get_aspect_of_type(DashboardInfoClass)
+ )
if dashboard_info_aspect and self.source_config.patch_metadata:
return convert_dashboard_info_to_patch(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
index 161975fa635fdb..927840c44bf0b0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
@@ -425,9 +425,9 @@ def itr_pages(
response.raise_for_status()
- assert (
- Constant.VALUE in response.json()
- ), "'value' key is not present in paginated response"
+ assert Constant.VALUE in response.json(), (
+ "'value' key is not present in paginated response"
+ )
if not response.json()[Constant.VALUE]: # if it is an empty list then break
break
@@ -447,13 +447,13 @@ def get_app(
if raw_app is None:
return None
- assert (
- Constant.ID in raw_app
- ), f"{Constant.ID} is required field not present in server response"
+ assert Constant.ID in raw_app, (
+ f"{Constant.ID} is required field not present in server response"
+ )
- assert (
- Constant.NAME in raw_app
- ), f"{Constant.NAME} is required field not present in server response"
+ assert Constant.NAME in raw_app, (
+ f"{Constant.NAME} is required field not present in server response"
+ )
return App(
id=raw_app[Constant.ID],
diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
index 31b0731aaa751c..10b062c98c147f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
@@ -156,7 +156,7 @@ def _get_sheet(
)
if chart:
if not chart.title:
- chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
+ chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
sheet.charts.append(chart)
websocket_connection.handle.pop()
return sheet
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index cad48eaf1c2375..932ada0a908b28 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -178,9 +178,9 @@ class RedshiftConfig(
@root_validator(pre=True)
def check_email_is_set_on_usage(cls, values):
if values.get("include_usage_statistics"):
- assert (
- "email_domain" in values and values["email_domain"]
- ), "email_domain needs to be set if usage is enabled"
+ assert "email_domain" in values and values["email_domain"], (
+ "email_domain needs to be set if usage is enabled"
+ )
return values
@root_validator(skip_on_failure=True)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index 9bfca941ce48fb..cce282c71056a2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -305,13 +305,13 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report.capability_report = {}
try:
RedshiftDataDictionary.get_schemas(connection, database=config.database)
- test_report.capability_report[
- SourceCapability.SCHEMA_METADATA
- ] = CapabilityReport(capable=True)
+ test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
+ CapabilityReport(capable=True)
+ )
except Exception as e:
- test_report.capability_report[
- SourceCapability.SCHEMA_METADATA
- ] = CapabilityReport(capable=False, failure_reason=str(e))
+ test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
+ CapabilityReport(capable=False, failure_reason=str(e))
+ )
except Exception as e:
test_report.basic_connectivity = CapabilityReport(
@@ -947,9 +947,9 @@ def cache_tables_and_views(self, connection, database):
def get_all_tables(
self,
) -> Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]]:
- all_tables: Dict[
- str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]
- ] = defaultdict(dict)
+ all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]] = (
+ defaultdict(dict)
+ )
for db in set().union(self.db_tables, self.db_views):
tables = self.db_tables.get(db, {})
views = self.db_views.get(db, {})
@@ -967,9 +967,9 @@ def extract_usage(
all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]],
) -> Iterable[MetadataWorkUnit]:
with PerfTimer() as timer:
- redundant_usage_run_skip_handler: Optional[
- RedundantUsageRunSkipHandler
- ] = None
+ redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
+ None
+ )
if self.config.enable_stateful_usage_ingestion:
redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
source=self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
index d66a1ee18be40f..a5758bdd825702 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
@@ -199,10 +199,10 @@ def _get_workunits_internal(
end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
database=self.config.database,
)
- access_events_iterable: Iterable[
- RedshiftAccessEvent
- ] = self._gen_access_events_from_history_query(
- query, connection=self.connection, all_tables=all_tables
+ access_events_iterable: Iterable[RedshiftAccessEvent] = (
+ self._gen_access_events_from_history_query(
+ query, connection=self.connection, all_tables=all_tables
+ )
)
aggregated_events: AggregatedAccessEvents = self._aggregate_access_events(
@@ -225,10 +225,10 @@ def _gen_operation_aspect_workunits(
start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT),
end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
)
- access_events_iterable: Iterable[
- RedshiftAccessEvent
- ] = self._gen_access_events_from_history_query(
- query, connection, all_tables=all_tables
+ access_events_iterable: Iterable[RedshiftAccessEvent] = (
+ self._gen_access_events_from_history_query(
+ query, connection, all_tables=all_tables
+ )
)
# Generate operation aspect work units from the access events
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
index 89c092875e4490..58e930eb6e809c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
@@ -85,8 +85,8 @@ def ensure_field_level_settings_are_normalized(
if field_level_metric.startswith("include_field_"):
values.setdefault(field_level_metric, False)
- assert (
- max_num_fields_to_profile is None
- ), f"{max_num_fields_to_profile_key} should be set to None"
+ assert max_num_fields_to_profile is None, (
+ f"{max_num_fields_to_profile_key} should be set to None"
+ )
return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
index 88679efdf5fc31..66e0e6b741d1ff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
@@ -236,12 +236,12 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
try:
if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
logger.debug("Access Token Provided in Config")
- assert (
- self.config.access_token is not None
- ), "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
- assert (
- self.config.instance_url is not None
- ), "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
+ assert self.config.access_token is not None, (
+ "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
+ )
+ assert self.config.instance_url is not None, (
+ "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
+ )
self.sf = Salesforce(
instance_url=self.config.instance_url,
@@ -250,15 +250,15 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
)
elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD:
logger.debug("Username/Password Provided in Config")
- assert (
- self.config.username is not None
- ), "Config username is required for USERNAME_PASSWORD auth"
- assert (
- self.config.password is not None
- ), "Config password is required for USERNAME_PASSWORD auth"
- assert (
- self.config.security_token is not None
- ), "Config security_token is required for USERNAME_PASSWORD auth"
+ assert self.config.username is not None, (
+ "Config username is required for USERNAME_PASSWORD auth"
+ )
+ assert self.config.password is not None, (
+ "Config password is required for USERNAME_PASSWORD auth"
+ )
+ assert self.config.security_token is not None, (
+ "Config security_token is required for USERNAME_PASSWORD auth"
+ )
self.sf = Salesforce(
username=self.config.username,
@@ -269,15 +269,15 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
logger.debug("Json Web Token provided in the config")
- assert (
- self.config.username is not None
- ), "Config username is required for JSON_WEB_TOKEN auth"
- assert (
- self.config.consumer_key is not None
- ), "Config consumer_key is required for JSON_WEB_TOKEN auth"
- assert (
- self.config.private_key is not None
- ), "Config private_key is required for JSON_WEB_TOKEN auth"
+ assert self.config.username is not None, (
+ "Config username is required for JSON_WEB_TOKEN auth"
+ )
+ assert self.config.consumer_key is not None, (
+ "Config consumer_key is required for JSON_WEB_TOKEN auth"
+ )
+ assert self.config.private_key is not None, (
+ "Config private_key is required for JSON_WEB_TOKEN auth"
+ )
self.sf = Salesforce(
username=self.config.username,
@@ -439,7 +439,8 @@ def get_platform_instance_workunit(self, datasetUrn: str) -> MetadataWorkUnit:
dataPlatformInstance = DataPlatformInstanceClass(
builder.make_data_platform_urn(self.platform),
instance=builder.make_dataplatform_instance_urn(
- self.platform, self.config.platform_instance # type:ignore
+ self.platform,
+ self.config.platform_instance, # type:ignore
),
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
index e96eeb58d96efe..0468792f44aabb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
@@ -477,9 +477,9 @@ def _gen_elements_workunit(
upstream_dataset_urns
and dataset_urn not in self.dataset_upstream_urn_mapping
):
- self.dataset_upstream_urn_mapping[
- dataset_urn
- ] = upstream_dataset_urns
+ self.dataset_upstream_urn_mapping[dataset_urn] = (
+ upstream_dataset_urns
+ )
element_input_fields = [
InputFieldClass(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
index 3e88f43142ede6..6762302ebe57c7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
@@ -126,9 +126,9 @@ def fill_workspaces(self) -> None:
response.raise_for_status()
response_dict = response.json()
for workspace_dict in response_dict[Constant.ENTRIES]:
- self.workspaces[
- workspace_dict[Constant.WORKSPACEID]
- ] = Workspace.parse_obj(workspace_dict)
+ self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
+ Workspace.parse_obj(workspace_dict)
+ )
if response_dict[Constant.NEXTPAGE]:
url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
else:
@@ -147,9 +147,9 @@ def _get_users(self) -> Dict[str, str]:
response.raise_for_status()
response_dict = response.json()
for user_dict in response_dict[Constant.ENTRIES]:
- users[
- user_dict[Constant.MEMBERID]
- ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+ users[user_dict[Constant.MEMBERID]] = (
+ f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+ )
if response_dict[Constant.NEXTPAGE]:
url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}"
else:
@@ -327,10 +327,12 @@ def get_page_elements(self, workbook: Workbook, page: Page) -> List[Element]:
response.raise_for_status()
for i, element_dict in enumerate(response.json()[Constant.ENTRIES]):
if not element_dict.get(Constant.NAME):
- element_dict[Constant.NAME] = f"Element {i+1} of Page '{page.name}'"
- element_dict[
- Constant.URL
- ] = f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+ element_dict[Constant.NAME] = (
+ f"Element {i + 1} of Page '{page.name}'"
+ )
+ element_dict[Constant.URL] = (
+ f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+ )
element = Element.parse_obj(element_dict)
if (
self.config.extract_lineage
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index b14e51a982082c..5f732e2621656f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -384,18 +384,20 @@ def validate_shares(
assert all(
consumer.platform_instance != share_details.platform_instance
for consumer in share_details.consumers
- ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+ ), (
+ "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+ )
databases_included_in_share.append(shared_db)
databases_created_from_share.extend(share_details.consumers)
for db_from_share in databases_created_from_share:
- assert (
- db_from_share not in databases_included_in_share
- ), "Database included in a share can not be present as consumer in any share."
- assert (
- databases_created_from_share.count(db_from_share) == 1
- ), "Same database can not be present as consumer in more than one share."
+ assert db_from_share not in databases_included_in_share, (
+ "Database included in a share can not be present as consumer in any share."
+ )
+ assert databases_created_from_share.count(db_from_share) == 1, (
+ "Same database can not be present as consumer in more than one share."
+ )
return shares
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
index 2239338972d9be..2854a99198d62b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
@@ -250,9 +250,9 @@ def get_connect_args(self) -> dict:
if self.private_key is not None:
pkey_bytes = self.private_key.replace("\\n", "\n").encode()
else:
- assert (
- self.private_key_path
- ), "missing required private key path to read key from"
+ assert self.private_key_path, (
+ "missing required private key path to read key from"
+ )
with open(self.private_key_path, "rb") as key:
pkey_bytes = key.read()
@@ -284,9 +284,9 @@ def get_options(self) -> dict:
return self.options
def get_oauth_connection(self) -> NativeSnowflakeConnection:
- assert (
- self.oauth_config
- ), "oauth_config should be provided if using oauth based authentication"
+ assert self.oauth_config, (
+ "oauth_config should be provided if using oauth based authentication"
+ )
generator = OAuthTokenGenerator(
client_id=self.oauth_config.client_id,
authority_url=self.oauth_config.authority_url,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
index b82734cbbe84ea..69d0b62a8edfdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
@@ -623,7 +623,7 @@ def _build_enriched_query_log_query(
query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
AND execution_status = 'SUCCESS'
- AND {users_filter or 'TRUE'}
+ AND {users_filter or "TRUE"}
)
, deduplicated_queries as (
SELECT
@@ -651,7 +651,7 @@ def _build_enriched_query_log_query(
WHERE
query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
- AND {users_filter or 'TRUE'}
+ AND {users_filter or "TRUE"}
AND query_id IN (
SELECT query_id FROM deduplicated_queries
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index d165be3f3cc656..173024aec0cf38 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -142,9 +142,9 @@ def __init__(self) -> None:
)
# self._table_tags[][][] = list of tags applied to table
- self._table_tags: Dict[
- str, Dict[str, Dict[str, List[SnowflakeTag]]]
- ] = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+ self._table_tags: Dict[str, Dict[str, Dict[str, List[SnowflakeTag]]]] = (
+ defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+ )
# self._column_tags[][][][] = list of tags applied to column
self._column_tags: Dict[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 393e4d3c96d51f..a2d69d9e552916 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -194,9 +194,9 @@ def __init__(
config, self.data_dictionary, self.report
)
self.profiler: Optional[SnowflakeProfiler] = profiler
- self.snowsight_url_builder: Optional[
- SnowsightUrlBuilder
- ] = snowsight_url_builder
+ self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
+ snowsight_url_builder
+ )
# These are populated as side-effects of get_workunits_internal.
self.databases: List[SnowflakeDatabase] = []
@@ -267,9 +267,9 @@ def get_databases(self) -> Optional[List[SnowflakeDatabase]]:
)
return None
else:
- ischema_databases: List[
- SnowflakeDatabase
- ] = self.get_databases_from_ischema(databases)
+ ischema_databases: List[SnowflakeDatabase] = (
+ self.get_databases_from_ischema(databases)
+ )
if len(ischema_databases) == 0:
self.structured_reporter.failure(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
index 75567cc3da8830..597e7bee4d4cc0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
@@ -38,9 +38,9 @@ def _get_tags_on_object_without_propagation(
table_name: Optional[str],
) -> List[SnowflakeTag]:
if db_name not in self.tag_cache:
- self.tag_cache[
- db_name
- ] = self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+ self.tag_cache[db_name] = (
+ self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+ )
if domain == SnowflakeObjectDomain.DATABASE:
return self.tag_cache[db_name].get_database_tags(db_name)
@@ -130,10 +130,10 @@ def get_column_tags_for_table(
temp_column_tags: Dict[str, List[SnowflakeTag]] = {}
if self.config.extract_tags == TagOption.without_lineage:
if db_name not in self.tag_cache:
- self.tag_cache[
- db_name
- ] = self.data_dictionary.get_tags_for_database_without_propagation(
- db_name
+ self.tag_cache[db_name] = (
+ self.data_dictionary.get_tags_for_database_without_propagation(
+ db_name
+ )
)
temp_column_tags = self.tag_cache[db_name].get_column_tags_for_table(
table_name, schema_name, db_name
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
index 85e4071aec07df..edd13ee48326bb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
@@ -549,9 +549,9 @@ def parse_event_objects(self, event_dict: Dict) -> None:
):
# NOTE: Generated emails may be incorrect, as email may be different than
# username@email_domain
- event_dict[
- "EMAIL"
- ] = f'{event_dict["USER_NAME"]}@{self.config.email_domain}'.lower()
+ event_dict["EMAIL"] = (
+ f"{event_dict['USER_NAME']}@{self.config.email_domain}".lower()
+ )
if not event_dict["EMAIL"]:
self.report.rows_missing_email += 1
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index 885bee1ccdb908..030edfde4ca1da 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -21,8 +21,7 @@
class SnowflakeStructuredReportMixin(abc.ABC):
@property
@abc.abstractmethod
- def structured_reporter(self) -> SourceReport:
- ...
+ def structured_reporter(self) -> SourceReport: ...
class SnowsightUrlBuilder:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index b8afd145727400..b4ef2180d71d45 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -211,9 +211,9 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
self.usage_extractor: Optional[SnowflakeUsageExtractor] = None
if self.config.include_usage_stats or self.config.include_operational_stats:
- redundant_usage_run_skip_handler: Optional[
- RedundantUsageRunSkipHandler
- ] = None
+ redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
+ None
+ )
if self.config.enable_stateful_usage_ingestion:
redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
source=self,
@@ -296,7 +296,16 @@ class SnowflakePrivilege:
_report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()
privileges: List[SnowflakePrivilege] = []
- capabilities: List[SourceCapability] = [c.capability for c in SnowflakeV2Source.get_capabilities() if c.capability not in (SourceCapability.PLATFORM_INSTANCE, SourceCapability.DOMAINS, SourceCapability.DELETION_DETECTION)] # type: ignore
+ capabilities: List[SourceCapability] = [
+ c.capability
+ for c in SnowflakeV2Source.get_capabilities() # type: ignore
+ if c.capability
+ not in (
+ SourceCapability.PLATFORM_INSTANCE,
+ SourceCapability.DOMAINS,
+ SourceCapability.DELETION_DETECTION,
+ )
+ ]
cur = conn.query("select current_role()")
current_role = [row["CURRENT_ROLE()"] for row in cur][0]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 6f7decc79b1df2..cfc43454b51fad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -104,9 +104,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
return "\n".join([r for r in res])
@typing.no_type_check
- def _get_column_type(
- self, type_: Union[str, Dict[str, Any]]
- ) -> TypeEngine: # noqa: C901
+ def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: # noqa: C901
"""Derives the data type of the Athena column.
This method is overwritten to extend the behavior of PyAthena.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 2899bcc2de37b0..a8208ca807ed02 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
, comment
, {properties_clause} AS properties
FROM system.tables
- WHERE name NOT LIKE '.inner%'""".format(
- properties_clause=properties_clause
- )
+ WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
)
all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
, comment
FROM system.columns
WHERE {schema_clause}
- ORDER BY database, table, position""".format(
- schema_clause=schema_clause
- )
+ ORDER BY database, table, position""".format(schema_clause=schema_clause)
)
)
)
@@ -474,7 +470,7 @@ def _get_all_tables(self) -> Set[str]:
logger.debug(f"sql_alchemy_url={url}")
engine = create_engine(url, **self.config.options)
for db_row in engine.execute(text(all_tables_query)):
- all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
+ all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
return all_tables_set
@@ -503,7 +499,7 @@ def _populate_lineage_map(
try:
for db_row in engine.execute(text(query)):
- dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
+ dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
if not self.config.database_pattern.allowed(
db_row["target_schema"]
) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ def _populate_lineage_map(
# Target
target_path = (
- f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+ f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
f"{dataset_name}"
)
target = LineageItem(
@@ -525,7 +521,7 @@ def _populate_lineage_map(
# Source
platform = LineageDatasetPlatform.CLICKHOUSE
- path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
+ path = f"{db_row['source_schema']}.{db_row['source_table']}"
sources = [
LineageDataset(
@@ -552,9 +548,7 @@ def _populate_lineage_map(
target.dataset.path
].upstreams = self._lineage_map[
target.dataset.path
- ].upstreams.union(
- target.upstreams
- )
+ ].upstreams.union(target.upstreams)
else:
self._lineage_map[target.dataset.path] = target
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
index 52db3cd11a759d..ac568c58af6c68 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
@@ -234,9 +234,7 @@ def get_columns(
WHERE col.table_name = id.table_name
AND col.column_name = id.column_name
AND col.owner = id.owner
- ) AS identity_options""".format(
- dblink=dblink
- )
+ ) AS identity_options""".format(dblink=dblink)
else:
identity_cols = "NULL as default_on_null, NULL as identity_options"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
index c91be9b494c006..664735053f1852 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
@@ -278,8 +278,7 @@ def is_dataset_eligible_for_profiling(
if self.config.profiling.profile_table_size_limit is not None and (
size_in_bytes is not None
- and size_in_bytes / (2**30)
- > self.config.profiling.profile_table_size_limit
+ and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
):
self.report.profiling_skipped_size_limit[schema_name] += 1
logger.debug(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index 5b76fe41d92e97..84b65d6635e9d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -599,7 +599,12 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
setattr( # noqa: B010
TeradataDialect,
"get_columns",
- lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
+ lambda self,
+ connection,
+ table_name,
+ schema=None,
+ use_qvci=self.config.use_qvci,
+ **kw: optimized_get_columns(
self,
connection,
table_name,
@@ -613,7 +618,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
setattr( # noqa: B010
TeradataDialect,
"get_pk_constraint",
- lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
+ lambda self,
+ connection,
+ table_name,
+ schema=None,
+ **kw: optimized_get_pk_constraint(
self, connection, table_name, schema, **kw
),
)
@@ -621,7 +630,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
setattr( # noqa: B010
TeradataDialect,
"get_foreign_keys",
- lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
+ lambda self,
+ connection,
+ table_name,
+ schema=None,
+ **kw: optimized_get_foreign_keys(
self, connection, table_name, schema, **kw
),
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
index 9883bc2b8e9b0b..6080ddadb65e40 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
@@ -41,9 +41,9 @@ def __init__(
run_id: str,
):
self.state_provider = source.state_provider
- self.stateful_ingestion_config: Optional[
- ProfilingStatefulIngestionConfig
- ] = config.stateful_ingestion
+ self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
+ config.stateful_ingestion
+ )
self.pipeline_name = pipeline_name
self.run_id = run_id
self.checkpointing_enabled: bool = (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
index 8630a959d3f6a3..e4a2646f6ccd3c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
@@ -48,9 +48,9 @@ def __init__(
):
self.source = source
self.state_provider = source.state_provider
- self.stateful_ingestion_config: Optional[
- StatefulIngestionConfig
- ] = config.stateful_ingestion
+ self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
+ config.stateful_ingestion
+ )
self.pipeline_name = pipeline_name
self.run_id = run_id
self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ def should_skip_this_run(
)
logger.debug(
- f"{self.job_id} : Last run start, end times:"
- f"({last_run_time_window})"
+ f"{self.job_id} : Last run start, end times:({last_run_time_window})"
)
# If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ def suggest_run_time_window(
)
self.log(
- "Adjusted start, end times: "
- f"({suggested_start_time}, {suggested_end_time})"
+ f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
)
return (suggested_start_time, suggested_end_time)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
index d4fcbf09924e9e..017d78bc1abf8d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
@@ -111,9 +111,9 @@ def __init__(
self.state_type_class = state_type_class
self.pipeline_name = pipeline_name
self.run_id = run_id
- self.stateful_ingestion_config: Optional[
- StatefulStaleMetadataRemovalConfig
- ] = config.stateful_ingestion
+ self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
+ config.stateful_ingestion
+ )
self.checkpointing_enabled: bool = (
True
if (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
index 8f4a53ffc3ed58..1f5a651fc64a79 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
@@ -70,20 +70,20 @@ def get_latest_checkpoint(
self.orchestrator_name, pipeline_name, job_name
)
- latest_checkpoint: Optional[
- DatahubIngestionCheckpointClass
- ] = self.graph.get_latest_timeseries_value(
- entity_urn=data_job_urn,
- aspect_type=DatahubIngestionCheckpointClass,
- filter_criteria_map={
- "pipelineName": pipeline_name,
- },
+ latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
+ self.graph.get_latest_timeseries_value(
+ entity_urn=data_job_urn,
+ aspect_type=DatahubIngestionCheckpointClass,
+ filter_criteria_map={
+ "pipelineName": pipeline_name,
+ },
+ )
)
if latest_checkpoint:
logger.debug(
f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
f" job_name:'{job_name}' found with start_time:"
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
)
return latest_checkpoint
else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
index a37774773b84d7..55f0903b9c91c7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
@@ -67,7 +67,7 @@ def get_latest_checkpoint(
logger.debug(
f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
f" job_name:'{job_name}' found with start_time:"
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
)
return latest_checkpoint
else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 8187fff559208e..f961bd8ecba604 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -281,9 +281,9 @@ def get_tableau_auth(
return authentication
def make_tableau_client(self, site: str) -> Server:
- authentication: Union[
- TableauAuth, PersonalAccessTokenAuth
- ] = self.get_tableau_auth(site)
+ authentication: Union[TableauAuth, PersonalAccessTokenAuth] = (
+ self.get_tableau_auth(site)
+ )
try:
server = Server(
self.connect_uri,
@@ -635,7 +635,7 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict:
project_path_pattern = values.get("project_path_pattern")
if project_pattern is None and project_path_pattern is None and projects:
logger.warning(
- "projects is deprecated, please use " "project_path_pattern instead."
+ "projects is deprecated, please use project_path_pattern instead."
)
logger.info("Initializing project_pattern from projects")
values["project_pattern"] = AllowDenyPattern(
@@ -708,18 +708,18 @@ class DatabaseTable:
"""
urn: str
- id: Optional[
- str
- ] = None # is not None only for tables that came from Tableau metadata
+ id: Optional[str] = (
+ None # is not None only for tables that came from Tableau metadata
+ )
num_cols: Optional[int] = None
- paths: Optional[
- Set[str]
- ] = None # maintains all browse paths encountered for this table
+ paths: Optional[Set[str]] = (
+ None # maintains all browse paths encountered for this table
+ )
- parsed_columns: Optional[
- Set[str]
- ] = None # maintains all columns encountered for this table during parsing SQL queries
+ parsed_columns: Optional[Set[str]] = (
+ None # maintains all columns encountered for this table during parsing SQL queries
+ )
def update_table(
self,
@@ -2310,8 +2310,7 @@ def _get_datasource_project_luid(self, ds: dict) -> Optional[str]:
c.EMBEDDED_DATA_SOURCE,
):
logger.debug(
- f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is "
- f"unsupported"
+ f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is unsupported"
)
return None
@@ -2493,9 +2492,9 @@ def parse_custom_sql(
def _enrich_database_tables_with_parsed_schemas(
self, parsing_result: SqlParsingResult
) -> None:
- in_tables_schemas: Dict[
- str, Set[str]
- ] = transform_parsing_result_to_in_tables_schemas(parsing_result)
+ in_tables_schemas: Dict[str, Set[str]] = (
+ transform_parsing_result_to_in_tables_schemas(parsing_result)
+ )
if not in_tables_schemas:
logger.info("Unable to extract table schema from parsing result")
@@ -3559,25 +3558,25 @@ def emit_project_in_topological_order(
generated_project_keys.add(project_key.guid())
- parent_project_key: Optional[
- Union[ProjectKey, SiteKey]
- ] = None # It is going
+ parent_project_key: Optional[Union[ProjectKey, SiteKey]] = (
+ None # It is going
+ )
# to be used as a parent container key for the current tableau project
if project_.parent_id is not None:
# Go to the parent project as we need to generate container first for parent
parent_project_key = self.gen_project_key(project_.parent_id)
- parent_tableau_project: Optional[
- TableauProject
- ] = self.tableau_project_registry.get(project_.parent_id)
+ parent_tableau_project: Optional[TableauProject] = (
+ self.tableau_project_registry.get(project_.parent_id)
+ )
if (
parent_tableau_project is None
): # It is not in project registry because of project_pattern
- assert (
- project_.parent_name
- ), f"project {project_.name} should not be null"
+ assert project_.parent_name, (
+ f"project {project_.name} should not be null"
+ )
parent_tableau_project = TableauProject(
id=project_.parent_id,
name=project_.parent_name,
@@ -3669,16 +3668,16 @@ def ingest_tableau_site(self):
if self.config.extract_usage_stats:
with PerfTimer() as timer:
self._populate_usage_stat_registry()
- self.report.extract_usage_stats_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.extract_usage_stats_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
if self.config.permission_ingestion:
with PerfTimer() as timer:
self._fetch_groups()
- self.report.fetch_groups_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.fetch_groups_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
# Populate the map of database names and database hostnames to be used later to map
# databases to platform instances.
@@ -3691,9 +3690,9 @@ def ingest_tableau_site(self):
with PerfTimer() as timer:
self._populate_projects_registry()
- self.report.populate_projects_registry_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.populate_projects_registry_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
if self.config.add_site_container:
yield from self.emit_site_container()
@@ -3701,23 +3700,23 @@ def ingest_tableau_site(self):
with PerfTimer() as timer:
yield from self.emit_workbooks()
- self.report.emit_workbooks_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.emit_workbooks_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
if self.sheet_ids:
with PerfTimer() as timer:
yield from self.emit_sheets()
- self.report.emit_sheets_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.emit_sheets_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
if self.dashboard_ids:
with PerfTimer() as timer:
yield from self.emit_dashboards()
- self.report.emit_dashboards_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.emit_dashboards_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
if self.embedded_datasource_ids_being_used:
with PerfTimer() as timer:
@@ -3743,6 +3742,6 @@ def ingest_tableau_site(self):
if self.database_tables:
with PerfTimer() as timer:
yield from self.emit_upstream_tables()
- self.report.emit_upstream_tables_timer[
- self.site_content_url
- ] = timer.elapsed_seconds(digits=2)
+ self.report.emit_upstream_tables_timer[self.site_content_url] = (
+ timer.elapsed_seconds(digits=2)
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 1fdce3aa1e2d34..6c3f7a51294797 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -254,7 +254,9 @@ class UnityCatalogSourceConfig(
)
# TODO: Remove `type:ignore` by refactoring config
- profiling: Union[UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig] = Field( # type: ignore
+ profiling: Union[
+ UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig
+ ] = Field( # type: ignore
default=UnityCatalogGEProfilerConfig(),
description="Data profiling configuration",
discriminator="method",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 9b96953794dcd5..fd6fa8a50f707b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -363,7 +363,7 @@ def _escape_sequence(value: str) -> str:
@staticmethod
def _create_metastore(
- obj: Union[GetMetastoreSummaryResponse, MetastoreInfo]
+ obj: Union[GetMetastoreSummaryResponse, MetastoreInfo],
) -> Optional[Metastore]:
if not obj.name:
return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 43bd788f809c3e..29562eaf3ce5b1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -205,9 +205,9 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
self.table_refs: Set[TableReference] = set()
self.view_refs: Set[TableReference] = set()
self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
- self.view_definitions: FileBackedDict[
- Tuple[TableReference, str]
- ] = FileBackedDict()
+ self.view_definitions: FileBackedDict[Tuple[TableReference, str]] = (
+ FileBackedDict()
+ )
# Global map of tables, for profiling
self.tables: FileBackedDict[Table] = FileBackedDict()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
index 718818d9b347bf..2e9f7fc00c8784 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
@@ -103,7 +103,9 @@ def _get_workunits_internal(
query, table_info
)
for source_table in table_info.source_tables:
- with self.report.usage_perf_report.aggregator_add_event_timer:
+ with (
+ self.report.usage_perf_report.aggregator_add_event_timer
+ ):
self.usage_aggregator.aggregate_event(
resource=source_table,
start_time=query.start_time,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
index 4c2e4d42c440e8..2e1e315c4df956 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
@@ -213,15 +213,15 @@ def _get_joined_access_event(self, events):
def _aggregate_access_events(
self, events: List[ClickHouseJoinedAccessEvent]
) -> Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]]:
- datasets: Dict[
- datetime, Dict[ClickHouseTableRef, AggregatedDataset]
- ] = collections.defaultdict(dict)
+ datasets: Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]] = (
+ collections.defaultdict(dict)
+ )
for event in events:
floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)
resource = (
- f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+ f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
f"{event.database}.{event.table}"
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
index 6ded11027c83a8..e4138696186416 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@@ -235,9 +235,9 @@ def _get_joined_access_event(self, events):
def _aggregate_access_events(
self, events: List[TrinoJoinedAccessEvent]
) -> Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]]:
- datasets: Dict[
- datetime, Dict[TrinoTableRef, AggregatedDataset]
- ] = collections.defaultdict(dict)
+ datasets: Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]] = (
+ collections.defaultdict(dict)
+ )
for event in events:
floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
index bb1c297513de10..b4dc8835f9fba9 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
@@ -80,10 +80,10 @@ def handle_end_of_stream(
).add_asset(container_urn)
data_products_container[data_product_urn] = container_product
else:
- data_products_container[
- data_product_urn
- ] = data_products_container[data_product_urn].add_asset(
- container_urn
+ data_products_container[data_product_urn] = (
+ data_products_container[data_product_urn].add_asset(
+ container_urn
+ )
)
mcps: List[
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
index 668f6ed7abe074..4b9b4c9e6f5da6 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
@@ -61,9 +61,9 @@ def _merge_with_server_properties(
) -> Optional[DatasetPropertiesClass]:
assert dataset_properties_aspect
- server_dataset_properties_aspect: Optional[
- DatasetPropertiesClass
- ] = graph.get_dataset_properties(entity_urn)
+ server_dataset_properties_aspect: Optional[DatasetPropertiesClass] = (
+ graph.get_dataset_properties(entity_urn)
+ )
# No need to take any action if server properties is None or there is not customProperties in server properties
if (
server_dataset_properties_aspect is None
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
index ba3b6508daaecd..d2687ebc5e76f6 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
@@ -89,9 +89,9 @@ def transform_aspect(
server_field_map: dict = {}
if self.config.semantics == TransformerSemantics.PATCH:
assert self.ctx.graph
- server_schema_metadata_aspect: Optional[
- SchemaMetadataClass
- ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+ server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+ self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+ )
if server_schema_metadata_aspect is not None:
if not schema_metadata_aspect:
schema_metadata_aspect = server_schema_metadata_aspect
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
index a7e92d4bd7edbd..d17a39bee6cfbf 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
@@ -108,9 +108,9 @@ def transform_aspect(
] = {} # Map to cache server field objects, where fieldPath is key
if self.config.semantics == TransformerSemantics.PATCH:
assert self.ctx.graph
- server_schema_metadata_aspect: Optional[
- SchemaMetadataClass
- ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+ server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+ self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+ )
if server_schema_metadata_aspect is not None:
if not schema_metadata_aspect:
schema_metadata_aspect = server_schema_metadata_aspect
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
index 7be8069e1b0852..bb2f318dcac8b8 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
@@ -60,10 +60,10 @@ def transform_aspect(
domain_aspect.domains.extend(mapped_domains.domains)
if self.config.semantics == TransformerSemantics.PATCH:
# Try merging with server-side domains
- patch_domain_aspect: Optional[
- DomainsClass
- ] = AddDatasetDomain._merge_with_server_domains(
- self.ctx.graph, entity_urn, domain_aspect
+ patch_domain_aspect: Optional[DomainsClass] = (
+ AddDatasetDomain._merge_with_server_domains(
+ self.ctx.graph, entity_urn, domain_aspect
+ )
)
return cast(Optional[Aspect], patch_domain_aspect)
return cast(Optional[Aspect], domain_aspect)
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
index 212e018dd64fb7..32707dcd3a372f 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
@@ -141,9 +141,9 @@ def transform_aspect(
else:
owner_type = get_owner_type(self.config.owner_type)
if owner_type == OwnershipTypeClass.CUSTOM:
- assert (
- self.config.owner_type_urn is not None
- ), "owner_type_urn must be set if owner_type is CUSTOM"
+ assert self.config.owner_type_urn is not None, (
+ "owner_type_urn must be set if owner_type is CUSTOM"
+ )
owners.append(
OwnerClass(
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
index 7e6125079f16e3..65cf2ac3614ae0 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
@@ -92,9 +92,9 @@ def transform_aspect(
in_global_tags_aspect: Optional[GlobalTagsClass] = self.ctx.graph.get_tags(
entity_urn
)
- in_schema_metadata_aspect: Optional[
- SchemaMetadataClass
- ] = self.ctx.graph.get_schema_metadata(entity_urn)
+ in_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+ self.ctx.graph.get_schema_metadata(entity_urn)
+ )
if in_global_tags_aspect is None and in_schema_metadata_aspect is None:
return cast(Aspect, in_glossary_terms)
@@ -134,10 +134,10 @@ def transform_aspect(
)
if self.config.semantics == TransformerSemantics.PATCH:
- patch_glossary_terms: Optional[
- GlossaryTermsClass
- ] = TagsToTermMapper._merge_with_server_glossary_terms(
- self.ctx.graph, entity_urn, out_glossary_terms
+ patch_glossary_terms: Optional[GlossaryTermsClass] = (
+ TagsToTermMapper._merge_with_server_glossary_terms(
+ self.ctx.graph, entity_urn, out_glossary_terms
+ )
)
return cast(Optional[Aspect], patch_glossary_terms)
else:
diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
index 8d2ae2960ebd05..e32f1ddc3943ae 100644
--- a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
+++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
@@ -61,17 +61,17 @@ def __init__(self, output_dir: str, extras: Dict[str, str]) -> None:
def create(
cls, output_dir: str, extras: Dict[str, str]
) -> "SnowflakeAssertionCompiler":
- assert os.path.exists(
- output_dir
- ), f"Specified location {output_dir} does not exist."
+ assert os.path.exists(output_dir), (
+ f"Specified location {output_dir} does not exist."
+ )
- assert os.path.isdir(
- output_dir
- ), f"Specified location {output_dir} is not a folder."
+ assert os.path.isdir(output_dir), (
+ f"Specified location {output_dir} is not a folder."
+ )
- assert any(
- x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras
- ), "Must specify value for DMF schema using -x DMF_SCHEMA="
+ assert any(x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras), (
+ "Must specify value for DMF schema using -x DMF_SCHEMA="
+ )
return SnowflakeAssertionCompiler(output_dir, extras)
@@ -232,6 +232,6 @@ def get_dmf_schedule(trigger: AssertionTrigger) -> str:
elif isinstance(trigger.trigger, CronTrigger):
return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}"
elif isinstance(trigger.trigger, IntervalTrigger):
- return f"{trigger.trigger.interval.seconds/60} MIN"
+ return f"{trigger.trigger.interval.seconds / 60} MIN"
else:
raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}")
diff --git a/metadata-ingestion/src/datahub/lite/duckdb_lite.py b/metadata-ingestion/src/datahub/lite/duckdb_lite.py
index 89317383520923..fe025842822b13 100644
--- a/metadata-ingestion/src/datahub/lite/duckdb_lite.py
+++ b/metadata-ingestion/src/datahub/lite/duckdb_lite.py
@@ -163,9 +163,9 @@ def write(
if "properties" not in writeable_dict["systemMetadata"]:
writeable_dict["systemMetadata"]["properties"] = {}
- writeable_dict["systemMetadata"]["properties"][
- "sysVersion"
- ] = new_version
+ writeable_dict["systemMetadata"]["properties"]["sysVersion"] = (
+ new_version
+ )
if needs_write:
self.duckdb_client.execute(
query="INSERT INTO metadata_aspect_v2 VALUES (?, ?, ?, ?, ?, ?)",
@@ -208,9 +208,9 @@ def write(
"lastObserved": writeable.systemMetadata.lastObserved
}
else:
- system_metadata[
- "lastObserved"
- ] = writeable.systemMetadata.lastObserved
+ system_metadata["lastObserved"] = (
+ writeable.systemMetadata.lastObserved
+ )
self.duckdb_client.execute(
query="UPDATE metadata_aspect_v2 SET system_metadata = ? WHERE urn = ? AND aspect_name = ? AND version = 0",
parameters=[
@@ -497,9 +497,9 @@ def get_all_entities(
aspect_name = r[1]
aspect_payload = json.loads(r[2])
if typed:
- assert (
- aspect_name in ASPECT_MAP
- ), f"Missing aspect name {aspect_name} in the registry"
+ assert aspect_name in ASPECT_MAP, (
+ f"Missing aspect name {aspect_name} in the registry"
+ )
try:
aspect_payload = ASPECT_MAP[aspect_name].from_obj(
post_json_transform(aspect_payload)
@@ -531,7 +531,9 @@ def get_all_aspects(self) -> Iterable[MetadataChangeProposalWrapper]:
for r in results.fetchall():
urn = r[0]
aspect_name = r[1]
- aspect_metadata = ASPECT_MAP[aspect_name].from_obj(post_json_transform(json.loads(r[2]))) # type: ignore
+ aspect_metadata = ASPECT_MAP[aspect_name].from_obj(
+ post_json_transform(json.loads(r[2]))
+ ) # type: ignore
system_metadata = SystemMetadataClass.from_obj(json.loads(r[3]))
mcp = MetadataChangeProposalWrapper(
entityUrn=urn,
diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
index 1fd1585a913581..4b8b4d0bc99bc0 100644
--- a/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
+++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
@@ -9,8 +9,7 @@
class HasCustomPropertiesPatch(MetadataPatchProposal):
@classmethod
@abstractmethod
- def _custom_properties_location(self) -> Tuple[str, PatchPath]:
- ...
+ def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
def add_custom_property(self, key: str, value: str) -> Self:
"""Add a custom property to the entity.
diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
index 6aa10381a883ef..55b026a144c6d5 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
class SchemaResolverInterface(Protocol):
@property
- def platform(self) -> str:
- ...
+ def platform(self) -> str: ...
- def includes_temp_tables(self) -> bool:
- ...
+ def includes_temp_tables(self) -> bool: ...
- def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
- ...
+ def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
def __hash__(self) -> int:
# Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ def convert_graphql_schema_metadata_to_info(
return {
get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
# The actual types are more of a "nice to have".
- field["nativeDataType"]
- or "str"
+ field["nativeDataType"] or "str"
)
for field in schema["fields"]
# TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
return {
get_simple_field_path_from_v2_field_path(col.fieldPath): (
# The actual types are more of a "nice to have".
- col.nativeDataType
- or "str"
+ col.nativeDataType or "str"
)
for col in schema_fields
# TODO: We can't generate lineage to columns nested within structs yet.
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index e1deeaec5ba826..8637802f6b9fee 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -682,10 +682,10 @@ def add_known_lineage_mapping(
query_id = self._known_lineage_query_id()
# Generate CLL if schema of downstream is known
- column_lineage: List[
- ColumnLineageInfo
- ] = self._generate_identity_column_lineage(
- upstream_urn=upstream_urn, downstream_urn=downstream_urn
+ column_lineage: List[ColumnLineageInfo] = (
+ self._generate_identity_column_lineage(
+ upstream_urn=upstream_urn, downstream_urn=downstream_urn
+ )
)
# Register the query.
@@ -1044,9 +1044,9 @@ def _make_schema_resolver_for_session(
temp_table_schemas: Dict[str, Optional[List[models.SchemaFieldClass]]] = {}
for temp_table_urn, query_ids in self._temp_lineage_map[session_id].items():
for query_id in query_ids:
- temp_table_schemas[
- temp_table_urn
- ] = self._inferred_temp_schemas.get(query_id)
+ temp_table_schemas[temp_table_urn] = (
+ self._inferred_temp_schemas.get(query_id)
+ )
if temp_table_schemas:
break
@@ -1073,9 +1073,9 @@ def _process_view_definition(
schema_resolver=self._schema_resolver,
)
if parsed.debug_info.error:
- self.report.views_parse_failures[
- view_urn
- ] = f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+ self.report.views_parse_failures[view_urn] = (
+ f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+ )
if parsed.debug_info.table_error:
self.report.num_views_failed += 1
return # we can't do anything with this query
@@ -1583,9 +1583,9 @@ def _recurse_into_query(
temp_query_lineage_info
)
else:
- temp_upstream_queries[
- upstream
- ] = temp_query_lineage_info
+ temp_upstream_queries[upstream] = (
+ temp_query_lineage_info
+ )
# Compute merged upstreams.
new_upstreams = OrderedSet[UrnStr]()
@@ -1665,9 +1665,9 @@ def _recurse_into_query(
composed_of_queries_truncated: LossyList[str] = LossyList()
for query_id in composed_of_queries:
composed_of_queries_truncated.append(query_id)
- self.report.queries_with_temp_upstreams[
- composite_query_id
- ] = composed_of_queries_truncated
+ self.report.queries_with_temp_upstreams[composite_query_id] = (
+ composed_of_queries_truncated
+ )
merged_query_text = ";\n\n".join(
[q.formatted_query_string for q in ordered_queries]
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
index bf28ab0e7b229b..c825deeccd9592 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
@@ -442,9 +442,9 @@ def _create_table_ddl_cll(
) -> List[_ColumnLineageInfo]:
column_lineage: List[_ColumnLineageInfo] = []
- assert (
- output_table is not None
- ), "output_table must be set for create DDL statements"
+ assert output_table is not None, (
+ "output_table must be set for create DDL statements"
+ )
create_schema: sqlglot.exp.Schema = statement.this
sqlglot_columns = create_schema.expressions
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
index 57a5cc3c9a6574..5b12c64a831666 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
@@ -404,7 +404,7 @@ def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression:
if new_statement == statement:
if iteration > 1:
logger.debug(
- f"Required {iteration+1} iterations to detach and eliminate all CTEs"
+ f"Required {iteration + 1} iterations to detach and eliminate all CTEs"
)
break
statement = new_statement
diff --git a/metadata-ingestion/src/datahub/telemetry/stats.py b/metadata-ingestion/src/datahub/telemetry/stats.py
index bf98bd72b574ce..d6835e49de56aa 100644
--- a/metadata-ingestion/src/datahub/telemetry/stats.py
+++ b/metadata-ingestion/src/datahub/telemetry/stats.py
@@ -5,8 +5,7 @@
class SupportsLT(Protocol):
- def __lt__(self, __other: Any) -> Any:
- ...
+ def __lt__(self, __other: Any) -> Any: ...
_SupportsComparisonT = TypeVar("_SupportsComparisonT", bound=SupportsLT)
diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index fb028605c35b77..79da90ba20ea9f 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -224,9 +224,9 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
_use_sqlite_on_conflict: bool = field(repr=False, default=True)
def __post_init__(self) -> None:
- assert (
- self.cache_eviction_batch_size > 0
- ), "cache_eviction_batch_size must be positive"
+ assert self.cache_eviction_batch_size > 0, (
+ "cache_eviction_batch_size must be positive"
+ )
for reserved_column in ("key", "value", "rowid"):
if reserved_column in self.extra_columns:
@@ -261,7 +261,7 @@ def __post_init__(self) -> None:
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT UNIQUE,
value BLOB
- {''.join(f', {column_name} BLOB' for column_name in self.extra_columns.keys())}
+ {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
)"""
)
@@ -316,12 +316,12 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
f"""INSERT INTO {self.tablename} (
key,
value
- {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+ {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
)
- VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})
+ VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
ON CONFLICT (key) DO UPDATE SET
value = excluded.value
- {''.join(f', {column_name} = excluded.{column_name}' for column_name in self.extra_columns.keys())}
+ {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
""",
items_to_write,
)
@@ -332,16 +332,16 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
f"""INSERT INTO {self.tablename} (
key,
value
- {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+ {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
)
- VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+ VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
item,
)
except sqlite3.IntegrityError:
self._conn.execute(
f"""UPDATE {self.tablename} SET
value = ?
- {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+ {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
WHERE key = ?""",
(*item[1:], item[0]),
)
diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
index e98fe42c1d56ce..fccd8dd8a60c35 100644
--- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
+++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
@@ -142,10 +142,10 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
fields.append({"name": field_name, "type": field_type})
if kwargs.get("ustruct_seqn") is not None:
- struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}'
+ struct_name = f"__structn_{kwargs['ustruct_seqn']}_{str(uuid.uuid4()).replace('-', '')}"
else:
- struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
+ struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
return {
"type": "record",
"name": struct_name,
diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py
index 926b8782fbf119..a5fd20fef307d0 100644
--- a/metadata-ingestion/src/datahub/utilities/logging_manager.py
+++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py
@@ -130,9 +130,9 @@ def _formatMessageColor(self, record: logging.LogRecord) -> str:
# Mimic our default format, but with color.
message_fg = self.MESSAGE_COLORS.get(record.levelname)
return (
- f'{click.style(f"[{self.formatTime(record, self.datefmt)}]", fg="green", dim=True)} '
+ f"{click.style(f'[{self.formatTime(record, self.datefmt)}]', fg='green', dim=True)} "
f"{click.style(f'{record.levelname:8}', fg=message_fg)} "
- f'{click.style(f"{{{record.name}:{record.lineno}}}", fg="blue", dim=True)} - '
+ f"{click.style(f'{{{record.name}:{record.lineno}}}', fg='blue', dim=True)} - "
f"{click.style(record.getMessage(), fg=message_fg)}"
)
diff --git a/metadata-ingestion/src/datahub/utilities/lossy_collections.py b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
index f71aad51ab0b6b..31d6d0eb842d04 100644
--- a/metadata-ingestion/src/datahub/utilities/lossy_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
@@ -151,9 +151,9 @@ def __str__(self) -> str:
def as_obj(self) -> Dict[Union[_KT, str], Union[_VT, str]]:
base_dict: Dict[Union[_KT, str], Union[_VT, str]] = super().copy() # type: ignore
if self.sampled:
- base_dict[
- "sampled"
- ] = f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+ base_dict["sampled"] = (
+ f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+ )
return base_dict
def total_key_count(self) -> int:
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 17023c7b388e76..96870fc6fcd378 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -349,9 +349,9 @@ def convert_to_aspects(self, operation_map: Dict[str, list]) -> Dict[str, Any]:
elements=[institutional_memory_element]
)
- aspect_map[
- Constants.ADD_DOC_LINK_OPERATION
- ] = institutional_memory_aspect
+ aspect_map[Constants.ADD_DOC_LINK_OPERATION] = (
+ institutional_memory_aspect
+ )
else:
raise Exception(
f"Expected 1 item of type list for the documentation_link meta_mapping config,"
diff --git a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
index b5f490720340ce..bdfe4285065522 100644
--- a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
+++ b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
@@ -41,7 +41,9 @@ def decorator(func: Callable[_F, _T]) -> Callable[_F, _T]:
def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
# We need a type ignore here because there's no way for us to require that
# the args and kwargs are hashable while using ParamSpec.
- key: _Key = cachetools.keys.hashkey(*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}) # type: ignore
+ key: _Key = cachetools.keys.hashkey(
+ *args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}
+ ) # type: ignore
with cache_lock:
if key in cache:
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
index 11c04082ee7ad5..cf92336c68cdf6 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
@@ -160,12 +160,12 @@ class SQLAlchemyQueryCombiner:
_greenlets_by_thread_lock: threading.Lock = dataclasses.field(
default_factory=lambda: threading.Lock()
)
- _queries_by_thread: Dict[
- greenlet.greenlet, Dict[str, _QueryFuture]
- ] = dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
- _greenlets_by_thread: Dict[
- greenlet.greenlet, Set[greenlet.greenlet]
- ] = dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+ _queries_by_thread: Dict[greenlet.greenlet, Dict[str, _QueryFuture]] = (
+ dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
+ )
+ _greenlets_by_thread: Dict[greenlet.greenlet, Set[greenlet.greenlet]] = (
+ dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+ )
@staticmethod
def _generate_sql_safe_identifier() -> str:
diff --git a/metadata-ingestion/src/datahub/utilities/stats_collections.py b/metadata-ingestion/src/datahub/utilities/stats_collections.py
index 09a9490abc0fbe..c0bd9d058e5d37 100644
--- a/metadata-ingestion/src/datahub/utilities/stats_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/stats_collections.py
@@ -48,7 +48,9 @@ def as_obj(self) -> Dict[_KT, _VT]:
total_value: Union[_VT, str] = sum(trimmed_dict.values()) # type: ignore
except Exception:
total_value = ""
- trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = total_value # type: ignore
+ trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = ( # type: ignore
+ total_value # type: ignore
+ )
return trimmed_dict
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index f0e4c6f5ee14a1..d792e0bba649dd 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -21,7 +21,7 @@ def _add_prefix_to_paths(
def list_urns_with_path(
- model: Union[DictWrapper, MetadataChangeProposalWrapper]
+ model: Union[DictWrapper, MetadataChangeProposalWrapper],
) -> List[Tuple[str, _Path]]:
"""List urns in the given model with their paths.
@@ -145,7 +145,7 @@ def lowercase_dataset_urns(
MetadataChangeEventClass,
MetadataChangeProposalClass,
MetadataChangeProposalWrapper,
- ]
+ ],
) -> None:
def modify_urn(urn: str) -> str:
if guess_entity_type(urn) == "dataset":
diff --git a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
index 024bb62bbe9ce9..5bb078a368dd50 100644
--- a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
+++ b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
@@ -98,7 +98,9 @@ def load_test_resources(test_resources_dir):
with azure_ad_nested_group_json_file.open() as azure_ad_nested_group_json:
reference_nested_group = json.loads(azure_ad_nested_group_json.read())
- with azure_ad_nested_groups_members_json_file.open() as azure_ad_nested_groups_users_json:
+ with (
+ azure_ad_nested_groups_members_json_file.open()
+ ) as azure_ad_nested_groups_users_json:
reference_nested_groups_users = json.loads(
azure_ad_nested_groups_users_json.read()
)
diff --git a/metadata-ingestion/tests/integration/dremio/test_dremio.py b/metadata-ingestion/tests/integration/dremio/test_dremio.py
index 401f487d8a14b8..c286746c68b79d 100644
--- a/metadata-ingestion/tests/integration/dremio/test_dremio.py
+++ b/metadata-ingestion/tests/integration/dremio/test_dremio.py
@@ -190,9 +190,9 @@ def create_mysql_source(headers):
"type": "MYSQL",
}
response = requests.post(url, headers=headers, data=json.dumps(payload))
- assert (
- response.status_code == 200
- ), f"Failed to add mysql datasource: {response.text}"
+ assert response.status_code == 200, (
+ f"Failed to add mysql datasource: {response.text}"
+ )
def upload_dataset(headers):
@@ -537,9 +537,9 @@ def test_dremio_platform_instance_urns(
# Check dataset URN structure
if mce["entityType"] == "dataset" and "entityUrn" in mce:
- assert (
- "test-platform.dremio" in mce["entityUrn"]
- ), f"Platform instance missing in dataset URN: {mce['entityUrn']}"
+ assert "test-platform.dremio" in mce["entityUrn"], (
+ f"Platform instance missing in dataset URN: {mce['entityUrn']}"
+ )
# Check aspects for both datasets and containers
if "aspectName" in mce:
@@ -558,9 +558,9 @@ def test_dremio_platform_instance_urns(
instance = aspect_json["instance"]
expected_instance = "urn:li:dataPlatformInstance:(urn:li:dataPlatform:dremio,test-platform)"
- assert (
- instance == expected_instance
- ), f"Invalid platform instance format: {instance}"
+ assert instance == expected_instance, (
+ f"Invalid platform instance format: {instance}"
+ )
# Verify against golden file
mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/grafana/test_grafana.py b/metadata-ingestion/tests/integration/grafana/test_grafana.py
index 6eb6b0b8509263..cbac965884365d 100644
--- a/metadata-ingestion/tests/integration/grafana/test_grafana.py
+++ b/metadata-ingestion/tests/integration/grafana/test_grafana.py
@@ -120,7 +120,7 @@ def test_grafana_dashboard(loaded_grafana, pytestconfig, tmp_path, test_resource
time.sleep(5)
resp = requests.get(url)
if resp.status_code == 200:
- logging.info(f"Grafana started after waiting {i*5} seconds")
+ logging.info(f"Grafana started after waiting {i * 5} seconds")
break
else:
pytest.fail("Grafana did not start in time")
@@ -131,12 +131,12 @@ def test_grafana_dashboard(loaded_grafana, pytestconfig, tmp_path, test_resource
assert resp.status_code == 200, "Failed to load default dashboard"
dashboard = resp.json()
- assert (
- dashboard["dashboard"]["title"] == "Default Dashboard"
- ), "Default dashboard title mismatch"
- assert any(
- panel["type"] == "text" for panel in dashboard["dashboard"]["panels"]
- ), "Default dashboard missing text panel"
+ assert dashboard["dashboard"]["title"] == "Default Dashboard", (
+ "Default dashboard title mismatch"
+ )
+ assert any(panel["type"] == "text" for panel in dashboard["dashboard"]["panels"]), (
+ "Default dashboard missing text panel"
+ )
# Verify the output. (You can add further checks here if needed)
logging.info("Default dashboard verified successfully")
@@ -153,7 +153,7 @@ def test_grafana_ingest(
time.sleep(5)
resp = requests.get(url)
if resp.status_code == 200:
- logging.info(f"Grafana started after waiting {i*5} seconds")
+ logging.info(f"Grafana started after waiting {i * 5} seconds")
break
else:
pytest.fail("Grafana did not start in time")
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index d0f4fc35fc03eb..d8c98b12951f5d 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -482,9 +482,9 @@ def test_kafka_connect_ingest_stateful(
"mysql_source1",
"mysql_source2",
]
- pipeline_run1_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_name}"
+ pipeline_run1_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_name}"
+ )
pipeline_run1 = Pipeline.create(pipeline_run1_config)
pipeline_run1.run()
pipeline_run1.raise_from_status()
@@ -506,14 +506,16 @@ def test_kafka_connect_ingest_stateful(
mock_datahub_graph,
) as mock_checkpoint:
mock_checkpoint.return_value = mock_datahub_graph
- pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore
+ pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+ base_pipeline_config # type: ignore
+ )
# Set the special properties for this run
pipeline_run1_config["source"]["config"]["connector_patterns"]["allow"] = [
"mysql_source1",
]
- pipeline_run2_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_deleted_name}"
+ pipeline_run2_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_deleted_name}"
+ )
pipeline_run2 = Pipeline.create(pipeline_run2_config)
pipeline_run2.run()
pipeline_run2.raise_from_status()
diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py
index c96bcc729a95da..bbcc6332539c02 100644
--- a/metadata-ingestion/tests/integration/looker/test_looker.py
+++ b/metadata-ingestion/tests/integration/looker/test_looker.py
@@ -1096,9 +1096,9 @@ def test_file_path_in_view_naming_pattern(
):
mocked_client = mock.MagicMock()
new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/looker_mces.json")
- new_recipe["source"]["config"][
- "view_naming_pattern"
- ] = "{project}.{file_path}.view.{name}"
+ new_recipe["source"]["config"]["view_naming_pattern"] = (
+ "{project}.{file_path}.view.{name}"
+ )
with mock.patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py
index 940e7f36675f79..d803b8498104fd 100644
--- a/metadata-ingestion/tests/integration/lookml/test_lookml.py
+++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py
@@ -101,13 +101,13 @@ def test_lookml_refinement_ingest(pytestconfig, tmp_path, mock_time):
)
new_recipe["source"]["config"]["process_refinements"] = True
- new_recipe["source"]["config"][
- "view_naming_pattern"
- ] = "{project}.{file_path}.view.{name}"
+ new_recipe["source"]["config"]["view_naming_pattern"] = (
+ "{project}.{file_path}.view.{name}"
+ )
- new_recipe["source"]["config"][
- "view_browse_pattern"
- ] = "/{env}/{platform}/{project}/{file_path}/views"
+ new_recipe["source"]["config"]["view_browse_pattern"] = (
+ "/{env}/{platform}/{project}/{file_path}/views"
+ )
pipeline = Pipeline.create(new_recipe)
pipeline.run()
diff --git a/metadata-ingestion/tests/integration/nifi/test_nifi.py b/metadata-ingestion/tests/integration/nifi/test_nifi.py
index b992de058879ef..924e854a47e4eb 100644
--- a/metadata-ingestion/tests/integration/nifi/test_nifi.py
+++ b/metadata-ingestion/tests/integration/nifi/test_nifi.py
@@ -72,7 +72,7 @@ def test_nifi_ingest_standalone(
status = next(s for s in statuses if s["name"] == "FetchS3Object")
if status["aggregateSnapshot"]["flowFilesOut"] >= 1:
- logging.info(f"Waited for time {i*5} seconds")
+ logging.info(f"Waited for time {i * 5} seconds")
break
# Run the metadata ingestion pipeline.
@@ -124,7 +124,7 @@ def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, tmp_path, test_resources
statuses = [pg["status"] for pg in pgs]
status = next(s for s in statuses if s["name"] == "Cluster_Site_S3_to_S3")
if status["aggregateSnapshot"]["flowFilesSent"] >= 1:
- logging.info(f"Waited for time {i*5} seconds")
+ logging.info(f"Waited for time {i * 5} seconds")
break
test_resources_dir = pytestconfig.rootpath / "tests/integration/nifi"
# Run the metadata ingestion pipeline.
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 6f7a9c7833ba1a..0d85d370265cae 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -765,14 +765,14 @@ def test_sqlglot_parser():
}
)
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = parser.get_upstream_tables(
- table,
- reporter,
- ctx=ctx,
- config=config,
- platform_instance_resolver=platform_instance_resolver,
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ parser.get_upstream_tables(
+ table,
+ reporter,
+ ctx=ctx,
+ config=config,
+ platform_instance_resolver=platform_instance_resolver,
+ )
)
data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
@@ -814,9 +814,9 @@ def test_sqlglot_parser():
def test_databricks_multi_cloud():
q = M_QUERIES[25]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -833,9 +833,9 @@ def test_databricks_multi_cloud():
def test_databricks_catalog_pattern_1():
q = M_QUERIES[26]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -904,14 +904,14 @@ def test_sqlglot_parser_2():
}
)
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = parser.get_upstream_tables(
- table,
- reporter,
- ctx=ctx,
- config=config,
- platform_instance_resolver=platform_instance_resolver,
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ parser.get_upstream_tables(
+ table,
+ reporter,
+ ctx=ctx,
+ config=config,
+ platform_instance_resolver=platform_instance_resolver,
+ )
)
data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
@@ -965,9 +965,9 @@ def test_databricks_regular_case_with_view():
def test_snowflake_double_double_quotes():
q = M_QUERIES[30]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -984,9 +984,9 @@ def test_snowflake_double_double_quotes():
def test_databricks_multicloud():
q = M_QUERIES[31]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -1003,9 +1003,9 @@ def test_databricks_multicloud():
def test_snowflake_multi_function_call():
q = M_QUERIES[32]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -1022,9 +1022,9 @@ def test_snowflake_multi_function_call():
def test_mssql_drop_with_select():
q = M_QUERIES[33]
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -1075,18 +1075,18 @@ def test_unsupported_data_platform():
is_entry_present = True
break
- assert (
- is_entry_present
- ), 'Info message "Non-Data Platform Expression" should be present in reporter'
+ assert is_entry_present, (
+ 'Info message "Non-Data Platform Expression" should be present in reporter'
+ )
def test_empty_string_in_m_query():
# TRIM(TRIM(TRIM(AGENT_NAME, '\"\"'), '+'), '\\'') is in Query
q = "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu10758.ap-unknown-2.fakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TRIM(TRIM(TRIM(AGENT_NAME, '\"\"'), '+'), '\\'') AS TRIM_AGENT_NAME,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS inner join OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT #(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n Source"
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -1108,9 +1108,9 @@ def test_double_quotes_in_alias():
# SELECT CAST(sales_date AS DATE) AS \"\"Date\"\" in query
q = 'let \n Source = Sql.Database("abc.com", "DB", [Query="SELECT CAST(sales_date AS DATE) AS ""Date"",#(lf) SUM(cshintrpret) / 60.0 AS ""Total Order All Items"",#(lf)#(tab)#(tab)#(tab) SUM(cshintrpret) / 60.0 - LAG(SUM(cshintrpret) / 60.0, 1) OVER (ORDER BY CAST(sales_date AS DATE)) AS ""Total minute difference"",#(lf)#(tab)#(tab)#(tab) SUM(sale_price) / 60.0 - LAG(SUM(sale_price) / 60.0, 1) OVER (ORDER BY CAST(sales_date AS DATE)) AS ""Normal minute difference""#(lf) FROM [DB].[dbo].[sales_t]#(lf) WHERE sales_date >= GETDATE() - 365#(lf) GROUP BY CAST(sales_date AS DATE),#(lf)#(tab)#(tab)CAST(sales_date AS TIME);"]) \n in \n Source'
- lineage: List[
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
- ] = get_data_platform_tables_with_dummy_table(q=q)
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+ get_data_platform_tables_with_dummy_table(q=q)
+ )
assert len(lineage) == 1
@@ -1168,9 +1168,9 @@ def test_m_query_timeout(mock_get_lark_parser):
is_entry_present = True
break
- assert (
- is_entry_present
- ), 'Warning message "M-Query Parsing Timeout" should be present in reporter'
+ assert is_entry_present, (
+ 'Warning message "M-Query Parsing Timeout" should be present in reporter'
+ )
def test_comments_in_m_query():
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 911d8a9f35139f..7f62e433bc8014 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -828,9 +828,9 @@ def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None:
# Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping
default_dataset_type_mapping: dict = {}
for item in SupportedDataPlatform:
- default_dataset_type_mapping[
- item.value.powerbi_data_platform_name
- ] = item.value.datahub_data_platform_name
+ default_dataset_type_mapping[item.value.powerbi_data_platform_name] = (
+ item.value.datahub_data_platform_name
+ )
assert default_dataset_type_mapping == source_config.dataset_type_mapping
@@ -1443,9 +1443,9 @@ def test_powerbi_cross_workspace_reference_info_message(
is_entry_present = True
break
- assert (
- is_entry_present
- ), 'Info message "Missing Lineage For Tile" should be present in reporter'
+ assert is_entry_present, (
+ 'Info message "Missing Lineage For Tile" should be present in reporter'
+ )
test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
@@ -1568,6 +1568,6 @@ def test_powerbi_app_ingest_info_message(
is_entry_present = True
break
- assert (
- is_entry_present
- ), "The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
+ assert is_entry_present, (
+ "The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
+ )
diff --git a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
index 89a37a372df843..9e68ff22a767e2 100644
--- a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
+++ b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
@@ -89,15 +89,15 @@ def test_latest_version(mock_sdk):
)
SalesforceSource(config=config, ctx=Mock())
calls = mock_sf._call_salesforce.mock_calls
- assert (
- len(calls) == 1
- ), "We didn't specify version but source didn't call SF API to get the latest one"
- assert calls[0].ends_with(
- "/services/data"
- ), "Source didn't call proper SF API endpoint to get all versions"
- assert (
- mock_sf.sf_version == "54.0"
- ), "API version was not correctly set (see versions_responses.json)"
+ assert len(calls) == 1, (
+ "We didn't specify version but source didn't call SF API to get the latest one"
+ )
+ assert calls[0].ends_with("/services/data"), (
+ "Source didn't call proper SF API endpoint to get all versions"
+ )
+ assert mock_sf.sf_version == "54.0", (
+ "API version was not correctly set (see versions_responses.json)"
+ )
@mock.patch("datahub.ingestion.source.salesforce.Salesforce")
@@ -133,12 +133,12 @@ def test_custom_version(mock_sdk):
SalesforceSource(config=config, ctx=Mock())
calls = mock_sf._call_salesforce.mock_calls
- assert (
- len(calls) == 0
- ), "Source called API to get all versions even though we specified proper version"
- assert (
- mock_sdk.call_args.kwargs["version"] == "46.0"
- ), "API client object was not correctly initialized with the custom version"
+ assert len(calls) == 0, (
+ "Source called API to get all versions even though we specified proper version"
+ )
+ assert mock_sdk.call_args.kwargs["version"] == "46.0", (
+ "API client object was not correctly initialized with the custom version"
+ )
@freeze_time(FROZEN_TIME)
diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
index b969f77b4c3c18..7fab5fc7dae1ba 100644
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@@ -57,7 +57,7 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi
pytestconfig,
output_path=tmp_path / "mssql_mces.json",
golden_path=test_resources_dir
- / f"golden_files/golden_mces_{config_file.replace('yml','json')}",
+ / f"golden_files/golden_mces_{config_file.replace('yml', 'json')}",
ignore_paths=[
r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['job_id'\]",
r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['date_created'\]",
diff --git a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
index b8b0563a1d24e5..9c7b86a275f6d0 100644
--- a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
+++ b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
@@ -205,55 +205,57 @@ def register_mock_data(workspace_client):
),
]
- workspace_client.tables.get = lambda *args, **kwargs: databricks.sdk.service.catalog.TableInfo.from_dict(
- {
- "name": "quickstart_table",
- "catalog_name": "quickstart_catalog",
- "schema_name": "quickstart_schema",
- "table_type": "MANAGED",
- "data_source_format": "DELTA",
- "columns": [
- {
- "name": "columnA",
- "type_text": "int",
- "type_json": '{"name":"columnA","type":"integer","nullable":true,"metadata":{}}',
- "type_name": "INT",
- "type_precision": 0,
- "type_scale": 0,
- "position": 0,
- "nullable": True,
- },
- {
- "name": "columnB",
- "type_text": "string",
- "type_json": '{"name":"columnB","type":"string","nullable":true,"metadata":{}}',
- "type_name": "STRING",
- "type_precision": 0,
- "type_scale": 0,
- "position": 1,
- "nullable": True,
+ workspace_client.tables.get = (
+ lambda *args, **kwargs: databricks.sdk.service.catalog.TableInfo.from_dict(
+ {
+ "name": "quickstart_table",
+ "catalog_name": "quickstart_catalog",
+ "schema_name": "quickstart_schema",
+ "table_type": "MANAGED",
+ "data_source_format": "DELTA",
+ "columns": [
+ {
+ "name": "columnA",
+ "type_text": "int",
+ "type_json": '{"name":"columnA","type":"integer","nullable":true,"metadata":{}}',
+ "type_name": "INT",
+ "type_precision": 0,
+ "type_scale": 0,
+ "position": 0,
+ "nullable": True,
+ },
+ {
+ "name": "columnB",
+ "type_text": "string",
+ "type_json": '{"name":"columnB","type":"string","nullable":true,"metadata":{}}',
+ "type_name": "STRING",
+ "type_precision": 0,
+ "type_scale": 0,
+ "position": 1,
+ "nullable": True,
+ },
+ ],
+ "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896",
+ "owner": "account users",
+ "properties": {
+ "delta.lastCommitTimestamp": "1666185711000",
+ "delta.lastUpdateVersion": "1",
+ "delta.minReaderVersion": "1",
+ "delta.minWriterVersion": "2",
+ "spark.sql.statistics.numRows": "10",
+ "spark.sql.statistics.totalSize": "512",
},
- ],
- "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896",
- "owner": "account users",
- "properties": {
- "delta.lastCommitTimestamp": "1666185711000",
- "delta.lastUpdateVersion": "1",
- "delta.minReaderVersion": "1",
- "delta.minWriterVersion": "2",
- "spark.sql.statistics.numRows": "10",
- "spark.sql.statistics.totalSize": "512",
- },
- "generation": 2,
- "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736",
- "full_name": "quickstart_catalog.quickstart_schema.quickstart_table",
- "data_access_configuration_id": "00000000-0000-0000-0000-000000000000",
- "created_at": 1666185698688,
- "created_by": "abc@acryl.io",
- "updated_at": 1666186049633,
- "updated_by": "abc@acryl.io",
- "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896",
- }
+ "generation": 2,
+ "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736",
+ "full_name": "quickstart_catalog.quickstart_schema.quickstart_table",
+ "data_access_configuration_id": "00000000-0000-0000-0000-000000000000",
+ "created_at": 1666185698688,
+ "created_by": "abc@acryl.io",
+ "updated_at": 1666186049633,
+ "updated_by": "abc@acryl.io",
+ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896",
+ }
+ )
)
workspace_client.service_principals.list.return_value = [
@@ -437,9 +439,7 @@ def test_ingestion(pytestconfig, tmp_path, requests_mock):
"datahub.ingestion.source.unity.proxy.WorkspaceClient"
) as mock_client, patch.object(
HiveMetastoreProxy, "get_inspector"
- ) as get_inspector, patch.object(
- HiveMetastoreProxy, "_execute_sql"
- ) as execute_sql:
+ ) as get_inspector, patch.object(HiveMetastoreProxy, "_execute_sql") as execute_sql:
workspace_client: mock.MagicMock = mock.MagicMock()
mock_client.return_value = workspace_client
register_mock_data(workspace_client)
diff --git a/metadata-ingestion/tests/performance/databricks/generator.py b/metadata-ingestion/tests/performance/databricks/generator.py
index 29df325d856a1a..b11771e55b2c9e 100644
--- a/metadata-ingestion/tests/performance/databricks/generator.py
+++ b/metadata-ingestion/tests/performance/databricks/generator.py
@@ -167,7 +167,7 @@ def _generate_insert_lineage(table: Table, upstream: Table) -> str:
def _generate_view_definition(view: View) -> str:
from_statement = f"FROM {_quote_table(view.upstreams[0])} t0"
join_statement = " ".join(
- f"JOIN {_quote_table(upstream)} t{i+1} ON t0.id = t{i+1}.id"
+ f"JOIN {_quote_table(upstream)} t{i + 1} ON t0.id = t{i + 1}.id"
for i, upstream in enumerate(view.upstreams[1:])
)
return f"CREATE VIEW {_quote_table(view)} AS SELECT * {from_statement} {join_statement} {view.definition}"
diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py
index 0105e6d596970b..d70a440dab0657 100644
--- a/metadata-ingestion/tests/test_helpers/mce_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py
@@ -300,9 +300,9 @@ def assert_for_each_entity(
for urn, aspect_val in aspect_map.items():
if aspect_val is not None:
for f in aspect_field_matcher:
- assert aspect_field_matcher[f] == _get_element(
- aspect_val, [f]
- ), f"urn: {urn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+ assert aspect_field_matcher[f] == _get_element(aspect_val, [f]), (
+ f"urn: {urn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+ )
success.append(urn)
elif urn not in exception_urns:
print(f"Adding {urn} to failures")
@@ -361,9 +361,9 @@ def assert_entity_mcp_aspect(
assert mcp.aspect
aspect_val = mcp.aspect.to_obj()
for f in aspect_field_matcher:
- assert aspect_field_matcher[f] == _get_element(
- aspect_val, [f]
- ), f"urn: {mcp.entityUrn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+ assert aspect_field_matcher[f] == _get_element(aspect_val, [f]), (
+ f"urn: {mcp.entityUrn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+ )
matches = matches + 1
return matches
diff --git a/metadata-ingestion/tests/test_helpers/state_helpers.py b/metadata-ingestion/tests/test_helpers/state_helpers.py
index f68aef742fc730..c469db6ce8cf80 100644
--- a/metadata-ingestion/tests/test_helpers/state_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/state_helpers.py
@@ -104,7 +104,7 @@ def monkey_patch_get_latest_timeseries_value(
@pytest.fixture
def mock_datahub_graph_instance(
- mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph]
+ mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph],
) -> DataHubGraph:
return mock_datahub_graph(DatahubClientConfig(server="http://fake.domain.local"))
diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
index e796f0b3f37219..dad7662d9ad00b 100644
--- a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
+++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
@@ -26,7 +26,7 @@ def base_entity_metadata():
@pytest.fixture
def base_mock_graph(
- base_entity_metadata: Dict[str, Dict[str, Any]]
+ base_entity_metadata: Dict[str, Dict[str, Any]],
) -> MockDataHubGraph:
return MockDataHubGraph(entity_graph=base_entity_metadata)
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py b/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
index bdf1e0a2e0e860..8a45efb46893ae 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
@@ -159,9 +159,9 @@ def test_ensure_size_of_proper_dataset_profile(processor):
processor.ensure_dataset_profile_size(
"urn:li:dataset:(s3, dummy_dataset, DEV)", profile
)
- assert orig_repr == json.dumps(
- profile.to_obj()
- ), "Aspect was modified in case where workunit processor should have been no-op"
+ assert orig_repr == json.dumps(profile.to_obj()), (
+ "Aspect was modified in case where workunit processor should have been no-op"
+ )
@freeze_time("2023-01-02 00:00:00")
@@ -177,9 +177,9 @@ def test_ensure_size_of_too_big_schema_metadata(processor):
# +100kb is completely arbitrary, but we are truncating the aspect based on schema fields size only, not total taken
# by other parameters of the aspect - it is reasonable approach though - schema fields is the only field in schema
# metadata which can be expected to grow out of control
- assert (
- len(json.dumps(schema.to_obj())) < INGEST_MAX_PAYLOAD_BYTES + 100000
- ), "Aspect exceeded acceptable size"
+ assert len(json.dumps(schema.to_obj())) < INGEST_MAX_PAYLOAD_BYTES + 100000, (
+ "Aspect exceeded acceptable size"
+ )
@freeze_time("2023-01-02 00:00:00")
@@ -189,9 +189,9 @@ def test_ensure_size_of_proper_schema_metadata(processor):
processor.ensure_schema_metadata_size(
"urn:li:dataset:(s3, dummy_dataset, DEV)", schema
)
- assert orig_repr == json.dumps(
- schema.to_obj()
- ), "Aspect was modified in case where workunit processor should have been no-op"
+ assert orig_repr == json.dumps(schema.to_obj()), (
+ "Aspect was modified in case where workunit processor should have been no-op"
+ )
@freeze_time("2023-01-02 00:00:00")
@@ -214,9 +214,9 @@ def test_ensure_size_of_too_big_dataset_profile(processor):
)
assert expected_profile.fieldProfiles
expected_profile.fieldProfiles.insert(4, reduced_field)
- assert json.dumps(profile.to_obj()) == json.dumps(
- expected_profile.to_obj()
- ), "Field 'big' was not properly removed from aspect due to its size"
+ assert json.dumps(profile.to_obj()) == json.dumps(expected_profile.to_obj()), (
+ "Field 'big' was not properly removed from aspect due to its size"
+ )
@freeze_time("2023-01-02 00:00:00")
diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
index a1981ccf767916..f494ed78211dcf 100644
--- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
+++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
@@ -232,9 +232,9 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass:
else []
for fine_grained_lineage in upstream_lineage.fineGrainedLineages
]
- assert all(
- urn in expected_schema_field_urns for urn in actual_schema_field_urns
- ), "Some expected column URNs are missing from fine grained lineage."
+ assert all(urn in expected_schema_field_urns for urn in actual_schema_field_urns), (
+ "Some expected column URNs are missing from fine grained lineage."
+ )
def test_lineage_for_external_bq_table_no_column_lineage(mock_datahub_graph_instance):
@@ -286,9 +286,9 @@ def fake_schema_metadata(entity_urn: str) -> Optional[models.SchemaMetadataClass
assert len(upstream_lineage.upstreams) == 3
# Extracting dataset URNs from upstream_lineage.upstreams
actual_dataset_urns = [upstream.dataset for upstream in upstream_lineage.upstreams]
- assert all(
- urn in actual_dataset_urns for urn in expected_dataset_urns
- ), "Some expected dataset URNs are missing from upstream lineage."
+ assert all(urn in actual_dataset_urns for urn in expected_dataset_urns), (
+ "Some expected dataset URNs are missing from upstream lineage."
+ )
assert upstream_lineage.fineGrainedLineages is None
diff --git a/metadata-ingestion/tests/unit/cli/assertion/test_compile.py b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
index 47253b5b0d71ea..0a1870d83212e8 100644
--- a/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
+++ b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
@@ -37,6 +37,6 @@ def test_compile_assertion_config_spec_for_snowflake(pytestconfig, tmp_path):
for file_name in output_file_names:
assert os.path.exists(tmp_path / file_name)
- assert filecmp.cmp(
- golden_file_path / file_name, tmp_path / file_name
- ), f"{file_name} is not as expected"
+ assert filecmp.cmp(golden_file_path / file_name, tmp_path / file_name), (
+ f"{file_name} is not as expected"
+ )
diff --git a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
index 941d13be0a6139..27045dfc656cbe 100644
--- a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
+++ b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
@@ -262,8 +262,7 @@ def test_collapse_temp_lineage():
lineage_item: LineageItem = lineage_extractor._lineage_map[target_urn]
assert list(lineage_item.upstreams)[0].urn == (
- "urn:li:dataset:(urn:li:dataPlatform:redshift,"
- "test.public.player_activity,PROD)"
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.player_activity,PROD)"
)
assert lineage_item.cll is not None
@@ -276,8 +275,7 @@ def test_collapse_temp_lineage():
assert lineage_item.cll[0].downstream.column == "price"
assert lineage_item.cll[0].upstreams[0].table == (
- "urn:li:dataset:(urn:li:dataPlatform:redshift,"
- "test.public.player_activity,PROD)"
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.player_activity,PROD)"
)
assert lineage_item.cll[0].upstreams[0].column == "price"
@@ -441,8 +439,7 @@ def test_collapse_temp_recursive_cll_lineage():
)
assert target_dataset_cll[0].upstreams[0].table == (
- "urn:li:dataset:(urn:li:dataPlatform:redshift,"
- "dev.public.player_activity,PROD)"
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)"
)
assert target_dataset_cll[0].upstreams[0].column == "price"
@@ -638,8 +635,7 @@ def test_collapse_temp_recursive_with_compex_column_cll_lineage():
)
assert target_dataset_cll[0].upstreams[0].table == (
- "urn:li:dataset:(urn:li:dataPlatform:redshift,"
- "dev.public.player_activity,PROD)"
+ "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)"
)
assert target_dataset_cll[0].upstreams[0].column == "price"
assert target_dataset_cll[0].upstreams[1].column == "tax"
diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py
index b49f7153129136..13fcf3d919cc03 100644
--- a/metadata-ingestion/tests/unit/serde/test_codegen.py
+++ b/metadata-ingestion/tests/unit/serde/test_codegen.py
@@ -156,9 +156,9 @@ def _err(msg: str) -> None:
f"entity {entity_type}: aspect {aspect_name} is missing from the entity registry"
)
- assert (
- not errors
- ), f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"'
+ assert not errors, (
+ f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"'
+ )
def test_enum_options():
diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
index 96ab8f7a01a386..e69727f73b6bf4 100644
--- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
+++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
@@ -226,9 +226,9 @@ def test_stateful_ingestion(pytestconfig, tmp_path, mock_time):
pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore
base_pipeline_config # type: ignore
)
- pipeline_run1_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_name}"
+ pipeline_run1_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_name}"
+ )
pipeline_run1 = Pipeline.create(pipeline_run1_config)
pipeline_run1.run()
pipeline_run1.raise_from_status()
@@ -254,16 +254,18 @@ def test_stateful_ingestion(pytestconfig, tmp_path, mock_time):
) as mock_state:
mock_state.return_value = GenericCheckpointState(serde="utf-8")
pipeline_run2 = None
- pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore
+ pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+ base_pipeline_config # type: ignore
+ )
pipeline_run2_config["source"]["config"]["dataset_patterns"] = {
"allow": ["dummy_dataset1", "dummy_dataset2"],
}
pipeline_run2_config["source"]["config"]["dpi_id_to_ingest"] = "job2"
pipeline_run2_config["source"]["config"]["query_id_to_ingest"] = "query2"
- pipeline_run2_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_name_after_deleted}"
+ pipeline_run2_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_name_after_deleted}"
+ )
pipeline_run2 = Pipeline.create(pipeline_run2_config)
pipeline_run2.run()
pipeline_run2.raise_from_status()
@@ -370,9 +372,9 @@ def test_stateful_ingestion_failure(pytestconfig, tmp_path, mock_time):
pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore
base_pipeline_config # type: ignore
)
- pipeline_run1_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_name}"
+ pipeline_run1_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_name}"
+ )
pipeline_run1 = Pipeline.create(pipeline_run1_config)
pipeline_run1.run()
pipeline_run1.raise_from_status()
@@ -398,14 +400,16 @@ def test_stateful_ingestion_failure(pytestconfig, tmp_path, mock_time):
) as mock_state:
mock_state.return_value = GenericCheckpointState(serde="utf-8")
pipeline_run2 = None
- pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore
+ pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+ base_pipeline_config # type: ignore
+ )
pipeline_run2_config["source"]["config"]["dataset_patterns"] = {
"allow": ["dummy_dataset1", "dummy_dataset2"],
}
pipeline_run2_config["source"]["config"]["report_failure"] = True
- pipeline_run2_config["sink"]["config"][
- "filename"
- ] = f"{tmp_path}/{output_file_name_after_deleted}"
+ pipeline_run2_config["sink"]["config"]["filename"] = (
+ f"{tmp_path}/{output_file_name_after_deleted}"
+ )
pipeline_run2 = Pipeline.create(pipeline_run2_config)
pipeline_run2.run()
pipeline_run2.pretty_print_summary()
diff --git a/metadata-ingestion/tests/unit/test_confluent_schema_registry.py b/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
index 3500636f00eddf..effa6ba85acaeb 100644
--- a/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
+++ b/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
@@ -85,16 +85,18 @@ def new_get_latest_version(subject_name: str) -> RegisteredSchema:
"get_latest_version",
new_get_latest_version,
):
- schema_str = confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
- # The external reference would match by name.
- schema=Schema(
- schema_str=schema_str_orig,
- schema_type="AVRO",
- references=[
- SchemaReference(
- name="TestTopic1", subject="schema_subject_1", version=1
- )
- ],
+ schema_str = (
+ confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
+ # The external reference would match by name.
+ schema=Schema(
+ schema_str=schema_str_orig,
+ schema_type="AVRO",
+ references=[
+ SchemaReference(
+ name="TestTopic1", subject="schema_subject_1", version=1
+ )
+ ],
+ )
)
)
assert schema_str == ConfluentSchemaRegistry._compact_schema(
@@ -106,16 +108,18 @@ def new_get_latest_version(subject_name: str) -> RegisteredSchema:
"get_latest_version",
new_get_latest_version,
):
- schema_str = confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
- # The external reference would match by subject.
- schema=Schema(
- schema_str=schema_str_orig,
- schema_type="AVRO",
- references=[
- SchemaReference(
- name="schema_subject_1", subject="TestTopic1", version=1
- )
- ],
+ schema_str = (
+ confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
+ # The external reference would match by subject.
+ schema=Schema(
+ schema_str=schema_str_orig,
+ schema_type="AVRO",
+ references=[
+ SchemaReference(
+ name="schema_subject_1", subject="TestTopic1", version=1
+ )
+ ],
+ )
)
)
assert schema_str == ConfluentSchemaRegistry._compact_schema(
diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py
index ff22ffedc6228f..d7899af69f8405 100644
--- a/metadata-ingestion/tests/unit/test_dbt_source.py
+++ b/metadata-ingestion/tests/unit/test_dbt_source.py
@@ -475,9 +475,9 @@ def test_get_column_type_redshift():
# Test 'super' type which should not show any warnings/errors
result_super = get_column_type(report, dataset_name, "super", "redshift")
assert isinstance(result_super.type, NullTypeClass)
- assert (
- len(report.infos) == 0
- ), "No warnings should be generated for known SUPER type"
+ assert len(report.infos) == 0, (
+ "No warnings should be generated for known SUPER type"
+ )
# Test unknown type, which generates a warning but resolves to NullTypeClass
unknown_type = "unknown_type"
diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py
index 3afa26b35dfe9f..48524450caf36e 100644
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@@ -88,15 +88,15 @@ def assert_field(
expected_nullable: bool,
expected_type: Any,
) -> None:
- assert (
- schema_field.description == expected_description
- ), f"Field description '{schema_field.description}' is different from expected description '{expected_description}'"
- assert (
- schema_field.nullable == expected_nullable
- ), f"Field nullable '{schema_field.nullable}' is different from expected nullable '{expected_nullable}'"
- assert isinstance(
- schema_field.type.type, expected_type
- ), f"Field type {schema_field.type.type} is different from expected type {expected_type}"
+ assert schema_field.description == expected_description, (
+ f"Field description '{schema_field.description}' is different from expected description '{expected_description}'"
+ )
+ assert schema_field.nullable == expected_nullable, (
+ f"Field nullable '{schema_field.nullable}' is different from expected nullable '{expected_nullable}'"
+ )
+ assert isinstance(schema_field.type.type, expected_type), (
+ f"Field type {schema_field.type.type} is different from expected type {expected_type}"
+ )
def test_config_no_catalog():
@@ -219,9 +219,9 @@ def test_iceberg_primitive_type_to_schema_field(
]:
schema = Schema(column)
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
- assert (
- len(schema_fields) == 1
- ), f"Expected 1 field, but got {len(schema_fields)}"
+ assert len(schema_fields) == 1, (
+ f"Expected 1 field, but got {len(schema_fields)}"
+ )
assert_field(
schema_fields[0],
column.doc,
@@ -300,19 +300,19 @@ def test_iceberg_list_to_schema_field(
iceberg_source_instance = with_iceberg_source()
schema = Schema(list_column)
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
- assert (
- len(schema_fields) == 1
- ), f"Expected 1 field, but got {len(schema_fields)}"
+ assert len(schema_fields) == 1, (
+ f"Expected 1 field, but got {len(schema_fields)}"
+ )
assert_field(
schema_fields[0], list_column.doc, list_column.optional, ArrayTypeClass
)
- assert isinstance(
- schema_fields[0].type.type, ArrayType
- ), f"Field type {schema_fields[0].type.type} was expected to be {ArrayType}"
+ assert isinstance(schema_fields[0].type.type, ArrayType), (
+ f"Field type {schema_fields[0].type.type} was expected to be {ArrayType}"
+ )
arrayType: ArrayType = schema_fields[0].type.type
- assert arrayType.nestedType == [
- expected_array_nested_type
- ], f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
+ assert arrayType.nestedType == [expected_array_nested_type], (
+ f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
+ )
@pytest.mark.parametrize(
@@ -387,9 +387,9 @@ def test_iceberg_map_to_schema_field(
schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
# Converting an Iceberg Map type will be done by creating an array of struct(key, value) records.
# The first field will be the array.
- assert (
- len(schema_fields) == 3
- ), f"Expected 3 fields, but got {len(schema_fields)}"
+ assert len(schema_fields) == 3, (
+ f"Expected 3 fields, but got {len(schema_fields)}"
+ )
assert_field(
schema_fields[0], map_column.doc, map_column.optional, ArrayTypeClass
)
diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py
index 91a62b603bb584..25140cf1b997f8 100644
--- a/metadata-ingestion/tests/unit/test_postgres_source.py
+++ b/metadata-ingestion/tests/unit/test_postgres_source.py
@@ -21,9 +21,7 @@ def test_initial_database(create_engine_mock):
@patch("datahub.ingestion.source.sql.postgres.create_engine")
def test_get_inspectors_multiple_databases(create_engine_mock):
- execute_mock = (
- create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
- )
+ execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
config = PostgresConfig.parse_obj({**_base_config(), "initial_database": "db0"})
@@ -37,9 +35,7 @@ def test_get_inspectors_multiple_databases(create_engine_mock):
@patch("datahub.ingestion.source.sql.postgres.create_engine")
def tests_get_inspectors_with_database_provided(create_engine_mock):
- execute_mock = (
- create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
- )
+ execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
config = PostgresConfig.parse_obj({**_base_config(), "database": "custom_db"})
@@ -51,9 +47,7 @@ def tests_get_inspectors_with_database_provided(create_engine_mock):
@patch("datahub.ingestion.source.sql.postgres.create_engine")
def tests_get_inspectors_with_sqlalchemy_uri_provided(create_engine_mock):
- execute_mock = (
- create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
- )
+ execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
config = PostgresConfig.parse_obj(
diff --git a/metadata-ingestion/tests/unit/test_rest_sink.py b/metadata-ingestion/tests/unit/test_rest_sink.py
index a76f96039c2c71..564cf613c04464 100644
--- a/metadata-ingestion/tests/unit/test_rest_sink.py
+++ b/metadata-ingestion/tests/unit/test_rest_sink.py
@@ -283,9 +283,9 @@
def test_datahub_rest_emitter(requests_mock, record, path, snapshot):
def match_request_text(request: requests.Request) -> bool:
requested_snapshot = request.json()
- assert (
- requested_snapshot == snapshot
- ), f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}"
+ assert requested_snapshot == snapshot, (
+ f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}"
+ )
return True
requests_mock.post(
diff --git a/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py b/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
index 43967367dff389..e137d671e95d71 100644
--- a/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
+++ b/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
@@ -34,7 +34,7 @@ def test_lossyset_sampling(length, sampling):
assert len(lossy_set) == min(10, length)
assert lossy_set.sampled is sampling
if sampling:
- assert f"... sampled with at most {length-10} elements missing" in str(
+ assert f"... sampled with at most {length - 10} elements missing" in str(
lossy_set
)
else:
@@ -66,7 +66,7 @@ def test_lossydict_sampling(length, sampling, sub_length):
element_length_map[i] = len(lossy_dict[i])
current_list = lossy_dict.get(i, LossyList())
- current_list.append(f"{i}:{round(time.time(),2)} Hello World")
+ current_list.append(f"{i}:{round(time.time(), 2)} Hello World")
lossy_dict[i] = current_list
element_length_map[i] += 1
diff --git a/metadata-ingestion/tests/unit/utilities/test_partition_executor.py b/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
index ce211c2d618062..89e95d185e8028 100644
--- a/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
+++ b/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
@@ -37,9 +37,9 @@ def task(key: str, id: str) -> None:
saw_keys_in_parallel = False
while executing_tasks or not done_tasks:
keys_executing = [key for key, _ in executing_tasks]
- assert list(sorted(keys_executing)) == list(
- sorted(set(keys_executing))
- ), "partitioning not working"
+ assert list(sorted(keys_executing)) == list(sorted(set(keys_executing))), (
+ "partitioning not working"
+ )
if len(keys_executing) == 2:
saw_keys_in_parallel = True
diff --git a/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py b/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
index 35c44c7b4a8479..fb7e2266e1c9d3 100644
--- a/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
+++ b/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
@@ -4,7 +4,7 @@
def test_threaded_iterator_executor():
def table_of(i):
for j in range(1, 11):
- yield f"{i}x{j}={i*j}"
+ yield f"{i}x{j}={i * j}"
assert {
res
diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle
index f3dc1de830ccef..c38468ca8cd8b0 100644
--- a/smoke-test/build.gradle
+++ b/smoke-test/build.gradle
@@ -73,16 +73,16 @@ task installDev(type: Exec) {
task pythonLint(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black --check --diff tests/ && " +
"ruff check tests/ && " +
+ "ruff format --check tests/ && " +
"mypy tests/"
}
task pythonLintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
- "black tests/ && " +
"ruff check --fix tests/ && " +
+ "ruff format tests/ && " +
"mypy tests/"
}
@@ -154,3 +154,19 @@ task lint {
task lintFix {
dependsOn pythonLintFix
}
+
+task cleanPythonCache(type: Exec) {
+ commandLine 'bash', '-c',
+ "find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
+}
+
+
+clean {
+ delete venv_name
+ delete 'build'
+ delete 'dist'
+ delete '.ruff_cache'
+ delete '.mypy_cache'
+ delete '.pytest_cache'
+}
+clean.dependsOn cleanPythonCache
\ No newline at end of file
diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml
index 55f037db2effea..55e286c73c01b9 100644
--- a/smoke-test/pyproject.toml
+++ b/smoke-test/pyproject.toml
@@ -7,20 +7,21 @@ name = "smoke-test"
version = "0.0.0"
description = ""
authors = [
- { name="Acryl Data", email="eng@acryl.io" },
+ { name="Acryl Data", email="eng@acryl.io" },
]
requires-python = ">=3.9"
+[tool.ruff]
+# Enable ruff format
+target-version = "py310"
+line-length = 88
+extend-exclude = ["tmp", "venv"]
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-tmp
-venv
-'''
-include = '\.pyi?$'
-target-version = ['py310']
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
[tool.ruff.lint.isort]
combine-as-imports = true
@@ -40,19 +41,19 @@ required-imports = []
classes = ["typing"]
[tool.ruff.lint]
-select = [
- "B",
- "C90",
- "E",
- "F",
- "I", # For isort
- "TID",
+extend-select = [
+ "B", # flake8-bugbear
+ "C90", # mccabe complexity
+ "E", # pycodestyle errors
+ "F", # pyflakes
+ "G010", # logging.warn -> logging.warning
+ "I", # isort
+ "TID", # flake8-tidy-imports
]
ignore = [
- 'E501', # Ignore line length, since black handles that.
- 'D203', # Ignore 1 blank line required before class docstring.
- 'B904', # exception with `raise ... from err` or `raise ... from None` to distinguish
- 'TID252', # Prefer absolute imports over relative imports
+ "E501", # Line length violations (handled by formatter)
+ "B904", # exception with `raise ... from err` or `raise ... from None` to distinguish
+ "TID252",# Prefer absolute imports over relative imports
]
[tool.ruff.lint.mccabe]
@@ -74,5 +75,4 @@ disallow_untyped_decorators = true
warn_unused_configs = true
# eventually we'd like to enable these
disallow_incomplete_defs = false
-disallow_untyped_defs = false
-
+disallow_untyped_defs = false
\ No newline at end of file
diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt
index 6779733a850bad..fadc3dbec1f2b5 100644
--- a/smoke-test/requirements.txt
+++ b/smoke-test/requirements.txt
@@ -9,7 +9,6 @@ joblib
pytest-xdist
networkx
# libaries for linting below this
-black==23.7.0
mypy==1.5.1
ruff==0.9.2
# stub version are copied from metadata-ingestion/setup.py and that should be the source of truth
diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py
index f1c532af515cfa..a68db03cf8cf16 100644
--- a/smoke-test/tests/data_process_instance/test_data_process_instance.py
+++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py
@@ -250,14 +250,14 @@ def test_search_dpi(auth_session, ingest_cleanup_data):
assert res_data, "Response should not be empty"
assert "data" in res_data, "Response should contain 'data' field"
print("RESPONSE DATA:" + str(res_data))
- assert (
- "scrollAcrossEntities" in res_data["data"]
- ), "Response should contain 'scrollAcrossEntities' field"
+ assert "scrollAcrossEntities" in res_data["data"], (
+ "Response should contain 'scrollAcrossEntities' field"
+ )
search_results = res_data["data"]["scrollAcrossEntities"]
- assert (
- "searchResults" in search_results
- ), "Response should contain 'searchResults' field"
+ assert "searchResults" in search_results, (
+ "Response should contain 'searchResults' field"
+ )
results = search_results["searchResults"]
assert len(results) > 0, "Should find at least one result"
diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py
index 0aa66984b394c8..8d484820d1ed45 100644
--- a/smoke-test/tests/dataproduct/test_dataproduct.py
+++ b/smoke-test/tests/dataproduct/test_dataproduct.py
@@ -135,9 +135,9 @@ def validate_relationships(
urn_match[dataset_urn] = True
urns_missing = [k for k in urn_match if urn_match[k] is False]
- assert (
- urns_missing == []
- ), "All dataset urns should have a DataProductContains relationship to the data product"
+ assert urns_missing == [], (
+ "All dataset urns should have a DataProductContains relationship to the data product"
+ )
dataset_urns_matched = set()
for e in graph_client.get_related_entities(
@@ -147,9 +147,9 @@ def validate_relationships(
):
dataset_urns_matched.add(e.urn)
- assert (
- set(dataset_urns) == dataset_urns_matched
- ), "All dataset urns should be navigable from the data product"
+ assert set(dataset_urns) == dataset_urns_matched, (
+ "All dataset urns should be navigable from the data product"
+ )
@tenacity.retry(
@@ -247,6 +247,6 @@ def test_create_data_product(graph_client, ingest_cleanup_data):
urn_match[dataset_urn] = True
urns_missing = [k for k in urn_match if urn_match[k] is False]
- assert set(urns_missing) == set(
- dataset_urns
- ), f"All dataset urns should no longer have a DataProductContains relationship to the data product {data_product_urn}"
+ assert set(urns_missing) == set(dataset_urns), (
+ f"All dataset urns should no longer have a DataProductContains relationship to the data product {data_product_urn}"
+ )
diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py
index 771085043926dd..dd5309667806c5 100644
--- a/smoke-test/tests/lineage/test_lineage.py
+++ b/smoke-test/tests/lineage/test_lineage.py
@@ -92,7 +92,7 @@ def _explain_sal_result(result: dict) -> str:
explain += "Entities: "
try:
for e in entities:
- explain += f"\t{e.replace('urn:li:','')}\n"
+ explain += f"\t{e.replace('urn:li:', '')}\n"
for entity in entities:
paths = [
x["paths"][0]["path"]
@@ -349,9 +349,9 @@ def get_expectation_for_query(self, query: ImpactQuery) -> LineageExpectation:
lineage_expectation.impacted_entities[impacted_entity]
)
else:
- entries_to_add[
- impacted_dataset_entity
- ] = lineage_expectation.impacted_entities[impacted_entity]
+ entries_to_add[impacted_dataset_entity] = (
+ lineage_expectation.impacted_entities[impacted_entity]
+ )
entries_to_remove.append(impacted_entity)
for impacted_entity in entries_to_remove:
del lineage_expectation.impacted_entities[impacted_entity]
@@ -756,9 +756,9 @@ def test_expectation(self, graph: DataHubGraph) -> bool:
]
)
try:
- assert (
- impacted_entities == impacted_entities_expectation
- ), f"Expected impacted entities to be {impacted_entities_expectation}, found {impacted_entities}"
+ assert impacted_entities == impacted_entities_expectation, (
+ f"Expected impacted entities to be {impacted_entities_expectation}, found {impacted_entities}"
+ )
except Exception:
# breakpoint()
raise
@@ -783,10 +783,14 @@ def test_expectation(self, graph: DataHubGraph) -> bool:
try:
assert len(impacted_entity_paths) == len(
expectation.impacted_entities[impacted_entity]
- ), f"Expected length of impacted entity paths to be {len(expectation.impacted_entities[impacted_entity])}, found {len(impacted_entity_paths)}"
+ ), (
+ f"Expected length of impacted entity paths to be {len(expectation.impacted_entities[impacted_entity])}, found {len(impacted_entity_paths)}"
+ )
assert set(impacted_entity_paths) == set(
expectation.impacted_entities[impacted_entity]
- ), f"Expected impacted entity paths to be {expectation.impacted_entities[impacted_entity]}, found {impacted_entity_paths}"
+ ), (
+ f"Expected impacted entity paths to be {expectation.impacted_entities[impacted_entity]}, found {impacted_entity_paths}"
+ )
except Exception:
# breakpoint()
raise
diff --git a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
index 5d6179de6be644..7fe9421af85b1a 100644
--- a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
+++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
@@ -489,9 +489,9 @@ def test_create_list_get_ingestion_execution_request(auth_session):
assert res_data
assert res_data["data"]
- assert (
- res_data["data"]["createIngestionExecutionRequest"] is not None
- ), f"res_data was {res_data}"
+ assert res_data["data"]["createIngestionExecutionRequest"] is not None, (
+ f"res_data was {res_data}"
+ )
assert "errors" not in res_data
execution_request_urn = res_data["data"]["createIngestionExecutionRequest"]
diff --git a/smoke-test/tests/read_only/test_search.py b/smoke-test/tests/read_only/test_search.py
index 36ecf68395f919..66bbeb408d0529 100644
--- a/smoke-test/tests/read_only/test_search.py
+++ b/smoke-test/tests/read_only/test_search.py
@@ -153,6 +153,6 @@ def test_openapi_v3_entity(auth_session, entity_type):
expected_data = {"urn": first_urn}
- assert (
- actual_data["urn"] == expected_data["urn"]
- ), f"Mismatch: expected {expected_data}, got {actual_data}"
+ assert actual_data["urn"] == expected_data["urn"], (
+ f"Mismatch: expected {expected_data}, got {actual_data}"
+ )
diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py
index 12ff04965548f0..79812b46476fa8 100644
--- a/smoke-test/tests/read_only/test_services_up.py
+++ b/smoke-test/tests/read_only/test_services_up.py
@@ -27,6 +27,6 @@ def test_gms_config_accessible(auth_session) -> None:
default_cli_version: str = gms_config["managedIngestion"]["defaultCliVersion"]
print(f"Default CLI version: {default_cli_version}")
assert not default_cli_version.startswith("@")
- assert "." in default_cli_version or looks_like_a_short_sha(
- default_cli_version
- ), "Default CLI version does not look like a version string"
+ assert "." in default_cli_version or looks_like_a_short_sha(default_cli_version), (
+ "Default CLI version does not look like a version string"
+ )
diff --git a/smoke-test/tests/utilities/file_emitter.py b/smoke-test/tests/utilities/file_emitter.py
index ddbcff8db31d8b..d5539d143af737 100644
--- a/smoke-test/tests/utilities/file_emitter.py
+++ b/smoke-test/tests/utilities/file_emitter.py
@@ -7,7 +7,7 @@
class FileEmitter:
def __init__(
- self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}"
+ self, filename: str, run_id: str = f"test_{int(time.time() * 1000.0)}"
) -> None:
self.sink: FileSink = FileSink(
ctx=PipelineContext(run_id=run_id),