From 262dd76518049c25c977b97e7144fcae6c270a80 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Sat, 18 Jan 2025 15:06:20 +0530
Subject: [PATCH] dev: remove black in favor of ruff for formatting (#12378)

---
 .../airflow-plugin/build.gradle               |  10 +-
 .../airflow-plugin/pyproject.toml             |  48 ++++-----
 .../airflow-plugin/setup.py                   |   1 -
 .../src/datahub_airflow_plugin/_extractors.py |   6 +-
 .../datahub_listener.py                       |   6 +-
 .../datahub_plugin_v22.py                     |   8 +-
 .../example_dags/lineage_emission_dag.py      |   1 +
 .../tests/integration/test_plugin.py          |  22 ++--
 .../airflow-plugin/tests/unit/test_airflow.py |   9 +-
 .../dagster-plugin/build.gradle               |  12 ++-
 .../examples/advanced_ops_jobs.py             |   2 +-
 .../dagster-plugin/examples/assets_job.py     |   2 +-
 .../dagster-plugin/examples/basic_setup.py    |   2 +-
 .../dagster-plugin/examples/ops_job.py        |   2 +-
 .../dagster-plugin/pyproject.toml             |  48 ++++-----
 .../dagster-plugin/setup.py                   |   1 -
 .../client/dagster_generator.py               |   8 +-
 .../sensors/datahub_sensors.py                |   1 -
 .../gx-plugin/build.gradle                    |  12 ++-
 .../gx-plugin/pyproject.toml                  |  48 ++++-----
 metadata-ingestion-modules/gx-plugin/setup.py |   3 +-
 .../gx-plugin/src/datahub_gx_plugin/action.py |   5 +-
 .../prefect-plugin/build.gradle               |  10 +-
 .../prefect-plugin/pyproject.toml             |  48 ++++-----
 .../prefect-plugin/setup.py                   |   3 +-
 .../src/prefect_datahub/datahub_emitter.py    |   6 +-
 metadata-ingestion/build.gradle               |   7 +-
 metadata-ingestion/developing.md              |   3 +-
 .../examples/library/run_assertion.py         |   2 +-
 metadata-ingestion/pyproject.toml             |  40 +++----
 metadata-ingestion/setup.py                   |   1 -
 .../entities/assertion/assertion_operator.py  |   8 +-
 .../datacontract/assertion_operator.py        |   8 +-
 .../api/entities/dataproduct/dataproduct.py   |   6 +-
 .../datahub/api/entities/dataset/dataset.py   |   3 +-
 .../structuredproperties.py                   |  12 +--
 .../src/datahub/cli/cli_utils.py              |   2 +-
 .../src/datahub/cli/docker_cli.py             |  12 +--
 .../src/datahub/cli/lite_cli.py               |   4 +-
 metadata-ingestion/src/datahub/cli/migrate.py |   6 +-
 .../datahub/cli/specific/assertions_cli.py    |   6 +-
 .../src/datahub/cli/timeline_cli.py           |   2 +-
 .../src/datahub/configuration/common.py       |   3 +-
 .../src/datahub/configuration/git.py          |   4 +-
 .../configuration/time_window_config.py       |  15 ++-
 .../src/datahub/emitter/mce_builder.py        |  12 +--
 .../src/datahub/emitter/mcp_patch_builder.py  |   3 +-
 .../api/incremental_lineage_helper.py         |  10 +-
 .../src/datahub/ingestion/api/report.py       |   3 +-
 .../datahub/ingestion/api/source_helpers.py   |   2 +-
 .../ingestion/extractor/json_schema_util.py   |   6 +-
 .../ingestion/extractor/schema_util.py        |   8 +-
 .../src/datahub/ingestion/fs/s3_fs.py         |   6 +-
 .../ingestion/glossary/datahub_classifier.py  |  10 +-
 .../src/datahub/ingestion/graph/client.py     |  10 +-
 .../src/datahub/ingestion/run/pipeline.py     |  10 +-
 .../datahub/ingestion/run/pipeline_config.py  |   6 +-
 .../source/abs/datalake_profiler_config.py    |   6 +-
 .../datahub/ingestion/source/abs/source.py    |  25 +++--
 .../src/datahub/ingestion/source/aws/glue.py  |  22 ++--
 .../sagemaker_processors/feature_groups.py    |   2 +-
 .../source/aws/sagemaker_processors/models.py |   4 +-
 .../ingestion/source/bigquery_v2/bigquery.py  |   6 +-
 .../source/bigquery_v2/bigquery_audit.py      |   6 +-
 .../source/bigquery_v2/bigquery_config.py     |  12 +--
 .../bigquery_platform_resource_helper.py      |  12 ++-
 .../source/bigquery_v2/bigquery_schema_gen.py |  24 +++--
 .../ingestion/source/bigquery_v2/lineage.py   |  18 ++--
 .../ingestion/source/bigquery_v2/queries.py   |   4 +-
 .../source/bigquery_v2/queries_extractor.py   |   6 +-
 .../ingestion/source/bigquery_v2/usage.py     |   6 +-
 .../ingestion/source/cassandra/cassandra.py   |   1 -
 .../source/cassandra/cassandra_utils.py       |   8 +-
 .../source/confluent_schema_registry.py       |  12 +--
 .../datahub/ingestion/source/csv_enricher.py  |  58 +++++-----
 .../source/datahub/datahub_database_reader.py |   6 +-
 .../datahub/ingestion/source/dbt/dbt_cloud.py |  26 ++---
 .../ingestion/source/dbt/dbt_common.py        |  16 +--
 .../ingestion/source/dremio/dremio_api.py     |   6 +-
 .../dremio/dremio_datahub_source_mapping.py   |   6 +-
 .../ingestion/source/elastic_search.py        |   8 +-
 .../source/gc/soft_deleted_entity_cleanup.py  |   6 +-
 .../ingestion/source/gcs/gcs_source.py        |   5 +-
 .../ingestion/source/ge_data_profiler.py      |   6 +-
 .../ingestion/source/ge_profiling_config.py   |   6 +-
 .../ingestion/source/iceberg/iceberg.py       |   6 +-
 .../ingestion/source/identity/azure_ad.py     |   6 +-
 .../datahub/ingestion/source/identity/okta.py |   6 +-
 .../datahub/ingestion/source/kafka/kafka.py   |  20 ++--
 .../source/kafka_connect/kafka_connect.py     |   5 +-
 .../source/kafka_connect/sink_connectors.py   |   6 +-
 .../source/kafka_connect/source_connectors.py |   6 +-
 .../ingestion/source/looker/looker_common.py  |  38 +++----
 .../ingestion/source/looker/looker_config.py  |   6 +-
 .../ingestion/source/looker/looker_source.py  |  50 ++++-----
 .../source/looker/looker_template_language.py |   6 +-
 .../ingestion/source/looker/looker_usage.py   |  12 +--
 .../source/looker/lookml_concept_context.py   |  12 +--
 .../ingestion/source/looker/lookml_source.py  |  28 +++--
 .../ingestion/source/looker/view_upstream.py  |  10 +-
 .../src/datahub/ingestion/source/mlflow.py    |   8 +-
 .../src/datahub/ingestion/source/mongodb.py   |  10 +-
 .../src/datahub/ingestion/source/nifi.py      |  50 +++++----
 .../src/datahub/ingestion/source/openapi.py   |  18 ++--
 .../ingestion/source/powerbi/config.py        |  24 ++---
 .../source/powerbi/m_query/parser.py          |  22 ++--
 .../source/powerbi/m_query/pattern_handler.py |  50 ++++-----
 .../source/powerbi/m_query/resolver.py        |  26 ++---
 .../ingestion/source/powerbi/powerbi.py       |  12 +--
 .../powerbi/rest_api_wrapper/data_resolver.py |  18 ++--
 .../ingestion/source/qlik_sense/qlik_api.py   |   2 +-
 .../ingestion/source/redshift/config.py       |   6 +-
 .../ingestion/source/redshift/redshift.py     |  24 ++---
 .../ingestion/source/redshift/usage.py        |  16 +--
 .../source/s3/datalake_profiler_config.py     |   6 +-
 .../datahub/ingestion/source/salesforce.py    |  51 ++++-----
 .../datahub/ingestion/source/sigma/sigma.py   |   6 +-
 .../ingestion/source/sigma/sigma_api.py       |  22 ++--
 .../source/snowflake/snowflake_config.py      |  16 +--
 .../source/snowflake/snowflake_connection.py  |  12 +--
 .../source/snowflake/snowflake_queries.py     |   4 +-
 .../source/snowflake/snowflake_schema.py      |   6 +-
 .../source/snowflake/snowflake_schema_gen.py  |  12 +--
 .../source/snowflake/snowflake_tag.py         |  14 +--
 .../source/snowflake/snowflake_usage_v2.py    |   6 +-
 .../source/snowflake/snowflake_utils.py       |   3 +-
 .../source/snowflake/snowflake_v2.py          |  17 ++-
 .../datahub/ingestion/source/sql/athena.py    |   4 +-
 .../ingestion/source/sql/clickhouse.py        |  20 ++--
 .../datahub/ingestion/source/sql/oracle.py    |   4 +-
 .../source/sql/sql_generic_profiler.py        |   3 +-
 .../datahub/ingestion/source/sql/teradata.py  |  19 +++-
 .../source/state/profiling_state_handler.py   |   6 +-
 .../state/redundant_run_skip_handler.py       |  12 +--
 .../state/stale_entity_removal_handler.py     |   6 +-
 ...atahub_ingestion_checkpointing_provider.py |  18 ++--
 .../file_ingestion_checkpointing_provider.py  |   2 +-
 .../ingestion/source/tableau/tableau.py       |  95 ++++++++--------
 .../datahub/ingestion/source/unity/config.py  |   4 +-
 .../datahub/ingestion/source/unity/proxy.py   |   2 +-
 .../datahub/ingestion/source/unity/source.py  |   6 +-
 .../datahub/ingestion/source/unity/usage.py   |   4 +-
 .../source/usage/clickhouse_usage.py          |   8 +-
 .../source/usage/starburst_trino_usage.py     |   6 +-
 .../transformer/add_dataset_dataproduct.py    |   8 +-
 .../transformer/add_dataset_properties.py     |   6 +-
 .../transformer/add_dataset_schema_tags.py    |   6 +-
 .../transformer/add_dataset_schema_terms.py   |   6 +-
 .../dataset_domain_based_on_tags.py           |   8 +-
 .../extract_ownership_from_tags.py            |   6 +-
 .../ingestion/transformer/tags_to_terms.py    |  14 +--
 .../assertion/snowflake/compiler.py           |  20 ++--
 .../src/datahub/lite/duckdb_lite.py           |  22 ++--
 .../aspect_helpers/custom_properties.py       |   3 +-
 .../datahub/sql_parsing/schema_resolver.py    |  15 +--
 .../sql_parsing/sql_parsing_aggregator.py     |  32 +++---
 .../datahub/sql_parsing/sqlglot_lineage.py    |   6 +-
 .../src/datahub/sql_parsing/sqlglot_utils.py  |   2 +-
 .../src/datahub/telemetry/stats.py            |   3 +-
 .../utilities/file_backed_collections.py      |  20 ++--
 .../datahub/utilities/hive_schema_to_avro.py  |   4 +-
 .../src/datahub/utilities/logging_manager.py  |   4 +-
 .../datahub/utilities/lossy_collections.py    |   6 +-
 .../src/datahub/utilities/mapping.py          |   6 +-
 .../datahub/utilities/serialized_lru_cache.py |   4 +-
 .../utilities/sqlalchemy_query_combiner.py    |  12 +--
 .../datahub/utilities/stats_collections.py    |   4 +-
 .../src/datahub/utilities/urns/urn_iter.py    |   4 +-
 .../integration/azure_ad/test_azure_ad.py     |   4 +-
 .../tests/integration/dremio/test_dremio.py   |  18 ++--
 .../tests/integration/grafana/test_grafana.py |  16 +--
 .../kafka-connect/test_kafka_connect.py       |  16 +--
 .../tests/integration/looker/test_looker.py   |   6 +-
 .../tests/integration/lookml/test_lookml.py   |  12 +--
 .../tests/integration/nifi/test_nifi.py       |   4 +-
 .../integration/powerbi/test_m_parser.py      |  92 ++++++++--------
 .../tests/integration/powerbi/test_powerbi.py |  18 ++--
 .../integration/salesforce/test_salesforce.py |  30 +++---
 .../integration/sql_server/test_sql_server.py |   2 +-
 .../unity/test_unity_catalog_ingest.py        | 102 +++++++++---------
 .../tests/performance/databricks/generator.py |   2 +-
 .../tests/test_helpers/mce_helpers.py         |  12 +--
 .../tests/test_helpers/state_helpers.py       |   2 +-
 .../entities/dataproducts/test_dataproduct.py |   2 +-
 .../source_helpers/test_ensure_aspect_size.py |  24 ++---
 .../unit/bigquery/test_bigquery_lineage.py    |  12 +--
 .../tests/unit/cli/assertion/test_compile.py  |   6 +-
 .../unit/redshift/test_redshift_lineage.py    |  12 +--
 .../tests/unit/serde/test_codegen.py          |   6 +-
 .../state/test_stateful_ingestion.py          |  32 +++---
 .../unit/test_confluent_schema_registry.py    |  44 ++++----
 .../tests/unit/test_dbt_source.py             |   6 +-
 metadata-ingestion/tests/unit/test_iceberg.py |  48 ++++-----
 .../tests/unit/test_postgres_source.py        |  12 +--
 .../tests/unit/test_rest_sink.py              |   6 +-
 .../unit/utilities/test_lossy_collections.py  |   4 +-
 .../unit/utilities/test_partition_executor.py |   6 +-
 .../test_threaded_iterator_executor.py        |   2 +-
 smoke-test/build.gradle                       |  20 +++-
 smoke-test/pyproject.toml                     |  46 ++++----
 smoke-test/requirements.txt                   |   1 -
 .../test_data_process_instance.py             |  12 +--
 .../tests/dataproduct/test_dataproduct.py     |  18 ++--
 smoke-test/tests/lineage/test_lineage.py      |  22 ++--
 .../managed_ingestion_test.py                 |   6 +-
 smoke-test/tests/read_only/test_search.py     |   6 +-
 .../tests/read_only/test_services_up.py       |   6 +-
 smoke-test/tests/utilities/file_emitter.py    |   2 +-
 208 files changed, 1366 insertions(+), 1326 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle
index 1bcb58e6b7c543..95b4ee3118f03e 100644
--- a/metadata-ingestion-modules/airflow-plugin/build.gradle
+++ b/metadata-ingestion-modules/airflow-plugin/build.gradle
@@ -73,16 +73,15 @@ task lint(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/  # type: List\\[str\\]//g' {} \\; && " +
     "source ${venv_name}/bin/activate && set -x && " +
-    "black --check --diff src/ tests/ && " +
     "ruff check src/ tests/ && " +
+    "ruff format --check src/ tests/ && " +
     "mypy --show-traceback --show-error-codes src/ tests/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black src/ tests/ && " +
-    "ruff check --fix src/ tests/"
-    "mypy src/ tests/ "
+    "ruff check --fix src/ tests/ && " +
+    "ruff format src/ tests/ "
 }
 
 // HACK: Some of the Airflow constraint files conflict with packages that we install (e.g. black).
@@ -119,5 +118,8 @@ clean {
   delete venv_name
   delete 'build'
   delete 'dist'
+  delete '.ruff_cache'
+  delete '.mypy_cache'
+  delete '.pytest_cache'
 }
 clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
@@ -2,13 +2,21 @@
 build-backend = "setuptools.build_meta"
 requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+    ".git",
+    "venv",
+    ".tox",
+    "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
 classes = ["typing"]
 
 [tool.ruff.lint]
-select = [
-    "B",
-    "C90",
-    "E",
-    "F",
-    "I", # For isort
-    "TID",
+extend-select = [
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 ignore = [
-    # Ignore line length violations (handled by Black)
-    "E501",
-    # Ignore whitespace before ':' (matches Black)
-    "E203",
-    "E203",
-    # Allow usages of functools.lru_cache
-    "B019",
-    # Allow function call in argument defaults
-    "B008",
+    "E501",  # Line length violations (handled by formatter)
 ]
 
 [tool.ruff.lint.mccabe]
 max-complexity = 15
 
 [tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
 ban-relative-imports = "all"
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 79c18a5188dd84..58c04158957ccd 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -71,7 +71,6 @@ def get_long_description():
 dev_requirements = {
     *base_requirements,
     *mypy_stubs,
-    "black==22.12.0",
     "coverage>=5.1",
     "ruff==0.9.2",
     "mypy==1.10.1",
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
index fd01ac10f98de9..5904ce1e9e978c 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
@@ -63,9 +63,9 @@ def __init__(self):
 
         self.task_to_extractor.extractors["AthenaOperator"] = AthenaOperatorExtractor
 
-        self.task_to_extractor.extractors[
-            "BigQueryInsertJobOperator"
-        ] = BigQueryInsertJobOperatorExtractor
+        self.task_to_extractor.extractors["BigQueryInsertJobOperator"] = (
+            BigQueryInsertJobOperatorExtractor
+        )
 
         self._graph: Optional["DataHubGraph"] = None
 
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
index 9de44811f60a48..b2ca61e3de3bf5 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -286,9 +286,9 @@ def _extract_lineage(
         if sql_parsing_result:
             if error := sql_parsing_result.debug_info.error:
                 logger.info(f"SQL parsing error: {error}", exc_info=error)
-                datajob.properties[
-                    "datahub_sql_parser_error"
-                ] = f"{type(error).__name__}: {error}"
+                datajob.properties["datahub_sql_parser_error"] = (
+                    f"{type(error).__name__}: {error}"
+                )
             if not sql_parsing_result.debug_info.table_error:
                 input_urns.extend(sql_parsing_result.in_tables)
                 output_urns.extend(sql_parsing_result.out_tables)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
index 4bf050d41473e4..99b0a40fd3c13e 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
@@ -44,11 +44,9 @@ def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:
 
     if task_inlets and isinstance(task_inlets, list):
         inlets = []
-        task_ids = (
-            {o for o in task_inlets if isinstance(o, str)}
-            .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
-            .intersection(task.get_flat_relative_ids(upstream=True))
-        )
+        task_ids = {o for o in task_inlets if isinstance(o, str)}.union(
+            op.task_id for op in task_inlets if isinstance(op, BaseOperator)
+        ).intersection(task.get_flat_relative_ids(upstream=True))
 
         from airflow.lineage import AUTO
         from cattr import structure
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
index 4351f40fe7e3ad..24e89211dd3c5b 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
@@ -2,6 +2,7 @@
 
 This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
 """
+
 from datetime import timedelta
 
 from airflow import DAG
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
index d2c9821295419c..2744c26021cde3 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
@@ -273,13 +273,21 @@ def _run_airflow(
             subprocess.check_call(
                 [
                     # fmt: off
-                    "airflow", "users", "create",
-                    "--username", "airflow",
-                    "--password", "airflow",
-                    "--firstname", "admin",
-                    "--lastname", "admin",
-                    "--role", "Admin",
-                    "--email", "airflow@example.com",
+                    "airflow",
+                    "users",
+                    "create",
+                    "--username",
+                    "airflow",
+                    "--password",
+                    "airflow",
+                    "--firstname",
+                    "admin",
+                    "--lastname",
+                    "admin",
+                    "--role",
+                    "Admin",
+                    "--email",
+                    "airflow@example.com",
                     # fmt: on
                 ],
                 env=environment,
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
index 1dc8e14a425dfc..4219c5fb9cefb3 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
@@ -242,9 +242,7 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
         },
     ), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch(
         "airflow.models.BaseOperator.xcom_push"
-    ), patch_airflow_connection(
-        datahub_rest_connection_config
-    ):
+    ), patch_airflow_connection(datahub_rest_connection_config):
         func = mock.Mock()
         func.__name__ = "foo"
 
@@ -275,7 +273,10 @@ def test_lineage_backend(mock_emit, inlets, outlets, capture_executions):
         if AIRFLOW_VERSION < packaging.version.parse("2.2.0"):
             ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE)
             # Ignoring type here because DagRun state is just a sring at Airflow 1
-            dag_run = DagRun(state="success", run_id=f"scheduled_{DEFAULT_DATE.isoformat()}")  # type: ignore
+            dag_run = DagRun(
+                state="success",  # type: ignore[arg-type]
+                run_id=f"scheduled_{DEFAULT_DATE.isoformat()}",
+            )
         else:
             from airflow.utils.state import DagRunState
 
diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle
index 503b3556a41bfe..7dd7036e276151 100644
--- a/metadata-ingestion-modules/dagster-plugin/build.gradle
+++ b/metadata-ingestion-modules/dagster-plugin/build.gradle
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
 task lint(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black --check --diff src/ tests/ examples/ && " +
-    "ruff check src/ tests/ && " +
+    "ruff check src/ tests/ examples/ && " +
+    "ruff format --check src/ tests/ && " +
     "mypy --show-traceback --show-error-codes src/ tests/ examples/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-x', '-c',
     "source ${venv_name}/bin/activate && " +
-    "black src/ tests/ examples/ && " +
-    "ruff check --fix src/ tests/"
-    "mypy src/ tests/ examples/"
+    "ruff check --fix src/ tests/ examples/ && " +
+    "ruff format src/ tests/ examples/ "
 }
 
 task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -105,5 +104,8 @@ clean {
   delete venv_name
   delete 'build'
   delete 'dist'
+  delete '.ruff_cache'
+  delete '.mypy_cache'
+  delete '.pytest_cache'
 }
 clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py b/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
index 7b7616b1ec11de..75cab237b05a3e 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/advanced_ops_jobs.py
@@ -9,9 +9,9 @@
     job,
     op,
 )
+
 from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
 from datahub.utilities.urns.dataset_urn import DatasetUrn
-
 from datahub_dagster_plugin.client.dagster_generator import (
     DagsterGenerator,
     DatasetLineage,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py b/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
index 1ed3f2f915061b..9b26b502d770f2 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
@@ -7,9 +7,9 @@
     define_asset_job,
     multi_asset,
 )
+
 from datahub.ingestion.graph.config import DatahubClientConfig
 from datahub.utilities.urns.dataset_urn import DatasetUrn
-
 from datahub_dagster_plugin.sensors.datahub_sensors import (
     DatahubDagsterSourceConfig,
     make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py b/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
index 300cf9df022c66..2eeff225697261 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/basic_setup.py
@@ -1,6 +1,6 @@
 from dagster import Definitions
-from datahub.ingestion.graph.client import DatahubClientConfig
 
+from datahub.ingestion.graph.client import DatahubClientConfig
 from datahub_dagster_plugin.sensors.datahub_sensors import (
     DatahubDagsterSourceConfig,
     make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py b/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
index a17fc89e6922df..aa2902ee5c708b 100644
--- a/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
+++ b/metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
@@ -1,7 +1,7 @@
 from dagster import Definitions, In, Out, PythonObjectDagsterType, job, op
+
 from datahub.ingestion.graph.config import DatahubClientConfig
 from datahub.utilities.urns.dataset_urn import DatasetUrn
-
 from datahub_dagster_plugin.sensors.datahub_sensors import (
     DatahubDagsterSourceConfig,
     make_datahub_sensor,
diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml
@@ -2,13 +2,21 @@
 build-backend = "setuptools.build_meta"
 requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+    ".git",
+    "venv",
+    ".tox",
+    "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
 classes = ["typing"]
 
 [tool.ruff.lint]
-select = [
-    "B",
-    "C90",
-    "E",
-    "F",
-    "I", # For isort
-    "TID",
+extend-select = [
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 ignore = [
-    # Ignore line length violations (handled by Black)
-    "E501",
-    # Ignore whitespace before ':' (matches Black)
-    "E203",
-    "E203",
-    # Allow usages of functools.lru_cache
-    "B019",
-    # Allow function call in argument defaults
-    "B008",
+    "E501",  # Line length violations (handled by formatter)
 ]
 
 [tool.ruff.lint.mccabe]
 max-complexity = 15
 
 [tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
 ban-relative-imports = "all"
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py
index b15f3716b28d0a..09859b6c4344e3 100644
--- a/metadata-ingestion-modules/dagster-plugin/setup.py
+++ b/metadata-ingestion-modules/dagster-plugin/setup.py
@@ -51,7 +51,6 @@ def get_long_description():
     "dagster-aws >= 0.11.0",
     "dagster-snowflake >= 0.11.0",
     "dagster-snowflake-pandas >= 0.11.0",
-    "black==22.12.0",
     "coverage>=5.1",
     "ruff==0.9.2",
     "mypy>=1.4.0",
diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
index 9a0a9a1b3a75ed..033d3967145017 100644
--- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
+++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py
@@ -507,7 +507,7 @@ def generate_datajob(
         job_property_bag: Dict[str, str] = {}
         if input_datasets:
             self.logger.info(
-                f"Input datasets for {op_def_snap.name} are { list(input_datasets.get(op_def_snap.name, []))}"
+                f"Input datasets for {op_def_snap.name} are {list(input_datasets.get(op_def_snap.name, []))}"
             )
             inlets.update(input_datasets.get(op_def_snap.name, []))
 
@@ -515,7 +515,7 @@ def generate_datajob(
 
         if output_datasets:
             self.logger.info(
-                f"Output datasets for {op_def_snap.name} are { list(output_datasets.get(op_def_snap.name, []))}"
+                f"Output datasets for {op_def_snap.name} are {list(output_datasets.get(op_def_snap.name, []))}"
             )
             datajob.outlets = list(output_datasets.get(op_def_snap.name, []))
 
@@ -606,7 +606,7 @@ def emit_job_run(
         if run.status not in status_result_map:
             raise Exception(
                 f"Job run status should be either complete, failed or cancelled and it was "
-                f"{run.status }"
+                f"{run.status}"
             )
 
         if run_stats.start_time is not None:
@@ -673,7 +673,7 @@ def emit_op_run(
         if run_step_stats.status not in status_result_map:
             raise Exception(
                 f"Step run status should be either complete, failed or cancelled and it was "
-                f"{run_step_stats.status }"
+                f"{run_step_stats.status}"
             )
 
         if run_step_stats.start_time is not None:
diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
index b91a9cfa56d398..5f049d55c16a12 100644
--- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
+++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py
@@ -262,7 +262,6 @@ def get_dagster_environment(
             and context.dagster_run.job_code_origin.repository_origin
             and context.dagster_run.job_code_origin.repository_origin.code_pointer
         ):
-
             code_pointer = (
                 context.dagster_run.job_code_origin.repository_origin.code_pointer
             )
diff --git a/metadata-ingestion-modules/gx-plugin/build.gradle b/metadata-ingestion-modules/gx-plugin/build.gradle
index a0604215426bf7..57a1ed0b2169d3 100644
--- a/metadata-ingestion-modules/gx-plugin/build.gradle
+++ b/metadata-ingestion-modules/gx-plugin/build.gradle
@@ -25,7 +25,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
   outputs.file(sentinel_file)
   commandLine 'bash', '-c',
       "${python_executable} -m venv ${venv_name} && " +
-      "${venv_name}/bin/python -m pip install --upgrade uv && " +
+      "${venv_name}/bin/pip install --upgrade uv && " +
       "touch ${sentinel_file}"
 }
 
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
 task lint(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black --check --diff src/ tests/ && " +
     "ruff check src/ tests/ && " +
+    "ruff format --check src/ tests/ && " +
     "mypy --show-traceback --show-error-codes src/ tests/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-x', '-c',
     "source ${venv_name}/bin/activate && " +
-    "black src/ tests/ && " +
-    "ruff check --fix src/ tests/"
-    "mypy src/ tests/"
+    "ruff check --fix src/ tests/ && " +
+    "ruff format src/ tests/ "
 }
 
 task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -105,5 +104,8 @@ clean {
   delete venv_name
   delete 'build'
   delete 'dist'
+  delete '.ruff_cache'
+  delete '.mypy_cache'
+  delete '.pytest_cache'
 }
 clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/gx-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml
@@ -2,13 +2,21 @@
 build-backend = "setuptools.build_meta"
 requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+    ".git",
+    "venv",
+    ".tox",
+    "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
 classes = ["typing"]
 
 [tool.ruff.lint]
-select = [
-    "B",
-    "C90",
-    "E",
-    "F",
-    "I", # For isort
-    "TID",
+extend-select = [
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 ignore = [
-    # Ignore line length violations (handled by Black)
-    "E501",
-    # Ignore whitespace before ':' (matches Black)
-    "E203",
-    "E203",
-    # Allow usages of functools.lru_cache
-    "B019",
-    # Allow function call in argument defaults
-    "B008",
+    "E501",  # Line length violations (handled by formatter)
 ]
 
 [tool.ruff.lint.mccabe]
 max-complexity = 15
 
 [tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
 ban-relative-imports = "all"
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py
index d114a4130ca4f2..fbc4097388993f 100644
--- a/metadata-ingestion-modules/gx-plugin/setup.py
+++ b/metadata-ingestion-modules/gx-plugin/setup.py
@@ -58,9 +58,8 @@ def get_long_description():
 base_dev_requirements = {
     *base_requirements,
     *mypy_stubs,
-    "black==22.12.0",
     "coverage>=5.1",
-    "ruff==0.9.2",
+    "ruff==0.9.1",
     "mypy>=1.4.0",
     # pydantic 1.8.2 is incompatible with mypy 0.910.
     # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
index 4f2aee52c3319f..1070d4d3d5d66d 100644
--- a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
+++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py
@@ -108,7 +108,6 @@ def __init__(
         convert_urns_to_lowercase: bool = False,
         name: str = "DataHubValidationAction",
     ):
-
         if has_name_positional_arg:
             if len(args) >= 1 and isinstance(args[0], str):
                 name = args[0]
@@ -164,9 +163,7 @@ def _run(
             if isinstance(
                 validation_result_suite_identifier, ValidationResultIdentifier
             ):
-                expectation_suite_name = (
-                    validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name
-                )
+                expectation_suite_name = validation_result_suite_identifier.expectation_suite_identifier.expectation_suite_name
                 run_id = validation_result_suite_identifier.run_id
                 batch_identifier = validation_result_suite_identifier.batch_identifier
 
diff --git a/metadata-ingestion-modules/prefect-plugin/build.gradle b/metadata-ingestion-modules/prefect-plugin/build.gradle
index d16201834a0ff0..d13c9fe3c9abe7 100644
--- a/metadata-ingestion-modules/prefect-plugin/build.gradle
+++ b/metadata-ingestion-modules/prefect-plugin/build.gradle
@@ -54,16 +54,15 @@ task installDev(type: Exec, dependsOn: [install]) {
 task lint(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black --check --diff src/ tests/ && " +
     "ruff check src/ tests/ && " +
+    "ruff format --check src/ tests/ && " +
     "mypy --show-traceback --show-error-codes src/ tests/"
 }
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-x', '-c',
     "source ${venv_name}/bin/activate && " +
-    "black src/ tests/ && " +
-    "ruff check --fix src/ tests/"
-    "mypy src/ tests/ "
+    "ruff check --fix src/ tests/ && " +
+    "ruff format src/ tests/ "
 }
 
 task installDevTest(type: Exec, dependsOn: [installDev]) {
@@ -111,5 +110,8 @@ clean {
   delete venv_name
   delete 'build'
   delete 'dist'
+  delete '.ruff_cache'
+  delete '.mypy_cache'
+  delete '.pytest_cache'
 }
 clean.dependsOn cleanPythonCache
diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml
index 7d03c2a14bf078..d1e1d0ad479442 100644
--- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml
@@ -2,13 +2,21 @@
 build-backend = "setuptools.build_meta"
 requires = ["setuptools>=54.0.0", "wheel", "pip>=21.0.0"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+exclude = [
+    ".git",
+    "venv",
+    ".tox",
+    "__pycache__",
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -28,31 +36,23 @@ required-imports = []
 classes = ["typing"]
 
 [tool.ruff.lint]
-select = [
-    "B",
-    "C90",
-    "E",
-    "F",
-    "I", # For isort
-    "TID",
+extend-select = [
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 ignore = [
-    # Ignore line length violations (handled by Black)
-    "E501",
-    # Ignore whitespace before ':' (matches Black)
-    "E203",
-    "E203",
-    # Allow usages of functools.lru_cache
-    "B019",
-    # Allow function call in argument defaults
-    "B008",
+    "E501",  # Line length violations (handled by formatter)
 ]
 
 [tool.ruff.lint.mccabe]
 max-complexity = 15
 
 [tool.ruff.lint.flake8-tidy-imports]
-# Disallow all relative imports.
 ban-relative-imports = "all"
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py
index 9587f0ed73780b..1d56cae8d938a2 100644
--- a/metadata-ingestion-modules/prefect-plugin/setup.py
+++ b/metadata-ingestion-modules/prefect-plugin/setup.py
@@ -57,9 +57,8 @@ def get_long_description():
 dev_requirements = {
     *base_requirements,
     *mypy_stubs,
-    "black==22.12.0",
     "coverage>=5.1",
-    "ruff==0.9.1",
+    "ruff==0.9.2",
     "mypy>=1.4.0",
     # pydantic 1.8.2 is incompatible with mypy 0.910.
     # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
index fcab6b6fd91430..190a249a912d1a 100644
--- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
+++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/datahub_emitter.py
@@ -351,9 +351,9 @@ def _emit_tasks(
 
             for prefect_future in flow_run_ctx.task_run_futures:
                 if prefect_future.task_run is not None:
-                    task_run_key_map[
-                        str(prefect_future.task_run.id)
-                    ] = prefect_future.task_run.task_key
+                    task_run_key_map[str(prefect_future.task_run.id)] = (
+                        prefect_future.task_run.task_key
+                    )
 
             for node in graph_json:
                 datajob_urn = DataJobUrn.create_from_ids(
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 16a6704949c875..be9d69a2f0e4b6 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -110,16 +110,16 @@ task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) {
 task lint(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black --check --diff src/ tests/ examples/ && " +
     "ruff check src/ tests/ examples/ && " +
+    "ruff format --check src/ tests/ examples/ && " +
     "mypy --show-traceback --show-error-codes src/ tests/ examples/"
 }
 
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "black src/ tests/ examples/ && " +
-    "ruff check --fix src/ tests/ examples/"
+    "ruff check --fix src/ tests/ examples/ && " +
+    "ruff format src/ tests/ examples/ "
 }
 
 def pytest_default_env = "PYTHONDEVMODE=1"
@@ -216,6 +216,7 @@ clean {
   delete 'src/datahub/metadata'
   delete '../docs/generated'
   delete 'generated'
+  delete '.ruff_cache'
   delete '.mypy_cache'
   delete '.pytest_cache'
   delete '.preflight_sentinel'
diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md
index ebe1cd3df81990..005b0427a5e6ad 100644
--- a/metadata-ingestion/developing.md
+++ b/metadata-ingestion/developing.md
@@ -177,11 +177,10 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap
 
 ## Code style
 
-We use black, ruff, and mypy to ensure consistent code style and quality.
+We use ruff, and mypy to ensure consistent code style and quality.
 
 ```shell
 # Assumes: pip install -e '.[dev]' and venv is activated
-black src/ tests/
 ruff check src/ tests/
 mypy src/ tests/
 ```
diff --git a/metadata-ingestion/examples/library/run_assertion.py b/metadata-ingestion/examples/library/run_assertion.py
index 414e5f46cc7f91..e7c717837eed3c 100644
--- a/metadata-ingestion/examples/library/run_assertion.py
+++ b/metadata-ingestion/examples/library/run_assertion.py
@@ -16,5 +16,5 @@
 assertion_result = graph.run_assertion(urn=assertion_urn, save_result=True)
 
 log.info(
-    f'Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get("type")}'
+    f"Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get('type')}"
 )
diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml
index 07f2010fde25f0..1d434eb8c3a94f 100644
--- a/metadata-ingestion/pyproject.toml
+++ b/metadata-ingestion/pyproject.toml
@@ -2,15 +2,6 @@
 build-backend = "setuptools.build_meta"
 requires = ["setuptools>=63.0.0", "wheel"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-^/tmp
-'''
-include = '\.pyi?$'
-target-version = ['py38', 'py39', 'py310', 'py311']
-
 [tool.ruff.lint.isort]
 section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]
 sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] }
@@ -31,23 +22,22 @@ exclude = [
 
 [tool.ruff.lint]
 extend-select = [
-    "B",  # Bugbear
-    "C90",
-    "E",
-    "F",
-    "G010",  # logging.warn -> logging.warning
-    "I",  # Import sorting
-    "TID",  # Tidy imports
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 extend-ignore = [
-    # Ignore line length violations (handled by Black)
-    "E501",
-    # Ignore whitespace before ':' (matches Black)
-    "E203",
-    # Allow usages of functools.lru_cache
-    "B019",
-    # Allow function call in argument defaults
-    "B008",
+    "E501",  # Handled by formatter
+    "E111",  # Handled by formatter
+    "E114",  # Handled by formatter
+    "E117",  # Handled by formatter
+    "E203",  # Ignore whitespace before ':' (matches Black)
+    "B019",  # Allow usages of functools.lru_cache
+    "B008",  # Allow function call in argument defaults
     # TODO: Enable these later
     "B006",  # Mutable args
     "B017",  # Do not assert blind exception
@@ -61,4 +51,4 @@ max-complexity = 20
 ban-relative-imports = "all"
 
 [tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401"]
\ No newline at end of file
+"__init__.py" = ["F401"]
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index ea1b0ad1582576..2cfdf9837f45ad 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -592,7 +592,6 @@
 lint_requirements = {
     # This is pinned only to avoid spurious errors in CI.
     # We should make an effort to keep it up to date.
-    "black==23.3.0",
     "ruff==0.9.2",
     "mypy==1.10.1",
 }
diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
index 8704ed13cb6c30..a05386798495de 100644
--- a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
+++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py
@@ -20,15 +20,13 @@ class Operator(Protocol):
 
     operator: str
 
-    def id(self) -> str:
-        ...
+    def id(self) -> str: ...
 
-    def generate_parameters(self) -> AssertionStdParametersClass:
-        ...
+    def generate_parameters(self) -> AssertionStdParametersClass: ...
 
 
 def _generate_assertion_std_parameter(
-    value: Union[str, int, float, list]
+    value: Union[str, int, float, list],
 ) -> AssertionStdParameterClass:
     if isinstance(value, str):
         return AssertionStdParameterClass(
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
index dc0c97d1c74e56..145a6097d7336c 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
@@ -19,15 +19,13 @@ class Operator(Protocol):
 
     operator: str
 
-    def id(self) -> str:
-        ...
+    def id(self) -> str: ...
 
-    def generate_parameters(self) -> AssertionStdParametersClass:
-        ...
+    def generate_parameters(self) -> AssertionStdParametersClass: ...
 
 
 def _generate_assertion_std_parameter(
-    value: Union[str, int, float]
+    value: Union[str, int, float],
 ) -> AssertionStdParameterClass:
     if isinstance(value, str):
         return AssertionStdParameterClass(
diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
index 39de4d7f80558e..d2035d560716ae 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
@@ -321,9 +321,9 @@ def from_yaml(
 
     @classmethod
     def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct:
-        data_product_properties: Optional[
-            DataProductPropertiesClass
-        ] = graph.get_aspect(id, DataProductPropertiesClass)
+        data_product_properties: Optional[DataProductPropertiesClass] = (
+            graph.get_aspect(id, DataProductPropertiesClass)
+        )
         domains: Optional[DomainsClass] = graph.get_aspect(id, DomainsClass)
         assert domains, "Data Product must have an associated domain. Found none."
         owners: Optional[OwnershipClass] = graph.get_aspect(id, OwnershipClass)
diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
index 315f2249d2e5cd..bf824a11a77b5d 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
@@ -266,7 +266,8 @@ def generate_mcp(
             if self.schema_metadata.fields:
                 for field in self.schema_metadata.fields:
                     field_urn = field.urn or make_schema_field_urn(
-                        self.urn, field.id  # type: ignore[arg-type]
+                        self.urn,  # type: ignore[arg-type]
+                        field.id,  # type: ignore[arg-type]
                     )
                     assert field_urn.startswith("urn:li:schemaField:")
 
diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
index 179dbdb231c912..b0b434751ad2cc 100644
--- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
+++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py
@@ -118,9 +118,9 @@ def fqn(self) -> str:
         id = StructuredPropertyUrn.from_string(self.urn).id
         if self.qualified_name is not None:
             # ensure that qualified name and ID match
-            assert (
-                self.qualified_name == id
-            ), "ID in the urn and the qualified_name must match"
+            assert self.qualified_name == id, (
+                "ID in the urn and the qualified_name must match"
+            )
         return id
 
     @validator("urn", pre=True, always=True)
@@ -184,9 +184,9 @@ def create(file: str, graph: DataHubGraph) -> None:
 
     @classmethod
     def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
-        structured_property: Optional[
-            StructuredPropertyDefinitionClass
-        ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+        structured_property: Optional[StructuredPropertyDefinitionClass] = (
+            graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+        )
         if structured_property is None:
             raise Exception(
                 "StructuredPropertyDefinition aspect is None. Unable to create structured property."
diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index f6b5ba6176c59d..1f13391644c6c8 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -412,7 +412,7 @@ def generate_access_token(
 def ensure_has_system_metadata(
     event: Union[
         MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
-    ]
+    ],
 ) -> None:
     if event.systemMetadata is None:
         event.systemMetadata = SystemMetadataClass()
diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 86bcd7eff1cbfc..b744ac573aed6e 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -296,9 +296,9 @@ def _restore(
     restore_indices: Optional[bool],
     primary_restore_file: Optional[str],
 ) -> int:
-    assert (
-        restore_primary or restore_indices
-    ), "Either restore_primary or restore_indices must be set"
+    assert restore_primary or restore_indices, (
+        "Either restore_primary or restore_indices must be set"
+    )
     msg = "datahub> "
     if restore_primary:
         msg += f"Will restore primary database from {primary_restore_file}. "
@@ -314,9 +314,9 @@ def _restore(
         assert primary_restore_file
         resolved_restore_file = os.path.expanduser(primary_restore_file)
         logger.info(f"Restoring primary db from backup at {resolved_restore_file}")
-        assert os.path.exists(
-            resolved_restore_file
-        ), f"File {resolved_restore_file} does not exist"
+        assert os.path.exists(resolved_restore_file), (
+            f"File {resolved_restore_file} does not exist"
+        )
         with open(resolved_restore_file) as fp:
             result = subprocess.run(
                 [
diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py
index 957ee16245dd81..90bbb353deab18 100644
--- a/metadata-ingestion/src/datahub/cli/lite_cli.py
+++ b/metadata-ingestion/src/datahub/cli/lite_cli.py
@@ -176,7 +176,7 @@ def get(
             )
         )
     end_time = time.time()
-    logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+    logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
 
 
 @lite.command()
@@ -228,7 +228,7 @@ def ls(path: Optional[str]) -> None:
     try:
         browseables = lite.ls(path)
         end_time = time.time()
-        logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+        logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
         auto_complete: List[AutoComplete] = [
             b.auto_complete for b in browseables if b.auto_complete is not None
         ]
diff --git a/metadata-ingestion/src/datahub/cli/migrate.py b/metadata-ingestion/src/datahub/cli/migrate.py
index 1bf1211674f596..3bd1b6fc4dc124 100644
--- a/metadata-ingestion/src/datahub/cli/migrate.py
+++ b/metadata-ingestion/src/datahub/cli/migrate.py
@@ -426,9 +426,9 @@ def batch_get_ids(
             entities_yielded += 1
             log.debug(f"yielding {x}")
             yield x
-        assert (
-            entities_yielded == num_entities
-        ), "Did not delete all entities, try running this command again!"
+        assert entities_yielded == num_entities, (
+            "Did not delete all entities, try running this command again!"
+        )
     else:
         log.error(f"Failed to execute batch get with {str(response.content)}")
         response.raise_for_status()
diff --git a/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
index dad724bfe11157..c0d93af90ada00 100644
--- a/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py
@@ -136,9 +136,9 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
     extra_properties: Dict[str, str] = dict()
     for x in extras:
         parts = x.split("=")
-        assert (
-            len(parts) == 2
-        ), f"Invalid value for extras {x}, should be in format key=value"
+        assert len(parts) == 2, (
+            f"Invalid value for extras {x}, should be in format key=value"
+        )
         extra_properties[parts[0]] = parts[1]
     return extra_properties
 
diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index 37089e6f051f0d..174ce63e84ef4c 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -50,7 +50,7 @@ def pretty_id(id: Optional[str]) -> str:
     if id.startswith("urn:li:dataset"):
         dataset_key = dataset_urn_to_key(id)
         if dataset_key:
-            return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
+            return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:') :], fg='white')}:{click.style(dataset_key.name, fg='white')}"
     # failed to prettify, return original
     return id
 
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 08817d9d5fdb93..8052de1b0669c4 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -200,8 +200,7 @@ class IgnorableError(MetaError):
 
 @runtime_checkable
 class ExceptionWithProps(Protocol):
-    def get_telemetry_props(self) -> Dict[str, Any]:
-        ...
+    def get_telemetry_props(self) -> Dict[str, Any]: ...
 
 
 def should_show_stack_trace(exc: Exception) -> bool:
diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py
index e7e9bfd43adca5..7e68e9f80da4ff 100644
--- a/metadata-ingestion/src/datahub/configuration/git.py
+++ b/metadata-ingestion/src/datahub/configuration/git.py
@@ -121,9 +121,9 @@ def infer_repo_ssh_locator(
 
         repo: str = values["repo"]
         if repo.startswith(_GITHUB_PREFIX):
-            return f"git@github.com:{repo[len(_GITHUB_PREFIX):]}.git"
+            return f"git@github.com:{repo[len(_GITHUB_PREFIX) :]}.git"
         elif repo.startswith(_GITLAB_PREFIX):
-            return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX):]}.git"
+            return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX) :]}.git"
         else:
             raise ValueError(
                 "Unable to infer repo_ssh_locator from repo. Please set repo_ssh_locator manually."
diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py
index b3cc0316091173..5fabcf904d3219 100644
--- a/metadata-ingestion/src/datahub/configuration/time_window_config.py
+++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py
@@ -47,7 +47,10 @@ class BaseTimeWindowConfig(ConfigModel):
         default_factory=lambda: datetime.now(tz=timezone.utc),
         description="Latest date of lineage/usage to consider. Default: Current time in UTC",
     )
-    start_time: datetime = Field(default=None, description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.")  # type: ignore
+    start_time: datetime = Field(
+        default=None,
+        description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.",
+    )  # type: ignore
 
     @pydantic.validator("start_time", pre=True, always=True)
     def default_start_time(
@@ -63,12 +66,14 @@ def default_start_time(
             # This is where start_time str is resolved to datetime
             try:
                 delta = parse_relative_timespan(v)
-                assert delta < timedelta(
-                    0
-                ), "Relative start time should start with minus sign (-) e.g. '-2 days'."
+                assert delta < timedelta(0), (
+                    "Relative start time should start with minus sign (-) e.g. '-2 days'."
+                )
                 assert abs(delta) >= get_bucket_duration_delta(
                     values["bucket_duration"]
-                ), "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+                ), (
+                    "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+                )
 
                 # The end_time's default value is not yet populated, in which case
                 # we can just manually generate it here.
diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index f095fffbaea6b4..f5da90a86c9ef6 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -88,13 +88,11 @@ def get_sys_time() -> int:
 
 
 @overload
-def make_ts_millis(ts: None) -> None:
-    ...
+def make_ts_millis(ts: None) -> None: ...
 
 
 @overload
-def make_ts_millis(ts: datetime) -> int:
-    ...
+def make_ts_millis(ts: datetime) -> int: ...
 
 
 def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
@@ -105,13 +103,11 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
 
 
 @overload
-def parse_ts_millis(ts: float) -> datetime:
-    ...
+def parse_ts_millis(ts: float) -> datetime: ...
 
 
 @overload
-def parse_ts_millis(ts: None) -> None:
-    ...
+def parse_ts_millis(ts: None) -> None: ...
 
 
 def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
index 17026a4114c128..e51c37d96e90f0 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py
@@ -33,8 +33,7 @@
 
 @runtime_checkable
 class SupportsToObj(Protocol):
-    def to_obj(self) -> Any:
-        ...
+    def to_obj(self) -> Any: ...
 
 
 def _recursive_to_obj(obj: Any) -> Any:
diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
index 78a091f1ffe689..92ee158661d3d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
@@ -55,15 +55,9 @@ def convert_chart_info_to_patch(
         aspect.externalUrl
     ).set_type(aspect.type).set_title(aspect.title).set_access(
         aspect.access
-    ).set_last_modified(
-        aspect.lastModified
-    ).set_last_refreshed(
+    ).set_last_modified(aspect.lastModified).set_last_refreshed(
         aspect.lastRefreshed
-    ).set_description(
-        aspect.description
-    ).add_inputs(
-        aspect.inputs
-    )
+    ).set_description(aspect.description).add_inputs(aspect.inputs)
 
     values = patch_builder.build()
     if values:
diff --git a/metadata-ingestion/src/datahub/ingestion/api/report.py b/metadata-ingestion/src/datahub/ingestion/api/report.py
index 32810189acd00b..8cfca5782bee40 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/report.py
@@ -21,8 +21,7 @@
 
 @runtime_checkable
 class SupportsAsObj(Protocol):
-    def as_obj(self) -> dict:
-        ...
+    def as_obj(self) -> dict: ...
 
 
 @dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index f3e5b1db6a1c85..08af39cd24982a 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -48,7 +48,7 @@
 
 
 def auto_workunit(
-    stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]]
+    stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]],
 ) -> Iterable[MetadataWorkUnit]:
     """Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
 
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
index 88d1fcc52e2196..1c440642e06d8b 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
@@ -131,9 +131,9 @@ def get_recursive(self, schema: Dict) -> Optional[str]:
             for i, schema_type in enumerate(p.schema_types):
                 if schema_type == schema_str:
                     # return the corresponding type for the schema that's a match
-                    assert (
-                        len(p.type) > i
-                    ), f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+                    assert len(p.type) > i, (
+                        f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+                    )
                     return p.type[i]
         return None
 
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
index d5af4f7a2389c0..dbb851c74e7e34 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
@@ -263,15 +263,13 @@ def _get_type_annotation(schema: SchemaOrField) -> str:
     @overload
     def _get_underlying_type_if_option_as_union(
         schema: SchemaOrField, default: SchemaOrField
-    ) -> SchemaOrField:
-        ...
+    ) -> SchemaOrField: ...
 
     @staticmethod
     @overload
     def _get_underlying_type_if_option_as_union(
         schema: SchemaOrField, default: Optional[SchemaOrField] = None
-    ) -> Optional[SchemaOrField]:
-        ...
+    ) -> Optional[SchemaOrField]: ...
 
     @staticmethod
     def _get_underlying_type_if_option_as_union(
@@ -386,7 +384,7 @@ def emit(self) -> Iterable[SchemaField]:
 
                 if "deprecated" in merged_props:
                     description = (
-                        f"<span style=\"color:red\">DEPRECATED: {merged_props['deprecated']}</span>\n"
+                        f'<span style="color:red">DEPRECATED: {merged_props["deprecated"]}</span>\n'
                         + description
                         if description
                         else ""
diff --git a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
index 9c34c4f83b0a93..beec42724529e6 100644
--- a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
+++ b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py
@@ -17,9 +17,9 @@ def parse_s3_path(path: str) -> "S3Path":
 
 def assert_ok_status(s3_response):
     is_ok = s3_response["ResponseMetadata"]["HTTPStatusCode"] == 200
-    assert (
-        is_ok
-    ), f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+    assert is_ok, (
+        f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+    )
 
 
 @dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
index 50268768d0ce9f..ba03083854e785 100644
--- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
@@ -148,9 +148,9 @@ def input_config_selectively_overrides_default_config(cls, info_types_config):
                 weight,
             ) in custom_infotype_config.Prediction_Factors_and_Weights.dict().items():
                 if weight > 0:
-                    assert (
-                        getattr(custom_infotype_config, factor) is not None
-                    ), f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+                    assert getattr(custom_infotype_config, factor) is not None, (
+                        f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+                    )
 
             # Custom infotype supports only regex based prediction for column values
             if custom_infotype_config.Prediction_Factors_and_Weights.Values > 0:
@@ -158,7 +158,9 @@ def input_config_selectively_overrides_default_config(cls, info_types_config):
                 assert (
                     custom_infotype_config.Values.prediction_type
                     == ValuePredictionType.REGEX
-                ), f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+                ), (
+                    f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+                )
 
         return info_types_config
 
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 8c5f894a072d93..48a008536ed1ed 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -519,9 +519,9 @@ def get_aspects_for_entity(
         :return: Optionally, a map of aspect_name to aspect_value as a dictionary if present, aspect_value will be set to None if that aspect was not found. Returns None on HTTP status 404.
         :raises HttpError: if the HTTP response is not a 200
         """
-        assert len(aspects) == len(
-            aspect_types
-        ), f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+        assert len(aspects) == len(aspect_types), (
+            f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+        )
 
         # TODO: generate aspects list from type classes
         response_json = self.get_entity_raw(entity_urn, aspects)
@@ -1576,9 +1576,7 @@ def run_assertion(
                     ... assertionResult
                 }
             }
-        """ % (
-            self._assertion_result_shared()
-        )
+        """ % (self._assertion_result_shared())
 
         variables = {
             "assertionUrn": urn,
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index ef59ba7a3b58b4..25cbd340c9674b 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -109,9 +109,9 @@ def on_failure(
                         mcp.systemMetadata.properties = {}
                     if "workunit_id" not in mcp.systemMetadata.properties:
                         # update the workunit id
-                        mcp.systemMetadata.properties[
-                            "workunit_id"
-                        ] = record_envelope.metadata["workunit_id"]
+                        mcp.systemMetadata.properties["workunit_id"] = (
+                            record_envelope.metadata["workunit_id"]
+                        )
                 record_envelope.record = mcp
         self.file_sink.write_record_async(record_envelope, self.logging_callback)
 
@@ -701,7 +701,7 @@ def pretty_print_summary(
             num_failures_sink = len(self.sink.get_report().failures)
             click.secho(
                 message_template.format(
-                    status=f"with at least {num_failures_source+num_failures_sink} failures"
+                    status=f"with at least {num_failures_source + num_failures_sink} failures"
                 ),
                 fg=self._get_text_color(
                     running=currently_running, failures=True, warnings=False
@@ -719,7 +719,7 @@ def pretty_print_summary(
             num_warn_global = len(global_warnings)
             click.secho(
                 message_template.format(
-                    status=f"with at least {num_warn_source+num_warn_sink+num_warn_global} warnings"
+                    status=f"with at least {num_warn_source + num_warn_sink + num_warn_global} warnings"
                 ),
                 fg=self._get_text_color(
                     running=currently_running, failures=False, warnings=True
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 7a4e7ec52a8e96..53e31aa2ea96e1 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -92,9 +92,9 @@ class PipelineConfig(ConfigModel):
     pipeline_name: Optional[str] = None
     failure_log: FailureLoggingConfig = FailureLoggingConfig()
 
-    _raw_dict: Optional[
-        dict
-    ] = None  # the raw dict that was parsed to construct this config
+    _raw_dict: Optional[dict] = (
+        None  # the raw dict that was parsed to construct this config
+    )
 
     @validator("run_id", pre=True, always=True)
     def run_id_should_be_semantic(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
index 9f6d13a08b182e..d12ff7415faefc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
@@ -85,8 +85,8 @@ def ensure_field_level_settings_are_normalized(
                 if field_level_metric.startswith("include_field_"):
                     values.setdefault(field_level_metric, False)
 
-            assert (
-                max_num_fields_to_profile is None
-            ), f"{max_num_fields_to_profile_key} should be set to None"
+            assert max_num_fields_to_profile is None, (
+                f"{max_num_fields_to_profile_key} should be set to None"
+            )
 
         return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
index e4f9cd0ee7e018..586e7a3af3bcd1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
@@ -508,7 +508,12 @@ def abs_browser(
                         ):
                             abs_path = self.create_abs_path(obj.name)
                             logger.debug(f"Sampling file: {abs_path}")
-                            yield abs_path, obj.name, obj.last_modified, obj.size,
+                            yield (
+                                abs_path,
+                                obj.name,
+                                obj.last_modified,
+                                obj.size,
+                            )
                 except Exception as e:
                     # This odd check if being done because boto does not have a proper exception to catch
                     # The exception that appears in stacktrace cannot actually be caught without a lot more work
@@ -552,9 +557,12 @@ def local_browser(
         if os.path.isfile(prefix):
             logger.debug(f"Scanning single local file: {prefix}")
             file_name = prefix
-            yield prefix, file_name, datetime.utcfromtimestamp(
-                os.path.getmtime(prefix)
-            ), os.path.getsize(prefix)
+            yield (
+                prefix,
+                file_name,
+                datetime.utcfromtimestamp(os.path.getmtime(prefix)),
+                os.path.getsize(prefix),
+            )
         else:
             logger.debug(f"Scanning files under local folder: {prefix}")
             for root, dirs, files in os.walk(prefix):
@@ -565,9 +573,12 @@ def local_browser(
                     full_path = PurePath(
                         os.path.normpath(os.path.join(root, file))
                     ).as_posix()
-                    yield full_path, file, datetime.utcfromtimestamp(
-                        os.path.getmtime(full_path)
-                    ), os.path.getsize(full_path)
+                    yield (
+                        full_path,
+                        file,
+                        datetime.utcfromtimestamp(os.path.getmtime(full_path)),
+                        os.path.getsize(full_path),
+                    )
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         self.container_WU_creator = ContainerWUCreator(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
index 30e81643837375..2509927854d4a0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
@@ -521,7 +521,7 @@ def process_dataflow_node(
         # otherwise, a node represents a transformation
         else:
             node_urn = mce_builder.make_data_job_urn_with_flow(
-                flow_urn, job_id=f'{node["NodeType"]}-{node["Id"]}'
+                flow_urn, job_id=f"{node['NodeType']}-{node['Id']}"
             )
 
         return {
@@ -679,7 +679,7 @@ def get_datajob_wu(self, node: Dict[str, Any], job_name: str) -> MetadataWorkUni
             )
         )
 
-        return MetadataWorkUnit(id=f'{job_name}-{node["Id"]}', mce=mce)
+        return MetadataWorkUnit(id=f"{job_name}-{node['Id']}", mce=mce)
 
     def get_all_databases(self) -> Iterable[Mapping[str, Any]]:
         logger.debug("Getting all databases")
@@ -750,13 +750,13 @@ def get_lineage_if_enabled(
     ) -> Optional[MetadataWorkUnit]:
         if self.source_config.emit_s3_lineage:
             # extract dataset properties aspect
-            dataset_properties: Optional[
-                DatasetPropertiesClass
-            ] = mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+            dataset_properties: Optional[DatasetPropertiesClass] = (
+                mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+            )
             # extract dataset schema aspect
-            schema_metadata: Optional[
-                SchemaMetadataClass
-            ] = mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+            schema_metadata: Optional[SchemaMetadataClass] = (
+                mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+            )
 
             if dataset_properties and "Location" in dataset_properties.customProperties:
                 location = dataset_properties.customProperties["Location"]
@@ -765,9 +765,9 @@ def get_lineage_if_enabled(
                         location, self.source_config.env
                     )
                     assert self.ctx.graph
-                    schema_metadata_for_s3: Optional[
-                        SchemaMetadataClass
-                    ] = self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+                    schema_metadata_for_s3: Optional[SchemaMetadataClass] = (
+                        self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+                    )
 
                     if self.source_config.glue_s3_lineage_direction == "upstream":
                         fine_grained_lineages = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
index c4561b9d9e676a..d46d1c099383fe 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py
@@ -257,7 +257,7 @@ def get_feature_wu(
             mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)
 
         return MetadataWorkUnit(
-            id=f'{feature_group_details["FeatureGroupName"]}-{feature["FeatureName"]}',
+            id=f"{feature_group_details['FeatureGroupName']}-{feature['FeatureName']}",
             mce=mce,
         )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
index 0f433aaecf2d96..f1374117af775f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py
@@ -212,7 +212,7 @@ def get_endpoint_wu(
         mce = MetadataChangeEvent(proposedSnapshot=endpoint_snapshot)
 
         return MetadataWorkUnit(
-            id=f'{endpoint_details["EndpointName"]}',
+            id=f"{endpoint_details['EndpointName']}",
             mce=mce,
         )
 
@@ -503,7 +503,7 @@ def get_model_wu(
         mce = MetadataChangeEvent(proposedSnapshot=model_snapshot)
 
         return MetadataWorkUnit(
-            id=f'{model_details["ModelName"]}',
+            id=f"{model_details['ModelName']}",
             mce=mce,
         )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 508b4bbaa277dc..ceb010a7f0675f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -132,9 +132,9 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         self.filters = BigQueryFilter(self.config, self.report)
         self.identifiers = BigQueryIdentifierBuilder(self.config, self.report)
 
-        redundant_lineage_run_skip_handler: Optional[
-            RedundantLineageRunSkipHandler
-        ] = None
+        redundant_lineage_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = (
+            None
+        )
         if self.config.enable_stateful_lineage_ingestion:
             redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
                 source=self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index 42f82704c81b99..d35c5265878c03 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -37,9 +37,9 @@ class BigqueryTableIdentifier:
 
     # Note: this regex may get overwritten by the sharded_table_pattern config.
     # The class-level constant, however, will not be overwritten.
-    _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[
-        str
-    ] = _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+    _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[str] = (
+        _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+    )
     _BIGQUERY_WILDCARD_REGEX: ClassVar[str] = "((_(\\d+)?)\\*$)|\\*$"
     _BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd"
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index afbe919df4dcae..57bfa2e3090d31 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -137,9 +137,9 @@ class BigQueryCredential(ConfigModel):
     @root_validator(skip_on_failure=True)
     def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         if values.get("client_x509_cert_url") is None:
-            values[
-                "client_x509_cert_url"
-            ] = f'https://www.googleapis.com/robot/v1/metadata/x509/{values["client_email"]}'
+            values["client_x509_cert_url"] = (
+                f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
+            )
         return values
 
     def create_credential_temp_file(self) -> str:
@@ -611,9 +611,9 @@ def validate_bigquery_audit_metadata_datasets(
         cls, v: Optional[List[str]], values: Dict
     ) -> Optional[List[str]]:
         if values.get("use_exported_bigquery_audit_metadata"):
-            assert (
-                v and len(v) > 0
-            ), "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+            assert v and len(v) > 0, (
+                "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+            )
 
         return v
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
index 9da2aceb19220a..7dc0e4195d5dc9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py
@@ -87,9 +87,9 @@ def get_platform_resource(
             key=platform_resource_key, graph_client=self.graph
         )
         if platform_resource:
-            self.platform_resource_cache[
-                platform_resource_key.primary_key
-            ] = platform_resource
+            self.platform_resource_cache[platform_resource_key.primary_key] = (
+                platform_resource
+            )
             return platform_resource
         return None
 
@@ -115,7 +115,11 @@ def generate_label_platform_resource(
                 and platform_resource.resource_info.value
             ):
                 try:
-                    existing_info: Optional[BigQueryLabelInfo] = platform_resource.resource_info.value.as_pydantic_object(BigQueryLabelInfo)  # type: ignore
+                    existing_info: Optional[BigQueryLabelInfo] = (
+                        platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                            BigQueryLabelInfo
+                        )
+                    )
                 except ValidationError as e:
                     logger.error(
                         f"Error converting existing value to BigQueryLabelInfo: {e}. Creating new one. Maybe this is because of a non backward compatible schema change."
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
index 56e930dfb811f1..ebfbbf0639c38c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
@@ -311,8 +311,10 @@ def gen_dataset_containers(
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
 
@@ -820,8 +822,10 @@ def gen_table_dataset_workunits(
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
 
@@ -860,8 +864,10 @@ def gen_view_dataset_workunits(
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
 
@@ -1203,9 +1209,9 @@ def get_tables_for_dataset(
                     report=self.report,
                 )
 
-        self.report.metadata_extraction_sec[
-            f"{project_id}.{dataset.name}"
-        ] = timer.elapsed_seconds(digits=2)
+        self.report.metadata_extraction_sec[f"{project_id}.{dataset.name}"] = (
+            timer.elapsed_seconds(digits=2)
+        )
 
     def get_core_table_details(
         self, dataset_name: str, project_id: str, temp_table_dataset_prefix: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index 433282a21fdb66..da82c6a06f0395 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -697,7 +697,7 @@ def _create_lineage_map(
                         if parsed_queries[-1]:
                             query = f"""create table `{destination_table.get_sanitized_table_ref().table_identifier.get_table_name()}` AS
                             (
-                                {parsed_queries[-1].sql(dialect='bigquery')}
+                                {parsed_queries[-1].sql(dialect="bigquery")}
                             )"""
                         else:
                             query = e.query
@@ -809,11 +809,11 @@ def get_upstream_tables(
                             upstream_lineage, temp_table_upstream
                         )
 
-                        upstreams[
-                            ref_temp_table_upstream
-                        ] = _merge_lineage_edge_columns(
-                            upstreams.get(ref_temp_table_upstream),
-                            collapsed_lineage,
+                        upstreams[ref_temp_table_upstream] = (
+                            _merge_lineage_edge_columns(
+                                upstreams.get(ref_temp_table_upstream),
+                                collapsed_lineage,
+                            )
                         )
             else:
                 upstreams[upstream_table_ref] = _merge_lineage_edge_columns(
@@ -1004,9 +1004,9 @@ def get_lineage_for_external_table(
                 dataset_urn
             )
             for gcs_dataset_urn in gcs_urns:
-                schema_metadata_for_gcs: Optional[
-                    SchemaMetadataClass
-                ] = graph.get_schema_metadata(gcs_dataset_urn)
+                schema_metadata_for_gcs: Optional[SchemaMetadataClass] = (
+                    graph.get_schema_metadata(gcs_dataset_urn)
+                )
                 if schema_metadata and schema_metadata_for_gcs:
                     fine_grained_lineage = self.get_fine_grained_lineages_with_gcs(
                         dataset_urn,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index 2ac40a48de4cc7..8a558d7736a389 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -387,9 +387,7 @@ class BigqueryQuery:
     OR
     protoPayload.metadata.tableDataRead.reason = "JOB"
 )
-""".strip(
-    "\t \n"
-)
+""".strip("\t \n")
 
 
 def bigquery_audit_metadata_query_template_lineage(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
index 08c9beaa73c53b..0f9471219c6590 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
@@ -271,9 +271,9 @@ def get_workunits_internal(
             # Preprocessing stage that deduplicates the queries using query hash per usage bucket
             # Note: FileBackedDict is an ordered dictionary, so the order of execution of
             # queries is inherently maintained
-            queries_deduped: FileBackedDict[
-                Dict[int, ObservedQuery]
-            ] = self.deduplicate_queries(queries)
+            queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] = (
+                self.deduplicate_queries(queries)
+            )
             self.report.num_unique_queries = len(queries_deduped)
             logger.info(f"Found {self.report.num_unique_queries} unique queries")
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
index f2f6cc731858d1..c2b849e58fc6dc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@@ -763,9 +763,9 @@ def _create_operational_custom_properties(
                     )
 
                 if event.query_event.default_dataset:
-                    custom_properties[
-                        "defaultDatabase"
-                    ] = event.query_event.default_dataset
+                    custom_properties["defaultDatabase"] = (
+                        event.query_event.default_dataset
+                    )
             if event.read_event:
                 if event.read_event.readReason:
                     custom_properties["readReason"] = event.read_event.readReason
diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
index dcdccc08ce0483..062c64d45767fc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py
@@ -91,7 +91,6 @@ class KeyspaceKey(ContainerKey):
     supported=True,
 )
 class CassandraSource(StatefulIngestionSourceBase):
-
     """
     This plugin extracts the following:
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
index 75a0ba0c617734..b467ca0aca6be4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py
@@ -107,10 +107,10 @@ class CassandraToSchemaFieldConverter:
 
     @staticmethod
     def get_column_type(cassandra_column_type: str) -> SchemaFieldDataType:
-        type_class: Optional[
-            Type
-        ] = CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
-            cassandra_column_type
+        type_class: Optional[Type] = (
+            CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
+                cassandra_column_type
+            )
         )
         if type_class is None:
             logger.warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
index 2b75d0dca53cb7..5ba4dd13fb2ac9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
@@ -293,9 +293,9 @@ def _get_schema_and_fields(
     def _load_json_schema_with_resolved_references(
         self, schema: Schema, name: str, subject: str
     ) -> dict:
-        imported_json_schemas: List[
-            JsonSchemaWrapper
-        ] = self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+        imported_json_schemas: List[JsonSchemaWrapper] = (
+            self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+        )
         schema_dict = json.loads(schema.schema_str)
         reference_map = {}
         for imported_schema in imported_json_schemas:
@@ -332,9 +332,9 @@ def _get_schema_fields(
             )
 
         elif schema.schema_type == "PROTOBUF":
-            imported_schemas: List[
-                ProtobufSchema
-            ] = self.get_schemas_from_confluent_ref_protobuf(schema)
+            imported_schemas: List[ProtobufSchema] = (
+                self.get_schemas_from_confluent_ref_protobuf(schema)
+            )
             base_name: str = topic.replace(".", "_")
             fields = protobuf_util.protobuf_schema_to_mce_fields(
                 ProtobufSchema(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
index 42e025073b534e..8ebb7b9ef7fbdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
@@ -371,11 +371,11 @@ def get_resource_workunits(
         domain: Optional[str],
         description: Optional[str],
     ) -> Iterable[MetadataWorkUnit]:
-        maybe_terms_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_glossary_terms_work_unit(
-            entity_urn=entity_urn,
-            term_associations=term_associations,
+        maybe_terms_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_glossary_terms_work_unit(
+                entity_urn=entity_urn,
+                term_associations=term_associations,
+            )
         )
         if maybe_terms_wu:
             self.report.num_glossary_term_workunits_produced += 1
@@ -389,31 +389,31 @@ def get_resource_workunits(
             self.report.num_tag_workunits_produced += 1
             yield maybe_tags_wu
 
-        maybe_owners_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_owners_work_unit(
-            entity_urn=entity_urn,
-            owners=owners,
+        maybe_owners_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_owners_work_unit(
+                entity_urn=entity_urn,
+                owners=owners,
+            )
         )
         if maybe_owners_wu:
             self.report.num_owners_workunits_produced += 1
             yield maybe_owners_wu
 
-        maybe_domain_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_domain_work_unit(
-            entity_urn=entity_urn,
-            domain=domain,
+        maybe_domain_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_domain_work_unit(
+                entity_urn=entity_urn,
+                domain=domain,
+            )
         )
         if maybe_domain_wu:
             self.report.num_domain_workunits_produced += 1
             yield maybe_domain_wu
 
-        maybe_description_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_description_work_unit(
-            entity_urn=entity_urn,
-            description=description,
+        maybe_description_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_description_work_unit(
+                entity_urn=entity_urn,
+                description=description,
+            )
         )
         if maybe_description_wu:
             self.report.num_description_workunits_produced += 1
@@ -426,9 +426,9 @@ def process_sub_resource_row(
         needs_write: bool,
     ) -> Tuple[EditableSchemaMetadataClass, bool]:
         field_path: str = sub_resource_row.field_path
-        term_associations: List[
-            GlossaryTermAssociationClass
-        ] = sub_resource_row.term_associations
+        term_associations: List[GlossaryTermAssociationClass] = (
+            sub_resource_row.term_associations
+        )
         tag_associations: List[TagAssociationClass] = sub_resource_row.tag_associations
         description: Optional[str] = sub_resource_row.description
         has_terms: bool = len(term_associations) > 0
@@ -517,9 +517,9 @@ def get_sub_resource_work_units(self) -> Iterable[MetadataWorkUnit]:
             # Boolean field to tell whether we need to write an MCPW.
             needs_write = False
 
-            current_editable_schema_metadata: Optional[
-                EditableSchemaMetadataClass
-            ] = None
+            current_editable_schema_metadata: Optional[EditableSchemaMetadataClass] = (
+                None
+            )
             if self.ctx.graph and not self.should_overwrite:
                 # Fetch the current editable schema metadata
                 current_editable_schema_metadata = self.ctx.graph.get_aspect(
@@ -655,9 +655,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
             entity_urn = row["resource"]
             entity_type = Urn.from_string(row["resource"]).get_type()
 
-            term_associations: List[
-                GlossaryTermAssociationClass
-            ] = self.maybe_extract_glossary_terms(row)
+            term_associations: List[GlossaryTermAssociationClass] = (
+                self.maybe_extract_glossary_terms(row)
+            )
             tag_associations: List[TagAssociationClass] = self.maybe_extract_tags(row)
             owners: List[OwnerClass] = self.maybe_extract_owners(row, is_resource_row)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index ee105f4862caba..51a25829d21dba 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -152,7 +152,9 @@ def execute_server_cursor(
     ) -> Iterable[Dict[str, Any]]:
         with self.engine.connect() as conn:
             if self.engine.dialect.name in ["postgresql", "mysql", "mariadb"]:
-                with conn.begin():  # Transaction required for PostgreSQL server-side cursor
+                with (
+                    conn.begin()
+                ):  # Transaction required for PostgreSQL server-side cursor
                     # Note that stream_results=True is mainly supported by PostgreSQL and MySQL-based dialects.
                     # https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execution_options.params.stream_results
                     conn = conn.execution_options(
@@ -222,7 +224,7 @@ def _parse_row(
             )
         except Exception as e:
             logger.warning(
-                f'Failed to parse metadata for {row["urn"]}: {e}', exc_info=True
+                f"Failed to parse metadata for {row['urn']}: {e}", exc_info=True
             )
             self.report.num_database_parse_errors += 1
             self.report.database_parse_errors.setdefault(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index 5042f6d69b261a..41b59a9c8b892c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -194,20 +194,20 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
 
 _DBT_FIELDS_BY_TYPE = {
     "models": f"""
-    { _DBT_GRAPHQL_COMMON_FIELDS }
-    { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
-    { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+    {_DBT_GRAPHQL_COMMON_FIELDS}
+    {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+    {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
     dependsOn
     materializedType
 """,
     "seeds": f"""
-    { _DBT_GRAPHQL_COMMON_FIELDS }
-    { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
-    { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+    {_DBT_GRAPHQL_COMMON_FIELDS}
+    {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+    {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
 """,
     "sources": f"""
-    { _DBT_GRAPHQL_COMMON_FIELDS }
-    { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
+    {_DBT_GRAPHQL_COMMON_FIELDS}
+    {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
     identifier
     sourceName
     sourceDescription
@@ -218,9 +218,9 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
     loader
 """,
     "snapshots": f"""
-    { _DBT_GRAPHQL_COMMON_FIELDS }
-    { _DBT_GRAPHQL_NODE_COMMON_FIELDS }
-    { _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
+    {_DBT_GRAPHQL_COMMON_FIELDS}
+    {_DBT_GRAPHQL_NODE_COMMON_FIELDS}
+    {_DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS}
     parentsSources {{
       uniqueId
     }}
@@ -229,7 +229,7 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]:
     }}
 """,
     "tests": f"""
-    { _DBT_GRAPHQL_COMMON_FIELDS }
+    {_DBT_GRAPHQL_COMMON_FIELDS}
     state
     columnName
     status
@@ -315,7 +315,7 @@ def _send_graphql_query(
             res = response.json()
             if "errors" in res:
                 raise ValueError(
-                    f'Unable to fetch metadata from dbt Cloud: {res["errors"]}'
+                    f"Unable to fetch metadata from dbt Cloud: {res['errors']}"
                 )
             data = res["data"]
         except JSONDecodeError as e:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 499e7e1231d050..fa85308b325979 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -506,16 +506,18 @@ class DBTNode:
     materialization: Optional[str]  # table, view, ephemeral, incremental, snapshot
     # see https://docs.getdbt.com/reference/artifacts/manifest-json
     catalog_type: Optional[str]
-    missing_from_catalog: bool  # indicates if the node was missing from the catalog.json
+    missing_from_catalog: (
+        bool  # indicates if the node was missing from the catalog.json
+    )
 
     owner: Optional[str]
 
     columns: List[DBTColumn] = field(default_factory=list)
     upstream_nodes: List[str] = field(default_factory=list)  # list of upstream dbt_name
     upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list)
-    raw_sql_parsing_result: Optional[
-        SqlParsingResult
-    ] = None  # only set for nodes that don't depend on ephemeral models
+    raw_sql_parsing_result: Optional[SqlParsingResult] = (
+        None  # only set for nodes that don't depend on ephemeral models
+    )
     cll_debug_info: Optional[SqlParsingDebugInfo] = None
 
     meta: Dict[str, Any] = field(default_factory=dict)
@@ -869,10 +871,10 @@ def create_test_entity_mcps(
                                 "platform": DBT_PLATFORM,
                                 "name": node.dbt_name,
                                 "instance": self.config.platform_instance,
+                                # Ideally we'd include the env unconditionally. However, we started out
+                                # not including env in the guid, so we need to maintain backwards compatibility
+                                # with existing PROD assertions.
                                 **(
-                                    # Ideally we'd include the env unconditionally. However, we started out
-                                    # not including env in the guid, so we need to maintain backwards compatibility
-                                    # with existing PROD assertions.
                                     {"env": self.config.env}
                                     if self.config.env != mce_builder.DEFAULT_ENV
                                     and self.config.include_env_in_assertion_guid
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
index 072995c10ebcef..cf2d9670400ca5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
@@ -191,9 +191,9 @@ def authenticate(self, connection_args: "DremioSourceConfig") -> None:
                     )
                     return
                 else:
-                    assert (
-                        connection_args.username and connection_args.password
-                    ), "Username and password are required for authentication"
+                    assert connection_args.username and connection_args.password, (
+                        "Username and password are required for authentication"
+                    )
                     host = connection_args.hostname
                     port = connection_args.port
                     protocol = "https" if connection_args.tls else "http"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
index e5d6b8e40fb3d8..482647f8d77da1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py
@@ -101,9 +101,9 @@ def add_mapping(
         Add a new source type if not in the map (e.g., Dremio ARP).
         """
         dremio_source_type = dremio_source_type.upper()
-        DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[
-            dremio_source_type
-        ] = datahub_source_type
+        DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[dremio_source_type] = (
+            datahub_source_type
+        )
 
         if category:
             if category.lower() == "file_object_storage":
diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
index 99aa5f54f6a576..ce1c60dcafdd46 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
@@ -111,10 +111,10 @@ class ElasticToSchemaFieldConverter:
 
     @staticmethod
     def get_column_type(elastic_column_type: str) -> SchemaFieldDataType:
-        type_class: Optional[
-            Type
-        ] = ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
-            elastic_column_type
+        type_class: Optional[Type] = (
+            ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
+                elastic_column_type
+            )
         )
         if type_class is None:
             logger.warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
index 02b29051dd2ebe..ffcd9218a2103c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py
@@ -155,9 +155,9 @@ def _update_report(self, urn: str, entity_type: str) -> None:
             current_count = self.report.num_hard_deleted_by_type.get(entity_type, 0)
             self.report.num_hard_deleted_by_type[entity_type] = current_count + 1
             if entity_type not in self.report.sample_hard_deleted_aspects_by_type:
-                self.report.sample_hard_deleted_aspects_by_type[
-                    entity_type
-                ] = LossyList()
+                self.report.sample_hard_deleted_aspects_by_type[entity_type] = (
+                    LossyList()
+                )
             self.report.sample_hard_deleted_aspects_by_type[entity_type].append(urn)
 
     def delete_entity(self, urn: str) -> None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
index 18838af9bdf85f..5196c8ec5b998b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py
@@ -141,8 +141,9 @@ def s3_source_overrides(self, source: S3Source) -> S3Source:
         source.source_config.platform = PLATFORM_GCS
 
         source.is_s3_platform = lambda: True  # type: ignore
-        source.create_s3_path = lambda bucket_name, key: unquote(f"s3://{bucket_name}/{key}")  # type: ignore
-
+        source.create_s3_path = lambda bucket_name, key: unquote(  # type: ignore
+            f"s3://{bucket_name}/{key}"
+        )
         return source
 
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index aba0deebd356c5..bde26f97bf271f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -327,7 +327,7 @@ def _is_single_row_query_method(query: Any) -> bool:
 
 
 def _run_with_query_combiner(
-    method: Callable[Concatenate["_SingleDatasetProfiler", P], None]
+    method: Callable[Concatenate["_SingleDatasetProfiler", P], None],
 ) -> Callable[Concatenate["_SingleDatasetProfiler", P], None]:
     @functools.wraps(method)
     def inner(
@@ -1537,9 +1537,7 @@ def create_bigquery_temp_table(
         query_job: Optional["google.cloud.bigquery.job.query.QueryJob"] = (
             # In google-cloud-bigquery 3.15.0, the _query_job attribute was
             # made public and renamed to query_job.
-            cursor.query_job
-            if hasattr(cursor, "query_job")
-            else cursor._query_job  # type: ignore[attr-defined]
+            cursor.query_job if hasattr(cursor, "query_job") else cursor._query_job  # type: ignore[attr-defined]
         )
         assert query_job
         temp_destination_table = query_job.destination
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 42d0def0a46e7d..93142a347ca0e6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -220,9 +220,9 @@ def ensure_field_level_settings_are_normalized(
                         )
                     values[field_level_metric] = False
 
-            assert (
-                max_num_fields_to_profile is None
-            ), f"{max_num_fields_to_profile_key} should be set to None"
+            assert max_num_fields_to_profile is None, (
+                f"{max_num_fields_to_profile_key} should be set to None"
+            )
 
         # Disable expensive queries.
         if values.get("turn_off_expensive_profiling_metrics"):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
index 8101f0110509e3..9a62ee2dab52f4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
@@ -296,9 +296,9 @@ def _create_iceberg_workunit(
                 custom_properties["snapshot-id"] = str(
                     table.current_snapshot().snapshot_id
                 )
-                custom_properties[
-                    "manifest-list"
-                ] = table.current_snapshot().manifest_list
+                custom_properties["manifest-list"] = (
+                    table.current_snapshot().manifest_list
+                )
             dataset_properties = DatasetPropertiesClass(
                 name=table.name()[-1],
                 description=table.metadata.properties.get("comment", None),
diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
index 885b6514779cc4..edb9b7b8bd5264 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py
@@ -354,9 +354,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                     yield MetadataWorkUnit(id=group_status_wu_id, mcp=group_status_mcp)
 
         # Populate GroupMembership Aspects for CorpUsers
-        datahub_corp_user_urn_to_group_membership: Dict[
-            str, GroupMembershipClass
-        ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
+        datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
+            defaultdict(lambda: GroupMembershipClass(groups=[]))
+        )
         if (
             self.config.ingest_group_membership
             and len(self.selected_azure_ad_groups) > 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
index dda81b0e34a8d2..5452fbcd3f053b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py
@@ -344,9 +344,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 ).as_workunit()
 
         # Step 2: Populate GroupMembership Aspects for CorpUsers
-        datahub_corp_user_urn_to_group_membership: Dict[
-            str, GroupMembershipClass
-        ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
+        datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
+            defaultdict(lambda: GroupMembershipClass(groups=[]))
+        )
         if self.config.ingest_group_membership and okta_groups is not None:
             # Fetch membership for each group.
             for okta_group in okta_groups:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
index fa842a15ba7328..9f15eda1501f11 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py
@@ -419,10 +419,10 @@ def _extract_record(
             custom_props = self.build_custom_properties(
                 topic, topic_detail, extra_topic_config
             )
-            schema_name: Optional[
-                str
-            ] = self.schema_registry_client._get_subject_for_topic(
-                topic, is_key_schema=False
+            schema_name: Optional[str] = (
+                self.schema_registry_client._get_subject_for_topic(
+                    topic, is_key_schema=False
+                )
             )
             if schema_name is not None:
                 custom_props["Schema Name"] = schema_name
@@ -610,11 +610,13 @@ def fetch_extra_topic_details(self, topics: List[str]) -> Dict[str, dict]:
 
     def fetch_topic_configurations(self, topics: List[str]) -> Dict[str, dict]:
         logger.info("Fetching config details for all topics")
-        configs: Dict[
-            ConfigResource, concurrent.futures.Future
-        ] = self.admin_client.describe_configs(
-            resources=[ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics],
-            request_timeout=self.source_config.connection.client_timeout_seconds,
+        configs: Dict[ConfigResource, concurrent.futures.Future] = (
+            self.admin_client.describe_configs(
+                resources=[
+                    ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics
+                ],
+                request_timeout=self.source_config.connection.client_timeout_seconds,
+            )
         )
         logger.debug("Waiting for config details futures to complete")
         concurrent.futures.wait(configs.values())
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
index 72be864fc30a1c..9edfce5855f430 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py
@@ -110,9 +110,8 @@ def get_connectors_manifest(self) -> Iterable[ConnectorManifest]:
             connector_manifest = self._get_connector_manifest(
                 connector_name, connector_url
             )
-            if (
-                connector_manifest is None
-                or not self.config.connector_patterns.allowed(connector_manifest.name)
+            if connector_manifest is None or not self.config.connector_patterns.allowed(
+                connector_manifest.name
             ):
                 self.report.report_dropped(connector_name)
                 continue
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
index 2790460c8e6019..10255ed544b812 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/sink_connectors.py
@@ -199,9 +199,9 @@ def get_parser(
             transforms.append(transform)
             for key in self.connector_manifest.config.keys():
                 if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
+                    transform[key.replace(f"transforms.{name}.", "")] = (
+                        self.connector_manifest.config[key]
+                    )
 
         if "defaultDataset" in connector_manifest.config:
             defaultDataset = connector_manifest.config["defaultDataset"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
index 7b3b6e551a0a1f..5e64d4e161e3ea 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py
@@ -123,9 +123,9 @@ def get_parser(
             transforms.append(transform)
             for key in self.connector_manifest.config.keys():
                 if key.startswith(f"transforms.{name}."):
-                    transform[
-                        key.replace(f"transforms.{name}.", "")
-                    ] = self.connector_manifest.config[key]
+                    transform[key.replace(f"transforms.{name}.", "")] = (
+                        self.connector_manifest.config[key]
+                    )
 
         return self.JdbcParser(
             db_connection_url,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 1183916e9b3fef..abe9b5684f8f1f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -596,9 +596,9 @@ class LookerUtil:
 
     @staticmethod
     def _extract_view_from_field(field: str) -> str:
-        assert (
-            field.count(".") == 1
-        ), f"Error: A field must be prefixed by a view name, field is: {field}"
+        assert field.count(".") == 1, (
+            f"Error: A field must be prefixed by a view name, field is: {field}"
+        )
         return field.split(".")[0]
 
     @staticmethod
@@ -815,9 +815,9 @@ class LookerExplore:
     project_name: Optional[str] = None
     label: Optional[str] = None
     description: Optional[str] = None
-    upstream_views: Optional[
-        List[ProjectInclude]
-    ] = None  # captures the view name(s) this explore is derived from
+    upstream_views: Optional[List[ProjectInclude]] = (
+        None  # captures the view name(s) this explore is derived from
+    )
     upstream_views_file_path: Dict[str, Optional[str]] = dataclasses_field(
         default_factory=dict
     )  # view_name is key and file_path is value. A single file may contains multiple views
@@ -889,7 +889,7 @@ def from_dict(
                     upstream_views.extend(parsed_explore.upstream_views or [])
                 else:
                     logger.warning(
-                        f'Could not find extended explore {extended_explore} for explore {dict["name"]} in model {model_name}'
+                        f"Could not find extended explore {extended_explore} for explore {dict['name']} in model {model_name}"
                     )
         else:
             # we only fallback to the view_names list if this is not an extended explore
@@ -903,7 +903,7 @@ def from_dict(
                 )
                 if not info:
                     logger.warning(
-                        f'Could not resolve view {view_name} for explore {dict["name"]} in model {model_name}'
+                        f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
                     )
                 else:
                     upstream_views.append(
@@ -935,9 +935,9 @@ def from_api(  # noqa: C901
         try:
             explore = client.lookml_model_explore(model, explore_name)
             views: Set[str] = set()
-            lkml_fields: List[
-                LookmlModelExploreField
-            ] = explore_field_set_to_lkml_fields(explore)
+            lkml_fields: List[LookmlModelExploreField] = (
+                explore_field_set_to_lkml_fields(explore)
+            )
 
             if explore.view_name is not None and explore.view_name != explore.name:
                 # explore is not named after a view and is instead using a from field, which is modeled as view_name.
@@ -1034,9 +1034,9 @@ def from_api(  # noqa: C901
                         if measure_field.name is None:
                             continue
                         else:
-                            field_name_vs_raw_explore_field[
-                                measure_field.name
-                            ] = measure_field
+                            field_name_vs_raw_explore_field[measure_field.name] = (
+                                measure_field
+                            )
 
                             view_fields.append(
                                 ViewField(
@@ -1072,11 +1072,11 @@ def from_api(  # noqa: C901
             if view_project_map:
                 logger.debug(f"views and their projects: {view_project_map}")
 
-            upstream_views_file_path: Dict[
-                str, Optional[str]
-            ] = create_upstream_views_file_path_map(
-                lkml_fields=lkml_fields,
-                view_names=views,
+            upstream_views_file_path: Dict[str, Optional[str]] = (
+                create_upstream_views_file_path_map(
+                    lkml_fields=lkml_fields,
+                    view_names=views,
+                )
             )
             if upstream_views_file_path:
                 logger.debug(f"views and their file-paths: {upstream_views_file_path}")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
index 4e9d0f68928a45..3ed3186399588e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
@@ -166,9 +166,9 @@ def _get_generic_definition(
         # e.g. spark1 or hive2 or druid_18
         platform = re.sub(r"[0-9]+", "", dialect_name.split("_")[0])
 
-    assert (
-        platform is not None
-    ), f"Failed to extract a valid platform from connection {looker_connection}"
+    assert platform is not None, (
+        f"Failed to extract a valid platform from connection {looker_connection}"
+    )
     db = looker_connection.database
     schema = looker_connection.schema  # ok for this to be None
     return platform, db, schema
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 8487d5113bc1d3..2f1fcd378d40fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -250,9 +250,9 @@ def _set_test_connection_capability(
 
     @staticmethod
     def _extract_view_from_field(field: str) -> str:
-        assert (
-            field.count(".") == 1
-        ), f"Error: A field must be prefixed by a view name, field is: {field}"
+        assert field.count(".") == 1, (
+            f"Error: A field must be prefixed by a view name, field is: {field}"
+        )
         return field.split(".")[0]
 
     def _get_views_from_fields(self, fields: List[str]) -> List[str]:
@@ -610,12 +610,12 @@ def _get_folder_browse_path_v2_entries(
     def _create_platform_instance_aspect(
         self,
     ) -> DataPlatformInstance:
-        assert (
-            self.source_config.platform_name
-        ), "Platform name is not set in the configuration."
-        assert (
-            self.source_config.platform_instance
-        ), "Platform instance is not set in the configuration."
+        assert self.source_config.platform_name, (
+            "Platform name is not set in the configuration."
+        )
+        assert self.source_config.platform_instance, (
+            "Platform instance is not set in the configuration."
+        )
 
         return DataPlatformInstance(
             platform=builder.make_data_platform_urn(self.source_config.platform_name),
@@ -1016,9 +1016,9 @@ def _make_dashboard_and_chart_mces(
         yield from chart_events
 
         # Step 2: Emit metadata events for the Dashboard itself.
-        chart_urns: Set[
-            str
-        ] = set()  # Collect the unique child chart urns for dashboard input lineage.
+        chart_urns: Set[str] = (
+            set()
+        )  # Collect the unique child chart urns for dashboard input lineage.
         for chart_event in chart_events:
             chart_event_urn = self._extract_event_urn(chart_event)
             if chart_event_urn:
@@ -1538,20 +1538,20 @@ def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
                     }
                 )
 
-            dashboard_element: Optional[
-                LookerDashboardElement
-            ] = self._get_looker_dashboard_element(
-                DashboardElement(
-                    id=f"looks_{look.id}",  # to avoid conflict with non-standalone looks (element.id prefixes),
-                    # we add the "looks_" prefix to look.id.
-                    title=look.title,
-                    subtitle_text=look.description,
-                    look_id=look.id,
-                    dashboard_id=None,  # As this is an independent look
-                    look=LookWithQuery(
-                        query=query, folder=look.folder, user_id=look.user_id
+            dashboard_element: Optional[LookerDashboardElement] = (
+                self._get_looker_dashboard_element(
+                    DashboardElement(
+                        id=f"looks_{look.id}",  # to avoid conflict with non-standalone looks (element.id prefixes),
+                        # we add the "looks_" prefix to look.id.
+                        title=look.title,
+                        subtitle_text=look.description,
+                        look_id=look.id,
+                        dashboard_id=None,  # As this is an independent look
+                        look=LookWithQuery(
+                            query=query, folder=look.folder, user_id=look.user_id
+                        ),
                     ),
-                ),
+                )
             )
 
             if dashboard_element is not None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
index 6d49d57e077435..2bcae4d46b8d52 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
@@ -33,9 +33,9 @@
 
 
 class SpecialVariable:
-    SPECIAL_VARIABLE_PATTERN: ClassVar[
-        str
-    ] = r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
+    SPECIAL_VARIABLE_PATTERN: ClassVar[str] = (
+        r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
+    )
     liquid_variable: dict
 
     def __init__(self, liquid_variable):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
index 098d7d73a3da84..05806840b5c954 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
@@ -257,9 +257,9 @@ def _process_entity_timeseries_rows(
 
         for row in rows:
             logger.debug(row)
-            entity_stat_aspect[
-                self.get_entity_stat_key(row)
-            ] = self.to_entity_timeseries_stat_aspect(row)
+            entity_stat_aspect[self.get_entity_stat_key(row)] = (
+                self.to_entity_timeseries_stat_aspect(row)
+            )
 
         return entity_stat_aspect
 
@@ -385,10 +385,8 @@ def generate_usage_stat_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
         entity_rows: List[Dict] = self._execute_query(
             entity_query_with_filters, "entity_query"
         )
-        entity_usage_stat: Dict[
-            Tuple[str, str], Any
-        ] = self._process_entity_timeseries_rows(
-            entity_rows
+        entity_usage_stat: Dict[Tuple[str, str], Any] = (
+            self._process_entity_timeseries_rows(entity_rows)
         )  # Any type to pass mypy unbound Aspect type error
 
         user_wise_query_with_filters: LookerQuery = self._append_filters(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
index 103f4175a9ccff..4e38165bb56286 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
@@ -38,16 +38,16 @@ def merge_parent_and_child_fields(
     # Create a map field-name vs field
     child_field_map: dict = {}
     for field in child_fields:
-        assert (
-            NAME in field
-        ), "A lookml view must have a name field"  # name is required field of lookml field array
+        assert NAME in field, (
+            "A lookml view must have a name field"
+        )  # name is required field of lookml field array
 
         child_field_map[field[NAME]] = field
 
     for field in parent_fields:
-        assert (
-            NAME in field
-        ), "A lookml view must have a name field"  # name is required field of lookml field array
+        assert NAME in field, (
+            "A lookml view must have a name field"
+        )  # name is required field of lookml field array
 
         if field[NAME] in child_field_map:
             # Fields defined in the child view take higher precedence.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
index c7d3724472d3c8..a8575c84b510d5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
@@ -482,14 +482,14 @@ def get_project_name(self, model_name: str) -> str:
         if self.source_config.project_name is not None:
             return self.source_config.project_name
 
-        assert (
-            self.looker_client is not None
-        ), "Failed to find a configured Looker API client"
+        assert self.looker_client is not None, (
+            "Failed to find a configured Looker API client"
+        )
         try:
             model = self.looker_client.lookml_model(model_name, fields="project_name")
-            assert (
-                model.project_name is not None
-            ), f"Failed to find a project name for model {model_name}"
+            assert model.project_name is not None, (
+                f"Failed to find a project name for model {model_name}"
+            )
             return model.project_name
         except SDKError:
             raise ValueError(
@@ -541,9 +541,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 self.reporter.git_clone_latency = datetime.now() - start_time
                 self.source_config.base_folder = checkout_dir.resolve()
 
-            self.base_projects_folder[
-                BASE_PROJECT_NAME
-            ] = self.source_config.base_folder
+            self.base_projects_folder[BASE_PROJECT_NAME] = (
+                self.source_config.base_folder
+            )
 
             visited_projects: Set[str] = set()
 
@@ -641,9 +641,9 @@ def _recursively_check_manifests(
                     repo_url=remote_project.url,
                 )
 
-                self.base_projects_folder[
-                    remote_project.name
-                ] = p_checkout_dir.resolve()
+                self.base_projects_folder[remote_project.name] = (
+                    p_checkout_dir.resolve()
+                )
                 repo = p_cloner.get_last_repo_cloned()
                 assert repo
                 remote_git_info = GitInfo(
@@ -930,9 +930,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]:  # noqa: C901
                                         logger.warning(
                                             f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
                                         )
-                                        if (
-                                            not self.source_config.emit_reachable_views_only
-                                        ):
+                                        if not self.source_config.emit_reachable_views_only:
                                             logger.warning(
                                                 "Consider enabling the `emit_reachable_views_only` flag to handle this case."
                                             )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
index 971181e4300d69..f77eebb3cdd8cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
@@ -484,11 +484,11 @@ def __init__(
         )
 
     def __get_upstream_dataset_urn(self) -> List[str]:
-        current_view_id: Optional[
-            LookerViewId
-        ] = self.looker_view_id_cache.get_looker_view_id(
-            view_name=self.view_context.name(),
-            base_folder_path=self.view_context.base_folder_path,
+        current_view_id: Optional[LookerViewId] = (
+            self.looker_view_id_cache.get_looker_view_id(
+                view_name=self.view_context.name(),
+                base_folder_path=self.view_context.base_folder_path,
+            )
         )
 
         # Current view will always be present in cache. assert  will silence the lint
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
index b0b04dff20bffc..02125db83d2582 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@@ -172,10 +172,10 @@ def _get_mlflow_registered_models(self) -> Iterable[RegisteredModel]:
         """
         Get all Registered Models in MLflow Model Registry.
         """
-        registered_models: Iterable[
-            RegisteredModel
-        ] = self._traverse_mlflow_search_func(
-            search_func=self.client.search_registered_models,
+        registered_models: Iterable[RegisteredModel] = (
+            self._traverse_mlflow_search_func(
+                search_func=self.client.search_registered_models,
+            )
         )
         return registered_models
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
index bbc4897d227bac..ad8487c1a759ec 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
@@ -288,7 +288,9 @@ def __init__(self, ctx: PipelineContext, config: MongoDBConfig):
 
         # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
         self.mongo_client = MongoClient(
-            self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options  # type: ignore
+            self.config.connect_uri,
+            datetime_conversion="DATETIME_AUTO",
+            **options,  # type: ignore
         )
 
         # This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ def _infer_schema_metadata(
             )
             # Add this information to the custom properties so user can know they are looking at downsampled schema
             dataset_properties.customProperties["schema.downsampled"] = "True"
-            dataset_properties.customProperties[
-                "schema.totalFields"
-            ] = f"{collection_schema_size}"
+            dataset_properties.customProperties["schema.totalFields"] = (
+                f"{collection_schema_size}"
+            )
 
         logger.debug(f"Size of collection fields = {len(collection_fields)}")
         # append each schema field (sort so output is consistent)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index 7f446f6d1c2718..52b1386e21d85a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -184,9 +184,9 @@ def validator_site_url_to_site_name(cls, values):
 
     @validator("site_url")
     def validator_site_url(cls, site_url: str) -> str:
-        assert site_url.startswith(
-            ("http://", "https://")
-        ), "site_url must start with http:// or https://"
+        assert site_url.startswith(("http://", "https://")), (
+            "site_url must start with http:// or https://"
+        )
 
         if not site_url.endswith("/"):
             site_url = site_url + "/"
@@ -487,9 +487,7 @@ def rest_api_base_url(self):
     def get_report(self) -> SourceReport:
         return self.report
 
-    def update_flow(
-        self, pg_flow_dto: Dict, recursion_level: int = 0
-    ) -> None:  # noqa: C901
+    def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:  # noqa: C901
         """
         Update self.nifi_flow with contents of the input process group `pg_flow_dto`
         """
@@ -548,16 +546,16 @@ def update_flow(
         for inputPort in flow_dto.get("inputPorts", []):
             component = inputPort.get("component")
             if inputPort.get("allowRemoteAccess"):
-                self.nifi_flow.remotely_accessible_ports[
-                    component.get("id")
-                ] = NifiComponent(
-                    component.get("id"),
-                    component.get("name"),
-                    component.get("type"),
-                    component.get("parentGroupId"),
-                    NifiType.INPUT_PORT,
-                    comments=component.get("comments"),
-                    status=component.get("status", {}).get("runStatus"),
+                self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+                    NifiComponent(
+                        component.get("id"),
+                        component.get("name"),
+                        component.get("type"),
+                        component.get("parentGroupId"),
+                        NifiType.INPUT_PORT,
+                        comments=component.get("comments"),
+                        status=component.get("status", {}).get("runStatus"),
+                    )
                 )
                 logger.debug(f"Adding remotely accessible port {component.get('id')}")
             else:
@@ -576,16 +574,16 @@ def update_flow(
         for outputPort in flow_dto.get("outputPorts", []):
             component = outputPort.get("component")
             if outputPort.get("allowRemoteAccess"):
-                self.nifi_flow.remotely_accessible_ports[
-                    component.get("id")
-                ] = NifiComponent(
-                    component.get("id"),
-                    component.get("name"),
-                    component.get("type"),
-                    component.get("parentGroupId"),
-                    NifiType.OUTPUT_PORT,
-                    comments=component.get("comments"),
-                    status=component.get("status", {}).get("runStatus"),
+                self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+                    NifiComponent(
+                        component.get("id"),
+                        component.get("name"),
+                        component.get("type"),
+                        component.get("parentGroupId"),
+                        NifiType.OUTPUT_PORT,
+                        comments=component.get("comments"),
+                        status=component.get("status", {}).get("runStatus"),
+                    )
                 )
                 logger.debug(f"Adding remotely accessible port {component.get('id')}")
             else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
index 8289265483d598..2075e999ea1d0e 100755
--- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
@@ -101,16 +101,16 @@ def get_swagger(self) -> Dict:
                 # details there once, and then use that session for all requests.
                 self.token = f"Bearer {self.bearer_token}"
             else:
-                assert (
-                    "url_complement" in self.get_token.keys()
-                ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
+                assert "url_complement" in self.get_token.keys(), (
+                    "When 'request_type' is set to 'get', an url_complement is needed for the request."
+                )
                 if self.get_token["request_type"] == "get":
-                    assert (
-                        "{username}" in self.get_token["url_complement"]
-                    ), "we expect the keyword {username} to be present in the url"
-                    assert (
-                        "{password}" in self.get_token["url_complement"]
-                    ), "we expect the keyword {password} to be present in the url"
+                    assert "{username}" in self.get_token["url_complement"], (
+                        "we expect the keyword {username} to be present in the url"
+                    )
+                    assert "{password}" in self.get_token["url_complement"], (
+                        "we expect the keyword {password} to be present in the url"
+                    )
                     url4req = self.get_token["url_complement"].replace(
                         "{username}", self.username
                     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index b49d40a0c7eb6a..14beab6bc9391e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -225,9 +225,9 @@ def report_charts_dropped(self, view: str) -> None:
 def default_for_dataset_type_mapping() -> Dict[str, str]:
     dict_: dict = {}
     for item in SupportedDataPlatform:
-        dict_[
-            item.value.powerbi_data_platform_name
-        ] = item.value.datahub_data_platform_name
+        dict_[item.value.powerbi_data_platform_name] = (
+            item.value.datahub_data_platform_name
+        )
 
     return dict_
 
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
     # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
     # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
     # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
-    dataset_type_mapping: Union[
-        Dict[str, str], Dict[str, PlatformDetail]
-    ] = pydantic.Field(
-        default_factory=default_for_dataset_type_mapping,
-        description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
-        "DataHub supported datasources."
-        "You can configured platform instance for dataset lineage. "
-        "See Quickstart Recipe for mapping",
-        hidden_from_docs=True,
+    dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
+        pydantic.Field(
+            default_factory=default_for_dataset_type_mapping,
+            description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
+            "DataHub supported datasources."
+            "You can configured platform instance for dataset lineage. "
+            "See Quickstart Recipe for mapping",
+            hidden_from_docs=True,
+        )
     )
     # PowerBI datasource's server to platform instance mapping
     server_to_platform_instance: Dict[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 2a5de7494920b2..759fc6d7dadfba 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -128,17 +128,17 @@ def get_upstream_tables(
     reporter.m_query_parse_successes += 1
 
     try:
-        lineage: List[
-            datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-        ] = resolver.MQueryResolver(
-            table=table,
-            parse_tree=parse_tree,
-            reporter=reporter,
-            parameters=parameters,
-        ).resolve_to_lineage(
-            ctx=ctx,
-            config=config,
-            platform_instance_resolver=platform_instance_resolver,
+        lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+            resolver.MQueryResolver(
+                table=table,
+                parse_tree=parse_tree,
+                reporter=reporter,
+                parameters=parameters,
+            ).resolve_to_lineage(
+                ctx=ctx,
+                config=config,
+                platform_instance_resolver=platform_instance_resolver,
+            )
         )
 
         if lineage:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
index 63520bd731de86..54b810650f5854 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py
@@ -170,8 +170,7 @@ def create_reference_table(
         logger.debug(f"Processing arguments {arguments}")
 
         if (
-            len(arguments)
-            >= 4  # [0] is warehouse FQDN.
+            len(arguments) >= 4  # [0] is warehouse FQDN.
             # [1] is endpoint, we are not using it.
             # [2] is "Catalog" key
             # [3] is catalog's value
@@ -215,16 +214,16 @@ def parse_custom_sql(
             native_sql_parser.remove_special_characters(query)
         )
 
-        parsed_result: Optional[
-            "SqlParsingResult"
-        ] = native_sql_parser.parse_custom_sql(
-            ctx=self.ctx,
-            query=query,
-            platform=self.get_platform_pair().datahub_data_platform_name,
-            platform_instance=platform_detail.platform_instance,
-            env=platform_detail.env,
-            database=database,
-            schema=schema,
+        parsed_result: Optional["SqlParsingResult"] = (
+            native_sql_parser.parse_custom_sql(
+                ctx=self.ctx,
+                query=query,
+                platform=self.get_platform_pair().datahub_data_platform_name,
+                platform_instance=platform_detail.platform_instance,
+                env=platform_detail.env,
+                database=database,
+                schema=schema,
+            )
         )
 
         if parsed_result is None:
@@ -410,9 +409,9 @@ def create_lineage(
             f"Processing Databrick data-access function detail {data_access_func_detail}"
         )
         table_detail: Dict[str, str] = {}
-        temp_accessor: Optional[
-            IdentifierAccessor
-        ] = data_access_func_detail.identifier_accessor
+        temp_accessor: Optional[IdentifierAccessor] = (
+            data_access_func_detail.identifier_accessor
+        )
 
         while temp_accessor:
             # Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ def create_lineage(
         db_name: str = data_access_func_detail.identifier_accessor.items["Name"]  # type: ignore
         # Second is schema name
         schema_name: str = cast(
-            IdentifierAccessor, data_access_func_detail.identifier_accessor.next  # type: ignore
+            IdentifierAccessor,
+            data_access_func_detail.identifier_accessor.next,  # type: ignore
         ).items["Name"]
         # Third is table name
         table_name: str = cast(
-            IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next  # type: ignore
+            IdentifierAccessor,
+            data_access_func_detail.identifier_accessor.next.next,  # type: ignore
         ).items["Name"]
 
         qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ def get_db_name(self, data_access_tokens: List[str]) -> Optional[str]:
         ):  # database name is explicitly set
             return database
 
-        return get_next_item(  # database name is set in Name argument
-            data_access_tokens, "Name"
-        ) or get_next_item(  # If both above arguments are not available, then try Catalog
-            data_access_tokens, "Catalog"
+        return (
+            get_next_item(  # database name is set in Name argument
+                data_access_tokens, "Name"
+            )
+            or get_next_item(  # If both above arguments are not available, then try Catalog
+                data_access_tokens, "Catalog"
+            )
         )
 
     def create_lineage(
@@ -819,9 +823,7 @@ def create_lineage(
             values=tree_function.remove_whitespaces_from_list(
                 tree_function.token_values(flat_argument_list[1])
             ),
-        )[
-            0
-        ]  # Remove any whitespaces and double quotes character
+        )[0]  # Remove any whitespaces and double quotes character
 
         server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 2756a113d1ef0c..42963c08d992d1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -188,9 +188,9 @@ def _process_invoke_expression(
         # - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
         #   (a table reference) as its first argument
         # - Its result is then passed as the first argument to Table.SplitColumn
-        second_invoke_expression: Optional[
-            Tree
-        ] = tree_function.first_invoke_expression_func(first_argument)
+        second_invoke_expression: Optional[Tree] = (
+            tree_function.first_invoke_expression_func(first_argument)
+        )
         if second_invoke_expression:
             # 1. The First argument is function call
             # 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ def internal(
                 logger.debug(v_statement.pretty())
                 return None
 
-            invoke_expression: Optional[
-                Tree
-            ] = tree_function.first_invoke_expression_func(rh_tree)
+            invoke_expression: Optional[Tree] = (
+                tree_function.first_invoke_expression_func(rh_tree)
+            )
 
             if invoke_expression is not None:
-                result: Union[
-                    DataAccessFunctionDetail, List[str], None
-                ] = self._process_invoke_expression(invoke_expression)
+                result: Union[DataAccessFunctionDetail, List[str], None] = (
+                    self._process_invoke_expression(invoke_expression)
+                )
                 if result is None:
                     return None  # No need to process some un-expected grammar found while processing invoke_expression
                 if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ def resolve_to_lineage(
             return lineage
 
         # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
-        table_links: List[
-            DataAccessFunctionDetail
-        ] = self.create_data_access_functional_detail(output_variable)
+        table_links: List[DataAccessFunctionDetail] = (
+            self.create_data_access_functional_detail(output_variable)
+        )
 
         # Each item is data-access function
         for f_detail in table_links:
@@ -390,7 +390,7 @@ def resolve_to_lineage(
 
             # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
             # & also pass additional information that will be need to generate lineage
-            pattern_handler: (AbstractLineage) = supported_resolver.handler()(
+            pattern_handler: AbstractLineage = supported_resolver.handler()(
                 ctx=ctx,
                 table=self.table,
                 config=config,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 044946a5d308d1..5e5636f2d50fe3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -945,9 +945,9 @@ def to_datahub_work_units(
         # Convert tiles to charts
         ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
         # Lets convert dashboard to datahub dashboard
-        dashboard_mcps: List[
-            MetadataChangeProposalWrapper
-        ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+        dashboard_mcps: List[MetadataChangeProposalWrapper] = (
+            self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+        )
 
         # Now add MCPs in sequence
         mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ def get_workspace_workunit(
     def _get_dashboard_patch_work_unit(
         self, work_unit: MetadataWorkUnit
     ) -> Optional[MetadataWorkUnit]:
-        dashboard_info_aspect: Optional[
-            DashboardInfoClass
-        ] = work_unit.get_aspect_of_type(DashboardInfoClass)
+        dashboard_info_aspect: Optional[DashboardInfoClass] = (
+            work_unit.get_aspect_of_type(DashboardInfoClass)
+        )
 
         if dashboard_info_aspect and self.source_config.patch_metadata:
             return convert_dashboard_info_to_patch(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
index 161975fa635fdb..927840c44bf0b0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
@@ -425,9 +425,9 @@ def itr_pages(
 
             response.raise_for_status()
 
-            assert (
-                Constant.VALUE in response.json()
-            ), "'value' key is not present in paginated response"
+            assert Constant.VALUE in response.json(), (
+                "'value' key is not present in paginated response"
+            )
 
             if not response.json()[Constant.VALUE]:  # if it is an empty list then break
                 break
@@ -447,13 +447,13 @@ def get_app(
         if raw_app is None:
             return None
 
-        assert (
-            Constant.ID in raw_app
-        ), f"{Constant.ID} is required field not present in server response"
+        assert Constant.ID in raw_app, (
+            f"{Constant.ID} is required field not present in server response"
+        )
 
-        assert (
-            Constant.NAME in raw_app
-        ), f"{Constant.NAME} is required field not present in server response"
+        assert Constant.NAME in raw_app, (
+            f"{Constant.NAME} is required field not present in server response"
+        )
 
         return App(
             id=raw_app[Constant.ID],
diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
index 31b0731aaa751c..10b062c98c147f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
@@ -156,7 +156,7 @@ def _get_sheet(
                 )
                 if chart:
                     if not chart.title:
-                        chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
+                        chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
                     sheet.charts.append(chart)
                 websocket_connection.handle.pop()
             return sheet
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index cad48eaf1c2375..932ada0a908b28 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -178,9 +178,9 @@ class RedshiftConfig(
     @root_validator(pre=True)
     def check_email_is_set_on_usage(cls, values):
         if values.get("include_usage_statistics"):
-            assert (
-                "email_domain" in values and values["email_domain"]
-            ), "email_domain needs to be set if usage is enabled"
+            assert "email_domain" in values and values["email_domain"], (
+                "email_domain needs to be set if usage is enabled"
+            )
         return values
 
     @root_validator(skip_on_failure=True)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index 9bfca941ce48fb..cce282c71056a2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -305,13 +305,13 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
             test_report.capability_report = {}
             try:
                 RedshiftDataDictionary.get_schemas(connection, database=config.database)
-                test_report.capability_report[
-                    SourceCapability.SCHEMA_METADATA
-                ] = CapabilityReport(capable=True)
+                test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
+                    CapabilityReport(capable=True)
+                )
             except Exception as e:
-                test_report.capability_report[
-                    SourceCapability.SCHEMA_METADATA
-                ] = CapabilityReport(capable=False, failure_reason=str(e))
+                test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
+                    CapabilityReport(capable=False, failure_reason=str(e))
+                )
 
         except Exception as e:
             test_report.basic_connectivity = CapabilityReport(
@@ -947,9 +947,9 @@ def cache_tables_and_views(self, connection, database):
     def get_all_tables(
         self,
     ) -> Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]]:
-        all_tables: Dict[
-            str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]
-        ] = defaultdict(dict)
+        all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]] = (
+            defaultdict(dict)
+        )
         for db in set().union(self.db_tables, self.db_views):
             tables = self.db_tables.get(db, {})
             views = self.db_views.get(db, {})
@@ -967,9 +967,9 @@ def extract_usage(
         all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]],
     ) -> Iterable[MetadataWorkUnit]:
         with PerfTimer() as timer:
-            redundant_usage_run_skip_handler: Optional[
-                RedundantUsageRunSkipHandler
-            ] = None
+            redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
+                None
+            )
             if self.config.enable_stateful_usage_ingestion:
                 redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
                     source=self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
index d66a1ee18be40f..a5758bdd825702 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
@@ -199,10 +199,10 @@ def _get_workunits_internal(
                 end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
                 database=self.config.database,
             )
-            access_events_iterable: Iterable[
-                RedshiftAccessEvent
-            ] = self._gen_access_events_from_history_query(
-                query, connection=self.connection, all_tables=all_tables
+            access_events_iterable: Iterable[RedshiftAccessEvent] = (
+                self._gen_access_events_from_history_query(
+                    query, connection=self.connection, all_tables=all_tables
+                )
             )
 
             aggregated_events: AggregatedAccessEvents = self._aggregate_access_events(
@@ -225,10 +225,10 @@ def _gen_operation_aspect_workunits(
             start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT),
             end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
         )
-        access_events_iterable: Iterable[
-            RedshiftAccessEvent
-        ] = self._gen_access_events_from_history_query(
-            query, connection, all_tables=all_tables
+        access_events_iterable: Iterable[RedshiftAccessEvent] = (
+            self._gen_access_events_from_history_query(
+                query, connection, all_tables=all_tables
+            )
         )
 
         # Generate operation aspect work units from the access events
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
index 89c092875e4490..58e930eb6e809c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/datalake_profiler_config.py
@@ -85,8 +85,8 @@ def ensure_field_level_settings_are_normalized(
                 if field_level_metric.startswith("include_field_"):
                     values.setdefault(field_level_metric, False)
 
-            assert (
-                max_num_fields_to_profile is None
-            ), f"{max_num_fields_to_profile_key} should be set to None"
+            assert max_num_fields_to_profile is None, (
+                f"{max_num_fields_to_profile_key} should be set to None"
+            )
 
         return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
index 88679efdf5fc31..66e0e6b741d1ff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py
@@ -236,12 +236,12 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
         try:
             if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
                 logger.debug("Access Token Provided in Config")
-                assert (
-                    self.config.access_token is not None
-                ), "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
-                assert (
-                    self.config.instance_url is not None
-                ), "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
+                assert self.config.access_token is not None, (
+                    "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
+                )
+                assert self.config.instance_url is not None, (
+                    "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
+                )
 
                 self.sf = Salesforce(
                     instance_url=self.config.instance_url,
@@ -250,15 +250,15 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
                 )
             elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD:
                 logger.debug("Username/Password Provided in Config")
-                assert (
-                    self.config.username is not None
-                ), "Config username is required for USERNAME_PASSWORD auth"
-                assert (
-                    self.config.password is not None
-                ), "Config password is required for USERNAME_PASSWORD auth"
-                assert (
-                    self.config.security_token is not None
-                ), "Config security_token is required for USERNAME_PASSWORD auth"
+                assert self.config.username is not None, (
+                    "Config username is required for USERNAME_PASSWORD auth"
+                )
+                assert self.config.password is not None, (
+                    "Config password is required for USERNAME_PASSWORD auth"
+                )
+                assert self.config.security_token is not None, (
+                    "Config security_token is required for USERNAME_PASSWORD auth"
+                )
 
                 self.sf = Salesforce(
                     username=self.config.username,
@@ -269,15 +269,15 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
 
             elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
                 logger.debug("Json Web Token provided in the config")
-                assert (
-                    self.config.username is not None
-                ), "Config username is required for JSON_WEB_TOKEN auth"
-                assert (
-                    self.config.consumer_key is not None
-                ), "Config consumer_key is required for JSON_WEB_TOKEN auth"
-                assert (
-                    self.config.private_key is not None
-                ), "Config private_key is required for JSON_WEB_TOKEN auth"
+                assert self.config.username is not None, (
+                    "Config username is required for JSON_WEB_TOKEN auth"
+                )
+                assert self.config.consumer_key is not None, (
+                    "Config consumer_key is required for JSON_WEB_TOKEN auth"
+                )
+                assert self.config.private_key is not None, (
+                    "Config private_key is required for JSON_WEB_TOKEN auth"
+                )
 
                 self.sf = Salesforce(
                     username=self.config.username,
@@ -439,7 +439,8 @@ def get_platform_instance_workunit(self, datasetUrn: str) -> MetadataWorkUnit:
         dataPlatformInstance = DataPlatformInstanceClass(
             builder.make_data_platform_urn(self.platform),
             instance=builder.make_dataplatform_instance_urn(
-                self.platform, self.config.platform_instance  # type:ignore
+                self.platform,
+                self.config.platform_instance,  # type:ignore
             ),
         )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
index e96eeb58d96efe..0468792f44aabb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py
@@ -477,9 +477,9 @@ def _gen_elements_workunit(
                     upstream_dataset_urns
                     and dataset_urn not in self.dataset_upstream_urn_mapping
                 ):
-                    self.dataset_upstream_urn_mapping[
-                        dataset_urn
-                    ] = upstream_dataset_urns
+                    self.dataset_upstream_urn_mapping[dataset_urn] = (
+                        upstream_dataset_urns
+                    )
 
             element_input_fields = [
                 InputFieldClass(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
index 3e88f43142ede6..6762302ebe57c7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py
@@ -126,9 +126,9 @@ def fill_workspaces(self) -> None:
                 response.raise_for_status()
                 response_dict = response.json()
                 for workspace_dict in response_dict[Constant.ENTRIES]:
-                    self.workspaces[
-                        workspace_dict[Constant.WORKSPACEID]
-                    ] = Workspace.parse_obj(workspace_dict)
+                    self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
+                        Workspace.parse_obj(workspace_dict)
+                    )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
                 else:
@@ -147,9 +147,9 @@ def _get_users(self) -> Dict[str, str]:
                 response.raise_for_status()
                 response_dict = response.json()
                 for user_dict in response_dict[Constant.ENTRIES]:
-                    users[
-                        user_dict[Constant.MEMBERID]
-                    ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+                    users[user_dict[Constant.MEMBERID]] = (
+                        f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+                    )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}"
                 else:
@@ -327,10 +327,12 @@ def get_page_elements(self, workbook: Workbook, page: Page) -> List[Element]:
             response.raise_for_status()
             for i, element_dict in enumerate(response.json()[Constant.ENTRIES]):
                 if not element_dict.get(Constant.NAME):
-                    element_dict[Constant.NAME] = f"Element {i+1} of Page '{page.name}'"
-                element_dict[
-                    Constant.URL
-                ] = f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+                    element_dict[Constant.NAME] = (
+                        f"Element {i + 1} of Page '{page.name}'"
+                    )
+                element_dict[Constant.URL] = (
+                    f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+                )
                 element = Element.parse_obj(element_dict)
                 if (
                     self.config.extract_lineage
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index b14e51a982082c..5f732e2621656f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -384,18 +384,20 @@ def validate_shares(
                     assert all(
                         consumer.platform_instance != share_details.platform_instance
                         for consumer in share_details.consumers
-                    ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+                    ), (
+                        "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+                    )
 
                 databases_included_in_share.append(shared_db)
                 databases_created_from_share.extend(share_details.consumers)
 
             for db_from_share in databases_created_from_share:
-                assert (
-                    db_from_share not in databases_included_in_share
-                ), "Database included in a share can not be present as consumer in any share."
-                assert (
-                    databases_created_from_share.count(db_from_share) == 1
-                ), "Same database can not be present as consumer in more than one share."
+                assert db_from_share not in databases_included_in_share, (
+                    "Database included in a share can not be present as consumer in any share."
+                )
+                assert databases_created_from_share.count(db_from_share) == 1, (
+                    "Same database can not be present as consumer in more than one share."
+                )
 
         return shares
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
index 2239338972d9be..2854a99198d62b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py
@@ -250,9 +250,9 @@ def get_connect_args(self) -> dict:
             if self.private_key is not None:
                 pkey_bytes = self.private_key.replace("\\n", "\n").encode()
             else:
-                assert (
-                    self.private_key_path
-                ), "missing required private key path to read key from"
+                assert self.private_key_path, (
+                    "missing required private key path to read key from"
+                )
                 with open(self.private_key_path, "rb") as key:
                     pkey_bytes = key.read()
 
@@ -284,9 +284,9 @@ def get_options(self) -> dict:
         return self.options
 
     def get_oauth_connection(self) -> NativeSnowflakeConnection:
-        assert (
-            self.oauth_config
-        ), "oauth_config should be provided if using oauth based authentication"
+        assert self.oauth_config, (
+            "oauth_config should be provided if using oauth based authentication"
+        )
         generator = OAuthTokenGenerator(
             client_id=self.oauth_config.client_id,
             authority_url=self.oauth_config.authority_url,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
index b82734cbbe84ea..69d0b62a8edfdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
@@ -623,7 +623,7 @@ def _build_enriched_query_log_query(
         query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
         AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
         AND execution_status = 'SUCCESS'
-        AND {users_filter or 'TRUE'}
+        AND {users_filter or "TRUE"}
 )
 , deduplicated_queries as (
     SELECT
@@ -651,7 +651,7 @@ def _build_enriched_query_log_query(
     WHERE
         query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
         AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
-        AND {users_filter or 'TRUE'}
+        AND {users_filter or "TRUE"}
         AND query_id IN (
             SELECT query_id FROM deduplicated_queries
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index d165be3f3cc656..173024aec0cf38 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -142,9 +142,9 @@ def __init__(self) -> None:
         )
 
         # self._table_tags[<database_name>][<schema_name>][<table_name>] = list of tags applied to table
-        self._table_tags: Dict[
-            str, Dict[str, Dict[str, List[SnowflakeTag]]]
-        ] = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+        self._table_tags: Dict[str, Dict[str, Dict[str, List[SnowflakeTag]]]] = (
+            defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+        )
 
         # self._column_tags[<database_name>][<schema_name>][<table_name>][<column_name>] = list of tags applied to column
         self._column_tags: Dict[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 393e4d3c96d51f..a2d69d9e552916 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -194,9 +194,9 @@ def __init__(
             config, self.data_dictionary, self.report
         )
         self.profiler: Optional[SnowflakeProfiler] = profiler
-        self.snowsight_url_builder: Optional[
-            SnowsightUrlBuilder
-        ] = snowsight_url_builder
+        self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
+            snowsight_url_builder
+        )
 
         # These are populated as side-effects of get_workunits_internal.
         self.databases: List[SnowflakeDatabase] = []
@@ -267,9 +267,9 @@ def get_databases(self) -> Optional[List[SnowflakeDatabase]]:
             )
             return None
         else:
-            ischema_databases: List[
-                SnowflakeDatabase
-            ] = self.get_databases_from_ischema(databases)
+            ischema_databases: List[SnowflakeDatabase] = (
+                self.get_databases_from_ischema(databases)
+            )
 
             if len(ischema_databases) == 0:
                 self.structured_reporter.failure(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
index 75567cc3da8830..597e7bee4d4cc0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py
@@ -38,9 +38,9 @@ def _get_tags_on_object_without_propagation(
         table_name: Optional[str],
     ) -> List[SnowflakeTag]:
         if db_name not in self.tag_cache:
-            self.tag_cache[
-                db_name
-            ] = self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+            self.tag_cache[db_name] = (
+                self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+            )
 
         if domain == SnowflakeObjectDomain.DATABASE:
             return self.tag_cache[db_name].get_database_tags(db_name)
@@ -130,10 +130,10 @@ def get_column_tags_for_table(
         temp_column_tags: Dict[str, List[SnowflakeTag]] = {}
         if self.config.extract_tags == TagOption.without_lineage:
             if db_name not in self.tag_cache:
-                self.tag_cache[
-                    db_name
-                ] = self.data_dictionary.get_tags_for_database_without_propagation(
-                    db_name
+                self.tag_cache[db_name] = (
+                    self.data_dictionary.get_tags_for_database_without_propagation(
+                        db_name
+                    )
                 )
             temp_column_tags = self.tag_cache[db_name].get_column_tags_for_table(
                 table_name, schema_name, db_name
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
index 85e4071aec07df..edd13ee48326bb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
@@ -549,9 +549,9 @@ def parse_event_objects(self, event_dict: Dict) -> None:
         ):
             # NOTE: Generated emails may be incorrect, as email may be different than
             # username@email_domain
-            event_dict[
-                "EMAIL"
-            ] = f'{event_dict["USER_NAME"]}@{self.config.email_domain}'.lower()
+            event_dict["EMAIL"] = (
+                f"{event_dict['USER_NAME']}@{self.config.email_domain}".lower()
+            )
 
         if not event_dict["EMAIL"]:
             self.report.rows_missing_email += 1
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index 885bee1ccdb908..030edfde4ca1da 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -21,8 +21,7 @@
 class SnowflakeStructuredReportMixin(abc.ABC):
     @property
     @abc.abstractmethod
-    def structured_reporter(self) -> SourceReport:
-        ...
+    def structured_reporter(self) -> SourceReport: ...
 
 
 class SnowsightUrlBuilder:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index b8afd145727400..b4ef2180d71d45 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -211,9 +211,9 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
 
         self.usage_extractor: Optional[SnowflakeUsageExtractor] = None
         if self.config.include_usage_stats or self.config.include_operational_stats:
-            redundant_usage_run_skip_handler: Optional[
-                RedundantUsageRunSkipHandler
-            ] = None
+            redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
+                None
+            )
             if self.config.enable_stateful_usage_ingestion:
                 redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
                     source=self,
@@ -296,7 +296,16 @@ class SnowflakePrivilege:
 
         _report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()
         privileges: List[SnowflakePrivilege] = []
-        capabilities: List[SourceCapability] = [c.capability for c in SnowflakeV2Source.get_capabilities() if c.capability not in (SourceCapability.PLATFORM_INSTANCE, SourceCapability.DOMAINS, SourceCapability.DELETION_DETECTION)]  # type: ignore
+        capabilities: List[SourceCapability] = [
+            c.capability
+            for c in SnowflakeV2Source.get_capabilities()  # type: ignore
+            if c.capability
+            not in (
+                SourceCapability.PLATFORM_INSTANCE,
+                SourceCapability.DOMAINS,
+                SourceCapability.DELETION_DETECTION,
+            )
+        ]
 
         cur = conn.query("select current_role()")
         current_role = [row["CURRENT_ROLE()"] for row in cur][0]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 6f7decc79b1df2..cfc43454b51fad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -104,9 +104,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw):
             return "\n".join([r for r in res])
 
     @typing.no_type_check
-    def _get_column_type(
-        self, type_: Union[str, Dict[str, Any]]
-    ) -> TypeEngine:  # noqa: C901
+    def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:  # noqa: C901
         """Derives the data type of the Athena column.
 
         This method is overwritten to extend the behavior of PyAthena.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 2899bcc2de37b0..a8208ca807ed02 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
              , comment
              , {properties_clause} AS properties
           FROM system.tables
-         WHERE name NOT LIKE '.inner%'""".format(
-            properties_clause=properties_clause
-        )
+         WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
     )
 
     all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
              , comment
           FROM system.columns
          WHERE {schema_clause}
-         ORDER BY database, table, position""".format(
-                    schema_clause=schema_clause
-                )
+         ORDER BY database, table, position""".format(schema_clause=schema_clause)
             )
         )
     )
@@ -474,7 +470,7 @@ def _get_all_tables(self) -> Set[str]:
         logger.debug(f"sql_alchemy_url={url}")
         engine = create_engine(url, **self.config.options)
         for db_row in engine.execute(text(all_tables_query)):
-            all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
+            all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
 
         return all_tables_set
 
@@ -503,7 +499,7 @@ def _populate_lineage_map(
 
         try:
             for db_row in engine.execute(text(query)):
-                dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
+                dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
                 if not self.config.database_pattern.allowed(
                     db_row["target_schema"]
                 ) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ def _populate_lineage_map(
 
                 # Target
                 target_path = (
-                    f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+                    f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
                     f"{dataset_name}"
                 )
                 target = LineageItem(
@@ -525,7 +521,7 @@ def _populate_lineage_map(
 
                 # Source
                 platform = LineageDatasetPlatform.CLICKHOUSE
-                path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
+                path = f"{db_row['source_schema']}.{db_row['source_table']}"
 
                 sources = [
                     LineageDataset(
@@ -552,9 +548,7 @@ def _populate_lineage_map(
                         target.dataset.path
                     ].upstreams = self._lineage_map[
                         target.dataset.path
-                    ].upstreams.union(
-                        target.upstreams
-                    )
+                    ].upstreams.union(target.upstreams)
 
                 else:
                     self._lineage_map[target.dataset.path] = target
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
index 52db3cd11a759d..ac568c58af6c68 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
@@ -234,9 +234,7 @@ def get_columns(
                     WHERE col.table_name = id.table_name
                     AND col.column_name = id.column_name
                     AND col.owner = id.owner
-                ) AS identity_options""".format(
-                dblink=dblink
-            )
+                ) AS identity_options""".format(dblink=dblink)
         else:
             identity_cols = "NULL as default_on_null, NULL as identity_options"
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
index c91be9b494c006..664735053f1852 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
@@ -278,8 +278,7 @@ def is_dataset_eligible_for_profiling(
 
         if self.config.profiling.profile_table_size_limit is not None and (
             size_in_bytes is not None
-            and size_in_bytes / (2**30)
-            > self.config.profiling.profile_table_size_limit
+            and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
         ):
             self.report.profiling_skipped_size_limit[schema_name] += 1
             logger.debug(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index 5b76fe41d92e97..84b65d6635e9d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -599,7 +599,12 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_columns",
-                lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                use_qvci=self.config.use_qvci,
+                **kw: optimized_get_columns(
                     self,
                     connection,
                     table_name,
@@ -613,7 +618,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_pk_constraint",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_pk_constraint(
                     self, connection, table_name, schema, **kw
                 ),
             )
@@ -621,7 +630,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_foreign_keys",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_foreign_keys(
                     self, connection, table_name, schema, **kw
                 ),
             )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
index 9883bc2b8e9b0b..6080ddadb65e40 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/profiling_state_handler.py
@@ -41,9 +41,9 @@ def __init__(
         run_id: str,
     ):
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            ProfilingStatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self.checkpointing_enabled: bool = (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
index 8630a959d3f6a3..e4a2646f6ccd3c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py
@@ -48,9 +48,9 @@ def __init__(
     ):
         self.source = source
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            StatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ def should_skip_this_run(
             )
 
             logger.debug(
-                f"{self.job_id} : Last run start, end times:"
-                f"({last_run_time_window})"
+                f"{self.job_id} : Last run start, end times:({last_run_time_window})"
             )
 
             # If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ def suggest_run_time_window(
             )
 
         self.log(
-            "Adjusted start, end times: "
-            f"({suggested_start_time}, {suggested_end_time})"
+            f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
         )
         return (suggested_start_time, suggested_end_time)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
index d4fcbf09924e9e..017d78bc1abf8d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py
@@ -111,9 +111,9 @@ def __init__(
         self.state_type_class = state_type_class
         self.pipeline_name = pipeline_name
         self.run_id = run_id
-        self.stateful_ingestion_config: Optional[
-            StatefulStaleMetadataRemovalConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
+            config.stateful_ingestion
+        )
         self.checkpointing_enabled: bool = (
             True
             if (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
index 8f4a53ffc3ed58..1f5a651fc64a79 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py
@@ -70,20 +70,20 @@ def get_latest_checkpoint(
             self.orchestrator_name, pipeline_name, job_name
         )
 
-        latest_checkpoint: Optional[
-            DatahubIngestionCheckpointClass
-        ] = self.graph.get_latest_timeseries_value(
-            entity_urn=data_job_urn,
-            aspect_type=DatahubIngestionCheckpointClass,
-            filter_criteria_map={
-                "pipelineName": pipeline_name,
-            },
+        latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
+            self.graph.get_latest_timeseries_value(
+                entity_urn=data_job_urn,
+                aspect_type=DatahubIngestionCheckpointClass,
+                filter_criteria_map={
+                    "pipelineName": pipeline_name,
+                },
+            )
         )
         if latest_checkpoint:
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
index a37774773b84d7..55f0903b9c91c7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py
@@ -67,7 +67,7 @@ def get_latest_checkpoint(
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 8187fff559208e..f961bd8ecba604 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -281,9 +281,9 @@ def get_tableau_auth(
         return authentication
 
     def make_tableau_client(self, site: str) -> Server:
-        authentication: Union[
-            TableauAuth, PersonalAccessTokenAuth
-        ] = self.get_tableau_auth(site)
+        authentication: Union[TableauAuth, PersonalAccessTokenAuth] = (
+            self.get_tableau_auth(site)
+        )
         try:
             server = Server(
                 self.connect_uri,
@@ -635,7 +635,7 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict:
         project_path_pattern = values.get("project_path_pattern")
         if project_pattern is None and project_path_pattern is None and projects:
             logger.warning(
-                "projects is deprecated, please use " "project_path_pattern instead."
+                "projects is deprecated, please use project_path_pattern instead."
             )
             logger.info("Initializing project_pattern from projects")
             values["project_pattern"] = AllowDenyPattern(
@@ -708,18 +708,18 @@ class DatabaseTable:
     """
 
     urn: str
-    id: Optional[
-        str
-    ] = None  # is not None only for tables that came from Tableau metadata
+    id: Optional[str] = (
+        None  # is not None only for tables that came from Tableau metadata
+    )
     num_cols: Optional[int] = None
 
-    paths: Optional[
-        Set[str]
-    ] = None  # maintains all browse paths encountered for this table
+    paths: Optional[Set[str]] = (
+        None  # maintains all browse paths encountered for this table
+    )
 
-    parsed_columns: Optional[
-        Set[str]
-    ] = None  # maintains all columns encountered for this table during parsing SQL queries
+    parsed_columns: Optional[Set[str]] = (
+        None  # maintains all columns encountered for this table during parsing SQL queries
+    )
 
     def update_table(
         self,
@@ -2310,8 +2310,7 @@ def _get_datasource_project_luid(self, ds: dict) -> Optional[str]:
             c.EMBEDDED_DATA_SOURCE,
         ):
             logger.debug(
-                f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is "
-                f"unsupported"
+                f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is unsupported"
             )
             return None
 
@@ -2493,9 +2492,9 @@ def parse_custom_sql(
     def _enrich_database_tables_with_parsed_schemas(
         self, parsing_result: SqlParsingResult
     ) -> None:
-        in_tables_schemas: Dict[
-            str, Set[str]
-        ] = transform_parsing_result_to_in_tables_schemas(parsing_result)
+        in_tables_schemas: Dict[str, Set[str]] = (
+            transform_parsing_result_to_in_tables_schemas(parsing_result)
+        )
 
         if not in_tables_schemas:
             logger.info("Unable to extract table schema from parsing result")
@@ -3559,25 +3558,25 @@ def emit_project_in_topological_order(
 
             generated_project_keys.add(project_key.guid())
 
-            parent_project_key: Optional[
-                Union[ProjectKey, SiteKey]
-            ] = None  # It is going
+            parent_project_key: Optional[Union[ProjectKey, SiteKey]] = (
+                None  # It is going
+            )
             # to be used as a parent container key for the current tableau project
 
             if project_.parent_id is not None:
                 # Go to the parent project as we need to generate container first for parent
                 parent_project_key = self.gen_project_key(project_.parent_id)
 
-                parent_tableau_project: Optional[
-                    TableauProject
-                ] = self.tableau_project_registry.get(project_.parent_id)
+                parent_tableau_project: Optional[TableauProject] = (
+                    self.tableau_project_registry.get(project_.parent_id)
+                )
 
                 if (
                     parent_tableau_project is None
                 ):  # It is not in project registry because of project_pattern
-                    assert (
-                        project_.parent_name
-                    ), f"project {project_.name} should not be null"
+                    assert project_.parent_name, (
+                        f"project {project_.name} should not be null"
+                    )
                     parent_tableau_project = TableauProject(
                         id=project_.parent_id,
                         name=project_.parent_name,
@@ -3669,16 +3668,16 @@ def ingest_tableau_site(self):
             if self.config.extract_usage_stats:
                 with PerfTimer() as timer:
                     self._populate_usage_stat_registry()
-                    self.report.extract_usage_stats_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.extract_usage_stats_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
 
             if self.config.permission_ingestion:
                 with PerfTimer() as timer:
                     self._fetch_groups()
-                    self.report.fetch_groups_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.fetch_groups_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
 
             # Populate the map of database names and database hostnames to be used later to map
             # databases to platform instances.
@@ -3691,9 +3690,9 @@ def ingest_tableau_site(self):
 
             with PerfTimer() as timer:
                 self._populate_projects_registry()
-                self.report.populate_projects_registry_timer[
-                    self.site_content_url
-                ] = timer.elapsed_seconds(digits=2)
+                self.report.populate_projects_registry_timer[self.site_content_url] = (
+                    timer.elapsed_seconds(digits=2)
+                )
 
             if self.config.add_site_container:
                 yield from self.emit_site_container()
@@ -3701,23 +3700,23 @@ def ingest_tableau_site(self):
 
             with PerfTimer() as timer:
                 yield from self.emit_workbooks()
-                self.report.emit_workbooks_timer[
-                    self.site_content_url
-                ] = timer.elapsed_seconds(digits=2)
+                self.report.emit_workbooks_timer[self.site_content_url] = (
+                    timer.elapsed_seconds(digits=2)
+                )
 
             if self.sheet_ids:
                 with PerfTimer() as timer:
                     yield from self.emit_sheets()
-                    self.report.emit_sheets_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_sheets_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
 
             if self.dashboard_ids:
                 with PerfTimer() as timer:
                     yield from self.emit_dashboards()
-                    self.report.emit_dashboards_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_dashboards_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
 
             if self.embedded_datasource_ids_being_used:
                 with PerfTimer() as timer:
@@ -3743,6 +3742,6 @@ def ingest_tableau_site(self):
             if self.database_tables:
                 with PerfTimer() as timer:
                     yield from self.emit_upstream_tables()
-                    self.report.emit_upstream_tables_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_upstream_tables_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 1fdce3aa1e2d34..6c3f7a51294797 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -254,7 +254,9 @@ class UnityCatalogSourceConfig(
     )
 
     # TODO: Remove `type:ignore` by refactoring config
-    profiling: Union[UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig] = Field(  # type: ignore
+    profiling: Union[
+        UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig
+    ] = Field(  # type: ignore
         default=UnityCatalogGEProfilerConfig(),
         description="Data profiling configuration",
         discriminator="method",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 9b96953794dcd5..fd6fa8a50f707b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -363,7 +363,7 @@ def _escape_sequence(value: str) -> str:
 
     @staticmethod
     def _create_metastore(
-        obj: Union[GetMetastoreSummaryResponse, MetastoreInfo]
+        obj: Union[GetMetastoreSummaryResponse, MetastoreInfo],
     ) -> Optional[Metastore]:
         if not obj.name:
             return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 43bd788f809c3e..29562eaf3ce5b1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -205,9 +205,9 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
         self.table_refs: Set[TableReference] = set()
         self.view_refs: Set[TableReference] = set()
         self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
-        self.view_definitions: FileBackedDict[
-            Tuple[TableReference, str]
-        ] = FileBackedDict()
+        self.view_definitions: FileBackedDict[Tuple[TableReference, str]] = (
+            FileBackedDict()
+        )
 
         # Global map of tables, for profiling
         self.tables: FileBackedDict[Table] = FileBackedDict()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
index 718818d9b347bf..2e9f7fc00c8784 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
@@ -103,7 +103,9 @@ def _get_workunits_internal(
                                 query, table_info
                             )
                         for source_table in table_info.source_tables:
-                            with self.report.usage_perf_report.aggregator_add_event_timer:
+                            with (
+                                self.report.usage_perf_report.aggregator_add_event_timer
+                            ):
                                 self.usage_aggregator.aggregate_event(
                                     resource=source_table,
                                     start_time=query.start_time,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
index 4c2e4d42c440e8..2e1e315c4df956 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
@@ -213,15 +213,15 @@ def _get_joined_access_event(self, events):
     def _aggregate_access_events(
         self, events: List[ClickHouseJoinedAccessEvent]
     ) -> Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]]:
-        datasets: Dict[
-            datetime, Dict[ClickHouseTableRef, AggregatedDataset]
-        ] = collections.defaultdict(dict)
+        datasets: Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]] = (
+            collections.defaultdict(dict)
+        )
 
         for event in events:
             floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)
 
             resource = (
-                f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+                f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
                 f"{event.database}.{event.table}"
             )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
index 6ded11027c83a8..e4138696186416 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@@ -235,9 +235,9 @@ def _get_joined_access_event(self, events):
     def _aggregate_access_events(
         self, events: List[TrinoJoinedAccessEvent]
     ) -> Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]]:
-        datasets: Dict[
-            datetime, Dict[TrinoTableRef, AggregatedDataset]
-        ] = collections.defaultdict(dict)
+        datasets: Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]] = (
+            collections.defaultdict(dict)
+        )
 
         for event in events:
             floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
index bb1c297513de10..b4dc8835f9fba9 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py
@@ -80,10 +80,10 @@ def handle_end_of_stream(
                         ).add_asset(container_urn)
                         data_products_container[data_product_urn] = container_product
                     else:
-                        data_products_container[
-                            data_product_urn
-                        ] = data_products_container[data_product_urn].add_asset(
-                            container_urn
+                        data_products_container[data_product_urn] = (
+                            data_products_container[data_product_urn].add_asset(
+                                container_urn
+                            )
                         )
 
         mcps: List[
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
index 668f6ed7abe074..4b9b4c9e6f5da6 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_properties.py
@@ -61,9 +61,9 @@ def _merge_with_server_properties(
     ) -> Optional[DatasetPropertiesClass]:
         assert dataset_properties_aspect
 
-        server_dataset_properties_aspect: Optional[
-            DatasetPropertiesClass
-        ] = graph.get_dataset_properties(entity_urn)
+        server_dataset_properties_aspect: Optional[DatasetPropertiesClass] = (
+            graph.get_dataset_properties(entity_urn)
+        )
         # No need to take any action if server properties is None or there is not customProperties in server properties
         if (
             server_dataset_properties_aspect is None
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
index ba3b6508daaecd..d2687ebc5e76f6 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_tags.py
@@ -89,9 +89,9 @@ def transform_aspect(
         server_field_map: dict = {}
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
index a7e92d4bd7edbd..d17a39bee6cfbf 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py
@@ -108,9 +108,9 @@ def transform_aspect(
         ] = {}  # Map to cache server field objects, where fieldPath is key
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
index 7be8069e1b0852..bb2f318dcac8b8 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py
@@ -60,10 +60,10 @@ def transform_aspect(
                 domain_aspect.domains.extend(mapped_domains.domains)
                 if self.config.semantics == TransformerSemantics.PATCH:
                     # Try merging with server-side domains
-                    patch_domain_aspect: Optional[
-                        DomainsClass
-                    ] = AddDatasetDomain._merge_with_server_domains(
-                        self.ctx.graph, entity_urn, domain_aspect
+                    patch_domain_aspect: Optional[DomainsClass] = (
+                        AddDatasetDomain._merge_with_server_domains(
+                            self.ctx.graph, entity_urn, domain_aspect
+                        )
                     )
                     return cast(Optional[Aspect], patch_domain_aspect)
                 return cast(Optional[Aspect], domain_aspect)
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
index 212e018dd64fb7..32707dcd3a372f 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
@@ -141,9 +141,9 @@ def transform_aspect(
                 else:
                     owner_type = get_owner_type(self.config.owner_type)
                     if owner_type == OwnershipTypeClass.CUSTOM:
-                        assert (
-                            self.config.owner_type_urn is not None
-                        ), "owner_type_urn must be set if owner_type is CUSTOM"
+                        assert self.config.owner_type_urn is not None, (
+                            "owner_type_urn must be set if owner_type is CUSTOM"
+                        )
 
                     owners.append(
                         OwnerClass(
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
index 7e6125079f16e3..65cf2ac3614ae0 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/tags_to_terms.py
@@ -92,9 +92,9 @@ def transform_aspect(
         in_global_tags_aspect: Optional[GlobalTagsClass] = self.ctx.graph.get_tags(
             entity_urn
         )
-        in_schema_metadata_aspect: Optional[
-            SchemaMetadataClass
-        ] = self.ctx.graph.get_schema_metadata(entity_urn)
+        in_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+            self.ctx.graph.get_schema_metadata(entity_urn)
+        )
 
         if in_global_tags_aspect is None and in_schema_metadata_aspect is None:
             return cast(Aspect, in_glossary_terms)
@@ -134,10 +134,10 @@ def transform_aspect(
         )
 
         if self.config.semantics == TransformerSemantics.PATCH:
-            patch_glossary_terms: Optional[
-                GlossaryTermsClass
-            ] = TagsToTermMapper._merge_with_server_glossary_terms(
-                self.ctx.graph, entity_urn, out_glossary_terms
+            patch_glossary_terms: Optional[GlossaryTermsClass] = (
+                TagsToTermMapper._merge_with_server_glossary_terms(
+                    self.ctx.graph, entity_urn, out_glossary_terms
+                )
             )
             return cast(Optional[Aspect], patch_glossary_terms)
         else:
diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
index 8d2ae2960ebd05..e32f1ddc3943ae 100644
--- a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
+++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py
@@ -61,17 +61,17 @@ def __init__(self, output_dir: str, extras: Dict[str, str]) -> None:
     def create(
         cls, output_dir: str, extras: Dict[str, str]
     ) -> "SnowflakeAssertionCompiler":
-        assert os.path.exists(
-            output_dir
-        ), f"Specified location {output_dir} does not exist."
+        assert os.path.exists(output_dir), (
+            f"Specified location {output_dir} does not exist."
+        )
 
-        assert os.path.isdir(
-            output_dir
-        ), f"Specified location {output_dir} is not a folder."
+        assert os.path.isdir(output_dir), (
+            f"Specified location {output_dir} is not a folder."
+        )
 
-        assert any(
-            x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras
-        ), "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        assert any(x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras), (
+            "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        )
 
         return SnowflakeAssertionCompiler(output_dir, extras)
 
@@ -232,6 +232,6 @@ def get_dmf_schedule(trigger: AssertionTrigger) -> str:
     elif isinstance(trigger.trigger, CronTrigger):
         return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}"
     elif isinstance(trigger.trigger, IntervalTrigger):
-        return f"{trigger.trigger.interval.seconds/60} MIN"
+        return f"{trigger.trigger.interval.seconds / 60} MIN"
     else:
         raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}")
diff --git a/metadata-ingestion/src/datahub/lite/duckdb_lite.py b/metadata-ingestion/src/datahub/lite/duckdb_lite.py
index 89317383520923..fe025842822b13 100644
--- a/metadata-ingestion/src/datahub/lite/duckdb_lite.py
+++ b/metadata-ingestion/src/datahub/lite/duckdb_lite.py
@@ -163,9 +163,9 @@ def write(
 
                 if "properties" not in writeable_dict["systemMetadata"]:
                     writeable_dict["systemMetadata"]["properties"] = {}
-                writeable_dict["systemMetadata"]["properties"][
-                    "sysVersion"
-                ] = new_version
+                writeable_dict["systemMetadata"]["properties"]["sysVersion"] = (
+                    new_version
+                )
                 if needs_write:
                     self.duckdb_client.execute(
                         query="INSERT INTO metadata_aspect_v2 VALUES (?, ?, ?, ?, ?, ?)",
@@ -208,9 +208,9 @@ def write(
                             "lastObserved": writeable.systemMetadata.lastObserved
                         }
                     else:
-                        system_metadata[
-                            "lastObserved"
-                        ] = writeable.systemMetadata.lastObserved
+                        system_metadata["lastObserved"] = (
+                            writeable.systemMetadata.lastObserved
+                        )
                     self.duckdb_client.execute(
                         query="UPDATE metadata_aspect_v2 SET system_metadata = ? WHERE urn = ? AND aspect_name = ? AND version = 0",
                         parameters=[
@@ -497,9 +497,9 @@ def get_all_entities(
             aspect_name = r[1]
             aspect_payload = json.loads(r[2])
             if typed:
-                assert (
-                    aspect_name in ASPECT_MAP
-                ), f"Missing aspect name {aspect_name} in the registry"
+                assert aspect_name in ASPECT_MAP, (
+                    f"Missing aspect name {aspect_name} in the registry"
+                )
                 try:
                     aspect_payload = ASPECT_MAP[aspect_name].from_obj(
                         post_json_transform(aspect_payload)
@@ -531,7 +531,9 @@ def get_all_aspects(self) -> Iterable[MetadataChangeProposalWrapper]:
         for r in results.fetchall():
             urn = r[0]
             aspect_name = r[1]
-            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(post_json_transform(json.loads(r[2])))  # type: ignore
+            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(
+                post_json_transform(json.loads(r[2]))
+            )  # type: ignore
             system_metadata = SystemMetadataClass.from_obj(json.loads(r[3]))
             mcp = MetadataChangeProposalWrapper(
                 entityUrn=urn,
diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
index 1fd1585a913581..4b8b4d0bc99bc0 100644
--- a/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
+++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py
@@ -9,8 +9,7 @@
 class HasCustomPropertiesPatch(MetadataPatchProposal):
     @classmethod
     @abstractmethod
-    def _custom_properties_location(self) -> Tuple[str, PatchPath]:
-        ...
+    def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
 
     def add_custom_property(self, key: str, value: str) -> Self:
         """Add a custom property to the entity.
diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
index 6aa10381a883ef..55b026a144c6d5 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
 
 class SchemaResolverInterface(Protocol):
     @property
-    def platform(self) -> str:
-        ...
+    def platform(self) -> str: ...
 
-    def includes_temp_tables(self) -> bool:
-        ...
+    def includes_temp_tables(self) -> bool: ...
 
-    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
-        ...
+    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
 
     def __hash__(self) -> int:
         # Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ def convert_graphql_schema_metadata_to_info(
         return {
             get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
                 # The actual types are more of a "nice to have".
-                field["nativeDataType"]
-                or "str"
+                field["nativeDataType"] or "str"
             )
             for field in schema["fields"]
             # TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
     return {
         get_simple_field_path_from_v2_field_path(col.fieldPath): (
             # The actual types are more of a "nice to have".
-            col.nativeDataType
-            or "str"
+            col.nativeDataType or "str"
         )
         for col in schema_fields
         # TODO: We can't generate lineage to columns nested within structs yet.
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index e1deeaec5ba826..8637802f6b9fee 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -682,10 +682,10 @@ def add_known_lineage_mapping(
         query_id = self._known_lineage_query_id()
 
         # Generate CLL if schema of downstream is known
-        column_lineage: List[
-            ColumnLineageInfo
-        ] = self._generate_identity_column_lineage(
-            upstream_urn=upstream_urn, downstream_urn=downstream_urn
+        column_lineage: List[ColumnLineageInfo] = (
+            self._generate_identity_column_lineage(
+                upstream_urn=upstream_urn, downstream_urn=downstream_urn
+            )
         )
 
         # Register the query.
@@ -1044,9 +1044,9 @@ def _make_schema_resolver_for_session(
             temp_table_schemas: Dict[str, Optional[List[models.SchemaFieldClass]]] = {}
             for temp_table_urn, query_ids in self._temp_lineage_map[session_id].items():
                 for query_id in query_ids:
-                    temp_table_schemas[
-                        temp_table_urn
-                    ] = self._inferred_temp_schemas.get(query_id)
+                    temp_table_schemas[temp_table_urn] = (
+                        self._inferred_temp_schemas.get(query_id)
+                    )
                     if temp_table_schemas:
                         break
 
@@ -1073,9 +1073,9 @@ def _process_view_definition(
             schema_resolver=self._schema_resolver,
         )
         if parsed.debug_info.error:
-            self.report.views_parse_failures[
-                view_urn
-            ] = f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            self.report.views_parse_failures[view_urn] = (
+                f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            )
         if parsed.debug_info.table_error:
             self.report.num_views_failed += 1
             return  # we can't do anything with this query
@@ -1583,9 +1583,9 @@ def _recurse_into_query(
                                     temp_query_lineage_info
                                 )
                             else:
-                                temp_upstream_queries[
-                                    upstream
-                                ] = temp_query_lineage_info
+                                temp_upstream_queries[upstream] = (
+                                    temp_query_lineage_info
+                                )
 
             # Compute merged upstreams.
             new_upstreams = OrderedSet[UrnStr]()
@@ -1665,9 +1665,9 @@ def _recurse_into_query(
         composed_of_queries_truncated: LossyList[str] = LossyList()
         for query_id in composed_of_queries:
             composed_of_queries_truncated.append(query_id)
-        self.report.queries_with_temp_upstreams[
-            composite_query_id
-        ] = composed_of_queries_truncated
+        self.report.queries_with_temp_upstreams[composite_query_id] = (
+            composed_of_queries_truncated
+        )
 
         merged_query_text = ";\n\n".join(
             [q.formatted_query_string for q in ordered_queries]
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
index bf28ab0e7b229b..c825deeccd9592 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
@@ -442,9 +442,9 @@ def _create_table_ddl_cll(
 ) -> List[_ColumnLineageInfo]:
     column_lineage: List[_ColumnLineageInfo] = []
 
-    assert (
-        output_table is not None
-    ), "output_table must be set for create DDL statements"
+    assert output_table is not None, (
+        "output_table must be set for create DDL statements"
+    )
 
     create_schema: sqlglot.exp.Schema = statement.this
     sqlglot_columns = create_schema.expressions
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
index 57a5cc3c9a6574..5b12c64a831666 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
@@ -404,7 +404,7 @@ def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression:
         if new_statement == statement:
             if iteration > 1:
                 logger.debug(
-                    f"Required {iteration+1} iterations to detach and eliminate all CTEs"
+                    f"Required {iteration + 1} iterations to detach and eliminate all CTEs"
                 )
             break
         statement = new_statement
diff --git a/metadata-ingestion/src/datahub/telemetry/stats.py b/metadata-ingestion/src/datahub/telemetry/stats.py
index bf98bd72b574ce..d6835e49de56aa 100644
--- a/metadata-ingestion/src/datahub/telemetry/stats.py
+++ b/metadata-ingestion/src/datahub/telemetry/stats.py
@@ -5,8 +5,7 @@
 
 
 class SupportsLT(Protocol):
-    def __lt__(self, __other: Any) -> Any:
-        ...
+    def __lt__(self, __other: Any) -> Any: ...
 
 
 _SupportsComparisonT = TypeVar("_SupportsComparisonT", bound=SupportsLT)
diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index fb028605c35b77..79da90ba20ea9f 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -224,9 +224,9 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
     _use_sqlite_on_conflict: bool = field(repr=False, default=True)
 
     def __post_init__(self) -> None:
-        assert (
-            self.cache_eviction_batch_size > 0
-        ), "cache_eviction_batch_size must be positive"
+        assert self.cache_eviction_batch_size > 0, (
+            "cache_eviction_batch_size must be positive"
+        )
 
         for reserved_column in ("key", "value", "rowid"):
             if reserved_column in self.extra_columns:
@@ -261,7 +261,7 @@ def __post_init__(self) -> None:
                 rowid INTEGER PRIMARY KEY AUTOINCREMENT,
                 key TEXT UNIQUE,
                 value BLOB
-                {''.join(f', {column_name} BLOB' for column_name in self.extra_columns.keys())}
+                {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
             )"""
         )
 
@@ -316,12 +316,12 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
                 f"""INSERT INTO {self.tablename} (
                     key,
                     value
-                    {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                 )
-                VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})
+                VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
                 ON CONFLICT (key) DO UPDATE SET
                     value = excluded.value
-                    {''.join(f', {column_name} = excluded.{column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
                 """,
                 items_to_write,
             )
@@ -332,16 +332,16 @@ def _prune_cache(self, num_items_to_prune: int) -> None:
                         f"""INSERT INTO {self.tablename} (
                             key,
                             value
-                            {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                         )
-                        VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+                        VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
                         item,
                     )
                 except sqlite3.IntegrityError:
                     self._conn.execute(
                         f"""UPDATE {self.tablename} SET
                             value = ?
-                            {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
                         WHERE key = ?""",
                         (*item[1:], item[0]),
                     )
diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
index e98fe42c1d56ce..fccd8dd8a60c35 100644
--- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
+++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
@@ -142,10 +142,10 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
                 fields.append({"name": field_name, "type": field_type})
 
         if kwargs.get("ustruct_seqn") is not None:
-            struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__structn_{kwargs['ustruct_seqn']}_{str(uuid.uuid4()).replace('-', '')}"
 
         else:
-            struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
         return {
             "type": "record",
             "name": struct_name,
diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py
index 926b8782fbf119..a5fd20fef307d0 100644
--- a/metadata-ingestion/src/datahub/utilities/logging_manager.py
+++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py
@@ -130,9 +130,9 @@ def _formatMessageColor(self, record: logging.LogRecord) -> str:
         # Mimic our default format, but with color.
         message_fg = self.MESSAGE_COLORS.get(record.levelname)
         return (
-            f'{click.style(f"[{self.formatTime(record, self.datefmt)}]", fg="green", dim=True)} '
+            f"{click.style(f'[{self.formatTime(record, self.datefmt)}]', fg='green', dim=True)} "
             f"{click.style(f'{record.levelname:8}', fg=message_fg)} "
-            f'{click.style(f"{{{record.name}:{record.lineno}}}", fg="blue", dim=True)} - '
+            f"{click.style(f'{{{record.name}:{record.lineno}}}', fg='blue', dim=True)} - "
             f"{click.style(record.getMessage(), fg=message_fg)}"
         )
 
diff --git a/metadata-ingestion/src/datahub/utilities/lossy_collections.py b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
index f71aad51ab0b6b..31d6d0eb842d04 100644
--- a/metadata-ingestion/src/datahub/utilities/lossy_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
@@ -151,9 +151,9 @@ def __str__(self) -> str:
     def as_obj(self) -> Dict[Union[_KT, str], Union[_VT, str]]:
         base_dict: Dict[Union[_KT, str], Union[_VT, str]] = super().copy()  # type: ignore
         if self.sampled:
-            base_dict[
-                "sampled"
-            ] = f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            base_dict["sampled"] = (
+                f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            )
         return base_dict
 
     def total_key_count(self) -> int:
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 17023c7b388e76..96870fc6fcd378 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -349,9 +349,9 @@ def convert_to_aspects(self, operation_map: Dict[str, list]) -> Dict[str, Any]:
                         elements=[institutional_memory_element]
                     )
 
-                    aspect_map[
-                        Constants.ADD_DOC_LINK_OPERATION
-                    ] = institutional_memory_aspect
+                    aspect_map[Constants.ADD_DOC_LINK_OPERATION] = (
+                        institutional_memory_aspect
+                    )
                 else:
                     raise Exception(
                         f"Expected 1 item of type list for the documentation_link meta_mapping config,"
diff --git a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
index b5f490720340ce..bdfe4285065522 100644
--- a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
+++ b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
@@ -41,7 +41,9 @@ def decorator(func: Callable[_F, _T]) -> Callable[_F, _T]:
         def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
             # We need a type ignore here because there's no way for us to require that
             # the args and kwargs are hashable while using ParamSpec.
-            key: _Key = cachetools.keys.hashkey(*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k})  # type: ignore
+            key: _Key = cachetools.keys.hashkey(
+                *args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}
+            )  # type: ignore
 
             with cache_lock:
                 if key in cache:
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
index 11c04082ee7ad5..cf92336c68cdf6 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
@@ -160,12 +160,12 @@ class SQLAlchemyQueryCombiner:
     _greenlets_by_thread_lock: threading.Lock = dataclasses.field(
         default_factory=lambda: threading.Lock()
     )
-    _queries_by_thread: Dict[
-        greenlet.greenlet, Dict[str, _QueryFuture]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
-    _greenlets_by_thread: Dict[
-        greenlet.greenlet, Set[greenlet.greenlet]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    _queries_by_thread: Dict[greenlet.greenlet, Dict[str, _QueryFuture]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
+    )
+    _greenlets_by_thread: Dict[greenlet.greenlet, Set[greenlet.greenlet]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    )
 
     @staticmethod
     def _generate_sql_safe_identifier() -> str:
diff --git a/metadata-ingestion/src/datahub/utilities/stats_collections.py b/metadata-ingestion/src/datahub/utilities/stats_collections.py
index 09a9490abc0fbe..c0bd9d058e5d37 100644
--- a/metadata-ingestion/src/datahub/utilities/stats_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/stats_collections.py
@@ -48,7 +48,9 @@ def as_obj(self) -> Dict[_KT, _VT]:
                 total_value: Union[_VT, str] = sum(trimmed_dict.values())  # type: ignore
             except Exception:
                 total_value = ""
-            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = total_value  # type: ignore
+            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = (  # type: ignore
+                total_value  # type: ignore
+            )
             return trimmed_dict
 
 
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index f0e4c6f5ee14a1..d792e0bba649dd 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -21,7 +21,7 @@ def _add_prefix_to_paths(
 
 
 def list_urns_with_path(
-    model: Union[DictWrapper, MetadataChangeProposalWrapper]
+    model: Union[DictWrapper, MetadataChangeProposalWrapper],
 ) -> List[Tuple[str, _Path]]:
     """List urns in the given model with their paths.
 
@@ -145,7 +145,7 @@ def lowercase_dataset_urns(
         MetadataChangeEventClass,
         MetadataChangeProposalClass,
         MetadataChangeProposalWrapper,
-    ]
+    ],
 ) -> None:
     def modify_urn(urn: str) -> str:
         if guess_entity_type(urn) == "dataset":
diff --git a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
index 024bb62bbe9ce9..5bb078a368dd50 100644
--- a/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
+++ b/metadata-ingestion/tests/integration/azure_ad/test_azure_ad.py
@@ -98,7 +98,9 @@ def load_test_resources(test_resources_dir):
     with azure_ad_nested_group_json_file.open() as azure_ad_nested_group_json:
         reference_nested_group = json.loads(azure_ad_nested_group_json.read())
 
-    with azure_ad_nested_groups_members_json_file.open() as azure_ad_nested_groups_users_json:
+    with (
+        azure_ad_nested_groups_members_json_file.open()
+    ) as azure_ad_nested_groups_users_json:
         reference_nested_groups_users = json.loads(
             azure_ad_nested_groups_users_json.read()
         )
diff --git a/metadata-ingestion/tests/integration/dremio/test_dremio.py b/metadata-ingestion/tests/integration/dremio/test_dremio.py
index 401f487d8a14b8..c286746c68b79d 100644
--- a/metadata-ingestion/tests/integration/dremio/test_dremio.py
+++ b/metadata-ingestion/tests/integration/dremio/test_dremio.py
@@ -190,9 +190,9 @@ def create_mysql_source(headers):
         "type": "MYSQL",
     }
     response = requests.post(url, headers=headers, data=json.dumps(payload))
-    assert (
-        response.status_code == 200
-    ), f"Failed to add mysql datasource: {response.text}"
+    assert response.status_code == 200, (
+        f"Failed to add mysql datasource: {response.text}"
+    )
 
 
 def upload_dataset(headers):
@@ -537,9 +537,9 @@ def test_dremio_platform_instance_urns(
 
         # Check dataset URN structure
         if mce["entityType"] == "dataset" and "entityUrn" in mce:
-            assert (
-                "test-platform.dremio" in mce["entityUrn"]
-            ), f"Platform instance missing in dataset URN: {mce['entityUrn']}"
+            assert "test-platform.dremio" in mce["entityUrn"], (
+                f"Platform instance missing in dataset URN: {mce['entityUrn']}"
+            )
 
         # Check aspects for both datasets and containers
         if "aspectName" in mce:
@@ -558,9 +558,9 @@ def test_dremio_platform_instance_urns(
 
                 instance = aspect_json["instance"]
                 expected_instance = "urn:li:dataPlatformInstance:(urn:li:dataPlatform:dremio,test-platform)"
-                assert (
-                    instance == expected_instance
-                ), f"Invalid platform instance format: {instance}"
+                assert instance == expected_instance, (
+                    f"Invalid platform instance format: {instance}"
+                )
 
     # Verify against golden file
     mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/grafana/test_grafana.py b/metadata-ingestion/tests/integration/grafana/test_grafana.py
index 6eb6b0b8509263..cbac965884365d 100644
--- a/metadata-ingestion/tests/integration/grafana/test_grafana.py
+++ b/metadata-ingestion/tests/integration/grafana/test_grafana.py
@@ -120,7 +120,7 @@ def test_grafana_dashboard(loaded_grafana, pytestconfig, tmp_path, test_resource
         time.sleep(5)
         resp = requests.get(url)
         if resp.status_code == 200:
-            logging.info(f"Grafana started after waiting {i*5} seconds")
+            logging.info(f"Grafana started after waiting {i * 5} seconds")
             break
     else:
         pytest.fail("Grafana did not start in time")
@@ -131,12 +131,12 @@ def test_grafana_dashboard(loaded_grafana, pytestconfig, tmp_path, test_resource
     assert resp.status_code == 200, "Failed to load default dashboard"
     dashboard = resp.json()
 
-    assert (
-        dashboard["dashboard"]["title"] == "Default Dashboard"
-    ), "Default dashboard title mismatch"
-    assert any(
-        panel["type"] == "text" for panel in dashboard["dashboard"]["panels"]
-    ), "Default dashboard missing text panel"
+    assert dashboard["dashboard"]["title"] == "Default Dashboard", (
+        "Default dashboard title mismatch"
+    )
+    assert any(panel["type"] == "text" for panel in dashboard["dashboard"]["panels"]), (
+        "Default dashboard missing text panel"
+    )
 
     # Verify the output. (You can add further checks here if needed)
     logging.info("Default dashboard verified successfully")
@@ -153,7 +153,7 @@ def test_grafana_ingest(
         time.sleep(5)
         resp = requests.get(url)
         if resp.status_code == 200:
-            logging.info(f"Grafana started after waiting {i*5} seconds")
+            logging.info(f"Grafana started after waiting {i * 5} seconds")
             break
     else:
         pytest.fail("Grafana did not start in time")
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index d0f4fc35fc03eb..d8c98b12951f5d 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -482,9 +482,9 @@ def test_kafka_connect_ingest_stateful(
             "mysql_source1",
             "mysql_source2",
         ]
-        pipeline_run1_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_name}"
+        pipeline_run1_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_name}"
+        )
         pipeline_run1 = Pipeline.create(pipeline_run1_config)
         pipeline_run1.run()
         pipeline_run1.raise_from_status()
@@ -506,14 +506,16 @@ def test_kafka_connect_ingest_stateful(
         mock_datahub_graph,
     ) as mock_checkpoint:
         mock_checkpoint.return_value = mock_datahub_graph
-        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config)  # type: ignore
+        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+            base_pipeline_config  # type: ignore
+        )
         # Set the special properties for this run
         pipeline_run1_config["source"]["config"]["connector_patterns"]["allow"] = [
             "mysql_source1",
         ]
-        pipeline_run2_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_deleted_name}"
+        pipeline_run2_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_deleted_name}"
+        )
         pipeline_run2 = Pipeline.create(pipeline_run2_config)
         pipeline_run2.run()
         pipeline_run2.raise_from_status()
diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py
index c96bcc729a95da..bbcc6332539c02 100644
--- a/metadata-ingestion/tests/integration/looker/test_looker.py
+++ b/metadata-ingestion/tests/integration/looker/test_looker.py
@@ -1096,9 +1096,9 @@ def test_file_path_in_view_naming_pattern(
 ):
     mocked_client = mock.MagicMock()
     new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/looker_mces.json")
-    new_recipe["source"]["config"][
-        "view_naming_pattern"
-    ] = "{project}.{file_path}.view.{name}"
+    new_recipe["source"]["config"]["view_naming_pattern"] = (
+        "{project}.{file_path}.view.{name}"
+    )
 
     with mock.patch(
         "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py
index 940e7f36675f79..d803b8498104fd 100644
--- a/metadata-ingestion/tests/integration/lookml/test_lookml.py
+++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py
@@ -101,13 +101,13 @@ def test_lookml_refinement_ingest(pytestconfig, tmp_path, mock_time):
     )
     new_recipe["source"]["config"]["process_refinements"] = True
 
-    new_recipe["source"]["config"][
-        "view_naming_pattern"
-    ] = "{project}.{file_path}.view.{name}"
+    new_recipe["source"]["config"]["view_naming_pattern"] = (
+        "{project}.{file_path}.view.{name}"
+    )
 
-    new_recipe["source"]["config"][
-        "view_browse_pattern"
-    ] = "/{env}/{platform}/{project}/{file_path}/views"
+    new_recipe["source"]["config"]["view_browse_pattern"] = (
+        "/{env}/{platform}/{project}/{file_path}/views"
+    )
 
     pipeline = Pipeline.create(new_recipe)
     pipeline.run()
diff --git a/metadata-ingestion/tests/integration/nifi/test_nifi.py b/metadata-ingestion/tests/integration/nifi/test_nifi.py
index b992de058879ef..924e854a47e4eb 100644
--- a/metadata-ingestion/tests/integration/nifi/test_nifi.py
+++ b/metadata-ingestion/tests/integration/nifi/test_nifi.py
@@ -72,7 +72,7 @@ def test_nifi_ingest_standalone(
             status = next(s for s in statuses if s["name"] == "FetchS3Object")
 
             if status["aggregateSnapshot"]["flowFilesOut"] >= 1:
-                logging.info(f"Waited for time {i*5} seconds")
+                logging.info(f"Waited for time {i * 5} seconds")
                 break
 
     # Run the metadata ingestion pipeline.
@@ -124,7 +124,7 @@ def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, tmp_path, test_resources
             statuses = [pg["status"] for pg in pgs]
             status = next(s for s in statuses if s["name"] == "Cluster_Site_S3_to_S3")
             if status["aggregateSnapshot"]["flowFilesSent"] >= 1:
-                logging.info(f"Waited for time {i*5} seconds")
+                logging.info(f"Waited for time {i * 5} seconds")
                 break
     test_resources_dir = pytestconfig.rootpath / "tests/integration/nifi"
     # Run the metadata ingestion pipeline.
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 6f7a9c7833ba1a..0d85d370265cae 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -765,14 +765,14 @@ def test_sqlglot_parser():
         }
     )
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
@@ -814,9 +814,9 @@ def test_sqlglot_parser():
 def test_databricks_multi_cloud():
     q = M_QUERIES[25]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -833,9 +833,9 @@ def test_databricks_multi_cloud():
 def test_databricks_catalog_pattern_1():
     q = M_QUERIES[26]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -904,14 +904,14 @@ def test_sqlglot_parser_2():
         }
     )
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
@@ -965,9 +965,9 @@ def test_databricks_regular_case_with_view():
 def test_snowflake_double_double_quotes():
     q = M_QUERIES[30]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -984,9 +984,9 @@ def test_snowflake_double_double_quotes():
 def test_databricks_multicloud():
     q = M_QUERIES[31]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -1003,9 +1003,9 @@ def test_databricks_multicloud():
 def test_snowflake_multi_function_call():
     q = M_QUERIES[32]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -1022,9 +1022,9 @@ def test_snowflake_multi_function_call():
 def test_mssql_drop_with_select():
     q = M_QUERIES[33]
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -1075,18 +1075,18 @@ def test_unsupported_data_platform():
             is_entry_present = True
             break
 
-    assert (
-        is_entry_present
-    ), 'Info message "Non-Data Platform Expression" should be present in reporter'
+    assert is_entry_present, (
+        'Info message "Non-Data Platform Expression" should be present in reporter'
+    )
 
 
 def test_empty_string_in_m_query():
     # TRIM(TRIM(TRIM(AGENT_NAME, '\"\"'), '+'), '\\'') is in Query
     q = "let\n  Source = Value.NativeQuery(Snowflake.Databases(\"bu10758.ap-unknown-2.fakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TRIM(TRIM(TRIM(AGENT_NAME, '\"\"'), '+'), '\\'') AS TRIM_AGENT_NAME,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS inner join OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT #(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -1108,9 +1108,9 @@ def test_double_quotes_in_alias():
     # SELECT CAST(sales_date AS DATE) AS \"\"Date\"\" in query
     q = 'let \n Source = Sql.Database("abc.com", "DB", [Query="SELECT CAST(sales_date AS DATE) AS ""Date"",#(lf) SUM(cshintrpret) / 60.0      AS ""Total Order All Items"",#(lf)#(tab)#(tab)#(tab)  SUM(cshintrpret) / 60.0 - LAG(SUM(cshintrpret) / 60.0, 1) OVER (ORDER BY CAST(sales_date AS DATE)) AS ""Total minute difference"",#(lf)#(tab)#(tab)#(tab)  SUM(sale_price)  / 60.0 - LAG(SUM(sale_price)  / 60.0, 1) OVER (ORDER BY CAST(sales_date AS DATE)) AS ""Normal minute difference""#(lf)        FROM   [DB].[dbo].[sales_t]#(lf)        WHERE  sales_date >= GETDATE() - 365#(lf)        GROUP  BY CAST(sales_date AS DATE),#(lf)#(tab)#(tab)CAST(sales_date AS TIME);"]) \n in \n Source'
 
-    lineage: List[
-        datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-    ] = get_data_platform_tables_with_dummy_table(q=q)
+    lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+        get_data_platform_tables_with_dummy_table(q=q)
+    )
 
     assert len(lineage) == 1
 
@@ -1168,9 +1168,9 @@ def test_m_query_timeout(mock_get_lark_parser):
             is_entry_present = True
             break
 
-    assert (
-        is_entry_present
-    ), 'Warning message "M-Query Parsing Timeout" should be present in reporter'
+    assert is_entry_present, (
+        'Warning message "M-Query Parsing Timeout" should be present in reporter'
+    )
 
 
 def test_comments_in_m_query():
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 911d8a9f35139f..7f62e433bc8014 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -828,9 +828,9 @@ def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None:
     # Generate default dataset_type_mapping and compare it with source_config.dataset_type_mapping
     default_dataset_type_mapping: dict = {}
     for item in SupportedDataPlatform:
-        default_dataset_type_mapping[
-            item.value.powerbi_data_platform_name
-        ] = item.value.datahub_data_platform_name
+        default_dataset_type_mapping[item.value.powerbi_data_platform_name] = (
+            item.value.datahub_data_platform_name
+        )
 
     assert default_dataset_type_mapping == source_config.dataset_type_mapping
 
@@ -1443,9 +1443,9 @@ def test_powerbi_cross_workspace_reference_info_message(
             is_entry_present = True
             break
 
-    assert (
-        is_entry_present
-    ), 'Info message "Missing Lineage For Tile" should be present in reporter'
+    assert is_entry_present, (
+        'Info message "Missing Lineage For Tile" should be present in reporter'
+    )
 
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
@@ -1568,6 +1568,6 @@ def test_powerbi_app_ingest_info_message(
             is_entry_present = True
             break
 
-    assert (
-        is_entry_present
-    ), "The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
+    assert is_entry_present, (
+        "The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
+    )
diff --git a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
index 89a37a372df843..9e68ff22a767e2 100644
--- a/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
+++ b/metadata-ingestion/tests/integration/salesforce/test_salesforce.py
@@ -89,15 +89,15 @@ def test_latest_version(mock_sdk):
     )
     SalesforceSource(config=config, ctx=Mock())
     calls = mock_sf._call_salesforce.mock_calls
-    assert (
-        len(calls) == 1
-    ), "We didn't specify version but source didn't call SF API to get the latest one"
-    assert calls[0].ends_with(
-        "/services/data"
-    ), "Source didn't call proper SF API endpoint to get all versions"
-    assert (
-        mock_sf.sf_version == "54.0"
-    ), "API version was not correctly set (see versions_responses.json)"
+    assert len(calls) == 1, (
+        "We didn't specify version but source didn't call SF API to get the latest one"
+    )
+    assert calls[0].ends_with("/services/data"), (
+        "Source didn't call proper SF API endpoint to get all versions"
+    )
+    assert mock_sf.sf_version == "54.0", (
+        "API version was not correctly set (see versions_responses.json)"
+    )
 
 
 @mock.patch("datahub.ingestion.source.salesforce.Salesforce")
@@ -133,12 +133,12 @@ def test_custom_version(mock_sdk):
     SalesforceSource(config=config, ctx=Mock())
 
     calls = mock_sf._call_salesforce.mock_calls
-    assert (
-        len(calls) == 0
-    ), "Source called API to get all versions even though we specified proper version"
-    assert (
-        mock_sdk.call_args.kwargs["version"] == "46.0"
-    ), "API client object was not correctly initialized with the custom version"
+    assert len(calls) == 0, (
+        "Source called API to get all versions even though we specified proper version"
+    )
+    assert mock_sdk.call_args.kwargs["version"] == "46.0", (
+        "API client object was not correctly initialized with the custom version"
+    )
 
 
 @freeze_time(FROZEN_TIME)
diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
index b969f77b4c3c18..7fab5fc7dae1ba 100644
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@@ -57,7 +57,7 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi
         pytestconfig,
         output_path=tmp_path / "mssql_mces.json",
         golden_path=test_resources_dir
-        / f"golden_files/golden_mces_{config_file.replace('yml','json')}",
+        / f"golden_files/golden_mces_{config_file.replace('yml', 'json')}",
         ignore_paths=[
             r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['job_id'\]",
             r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['date_created'\]",
diff --git a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
index b8b0563a1d24e5..9c7b86a275f6d0 100644
--- a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
+++ b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py
@@ -205,55 +205,57 @@ def register_mock_data(workspace_client):
         ),
     ]
 
-    workspace_client.tables.get = lambda *args, **kwargs: databricks.sdk.service.catalog.TableInfo.from_dict(
-        {
-            "name": "quickstart_table",
-            "catalog_name": "quickstart_catalog",
-            "schema_name": "quickstart_schema",
-            "table_type": "MANAGED",
-            "data_source_format": "DELTA",
-            "columns": [
-                {
-                    "name": "columnA",
-                    "type_text": "int",
-                    "type_json": '{"name":"columnA","type":"integer","nullable":true,"metadata":{}}',
-                    "type_name": "INT",
-                    "type_precision": 0,
-                    "type_scale": 0,
-                    "position": 0,
-                    "nullable": True,
-                },
-                {
-                    "name": "columnB",
-                    "type_text": "string",
-                    "type_json": '{"name":"columnB","type":"string","nullable":true,"metadata":{}}',
-                    "type_name": "STRING",
-                    "type_precision": 0,
-                    "type_scale": 0,
-                    "position": 1,
-                    "nullable": True,
+    workspace_client.tables.get = (
+        lambda *args, **kwargs: databricks.sdk.service.catalog.TableInfo.from_dict(
+            {
+                "name": "quickstart_table",
+                "catalog_name": "quickstart_catalog",
+                "schema_name": "quickstart_schema",
+                "table_type": "MANAGED",
+                "data_source_format": "DELTA",
+                "columns": [
+                    {
+                        "name": "columnA",
+                        "type_text": "int",
+                        "type_json": '{"name":"columnA","type":"integer","nullable":true,"metadata":{}}',
+                        "type_name": "INT",
+                        "type_precision": 0,
+                        "type_scale": 0,
+                        "position": 0,
+                        "nullable": True,
+                    },
+                    {
+                        "name": "columnB",
+                        "type_text": "string",
+                        "type_json": '{"name":"columnB","type":"string","nullable":true,"metadata":{}}',
+                        "type_name": "STRING",
+                        "type_precision": 0,
+                        "type_scale": 0,
+                        "position": 1,
+                        "nullable": True,
+                    },
+                ],
+                "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896",
+                "owner": "account users",
+                "properties": {
+                    "delta.lastCommitTimestamp": "1666185711000",
+                    "delta.lastUpdateVersion": "1",
+                    "delta.minReaderVersion": "1",
+                    "delta.minWriterVersion": "2",
+                    "spark.sql.statistics.numRows": "10",
+                    "spark.sql.statistics.totalSize": "512",
                 },
-            ],
-            "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896",
-            "owner": "account users",
-            "properties": {
-                "delta.lastCommitTimestamp": "1666185711000",
-                "delta.lastUpdateVersion": "1",
-                "delta.minReaderVersion": "1",
-                "delta.minWriterVersion": "2",
-                "spark.sql.statistics.numRows": "10",
-                "spark.sql.statistics.totalSize": "512",
-            },
-            "generation": 2,
-            "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736",
-            "full_name": "quickstart_catalog.quickstart_schema.quickstart_table",
-            "data_access_configuration_id": "00000000-0000-0000-0000-000000000000",
-            "created_at": 1666185698688,
-            "created_by": "abc@acryl.io",
-            "updated_at": 1666186049633,
-            "updated_by": "abc@acryl.io",
-            "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896",
-        }
+                "generation": 2,
+                "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736",
+                "full_name": "quickstart_catalog.quickstart_schema.quickstart_table",
+                "data_access_configuration_id": "00000000-0000-0000-0000-000000000000",
+                "created_at": 1666185698688,
+                "created_by": "abc@acryl.io",
+                "updated_at": 1666186049633,
+                "updated_by": "abc@acryl.io",
+                "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896",
+            }
+        )
     )
 
     workspace_client.service_principals.list.return_value = [
@@ -437,9 +439,7 @@ def test_ingestion(pytestconfig, tmp_path, requests_mock):
         "datahub.ingestion.source.unity.proxy.WorkspaceClient"
     ) as mock_client, patch.object(
         HiveMetastoreProxy, "get_inspector"
-    ) as get_inspector, patch.object(
-        HiveMetastoreProxy, "_execute_sql"
-    ) as execute_sql:
+    ) as get_inspector, patch.object(HiveMetastoreProxy, "_execute_sql") as execute_sql:
         workspace_client: mock.MagicMock = mock.MagicMock()
         mock_client.return_value = workspace_client
         register_mock_data(workspace_client)
diff --git a/metadata-ingestion/tests/performance/databricks/generator.py b/metadata-ingestion/tests/performance/databricks/generator.py
index 29df325d856a1a..b11771e55b2c9e 100644
--- a/metadata-ingestion/tests/performance/databricks/generator.py
+++ b/metadata-ingestion/tests/performance/databricks/generator.py
@@ -167,7 +167,7 @@ def _generate_insert_lineage(table: Table, upstream: Table) -> str:
 def _generate_view_definition(view: View) -> str:
     from_statement = f"FROM {_quote_table(view.upstreams[0])} t0"
     join_statement = " ".join(
-        f"JOIN {_quote_table(upstream)} t{i+1} ON t0.id = t{i+1}.id"
+        f"JOIN {_quote_table(upstream)} t{i + 1} ON t0.id = t{i + 1}.id"
         for i, upstream in enumerate(view.upstreams[1:])
     )
     return f"CREATE VIEW {_quote_table(view)} AS SELECT * {from_statement} {join_statement} {view.definition}"
diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py
index 0105e6d596970b..d70a440dab0657 100644
--- a/metadata-ingestion/tests/test_helpers/mce_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py
@@ -300,9 +300,9 @@ def assert_for_each_entity(
     for urn, aspect_val in aspect_map.items():
         if aspect_val is not None:
             for f in aspect_field_matcher:
-                assert aspect_field_matcher[f] == _get_element(
-                    aspect_val, [f]
-                ), f"urn: {urn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+                assert aspect_field_matcher[f] == _get_element(aspect_val, [f]), (
+                    f"urn: {urn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+                )
             success.append(urn)
         elif urn not in exception_urns:
             print(f"Adding {urn} to failures")
@@ -361,9 +361,9 @@ def assert_entity_mcp_aspect(
             assert mcp.aspect
             aspect_val = mcp.aspect.to_obj()
             for f in aspect_field_matcher:
-                assert aspect_field_matcher[f] == _get_element(
-                    aspect_val, [f]
-                ), f"urn: {mcp.entityUrn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+                assert aspect_field_matcher[f] == _get_element(aspect_val, [f]), (
+                    f"urn: {mcp.entityUrn} -> Field {f} must match value {aspect_field_matcher[f]}, found {_get_element(aspect_val, [f])}"
+                )
                 matches = matches + 1
     return matches
 
diff --git a/metadata-ingestion/tests/test_helpers/state_helpers.py b/metadata-ingestion/tests/test_helpers/state_helpers.py
index f68aef742fc730..c469db6ce8cf80 100644
--- a/metadata-ingestion/tests/test_helpers/state_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/state_helpers.py
@@ -104,7 +104,7 @@ def monkey_patch_get_latest_timeseries_value(
 
 @pytest.fixture
 def mock_datahub_graph_instance(
-    mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph]
+    mock_datahub_graph: Callable[[DatahubClientConfig], DataHubGraph],
 ) -> DataHubGraph:
     return mock_datahub_graph(DatahubClientConfig(server="http://fake.domain.local"))
 
diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
index e796f0b3f37219..dad7662d9ad00b 100644
--- a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
+++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py
@@ -26,7 +26,7 @@ def base_entity_metadata():
 
 @pytest.fixture
 def base_mock_graph(
-    base_entity_metadata: Dict[str, Dict[str, Any]]
+    base_entity_metadata: Dict[str, Dict[str, Any]],
 ) -> MockDataHubGraph:
     return MockDataHubGraph(entity_graph=base_entity_metadata)
 
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py b/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
index bdf1e0a2e0e860..8a45efb46893ae 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_ensure_aspect_size.py
@@ -159,9 +159,9 @@ def test_ensure_size_of_proper_dataset_profile(processor):
     processor.ensure_dataset_profile_size(
         "urn:li:dataset:(s3, dummy_dataset, DEV)", profile
     )
-    assert orig_repr == json.dumps(
-        profile.to_obj()
-    ), "Aspect was modified in case where workunit processor should have been no-op"
+    assert orig_repr == json.dumps(profile.to_obj()), (
+        "Aspect was modified in case where workunit processor should have been no-op"
+    )
 
 
 @freeze_time("2023-01-02 00:00:00")
@@ -177,9 +177,9 @@ def test_ensure_size_of_too_big_schema_metadata(processor):
     # +100kb is completely arbitrary, but we are truncating the aspect based on schema fields size only, not total taken
     # by other parameters of the aspect - it is reasonable approach though - schema fields is the only field in schema
     # metadata which can be expected to grow out of control
-    assert (
-        len(json.dumps(schema.to_obj())) < INGEST_MAX_PAYLOAD_BYTES + 100000
-    ), "Aspect exceeded acceptable size"
+    assert len(json.dumps(schema.to_obj())) < INGEST_MAX_PAYLOAD_BYTES + 100000, (
+        "Aspect exceeded acceptable size"
+    )
 
 
 @freeze_time("2023-01-02 00:00:00")
@@ -189,9 +189,9 @@ def test_ensure_size_of_proper_schema_metadata(processor):
     processor.ensure_schema_metadata_size(
         "urn:li:dataset:(s3, dummy_dataset, DEV)", schema
     )
-    assert orig_repr == json.dumps(
-        schema.to_obj()
-    ), "Aspect was modified in case where workunit processor should have been no-op"
+    assert orig_repr == json.dumps(schema.to_obj()), (
+        "Aspect was modified in case where workunit processor should have been no-op"
+    )
 
 
 @freeze_time("2023-01-02 00:00:00")
@@ -214,9 +214,9 @@ def test_ensure_size_of_too_big_dataset_profile(processor):
     )
     assert expected_profile.fieldProfiles
     expected_profile.fieldProfiles.insert(4, reduced_field)
-    assert json.dumps(profile.to_obj()) == json.dumps(
-        expected_profile.to_obj()
-    ), "Field 'big' was not properly removed from aspect due to its size"
+    assert json.dumps(profile.to_obj()) == json.dumps(expected_profile.to_obj()), (
+        "Field 'big' was not properly removed from aspect due to its size"
+    )
 
 
 @freeze_time("2023-01-02 00:00:00")
diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
index a1981ccf767916..f494ed78211dcf 100644
--- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
+++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
@@ -232,9 +232,9 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass:
         else []
         for fine_grained_lineage in upstream_lineage.fineGrainedLineages
     ]
-    assert all(
-        urn in expected_schema_field_urns for urn in actual_schema_field_urns
-    ), "Some expected column URNs are missing from fine grained lineage."
+    assert all(urn in expected_schema_field_urns for urn in actual_schema_field_urns), (
+        "Some expected column URNs are missing from fine grained lineage."
+    )
 
 
 def test_lineage_for_external_bq_table_no_column_lineage(mock_datahub_graph_instance):
@@ -286,9 +286,9 @@ def fake_schema_metadata(entity_urn: str) -> Optional[models.SchemaMetadataClass
     assert len(upstream_lineage.upstreams) == 3
     # Extracting dataset URNs from upstream_lineage.upstreams
     actual_dataset_urns = [upstream.dataset for upstream in upstream_lineage.upstreams]
-    assert all(
-        urn in actual_dataset_urns for urn in expected_dataset_urns
-    ), "Some expected dataset URNs are missing from upstream lineage."
+    assert all(urn in actual_dataset_urns for urn in expected_dataset_urns), (
+        "Some expected dataset URNs are missing from upstream lineage."
+    )
     assert upstream_lineage.fineGrainedLineages is None
 
 
diff --git a/metadata-ingestion/tests/unit/cli/assertion/test_compile.py b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
index 47253b5b0d71ea..0a1870d83212e8 100644
--- a/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
+++ b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py
@@ -37,6 +37,6 @@ def test_compile_assertion_config_spec_for_snowflake(pytestconfig, tmp_path):
 
     for file_name in output_file_names:
         assert os.path.exists(tmp_path / file_name)
-        assert filecmp.cmp(
-            golden_file_path / file_name, tmp_path / file_name
-        ), f"{file_name} is not as expected"
+        assert filecmp.cmp(golden_file_path / file_name, tmp_path / file_name), (
+            f"{file_name} is not as expected"
+        )
diff --git a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
index 941d13be0a6139..27045dfc656cbe 100644
--- a/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
+++ b/metadata-ingestion/tests/unit/redshift/test_redshift_lineage.py
@@ -262,8 +262,7 @@ def test_collapse_temp_lineage():
     lineage_item: LineageItem = lineage_extractor._lineage_map[target_urn]
 
     assert list(lineage_item.upstreams)[0].urn == (
-        "urn:li:dataset:(urn:li:dataPlatform:redshift,"
-        "test.public.player_activity,PROD)"
+        "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.player_activity,PROD)"
     )
 
     assert lineage_item.cll is not None
@@ -276,8 +275,7 @@ def test_collapse_temp_lineage():
     assert lineage_item.cll[0].downstream.column == "price"
 
     assert lineage_item.cll[0].upstreams[0].table == (
-        "urn:li:dataset:(urn:li:dataPlatform:redshift,"
-        "test.public.player_activity,PROD)"
+        "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.player_activity,PROD)"
     )
 
     assert lineage_item.cll[0].upstreams[0].column == "price"
@@ -441,8 +439,7 @@ def test_collapse_temp_recursive_cll_lineage():
     )
 
     assert target_dataset_cll[0].upstreams[0].table == (
-        "urn:li:dataset:(urn:li:dataPlatform:redshift,"
-        "dev.public.player_activity,PROD)"
+        "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)"
     )
     assert target_dataset_cll[0].upstreams[0].column == "price"
 
@@ -638,8 +635,7 @@ def test_collapse_temp_recursive_with_compex_column_cll_lineage():
     )
 
     assert target_dataset_cll[0].upstreams[0].table == (
-        "urn:li:dataset:(urn:li:dataPlatform:redshift,"
-        "dev.public.player_activity,PROD)"
+        "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)"
     )
     assert target_dataset_cll[0].upstreams[0].column == "price"
     assert target_dataset_cll[0].upstreams[1].column == "tax"
diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py
index b49f7153129136..13fcf3d919cc03 100644
--- a/metadata-ingestion/tests/unit/serde/test_codegen.py
+++ b/metadata-ingestion/tests/unit/serde/test_codegen.py
@@ -156,9 +156,9 @@ def _err(msg: str) -> None:
                         f"entity {entity_type}: aspect {aspect_name} is missing from the entity registry"
                     )
 
-    assert (
-        not errors
-    ), f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"'
+    assert not errors, (
+        f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"'
+    )
 
 
 def test_enum_options():
diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
index 96ab8f7a01a386..e69727f73b6bf4 100644
--- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
+++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py
@@ -226,9 +226,9 @@ def test_stateful_ingestion(pytestconfig, tmp_path, mock_time):
         pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(  # type: ignore
             base_pipeline_config  # type: ignore
         )
-        pipeline_run1_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_name}"
+        pipeline_run1_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_name}"
+        )
         pipeline_run1 = Pipeline.create(pipeline_run1_config)
         pipeline_run1.run()
         pipeline_run1.raise_from_status()
@@ -254,16 +254,18 @@ def test_stateful_ingestion(pytestconfig, tmp_path, mock_time):
     ) as mock_state:
         mock_state.return_value = GenericCheckpointState(serde="utf-8")
         pipeline_run2 = None
-        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config)  # type: ignore
+        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+            base_pipeline_config  # type: ignore
+        )
         pipeline_run2_config["source"]["config"]["dataset_patterns"] = {
             "allow": ["dummy_dataset1", "dummy_dataset2"],
         }
         pipeline_run2_config["source"]["config"]["dpi_id_to_ingest"] = "job2"
         pipeline_run2_config["source"]["config"]["query_id_to_ingest"] = "query2"
 
-        pipeline_run2_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_name_after_deleted}"
+        pipeline_run2_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_name_after_deleted}"
+        )
         pipeline_run2 = Pipeline.create(pipeline_run2_config)
         pipeline_run2.run()
         pipeline_run2.raise_from_status()
@@ -370,9 +372,9 @@ def test_stateful_ingestion_failure(pytestconfig, tmp_path, mock_time):
         pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(  # type: ignore
             base_pipeline_config  # type: ignore
         )
-        pipeline_run1_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_name}"
+        pipeline_run1_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_name}"
+        )
         pipeline_run1 = Pipeline.create(pipeline_run1_config)
         pipeline_run1.run()
         pipeline_run1.raise_from_status()
@@ -398,14 +400,16 @@ def test_stateful_ingestion_failure(pytestconfig, tmp_path, mock_time):
     ) as mock_state:
         mock_state.return_value = GenericCheckpointState(serde="utf-8")
         pipeline_run2 = None
-        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config)  # type: ignore
+        pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(
+            base_pipeline_config  # type: ignore
+        )
         pipeline_run2_config["source"]["config"]["dataset_patterns"] = {
             "allow": ["dummy_dataset1", "dummy_dataset2"],
         }
         pipeline_run2_config["source"]["config"]["report_failure"] = True
-        pipeline_run2_config["sink"]["config"][
-            "filename"
-        ] = f"{tmp_path}/{output_file_name_after_deleted}"
+        pipeline_run2_config["sink"]["config"]["filename"] = (
+            f"{tmp_path}/{output_file_name_after_deleted}"
+        )
         pipeline_run2 = Pipeline.create(pipeline_run2_config)
         pipeline_run2.run()
         pipeline_run2.pretty_print_summary()
diff --git a/metadata-ingestion/tests/unit/test_confluent_schema_registry.py b/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
index 3500636f00eddf..effa6ba85acaeb 100644
--- a/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
+++ b/metadata-ingestion/tests/unit/test_confluent_schema_registry.py
@@ -85,16 +85,18 @@ def new_get_latest_version(subject_name: str) -> RegisteredSchema:
             "get_latest_version",
             new_get_latest_version,
         ):
-            schema_str = confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
-                # The external reference would match by name.
-                schema=Schema(
-                    schema_str=schema_str_orig,
-                    schema_type="AVRO",
-                    references=[
-                        SchemaReference(
-                            name="TestTopic1", subject="schema_subject_1", version=1
-                        )
-                    ],
+            schema_str = (
+                confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
+                    # The external reference would match by name.
+                    schema=Schema(
+                        schema_str=schema_str_orig,
+                        schema_type="AVRO",
+                        references=[
+                            SchemaReference(
+                                name="TestTopic1", subject="schema_subject_1", version=1
+                            )
+                        ],
+                    )
                 )
             )
             assert schema_str == ConfluentSchemaRegistry._compact_schema(
@@ -106,16 +108,18 @@ def new_get_latest_version(subject_name: str) -> RegisteredSchema:
             "get_latest_version",
             new_get_latest_version,
         ):
-            schema_str = confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
-                # The external reference would match by subject.
-                schema=Schema(
-                    schema_str=schema_str_orig,
-                    schema_type="AVRO",
-                    references=[
-                        SchemaReference(
-                            name="schema_subject_1", subject="TestTopic1", version=1
-                        )
-                    ],
+            schema_str = (
+                confluent_schema_registry.get_schema_str_replace_confluent_ref_avro(
+                    # The external reference would match by subject.
+                    schema=Schema(
+                        schema_str=schema_str_orig,
+                        schema_type="AVRO",
+                        references=[
+                            SchemaReference(
+                                name="schema_subject_1", subject="TestTopic1", version=1
+                            )
+                        ],
+                    )
                 )
             )
             assert schema_str == ConfluentSchemaRegistry._compact_schema(
diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py
index ff22ffedc6228f..d7899af69f8405 100644
--- a/metadata-ingestion/tests/unit/test_dbt_source.py
+++ b/metadata-ingestion/tests/unit/test_dbt_source.py
@@ -475,9 +475,9 @@ def test_get_column_type_redshift():
     # Test 'super' type which should not show any warnings/errors
     result_super = get_column_type(report, dataset_name, "super", "redshift")
     assert isinstance(result_super.type, NullTypeClass)
-    assert (
-        len(report.infos) == 0
-    ), "No warnings should be generated for known SUPER type"
+    assert len(report.infos) == 0, (
+        "No warnings should be generated for known SUPER type"
+    )
 
     # Test unknown type, which generates a warning but resolves to NullTypeClass
     unknown_type = "unknown_type"
diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py
index 3afa26b35dfe9f..48524450caf36e 100644
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@@ -88,15 +88,15 @@ def assert_field(
     expected_nullable: bool,
     expected_type: Any,
 ) -> None:
-    assert (
-        schema_field.description == expected_description
-    ), f"Field description '{schema_field.description}' is different from expected description '{expected_description}'"
-    assert (
-        schema_field.nullable == expected_nullable
-    ), f"Field nullable '{schema_field.nullable}' is different from expected nullable '{expected_nullable}'"
-    assert isinstance(
-        schema_field.type.type, expected_type
-    ), f"Field type {schema_field.type.type} is different from expected type {expected_type}"
+    assert schema_field.description == expected_description, (
+        f"Field description '{schema_field.description}' is different from expected description '{expected_description}'"
+    )
+    assert schema_field.nullable == expected_nullable, (
+        f"Field nullable '{schema_field.nullable}' is different from expected nullable '{expected_nullable}'"
+    )
+    assert isinstance(schema_field.type.type, expected_type), (
+        f"Field type {schema_field.type.type} is different from expected type {expected_type}"
+    )
 
 
 def test_config_no_catalog():
@@ -219,9 +219,9 @@ def test_iceberg_primitive_type_to_schema_field(
     ]:
         schema = Schema(column)
         schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
-        assert (
-            len(schema_fields) == 1
-        ), f"Expected 1 field, but got {len(schema_fields)}"
+        assert len(schema_fields) == 1, (
+            f"Expected 1 field, but got {len(schema_fields)}"
+        )
         assert_field(
             schema_fields[0],
             column.doc,
@@ -300,19 +300,19 @@ def test_iceberg_list_to_schema_field(
         iceberg_source_instance = with_iceberg_source()
         schema = Schema(list_column)
         schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
-        assert (
-            len(schema_fields) == 1
-        ), f"Expected 1 field, but got {len(schema_fields)}"
+        assert len(schema_fields) == 1, (
+            f"Expected 1 field, but got {len(schema_fields)}"
+        )
         assert_field(
             schema_fields[0], list_column.doc, list_column.optional, ArrayTypeClass
         )
-        assert isinstance(
-            schema_fields[0].type.type, ArrayType
-        ), f"Field type {schema_fields[0].type.type} was expected to be {ArrayType}"
+        assert isinstance(schema_fields[0].type.type, ArrayType), (
+            f"Field type {schema_fields[0].type.type} was expected to be {ArrayType}"
+        )
         arrayType: ArrayType = schema_fields[0].type.type
-        assert arrayType.nestedType == [
-            expected_array_nested_type
-        ], f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
+        assert arrayType.nestedType == [expected_array_nested_type], (
+            f"List Field nested type {arrayType.nestedType} was expected to be {expected_array_nested_type}"
+        )
 
 
 @pytest.mark.parametrize(
@@ -387,9 +387,9 @@ def test_iceberg_map_to_schema_field(
         schema_fields = iceberg_source_instance._get_schema_fields_for_schema(schema)
         # Converting an Iceberg Map type will be done by creating an array of struct(key, value) records.
         # The first field will be the array.
-        assert (
-            len(schema_fields) == 3
-        ), f"Expected 3 fields, but got {len(schema_fields)}"
+        assert len(schema_fields) == 3, (
+            f"Expected 3 fields, but got {len(schema_fields)}"
+        )
         assert_field(
             schema_fields[0], map_column.doc, map_column.optional, ArrayTypeClass
         )
diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py
index 91a62b603bb584..25140cf1b997f8 100644
--- a/metadata-ingestion/tests/unit/test_postgres_source.py
+++ b/metadata-ingestion/tests/unit/test_postgres_source.py
@@ -21,9 +21,7 @@ def test_initial_database(create_engine_mock):
 
 @patch("datahub.ingestion.source.sql.postgres.create_engine")
 def test_get_inspectors_multiple_databases(create_engine_mock):
-    execute_mock = (
-        create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
-    )
+    execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
     execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
 
     config = PostgresConfig.parse_obj({**_base_config(), "initial_database": "db0"})
@@ -37,9 +35,7 @@ def test_get_inspectors_multiple_databases(create_engine_mock):
 
 @patch("datahub.ingestion.source.sql.postgres.create_engine")
 def tests_get_inspectors_with_database_provided(create_engine_mock):
-    execute_mock = (
-        create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
-    )
+    execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
     execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
 
     config = PostgresConfig.parse_obj({**_base_config(), "database": "custom_db"})
@@ -51,9 +47,7 @@ def tests_get_inspectors_with_database_provided(create_engine_mock):
 
 @patch("datahub.ingestion.source.sql.postgres.create_engine")
 def tests_get_inspectors_with_sqlalchemy_uri_provided(create_engine_mock):
-    execute_mock = (
-        create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
-    )
+    execute_mock = create_engine_mock.return_value.connect.return_value.__enter__.return_value.execute
     execute_mock.return_value = [{"datname": "db1"}, {"datname": "db2"}]
 
     config = PostgresConfig.parse_obj(
diff --git a/metadata-ingestion/tests/unit/test_rest_sink.py b/metadata-ingestion/tests/unit/test_rest_sink.py
index a76f96039c2c71..564cf613c04464 100644
--- a/metadata-ingestion/tests/unit/test_rest_sink.py
+++ b/metadata-ingestion/tests/unit/test_rest_sink.py
@@ -283,9 +283,9 @@
 def test_datahub_rest_emitter(requests_mock, record, path, snapshot):
     def match_request_text(request: requests.Request) -> bool:
         requested_snapshot = request.json()
-        assert (
-            requested_snapshot == snapshot
-        ), f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}"
+        assert requested_snapshot == snapshot, (
+            f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}"
+        )
         return True
 
     requests_mock.post(
diff --git a/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py b/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
index 43967367dff389..e137d671e95d71 100644
--- a/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
+++ b/metadata-ingestion/tests/unit/utilities/test_lossy_collections.py
@@ -34,7 +34,7 @@ def test_lossyset_sampling(length, sampling):
     assert len(lossy_set) == min(10, length)
     assert lossy_set.sampled is sampling
     if sampling:
-        assert f"... sampled with at most {length-10} elements missing" in str(
+        assert f"... sampled with at most {length - 10} elements missing" in str(
             lossy_set
         )
     else:
@@ -66,7 +66,7 @@ def test_lossydict_sampling(length, sampling, sub_length):
                 element_length_map[i] = len(lossy_dict[i])
 
             current_list = lossy_dict.get(i, LossyList())
-            current_list.append(f"{i}:{round(time.time(),2)} Hello World")
+            current_list.append(f"{i}:{round(time.time(), 2)} Hello World")
             lossy_dict[i] = current_list
             element_length_map[i] += 1
 
diff --git a/metadata-ingestion/tests/unit/utilities/test_partition_executor.py b/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
index ce211c2d618062..89e95d185e8028 100644
--- a/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
+++ b/metadata-ingestion/tests/unit/utilities/test_partition_executor.py
@@ -37,9 +37,9 @@ def task(key: str, id: str) -> None:
         saw_keys_in_parallel = False
         while executing_tasks or not done_tasks:
             keys_executing = [key for key, _ in executing_tasks]
-            assert list(sorted(keys_executing)) == list(
-                sorted(set(keys_executing))
-            ), "partitioning not working"
+            assert list(sorted(keys_executing)) == list(sorted(set(keys_executing))), (
+                "partitioning not working"
+            )
 
             if len(keys_executing) == 2:
                 saw_keys_in_parallel = True
diff --git a/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py b/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
index 35c44c7b4a8479..fb7e2266e1c9d3 100644
--- a/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
+++ b/metadata-ingestion/tests/unit/utilities/test_threaded_iterator_executor.py
@@ -4,7 +4,7 @@
 def test_threaded_iterator_executor():
     def table_of(i):
         for j in range(1, 11):
-            yield f"{i}x{j}={i*j}"
+            yield f"{i}x{j}={i * j}"
 
     assert {
         res
diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle
index f3dc1de830ccef..c38468ca8cd8b0 100644
--- a/smoke-test/build.gradle
+++ b/smoke-test/build.gradle
@@ -73,16 +73,16 @@ task installDev(type: Exec) {
 task pythonLint(type: Exec, dependsOn: installDev) {
     commandLine 'bash', '-c',
         "source ${venv_name}/bin/activate && set -x && " +
-        "black --check --diff tests/ && " +
         "ruff check tests/ && " +
+        "ruff format --check tests/ && " +
         "mypy tests/"
 }
 
 task pythonLintFix(type: Exec, dependsOn: installDev) {
     commandLine 'bash', '-c',
         "source ${venv_name}/bin/activate && set -x && " +
-        "black tests/ && " +
         "ruff check --fix tests/ && " +
+        "ruff format tests/ && " +
         "mypy tests/"
 }
 
@@ -154,3 +154,19 @@ task lint {
 task lintFix {
     dependsOn pythonLintFix
 }
+
+task cleanPythonCache(type: Exec) {
+  commandLine 'bash', '-c',
+    "find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
+}
+
+
+clean {
+  delete venv_name
+  delete 'build'
+  delete 'dist'
+  delete '.ruff_cache'
+  delete '.mypy_cache'
+  delete '.pytest_cache'
+}
+clean.dependsOn cleanPythonCache
\ No newline at end of file
diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml
index 55f037db2effea..55e286c73c01b9 100644
--- a/smoke-test/pyproject.toml
+++ b/smoke-test/pyproject.toml
@@ -7,20 +7,21 @@ name = "smoke-test"
 version = "0.0.0"
 description = ""
 authors = [
-	{ name="Acryl Data", email="eng@acryl.io" },
+    { name="Acryl Data", email="eng@acryl.io" },
 ]
 requires-python = ">=3.9"
 
+[tool.ruff]
+# Enable ruff format
+target-version = "py310"
+line-length = 88
+extend-exclude = ["tmp", "venv"]
 
-[tool.black]
-extend-exclude = '''
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-tmp
-venv
-'''
-include = '\.pyi?$'
-target-version = ['py310']
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -40,19 +41,19 @@ required-imports = []
 classes = ["typing"]
 
 [tool.ruff.lint]
-select = [
-    "B",
-    "C90",
-    "E",
-    "F",
-    "I", # For isort
-    "TID",
+extend-select = [
+    "B",    # flake8-bugbear
+    "C90",  # mccabe complexity
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "G010", # logging.warn -> logging.warning
+    "I",    # isort
+    "TID",  # flake8-tidy-imports
 ]
 ignore = [
-	'E501', # Ignore line length, since black handles that.
-	'D203', # Ignore 1 blank line required before class docstring.
-	'B904', # exception with `raise ... from err` or `raise ... from None` to distinguish
-	'TID252', # Prefer absolute imports over relative imports
+    "E501",  # Line length violations (handled by formatter)
+    "B904",  # exception with `raise ... from err` or `raise ... from None` to distinguish
+    "TID252",# Prefer absolute imports over relative imports
 ]
 
 [tool.ruff.lint.mccabe]
@@ -74,5 +75,4 @@ disallow_untyped_decorators = true
 warn_unused_configs = true
 # eventually we'd like to enable these
 disallow_incomplete_defs = false
-disallow_untyped_defs = false
-
+disallow_untyped_defs = false
\ No newline at end of file
diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt
index 6779733a850bad..fadc3dbec1f2b5 100644
--- a/smoke-test/requirements.txt
+++ b/smoke-test/requirements.txt
@@ -9,7 +9,6 @@ joblib
 pytest-xdist
 networkx
 # libaries for linting below this
-black==23.7.0
 mypy==1.5.1
 ruff==0.9.2
 # stub version are copied from metadata-ingestion/setup.py and that should be the source of truth
diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py
index f1c532af515cfa..a68db03cf8cf16 100644
--- a/smoke-test/tests/data_process_instance/test_data_process_instance.py
+++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py
@@ -250,14 +250,14 @@ def test_search_dpi(auth_session, ingest_cleanup_data):
     assert res_data, "Response should not be empty"
     assert "data" in res_data, "Response should contain 'data' field"
     print("RESPONSE DATA:" + str(res_data))
-    assert (
-        "scrollAcrossEntities" in res_data["data"]
-    ), "Response should contain 'scrollAcrossEntities' field"
+    assert "scrollAcrossEntities" in res_data["data"], (
+        "Response should contain 'scrollAcrossEntities' field"
+    )
 
     search_results = res_data["data"]["scrollAcrossEntities"]
-    assert (
-        "searchResults" in search_results
-    ), "Response should contain 'searchResults' field"
+    assert "searchResults" in search_results, (
+        "Response should contain 'searchResults' field"
+    )
 
     results = search_results["searchResults"]
     assert len(results) > 0, "Should find at least one result"
diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py
index 0aa66984b394c8..8d484820d1ed45 100644
--- a/smoke-test/tests/dataproduct/test_dataproduct.py
+++ b/smoke-test/tests/dataproduct/test_dataproduct.py
@@ -135,9 +135,9 @@ def validate_relationships(
                 urn_match[dataset_urn] = True
 
     urns_missing = [k for k in urn_match if urn_match[k] is False]
-    assert (
-        urns_missing == []
-    ), "All dataset urns should have a DataProductContains relationship to the data product"
+    assert urns_missing == [], (
+        "All dataset urns should have a DataProductContains relationship to the data product"
+    )
 
     dataset_urns_matched = set()
     for e in graph_client.get_related_entities(
@@ -147,9 +147,9 @@ def validate_relationships(
     ):
         dataset_urns_matched.add(e.urn)
 
-    assert (
-        set(dataset_urns) == dataset_urns_matched
-    ), "All dataset urns should be navigable from the data product"
+    assert set(dataset_urns) == dataset_urns_matched, (
+        "All dataset urns should be navigable from the data product"
+    )
 
 
 @tenacity.retry(
@@ -247,6 +247,6 @@ def test_create_data_product(graph_client, ingest_cleanup_data):
                 urn_match[dataset_urn] = True
 
     urns_missing = [k for k in urn_match if urn_match[k] is False]
-    assert set(urns_missing) == set(
-        dataset_urns
-    ), f"All dataset urns should no longer have a DataProductContains relationship to the data product {data_product_urn}"
+    assert set(urns_missing) == set(dataset_urns), (
+        f"All dataset urns should no longer have a DataProductContains relationship to the data product {data_product_urn}"
+    )
diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py
index 771085043926dd..dd5309667806c5 100644
--- a/smoke-test/tests/lineage/test_lineage.py
+++ b/smoke-test/tests/lineage/test_lineage.py
@@ -92,7 +92,7 @@ def _explain_sal_result(result: dict) -> str:
         explain += "Entities: "
         try:
             for e in entities:
-                explain += f"\t{e.replace('urn:li:','')}\n"
+                explain += f"\t{e.replace('urn:li:', '')}\n"
             for entity in entities:
                 paths = [
                     x["paths"][0]["path"]
@@ -349,9 +349,9 @@ def get_expectation_for_query(self, query: ImpactQuery) -> LineageExpectation:
                             lineage_expectation.impacted_entities[impacted_entity]
                         )
                     else:
-                        entries_to_add[
-                            impacted_dataset_entity
-                        ] = lineage_expectation.impacted_entities[impacted_entity]
+                        entries_to_add[impacted_dataset_entity] = (
+                            lineage_expectation.impacted_entities[impacted_entity]
+                        )
                     entries_to_remove.append(impacted_entity)
             for impacted_entity in entries_to_remove:
                 del lineage_expectation.impacted_entities[impacted_entity]
@@ -756,9 +756,9 @@ def test_expectation(self, graph: DataHubGraph) -> bool:
                     ]
                 )
                 try:
-                    assert (
-                        impacted_entities == impacted_entities_expectation
-                    ), f"Expected impacted entities to be {impacted_entities_expectation}, found {impacted_entities}"
+                    assert impacted_entities == impacted_entities_expectation, (
+                        f"Expected impacted entities to be {impacted_entities_expectation}, found {impacted_entities}"
+                    )
                 except Exception:
                     # breakpoint()
                     raise
@@ -783,10 +783,14 @@ def test_expectation(self, graph: DataHubGraph) -> bool:
                     try:
                         assert len(impacted_entity_paths) == len(
                             expectation.impacted_entities[impacted_entity]
-                        ), f"Expected length of impacted entity paths to be {len(expectation.impacted_entities[impacted_entity])}, found {len(impacted_entity_paths)}"
+                        ), (
+                            f"Expected length of impacted entity paths to be {len(expectation.impacted_entities[impacted_entity])}, found {len(impacted_entity_paths)}"
+                        )
                         assert set(impacted_entity_paths) == set(
                             expectation.impacted_entities[impacted_entity]
-                        ), f"Expected impacted entity paths to be {expectation.impacted_entities[impacted_entity]}, found {impacted_entity_paths}"
+                        ), (
+                            f"Expected impacted entity paths to be {expectation.impacted_entities[impacted_entity]}, found {impacted_entity_paths}"
+                        )
                     except Exception:
                         # breakpoint()
                         raise
diff --git a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
index 5d6179de6be644..7fe9421af85b1a 100644
--- a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
+++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py
@@ -489,9 +489,9 @@ def test_create_list_get_ingestion_execution_request(auth_session):
 
     assert res_data
     assert res_data["data"]
-    assert (
-        res_data["data"]["createIngestionExecutionRequest"] is not None
-    ), f"res_data was {res_data}"
+    assert res_data["data"]["createIngestionExecutionRequest"] is not None, (
+        f"res_data was {res_data}"
+    )
     assert "errors" not in res_data
 
     execution_request_urn = res_data["data"]["createIngestionExecutionRequest"]
diff --git a/smoke-test/tests/read_only/test_search.py b/smoke-test/tests/read_only/test_search.py
index 36ecf68395f919..66bbeb408d0529 100644
--- a/smoke-test/tests/read_only/test_search.py
+++ b/smoke-test/tests/read_only/test_search.py
@@ -153,6 +153,6 @@ def test_openapi_v3_entity(auth_session, entity_type):
 
     expected_data = {"urn": first_urn}
 
-    assert (
-        actual_data["urn"] == expected_data["urn"]
-    ), f"Mismatch: expected {expected_data}, got {actual_data}"
+    assert actual_data["urn"] == expected_data["urn"], (
+        f"Mismatch: expected {expected_data}, got {actual_data}"
+    )
diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py
index 12ff04965548f0..79812b46476fa8 100644
--- a/smoke-test/tests/read_only/test_services_up.py
+++ b/smoke-test/tests/read_only/test_services_up.py
@@ -27,6 +27,6 @@ def test_gms_config_accessible(auth_session) -> None:
     default_cli_version: str = gms_config["managedIngestion"]["defaultCliVersion"]
     print(f"Default CLI version: {default_cli_version}")
     assert not default_cli_version.startswith("@")
-    assert "." in default_cli_version or looks_like_a_short_sha(
-        default_cli_version
-    ), "Default CLI version does not look like a version string"
+    assert "." in default_cli_version or looks_like_a_short_sha(default_cli_version), (
+        "Default CLI version does not look like a version string"
+    )
diff --git a/smoke-test/tests/utilities/file_emitter.py b/smoke-test/tests/utilities/file_emitter.py
index ddbcff8db31d8b..d5539d143af737 100644
--- a/smoke-test/tests/utilities/file_emitter.py
+++ b/smoke-test/tests/utilities/file_emitter.py
@@ -7,7 +7,7 @@
 
 class FileEmitter:
     def __init__(
-        self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}"
+        self, filename: str, run_id: str = f"test_{int(time.time() * 1000.0)}"
     ) -> None:
         self.sink: FileSink = FileSink(
             ctx=PipelineContext(run_id=run_id),