Skip to content

Commit

Permalink
fix(ingest): simplify + fix ruff config (datahub-project#12382)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Jan 18, 2025
1 parent 94e9665 commit f06ad1a
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 33 deletions.
6 changes: 4 additions & 2 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
inputs.file file('setup.py')
outputs.file(sentinel_file)
commandLine 'bash', '-c',
"${python_executable} -m venv ${venv_name} && set -x && " +
"${venv_name}/bin/python -m pip install --upgrade uv && " +
"if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " +
"set -x && " +
// If we already have uv available, use it to upgrade uv. Otherwise, install it with pip.
"if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " +
"touch ${sentinel_file}"
}

Expand Down
31 changes: 9 additions & 22 deletions metadata-ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,13 @@ extend-exclude = '''
include = '\.pyi?$'
target-version = ['py38', 'py39', 'py310', 'py311']


[tool.ruff.lint.isort]
section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]
sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] }
combine-as-imports = true
known-first-party = ["datahub"]
extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
force-sort-within-sections = false
force-wrap-aliases = false
split-on-trailing-comma = false
order-by-type = true
relative-imports-order = "closest-to-furthest"
force-single-line = false
single-line-exclusions = ["typing"]
length-sort = false
from-first = false
required-imports = []
classes = ["typing"]

[tool.ruff]
target-version = "py38"
# Same as Black.
line-length = 88
# Exclude directories matching these patterns.
Expand All @@ -42,15 +30,16 @@ exclude = [
]

[tool.ruff.lint]
select = [
"B",
extend-select = [
"B", # Bugbear
"C90",
"E",
"F",
"I", # For isort
"TID",
"G010", # logging.warn -> logging.warning
"I", # Import sorting
"TID", # Tidy imports
]
ignore = [
extend-ignore = [
# Ignore line length violations (handled by Black)
"E501",
# Ignore whitespace before ':' (matches Black)
Expand All @@ -69,9 +58,7 @@ ignore = [
max-complexity = 20

[tool.ruff.lint.flake8-tidy-imports]
# Disallow all relative imports.
ban-relative-imports = "all"


[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def generate_mcp(
)
urns_created.add(m.urn)
else:
logger.warn(
logger.warning(
f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_s3_tags(
]
)
except s3.meta.client.exceptions.ClientError:
logger.warn(f"No tags found for bucket={bucket_name}")
logger.warning(f"No tags found for bucket={bucket_name}")

if use_s3_object_tags and key_name is not None:
s3_client = aws_config.get_s3_client()
Expand All @@ -53,7 +53,7 @@ def get_s3_tags(
else:
# Unlike bucket tags, if an object does not have tags, it will just return an empty array
# as opposed to an exception.
logger.warn(f"No tags found for bucket={bucket_name} key={key_name}")
logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
if len(tags_to_add) == 0:
return None
if ctx.graph is not None:
Expand All @@ -65,7 +65,7 @@ def get_s3_tags(
if current_tags:
tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags])
else:
logger.warn("Could not connect to DatahubApi. No current tags to maintain")
logger.warning("Could not connect to DatahubApi. No current tags to maintain")
# Remove duplicate tags
tags_to_add = sorted(list(set(tags_to_add)))
new_tags = GlobalTagsClass(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED

import collections
import concurrent.futures
import contextlib
Expand All @@ -10,7 +12,6 @@
import traceback
import unittest.mock
import uuid
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
from functools import lru_cache
from typing import (
TYPE_CHECKING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def make_usage_workunit(
top_sql_queries: Optional[List[str]] = None
if query_freq is not None:
if top_n_queries < len(query_freq):
logger.warn(
logger.warning(
f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}."
)
query_freq = query_freq[0:top_n_queries]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import dataclasses
import functools
import logging
import traceback
from collections import defaultdict
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union

import pydantic.dataclasses
Expand Down
3 changes: 2 additions & 1 deletion metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import functools
import hashlib
import logging
import re
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
from typing import Dict, Iterable, Optional, Tuple, Union

import sqlglot
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED

import time

import pytest
import sqlglot
import sqlglot.errors
Expand Down

0 comments on commit f06ad1a

Please sign in to comment.