diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index e44e6b11c6d057..a751d24dc18310 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -205,7 +205,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -276,7 +276,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -347,7 +347,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -418,7 +418,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -489,7 +489,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -550,7 +550,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -611,7 +611,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -672,7 +672,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -901,7 +901,7 @@ jobs: vuln-type: "os,library" timeout: 15m - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -1000,7 +1000,7 @@ jobs: vuln-type: "os,library" timeout: 15m - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index de7ad21b3e67bb..f519db20a73e10 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -10,11 +10,11 @@ jobs: pull-requests: write runs-on: ubuntu-latest steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" configuration-path: ".github/pr-labeler-config.yml" - - uses: actions-ecosystem/action-add-labels@v1.1.0 + - uses: actions-ecosystem/action-add-labels@v1.1.3 # only add names of Acryl Data team members here if: ${{ @@ -55,7 +55,7 @@ jobs: github_token: ${{ github.token }} labels: | community-contribution - - uses: actions-ecosystem/action-add-labels@v1.1.0 + - uses: actions-ecosystem/action-add-labels@v1.1.3 # only add names of champions here. Confirm with DevRel Team if: ${{ diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 14e467e27316e3..16a6704949c875 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -23,8 +23,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && set -x && " + - "${venv_name}/bin/python -m pip install --upgrade uv && " + + "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " + + "set -x && " + + // If we already have uv available, use it to upgrade uv. Otherwise, install it with pip. + "if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " + "touch ${sentinel_file}" } diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index 745547f88bcb93..07f2010fde25f0 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -11,25 +11,13 @@ extend-exclude = ''' include = '\.pyi?$' target-version = ['py38', 'py39', 'py310', 'py311'] - [tool.ruff.lint.isort] +section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"] +sections = { "patch" = ["datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] } combine-as-imports = true -known-first-party = ["datahub"] -extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] -section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] -force-sort-within-sections = false -force-wrap-aliases = false -split-on-trailing-comma = false -order-by-type = true -relative-imports-order = "closest-to-furthest" -force-single-line = false -single-line-exclusions = ["typing"] -length-sort = false -from-first = false -required-imports = [] -classes = ["typing"] [tool.ruff] +target-version = "py38" # Same as Black. line-length = 88 # Exclude directories matching these patterns. @@ -42,15 +30,16 @@ exclude = [ ] [tool.ruff.lint] -select = [ - "B", +extend-select = [ + "B", # Bugbear "C90", "E", "F", - "I", # For isort - "TID", + "G010", # logging.warn -> logging.warning + "I", # Import sorting + "TID", # Tidy imports ] -ignore = [ +extend-ignore = [ # Ignore line length violations (handled by Black) "E501", # Ignore whitespace before ':' (matches Black) @@ -69,9 +58,7 @@ ignore = [ max-complexity = 20 [tool.ruff.lint.flake8-tidy-imports] -# Disallow all relative imports. ban-relative-imports = "all" - [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py index a898e35bb810ec..bf58d2fbbda913 100644 --- a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py +++ b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py @@ -114,7 +114,7 @@ def generate_mcp( ) urns_created.add(m.urn) else: - logger.warn( + logger.warning( f"Suppressing emission of member {m.urn} before we already emitted metadata for it" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py index 682cddca8f4154..87a6f8a5baf2e3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py @@ -40,7 +40,7 @@ def get_s3_tags( ] ) except s3.meta.client.exceptions.ClientError: - logger.warn(f"No tags found for bucket={bucket_name}") + logger.warning(f"No tags found for bucket={bucket_name}") if use_s3_object_tags and key_name is not None: s3_client = aws_config.get_s3_client() @@ -53,7 +53,7 @@ def get_s3_tags( else: # Unlike bucket tags, if an object does not have tags, it will just return an empty array # as opposed to an exception. - logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") + logger.warning(f"No tags found for bucket={bucket_name} key={key_name}") if len(tags_to_add) == 0: return None if ctx.graph is not None: @@ -65,7 +65,7 @@ def get_s3_tags( if current_tags: tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) else: - logger.warn("Could not connect to DatahubApi. No current tags to maintain") + logger.warning("Could not connect to DatahubApi. No current tags to maintain") # Remove duplicate tags tags_to_add = sorted(list(set(tags_to_add))) new_tags = GlobalTagsClass( diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index be182c70eafec1..aba0deebd356c5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -1,3 +1,5 @@ +from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED + import collections import concurrent.futures import contextlib @@ -10,7 +12,6 @@ import traceback import unittest.mock import uuid -from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED from functools import lru_cache from typing import ( TYPE_CHECKING, diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py index 95c2345232a1ee..73e7e415e2b9eb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py @@ -89,7 +89,7 @@ def make_usage_workunit( top_sql_queries: Optional[List[str]] = None if query_freq is not None: if top_n_queries < len(query_freq): - logger.warn( + logger.warning( f"Top N query limit exceeded on {str(resource)}. Max number of queries {top_n_queries} < {len(query_freq)}. Truncating top queries to {top_n_queries}." ) query_freq = query_freq[0:top_n_queries] diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index d3149fec970b59..bf28ab0e7b229b 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -1,9 +1,10 @@ +from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED + import dataclasses import functools import logging import traceback from collections import defaultdict -from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union import pydantic.dataclasses diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index fd2c68266624f9..57a5cc3c9a6574 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -1,8 +1,9 @@ +from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED + import functools import hashlib import logging import re -from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED from typing import Dict, Iterable, Optional, Tuple, Union import sqlglot diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_patch.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_patch.py index 31f87b8a150eb2..dee6d9630c12eb 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_patch.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_patch.py @@ -1,6 +1,7 @@ -import time from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED +import time + import pytest import sqlglot import sqlglot.errors