From ef42e6ea5e89bab562fafe1a5f2afe9288ddd47f Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 16 Jan 2024 08:21:45 -0600 Subject: [PATCH] type fixing, hatch updates --- .github/workflows/ci_code_quality.yml | 2 +- .github/workflows/ci_tests.yml | 2 +- .gitignore | 3 + .pre-commit-config.yaml | 41 ++------- Makefile | 32 ------- dbt_common/__about__.py | 1 + dbt_common/clients/agate_helper.py | 4 +- pyproject.toml | 123 ++++++++++++++++++------- tests/unit/test_agate_helper.py | 1 - tests/unit/test_core_dbt_utils.py | 32 +++---- third-party-stubs/agate/__init__.pyi | 89 ++++++++++++++++++ third-party-stubs/agate/data_types.pyi | 71 ++++++++++++++ 12 files changed, 281 insertions(+), 120 deletions(-) delete mode 100644 Makefile create mode 100644 dbt_common/__about__.py create mode 100644 third-party-stubs/agate/__init__.pyi create mode 100644 third-party-stubs/agate/data_types.pyi diff --git a/.github/workflows/ci_code_quality.yml b/.github/workflows/ci_code_quality.yml index 7b12e3d5..153f803e 100644 --- a/.github/workflows/ci_code_quality.yml +++ b/.github/workflows/ci_code_quality.yml @@ -50,4 +50,4 @@ jobs: run: pip3 install hatch - name: Run Pre-commit Hooks - run: hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files + run: hatch run lint-all diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 03cdffb8..d0da8a56 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -55,7 +55,7 @@ jobs: run: pip3 install hatch - name: "Run Tests" - run: hatch run dev-env:pytest tests + run: hatch run unit-tests - name: "Get current date" if: always() diff --git a/.gitignore b/.gitignore index 28030861..f69e6aa9 100644 --- a/.gitignore +++ b/.gitignore @@ -143,6 +143,9 @@ venv.bak/ .dmypy.json dmypy.json +# ruff +.ruff_cache/ + # Pyre type checker .pyre/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b2efe56b..c79373dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,37 +18,10 @@ repos: exclude_types: - "markdown" - id: check-case-conflict -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - - id: black - alias: black-check - stages: [manual] - args: - - "--check" - - "--diff" -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.1 - hooks: - - id: mypy - # N.B.: Mypy is... a bit fragile. - # - # By using `language: system` we run this hook in the local - # environment instead of a pre-commit isolated one. This is needed - # to ensure mypy correctly parses the project. - - # It may cause trouble - # in that it adds environmental variables out of our control to the - # mix. Unfortunately, there's nothing we can do about per pre-commit's - # author. - # See https://github.com/pre-commit/pre-commit/issues/730 for details. - args: [--show-error-codes] - files: ^dbt_common/ - language: system - - id: mypy - alias: mypy-check - stages: [manual] - args: [--show-error-codes, --pretty] - files: ^dbt_common - language: system + - id: format + name: format + entry: hatch run lint-all + language: python + types: [python] + pass_filenames: false + verbose: true \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index ed1b9d70..00000000 --- a/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -.DEFAULT_GOAL:=help - - -.PHONY: run install-hatch overwrite-pre-commit install test lint json_schema - -run: - export FORMAT_JSON_LOGS="1" - -install-hatch: - pip3 install hatch - -# This edits your local pre-commit hook file to use Hatch when executing. -overwrite-pre-commit: - hatch run dev-env:pre-commit install - hatch run dev-env:sed -i -e "s/exec /exec hatch run dev-env:/g" .git/hooks/pre-commit - -test: - export FORMAT_JSON_LOGS="1" && hatch -v run dev-env:pytest -n auto tests - -lint: - hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files - -.PHONY: proto_types -proto_types: ## generates google protobuf python file from types.proto - hatch run dev-env:protoc -I=./dbt_common/events --python_out=./dbt_common/events ./dbt_common/events/types.proto - -.PHONY: help -help: ## Show this help message. - @echo 'usage: make [target]' - @echo - @echo 'targets:' - @grep -E '^[8+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/dbt_common/__about__.py b/dbt_common/__about__.py new file mode 100644 index 00000000..5820a729 --- /dev/null +++ b/dbt_common/__about__.py @@ -0,0 +1 @@ +version = "0.0.1" diff --git a/dbt_common/clients/agate_helper.py b/dbt_common/clients/agate_helper.py index d5aa1b2a..3aade66d 100644 --- a/dbt_common/clients/agate_helper.py +++ b/dbt_common/clients/agate_helper.py @@ -17,7 +17,7 @@ def cast(self, d): # by default agate will cast none as a Number # but we need to cast it as an Integer to preserve # the type when merging and unioning tables - if isinstance(d, int) or d is None: + if type(d) == int or d is None: # noqa [E721] return d else: raise agate.exceptions.CastError('Can not parse value "%s" as Integer.' % d) @@ -30,7 +30,7 @@ class Number(agate.data_types.Number): # undo the change in https://github.com/wireservice/agate/pull/733 # i.e. do not cast True and False to numeric 1 and 0 def cast(self, d): - if isinstance(d, bool): + if type(d) == bool: # noqa [E721] raise agate.exceptions.CastError("Do not cast True to 1 or False to 0.") else: return super().cast(d) diff --git a/pyproject.toml b/pyproject.toml index b74190b5..aa460944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dbt-common" -version = "0.0.1" +dynamic = ["version"] description = "The shared common utilities that dbt-core and adapter implementations use" readme = "README.md" requires-python = ">=3.8" @@ -9,8 +9,15 @@ keywords = [] authors = [ { name = "dbt Labs", email = "info@dbtlabs.com" }, ] +maintainers = [ + { name = "dbt Labs", email = "info@dbtlabs.com" }, +] classifiers = [ - "Development Status :: 4 - Beta", + "Development Status :: 2 - Pre-Alpha", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -32,6 +39,33 @@ dependencies = [ "requests<3.0.0", "typing-extensions~=4.4", ] +[project.optional-dependencies] +lint = [ + "black~=23.3", + "flake8", + "Flake8-pyproject", + "mypy~=1.3", + "ruff==0.1.11", + "types-Jinja2~=2.11", + "types-jsonschema~=4.17", + "types-protobuf~=4.24.0", + "types-python-dateutil~=2.8", + "types-PyYAML~=6.0", + "types-requests<2.31.0" # types-requests 2.31.0.8 requires urllib3>=2, but we pin urllib3 ~= 1.0 because of openssl requirement for requests + +] +test = [ + "pytest~=7.3", +# "pytest-dotenv", + "pytest-xdist~=3.2", + "hypothesis~=6.87" +] + +[project.urls] +Homepage = "https://github.com/dbt-labs/dbt-common" # TODO: should this be dbt's homepage? +Repository = "https://github.com/dbt-labs/dbt-common.git" +Issues = "https://github.com/dbt-labs/dbt-common/issues" +Changelog = "https://github.com/dbt-labs/dbt-common/blob/main/CHANGELOG.md" [build-system] requires = ["hatchling"] @@ -45,36 +79,40 @@ exclude = [ ".gitignore", ".pre-commit-config.yaml", "CONTRIBUTING.md", - "MAKEFILE", + "MAKEFILE", # TODO: remove once we remove the makefile "/tests", ] [tool.hatch.build.targets.wheel] packages = ["dbt_common"] -[tool.hatch.envs.dev-env.scripts] -all = ["pre-commit run --all-files"] +[tool.hatch.version] +path = "dbt_common/__about__.py" -[tool.hatch.envs.dev-env] -description = "Env for running development commands like pytest / pre-commit" -dependencies = [ - "pytest~=7.3", - "pytest-xdist~=3.2", - "httpx~=0.24", - "hypothesis~=6.87", - "pre-commit~=3.2", - "isort~=5.12", - "black~=23.3", - "ruff==0.1.11", - "mypy~=1.3", - "pytest~=7.3", - "types-Jinja2~=2.11", - "types-jsonschema~=4.17", - "types-protobuf~=4.24.0", - "types-python-dateutil~=2.8", - "types-PyYAML~=6.0", - "types-requests<2.31.0" # types-requests 2.31.0.8 requires urllib3>=2, but we pin urllib3 ~= 1.0 because of openssl requirement for requests +# [tool.hatch.envs.dev.scripts] +# all = ["pre-commit run --all-files"] + +[tool.hatch.envs.default] +description = "Env for running development commands like pytest & linting" +features = ["lint", "test"] + +# these are the commands that you run with `hatch run ` for local dev & CI +[tool.hatch.envs.default.scripts] +# This edits your local pre-commit hook file to use Hatch when executing. +# TODO: move to it's own env that only installs pre-commit - may need to live here though? +setup-pre-commit = 'sed -i -e "s/exec /exec hatch run /g" .git/hooks/pre-commit' +unit-tests = "- python -m pytest {args:tests/unit}" +lint-all = [ + "- lint-black", + "- lint-flake8", + "- lint-mypy", ] +lint-black = "python -m black ." +lint-flake8 = "python -m flake8 ." +lint-mypy = "python -m mypy ." +proto = "protoc -I=./dbt_common/events --python_out=./dbt_common/events ./dbt_common/events/types.proto" +lint = ["black --check --diff {args:.}", "ruff check {args:.}", "mypy {args:.}"] +format = ["black {args:.}", "ruff --fix --exit-non-zero-on-fix {args:.}"] # ruff replaces flake8! [tool.ruff] @@ -108,6 +146,11 @@ ignore = [ # E501 - Line too long. # W292 - No newline at end of file. fixable = ["F401", "E501", "W292"] +exclude = [ + "dbt_common/events/types_pb2.py", + "env*", + "third-party-stubs/*", +] [tool.ruff.per-file-ignores] "types_pb2.py" = ["E501", "E712", "F821"] @@ -115,6 +158,22 @@ fixable = ["F401", "E501", "W292"] [tool.ruff.pydocstyle] convention = "google" +[tool.black] +extend-exclude = "dbt_common/events/types_pb2.py" +line-length = 99 +target-version = ['py38'] + +[tool.flake8] +select = ["E", "W", "F"] +ignore = ["E203", "E501", "E741", "W503", "W504"] +exclude = [ + "dbt_common/events/types_pb2.py", + "tests", + "venv", + "env*" +] +per-file-ignores = ["*/__init__.py: F401"] + [tool.mypy] mypy_path = "third-party-stubs/" namespace_packages = true @@ -123,14 +182,12 @@ show_error_codes = true disable_error_code = "attr-defined" # TODO: revisit once other mypy errors resolved disallow_untyped_defs = false # TODO: add type annotations everywhere warn_redundant_casts = true +exclude = [ + "dbt_common/events/types_pb2.py", + "env*", + "third-party-stubs/*", +] -# Don't run the extensive mypy checks on custom stubs [[tool.mypy.overrides]] -module = ["logbook.*"] -disallow_untyped_defs = false - -[tool.isort] -profile = "black" - -[tool.black] -line-length = 99 +module = ["dbt_common.events.types_pb2.py"] +follow_imports = "skip" diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index 2384c13e..4c12bcd8 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -222,6 +222,5 @@ def test_nocast_bool_01(self): [True, Decimal(1)], [False, Decimal(0)], ] - for i, row in enumerate(tbl): self.assertEqual(list(row), expected[i]) diff --git a/tests/unit/test_core_dbt_utils.py b/tests/unit/test_core_dbt_utils.py index 3a31c60d..8a0e836e 100644 --- a/tests/unit/test_core_dbt_utils.py +++ b/tests/unit/test_core_dbt_utils.py @@ -8,29 +8,29 @@ class TestCommonDbtUtils(unittest.TestCase): def test_connection_exception_retry_none(self): - Counter._reset() - connection_exception_retry(lambda: Counter._add(), 5) + Counter._reset(self) + connection_exception_retry(lambda: Counter._add(self), 5) self.assertEqual(1, counter) def test_connection_exception_retry_success_requests_exception(self): - Counter._reset() - connection_exception_retry(lambda: Counter._add_with_requests_exception(), 5) + Counter._reset(self) + connection_exception_retry(lambda: Counter._add_with_requests_exception(self), 5) self.assertEqual(2, counter) # 2 = original attempt returned None, plus 1 retry def test_connection_exception_retry_max(self): - Counter._reset() + Counter._reset(self) with self.assertRaises(ConnectionError): - connection_exception_retry(lambda: Counter._add_with_exception(), 5) + connection_exception_retry(lambda: Counter._add_with_exception(self), 5) self.assertEqual(6, counter) # 6 = original attempt plus 5 retries def test_connection_exception_retry_success_failed_untar(self): - Counter._reset() - connection_exception_retry(lambda: Counter._add_with_untar_exception(), 5) + Counter._reset(self) + connection_exception_retry(lambda: Counter._add_with_untar_exception(self), 5) self.assertEqual(2, counter) # 2 = original attempt returned ReadError, plus 1 retry def test_connection_exception_retry_success_failed_eofexception(self): - Counter._reset() - connection_exception_retry(lambda: Counter._add_with_eof_exception(), 5) + Counter._reset(self) + connection_exception_retry(lambda: Counter._add_with_eof_exception(self), 5) self.assertEqual(2, counter) # 2 = original attempt returned EOFError, plus 1 retry @@ -38,36 +38,36 @@ def test_connection_exception_retry_success_failed_eofexception(self): class Counter: - def _add(): + def _add(self): global counter counter += 1 # All exceptions that Requests explicitly raises inherit from # requests.exceptions.RequestException so we want to make sure that raises plus one exception # that inherit from it for sanity - def _add_with_requests_exception(): + def _add_with_requests_exception(self): global counter counter += 1 if counter < 2: raise requests.exceptions.RequestException - def _add_with_exception(): + def _add_with_exception(self): global counter counter += 1 raise requests.exceptions.ConnectionError - def _add_with_untar_exception(): + def _add_with_untar_exception(self): global counter counter += 1 if counter < 2: raise tarfile.ReadError - def _add_with_eof_exception(): + def _add_with_eof_exception(self): global counter counter += 1 if counter < 2: raise EOFError - def _reset(): + def _reset(self): global counter counter = 0 diff --git a/third-party-stubs/agate/__init__.pyi b/third-party-stubs/agate/__init__.pyi new file mode 100644 index 00000000..c773cc7d --- /dev/null +++ b/third-party-stubs/agate/__init__.pyi @@ -0,0 +1,89 @@ +from collections.abc import Sequence + +from typing import Any, Optional, Callable, Iterable, Dict, Union + +from . import data_types as data_types +from .data_types import ( + Text as Text, + Number as Number, + Boolean as Boolean, + DateTime as DateTime, + Date as Date, + TimeDelta as TimeDelta, +) + +class MappedSequence(Sequence): + def __init__(self, values: Any, keys: Optional[Any] = ...) -> None: ... + def __unicode__(self): ... + def __getitem__(self, key: Any): ... + def __setitem__(self, key: Any, value: Any) -> None: ... + def __iter__(self): ... + def __len__(self): ... + def __eq__(self, other: Any): ... + def __ne__(self, other: Any): ... + def __contains__(self, value: Any): ... + def keys(self): ... + def values(self): ... + def items(self): ... + def get(self, key: Any, default: Optional[Any] = ...): ... + def dict(self): ... + +class Row(MappedSequence): ... + +class Table: + def __init__( + self, + rows: Any, + column_names: Optional[Any] = ..., + column_types: Optional[Any] = ..., + row_names: Optional[Any] = ..., + _is_fork: bool = ..., + ) -> None: ... + def __len__(self): ... + def __iter__(self): ... + def __getitem__(self, key: Any): ... + @property + def column_types(self): ... + @property + def column_names(self): ... + @property + def row_names(self): ... + @property + def columns(self): ... + @property + def rows(self): ... + def print_csv(self, **kwargs: Any) -> None: ... + def print_json(self, **kwargs: Any) -> None: ... + def where(self, test: Callable[[Row], bool]) -> "Table": ... + def select(self, key: Union[Iterable[str], str]) -> "Table": ... + # these definitions are much narrower than what's actually accepted + @classmethod + def from_object( + cls, obj: Iterable[Dict[str, Any]], *, column_types: Optional["TypeTester"] = None + ) -> "Table": ... + @classmethod + def from_csv( + cls, path: Iterable[str], *, column_types: Optional["TypeTester"] = None + ) -> "Table": ... + @classmethod + def merge(cls, tables: Iterable["Table"]) -> "Table": ... + def rename( + self, + column_names: Optional[Iterable[str]] = None, + row_names: Optional[Any] = None, + slug_columns: bool = False, + slug_rows: bool = False, + **kwargs: Any, + ) -> "Table": ... + +class TypeTester: + def __init__( + self, force: Any = ..., limit: Optional[Any] = ..., types: Optional[Any] = ... + ) -> None: ... + def run(self, rows: Any, column_names: Any): ... + +class MaxPrecision: + def __init__(self, column_name: Any) -> None: ... + +# this is not strictly true, but it's all we care about. +def aggregate(self, aggregations: MaxPrecision) -> int: ... diff --git a/third-party-stubs/agate/data_types.pyi b/third-party-stubs/agate/data_types.pyi new file mode 100644 index 00000000..8114f7b5 --- /dev/null +++ b/third-party-stubs/agate/data_types.pyi @@ -0,0 +1,71 @@ +from typing import Any, Optional + +DEFAULT_NULL_VALUES: Any + +class DataType: + null_values: Any = ... + def __init__(self, null_values: Any = ...) -> None: ... + def test(self, d: Any): ... + def cast(self, d: Any) -> None: ... + def csvify(self, d: Any): ... + def jsonify(self, d: Any): ... + +DEFAULT_TRUE_VALUES: Any +DEFAULT_FALSE_VALUES: Any + +class Boolean(DataType): + true_values: Any = ... + false_values: Any = ... + def __init__( + self, true_values: Any = ..., false_values: Any = ..., null_values: Any = ... + ) -> None: ... + def cast(self, d: Any): ... + def jsonify(self, d: Any): ... + +ZERO_DT: Any + +class Date(DataType): + date_format: Any = ... + parser: Any = ... + def __init__(self, date_format: Optional[Any] = ..., **kwargs: Any) -> None: ... + def cast(self, d: Any): ... + def csvify(self, d: Any): ... + def jsonify(self, d: Any): ... + +class DateTime(DataType): + datetime_format: Any = ... + timezone: Any = ... + def __init__( + self, datetime_format: Optional[Any] = ..., timezone: Optional[Any] = ..., **kwargs: Any + ) -> None: ... + def cast(self, d: Any): ... + def csvify(self, d: Any): ... + def jsonify(self, d: Any): ... + +DEFAULT_CURRENCY_SYMBOLS: Any +POSITIVE: Any +NEGATIVE: Any + +class Number(DataType): + locale: Any = ... + currency_symbols: Any = ... + group_symbol: Any = ... + decimal_symbol: Any = ... + def __init__( + self, + locale: str = ..., + group_symbol: Optional[Any] = ..., + decimal_symbol: Optional[Any] = ..., + currency_symbols: Any = ..., + **kwargs: Any, + ) -> None: ... + def cast(self, d: Any): ... + def jsonify(self, d: Any): ... + +class TimeDelta(DataType): + def cast(self, d: Any): ... + +class Text(DataType): + cast_nulls: Any = ... + def __init__(self, cast_nulls: bool = ..., **kwargs: Any) -> None: ... + def cast(self, d: Any): ...