From 72c657d85e42bc83a244e4f26b0ec8940d63e93e Mon Sep 17 00:00:00 2001 From: Gaetan Lepage Date: Mon, 7 Oct 2024 15:30:38 +0200 Subject: [PATCH 1/4] python312Packages.databricks-sdk: init at 0.34.0 --- .../python-modules/databricks-sdk/default.nix | 85 +++++++++++++++++++ pkgs/top-level/python-packages.nix | 2 + 2 files changed, 87 insertions(+) create mode 100644 pkgs/development/python-modules/databricks-sdk/default.nix diff --git a/pkgs/development/python-modules/databricks-sdk/default.nix b/pkgs/development/python-modules/databricks-sdk/default.nix new file mode 100644 index 0000000000000..93292f7eec1e2 --- /dev/null +++ b/pkgs/development/python-modules/databricks-sdk/default.nix @@ -0,0 +1,85 @@ +{ + lib, + stdenv, + buildPythonPackage, + fetchFromGitHub, + + # build-system + setuptools, + + # dependencies + google-auth, + requests, + + # tests + pyfakefs, + pytestCheckHook, + pytest-mock, + requests-mock, +}: + +buildPythonPackage rec { + pname = "databricks-sdk"; + version = "0.34.0"; + pyproject = true; + + src = fetchFromGitHub { + owner = "databricks"; + repo = "databricks-sdk-py"; + rev = "refs/tags/v${version}"; + hash = "sha256-pbOm1aTHtIAwk/TJ5CCT9/CqSTuHTWkRgJuflObkU54="; + }; + + build-system = [ + setuptools + ]; + + dependencies = [ + google-auth + requests + ]; + + pythonImportsCheck = [ + "databricks.sdk" + ]; + + nativeCheckInputs = [ + pyfakefs + pytestCheckHook + pytest-mock + requests-mock + ]; + + disabledTests = + [ + # Require internet access + # ValueError: default auth: cannot configure default credentials, please chec... + "test_azure_cli_does_not_specify_tenant_id_with_msi" + "test_azure_cli_fallback" + "test_azure_cli_user_no_management_access" + "test_azure_cli_user_with_management_access" + "test_azure_cli_with_warning_on_stderr" + "test_azure_cli_workspace_header_present" + "test_config_azure_cli_host" + "test_config_azure_cli_host_and_resource_id" + "test_config_azure_cli_host_and_resource_i_d_configuration_precedence" + "test_load_azure_tenant_id_404" + "test_load_azure_tenant_id_happy_path" + "test_load_azure_tenant_id_no_location_header" + "test_load_azure_tenant_id_unparsable_location_header" + ] + ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [ + # requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(54, 'Connection reset by peer')", ConnectionResetError(54, 'Connection reset by peer')) + "test_github_oidc_flow_works_with_azure" + ]; + + __darwinAllowLocalNetworking = true; + + meta = { + description = "Databricks SDK for Python"; + homepage = "https://github.com/databricks/databricks-sdk-py"; + changelog = "https://github.com/databricks/databricks-sdk-py/blob/${src.rev}/CHANGELOG.md"; + license = lib.licenses.asl20; + maintainers = with lib.maintainers; [ GaetanLepage ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index f7d6020211ec6..b399ea5c2ea5a 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -2876,6 +2876,8 @@ self: super: with self; { databricks-sql-connector = callPackage ../development/python-modules/databricks-sql-connector { }; + databricks-sdk = callPackage ../development/python-modules/databricks-sdk { }; + dataclass-factory = callPackage ../development/python-modules/dataclass-factory { }; dataclass-wizard = callPackage ../development/python-modules/dataclass-wizard { }; From 8f7e9e238be4a54e7621331a33f092905241bd8c Mon Sep 17 00:00:00 2001 From: Gaetan Lepage Date: Mon, 7 Oct 2024 15:07:03 +0200 Subject: [PATCH 2/4] python312Packages.mlflow: 2.14.3 -> 2.16.2 Changelog: https://github.com/mlflow/mlflow/blob/v2.16.2/CHANGELOG.md --- .../python-modules/mlflow/default.nix | 158 +++++++++++++----- 1 file changed, 115 insertions(+), 43 deletions(-) diff --git a/pkgs/development/python-modules/mlflow/default.nix b/pkgs/development/python-modules/mlflow/default.nix index 7032d0e91d517..b04eddabc5ffd 100644 --- a/pkgs/development/python-modules/mlflow/default.nix +++ b/pkgs/development/python-modules/mlflow/default.nix @@ -1,20 +1,24 @@ { lib, + fetchFromGitHub, + + # build-system + setuptools, + + # dependencies alembic, buildPythonPackage, cachetools, click, cloudpickle, - databricks-cli, + databricks-sdk, docker, - entrypoints, - fetchPypi, flask, gitpython, - gorilla, graphene, gunicorn, importlib-metadata, + jinja2, markdown, matplotlib, numpy, @@ -22,60 +26,77 @@ opentelemetry-sdk, packaging, pandas, - prometheus-flask-exporter, protobuf, - python-dateutil, - pythonOlder, pyarrow, - pytz, pyyaml, - querystring-parser, requests, - setuptools, scikit-learn, scipy, - simplejson, sqlalchemy, sqlparse, + + # tests + aiohttp, + azure-core, + azure-storage-blob, + azure-storage-file, + boto3, + botocore, + catboost, + datasets, + fastapi, + google-cloud-storage, + httpx, + jwt, + keras, + langchain, + librosa, + moto, + opentelemetry-exporter-otlp, + optuna, + pydantic, + pyspark, + pytestCheckHook, + pytorch-lightning, + sentence-transformers, + starlette, + statsmodels, + tensorflow, + torch, + transformers, + uvicorn, + xgboost, }: buildPythonPackage rec { pname = "mlflow"; - version = "2.14.3"; + version = "2.16.2"; pyproject = true; - disabled = pythonOlder "3.8"; - - src = fetchPypi { - inherit pname version; - hash = "sha256-KSyuS4NXSgyyIxF+IkyqZ5iTMHivAjNxnCthK+pkVhc="; + src = fetchFromGitHub { + owner = "mlflow"; + repo = "mlflow"; + rev = "refs/tags/v${version}"; + hash = "sha256-7W1gpVgJSN/iXoW987eCHfcOeE3D/ZJ2W/eilDdzOww="; }; - # Remove currently broken dependency `shap`, a model explainability package. - # This seems quite unprincipled especially with tests not being enabled, - # but not mlflow has a 'skinny' install option which does not require `shap`. - pythonRemoveDeps = [ "shap" ]; - pythonRelaxDeps = [ - "gunicorn" - "packaging" - "pytz" - "pyarrow" + build-system = [ + setuptools ]; - propagatedBuildInputs = [ + dependencies = [ alembic cachetools click cloudpickle - databricks-cli + databricks-sdk docker - entrypoints flask gitpython - gorilla graphene gunicorn importlib-metadata + jinja2 markdown matplotlib numpy @@ -83,36 +104,87 @@ buildPythonPackage rec { opentelemetry-sdk packaging pandas - prometheus-flask-exporter protobuf pyarrow - python-dateutil - pytz pyyaml - querystring-parser requests scikit-learn scipy - setuptools - #shap - simplejson sqlalchemy sqlparse ]; pythonImportsCheck = [ "mlflow" ]; - # no tests in PyPI dist - # run into https://stackoverflow.com/questions/51203641/attributeerror-module-alembic-context-has-no-attribute-config - # also, tests use conda so can't run on NixOS without buildFHSEnv + nativeCheckInputs = [ + aiohttp + azure-core + azure-storage-blob + azure-storage-file + boto3 + botocore + catboost + datasets + fastapi + google-cloud-storage + httpx + jwt + keras + langchain + librosa + moto + opentelemetry-exporter-otlp + optuna + pydantic + pyspark + pytestCheckHook + pytorch-lightning + sentence-transformers + starlette + statsmodels + tensorflow + torch + transformers + uvicorn + xgboost + ]; + + disabledTestPaths = [ + # Requires unpackaged `autogen` + "tests/autogen/test_autogen_autolog.py" + + # Requires unpackaged `diviner` + "tests/diviner/test_diviner_model_export.py" + + # Requires unpackaged `sktime` + "examples/sktime/test_sktime_model_export.py" + + # Requires `fastai` which would cause a circular dependency + "tests/fastai/test_fastai_autolog.py" + "tests/fastai/test_fastai_model_export.py" + + # Requires `spacy` which would cause a circular dependency + "tests/spacy/test_spacy_model_export.py" + + # Requires `tensorflow.keras` which is not included in our outdated version of `tensorflow` (2.13.0) + "tests/gateway/providers/test_ai21labs.py" + "tests/tensorflow/test_keras_model_export.py" + "tests/tensorflow/test_keras_pyfunc_model_works_with_all_input_types.py" + "tests/tensorflow/test_mlflow_callback.py" + ]; + + # I (@GaetanLepage) gave up at enabling tests: + # - They require a lot of dependencies (some unpackaged); + # - Many errors occur at collection time; + # - Most (all ?) tests require internet access anyway. doCheck = false; - meta = with lib; { + meta = { description = "Open source platform for the machine learning lifecycle"; mainProgram = "mlflow"; homepage = "https://github.com/mlflow/mlflow"; changelog = "https://github.com/mlflow/mlflow/blob/v${version}/CHANGELOG.md"; - license = licenses.asl20; - maintainers = with maintainers; [ tbenst ]; + license = lib.licenses.asl20; + maintainers = with lib.maintainers; [ tbenst ]; }; } From 87fa3c4e41ca74dc6e45b6b72124a711bc68c7d9 Mon Sep 17 00:00:00 2001 From: Gaetan Lepage Date: Mon, 7 Oct 2024 17:10:18 +0200 Subject: [PATCH 3/4] mlflow-server: 2.14.3 -> 2.16.2 --- pkgs/servers/mlflow-server/default.nix | 36 +++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pkgs/servers/mlflow-server/default.nix b/pkgs/servers/mlflow-server/default.nix index 6916b3b655476..4e0d90e5dd6ba 100644 --- a/pkgs/servers/mlflow-server/default.nix +++ b/pkgs/servers/mlflow-server/default.nix @@ -1,32 +1,32 @@ -{ python3, writeText}: +{ python3Packages, writers}: let - py = python3.pkgs; + py = python3Packages; + + gunicornScript = writers.writePython3 "gunicornMlflow" {} '' + import re + import sys + from gunicorn.app.wsgiapp import run + if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0]) + sys.exit(run()) + ''; in py.toPythonApplication - (py.mlflow.overridePythonAttrs(old: rec { + (py.mlflow.overridePythonAttrs(old: { - propagatedBuildInputs = old.propagatedBuildInputs ++ [ + propagatedBuildInputs = old.dependencies ++ [ py.boto3 py.mysqlclient ]; postPatch = (old.postPatch or "") + '' - substituteInPlace mlflow/utils/process.py --replace \ - "child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," \ - "cmd[0]='$out/bin/gunicornMlflow'; child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," - ''; + cat mlflow/utils/process.py - gunicornScript = writeText "gunicornMlflow" - '' - #!/usr/bin/env python - import re - import sys - from gunicorn.app.wsgiapp import run - if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0]) - sys.exit(run()) - ''; + substituteInPlace mlflow/utils/process.py --replace-fail \ + "process = subprocess.Popen(" \ + "cmd[0]='${gunicornScript}'; process = subprocess.Popen(" + ''; postInstall = '' gpath=$out/bin/gunicornMlflow From 8e03a09b7606eacf4bd3b645da455c126987a695 Mon Sep 17 00:00:00 2001 From: Gaetan Lepage Date: Tue, 8 Oct 2024 10:05:47 +0200 Subject: [PATCH 4/4] mlflow-server: format --- pkgs/servers/mlflow-server/default.nix | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pkgs/servers/mlflow-server/default.nix b/pkgs/servers/mlflow-server/default.nix index 4e0d90e5dd6ba..82ac934dc8887 100644 --- a/pkgs/servers/mlflow-server/default.nix +++ b/pkgs/servers/mlflow-server/default.nix @@ -1,9 +1,9 @@ -{ python3Packages, writers}: +{ python3Packages, writers }: let py = python3Packages; - gunicornScript = writers.writePython3 "gunicornMlflow" {} '' + gunicornScript = writers.writePython3 "gunicornMlflow" { } '' import re import sys from gunicorn.app.wsgiapp import run @@ -12,25 +12,28 @@ let sys.exit(run()) ''; in -py.toPythonApplication - (py.mlflow.overridePythonAttrs(old: { +py.toPythonApplication ( + py.mlflow.overridePythonAttrs (old: { propagatedBuildInputs = old.dependencies ++ [ py.boto3 py.mysqlclient ]; - postPatch = (old.postPatch or "") + '' - cat mlflow/utils/process.py + postPatch = + (old.postPatch or "") + + '' + cat mlflow/utils/process.py - substituteInPlace mlflow/utils/process.py --replace-fail \ - "process = subprocess.Popen(" \ - "cmd[0]='${gunicornScript}'; process = subprocess.Popen(" - ''; + substituteInPlace mlflow/utils/process.py --replace-fail \ + "process = subprocess.Popen(" \ + "cmd[0]='${gunicornScript}'; process = subprocess.Popen(" + ''; postInstall = '' gpath=$out/bin/gunicornMlflow cp ${gunicornScript} $gpath chmod 555 $gpath ''; -})) + }) +)