diff --git a/pkgs/development/python-modules/databricks-sdk/default.nix b/pkgs/development/python-modules/databricks-sdk/default.nix new file mode 100644 index 0000000000000..93292f7eec1e2 --- /dev/null +++ b/pkgs/development/python-modules/databricks-sdk/default.nix @@ -0,0 +1,85 @@ +{ + lib, + stdenv, + buildPythonPackage, + fetchFromGitHub, + + # build-system + setuptools, + + # dependencies + google-auth, + requests, + + # tests + pyfakefs, + pytestCheckHook, + pytest-mock, + requests-mock, +}: + +buildPythonPackage rec { + pname = "databricks-sdk"; + version = "0.34.0"; + pyproject = true; + + src = fetchFromGitHub { + owner = "databricks"; + repo = "databricks-sdk-py"; + rev = "refs/tags/v${version}"; + hash = "sha256-pbOm1aTHtIAwk/TJ5CCT9/CqSTuHTWkRgJuflObkU54="; + }; + + build-system = [ + setuptools + ]; + + dependencies = [ + google-auth + requests + ]; + + pythonImportsCheck = [ + "databricks.sdk" + ]; + + nativeCheckInputs = [ + pyfakefs + pytestCheckHook + pytest-mock + requests-mock + ]; + + disabledTests = + [ + # Require internet access + # ValueError: default auth: cannot configure default credentials, please chec... + "test_azure_cli_does_not_specify_tenant_id_with_msi" + "test_azure_cli_fallback" + "test_azure_cli_user_no_management_access" + "test_azure_cli_user_with_management_access" + "test_azure_cli_with_warning_on_stderr" + "test_azure_cli_workspace_header_present" + "test_config_azure_cli_host" + "test_config_azure_cli_host_and_resource_id" + "test_config_azure_cli_host_and_resource_i_d_configuration_precedence" + "test_load_azure_tenant_id_404" + "test_load_azure_tenant_id_happy_path" + "test_load_azure_tenant_id_no_location_header" + "test_load_azure_tenant_id_unparsable_location_header" + ] + ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isx86_64) [ + # requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(54, 'Connection reset by peer')", ConnectionResetError(54, 'Connection reset by peer')) + "test_github_oidc_flow_works_with_azure" + ]; + + __darwinAllowLocalNetworking = true; + + meta = { + description = "Databricks SDK for Python"; + homepage = "https://github.com/databricks/databricks-sdk-py"; + changelog = "https://github.com/databricks/databricks-sdk-py/blob/${src.rev}/CHANGELOG.md"; + license = lib.licenses.asl20; + maintainers = with lib.maintainers; [ GaetanLepage ]; + }; +} diff --git a/pkgs/development/python-modules/mlflow/default.nix b/pkgs/development/python-modules/mlflow/default.nix index 7032d0e91d517..b04eddabc5ffd 100644 --- a/pkgs/development/python-modules/mlflow/default.nix +++ b/pkgs/development/python-modules/mlflow/default.nix @@ -1,20 +1,24 @@ { lib, + fetchFromGitHub, + + # build-system + setuptools, + + # dependencies alembic, buildPythonPackage, cachetools, click, cloudpickle, - databricks-cli, + databricks-sdk, docker, - entrypoints, - fetchPypi, flask, gitpython, - gorilla, graphene, gunicorn, importlib-metadata, + jinja2, markdown, matplotlib, numpy, @@ -22,60 +26,77 @@ opentelemetry-sdk, packaging, pandas, - prometheus-flask-exporter, protobuf, - python-dateutil, - pythonOlder, pyarrow, - pytz, pyyaml, - querystring-parser, requests, - setuptools, scikit-learn, scipy, - simplejson, sqlalchemy, sqlparse, + + # tests + aiohttp, + azure-core, + azure-storage-blob, + azure-storage-file, + boto3, + botocore, + catboost, + datasets, + fastapi, + google-cloud-storage, + httpx, + jwt, + keras, + langchain, + librosa, + moto, + opentelemetry-exporter-otlp, + optuna, + pydantic, + pyspark, + pytestCheckHook, + pytorch-lightning, + sentence-transformers, + starlette, + statsmodels, + tensorflow, + torch, + transformers, + uvicorn, + xgboost, }: buildPythonPackage rec { pname = "mlflow"; - version = "2.14.3"; + version = "2.16.2"; pyproject = true; - disabled = pythonOlder "3.8"; - - src = fetchPypi { - inherit pname version; - hash = "sha256-KSyuS4NXSgyyIxF+IkyqZ5iTMHivAjNxnCthK+pkVhc="; + src = fetchFromGitHub { + owner = "mlflow"; + repo = "mlflow"; + rev = "refs/tags/v${version}"; + hash = "sha256-7W1gpVgJSN/iXoW987eCHfcOeE3D/ZJ2W/eilDdzOww="; }; - # Remove currently broken dependency `shap`, a model explainability package. - # This seems quite unprincipled especially with tests not being enabled, - # but not mlflow has a 'skinny' install option which does not require `shap`. - pythonRemoveDeps = [ "shap" ]; - pythonRelaxDeps = [ - "gunicorn" - "packaging" - "pytz" - "pyarrow" + build-system = [ + setuptools ]; - propagatedBuildInputs = [ + dependencies = [ alembic cachetools click cloudpickle - databricks-cli + databricks-sdk docker - entrypoints flask gitpython - gorilla graphene gunicorn importlib-metadata + jinja2 markdown matplotlib numpy @@ -83,36 +104,87 @@ buildPythonPackage rec { opentelemetry-sdk packaging pandas - prometheus-flask-exporter protobuf pyarrow - python-dateutil - pytz pyyaml - querystring-parser requests scikit-learn scipy - setuptools - #shap - simplejson sqlalchemy sqlparse ]; pythonImportsCheck = [ "mlflow" ]; - # no tests in PyPI dist - # run into https://stackoverflow.com/questions/51203641/attributeerror-module-alembic-context-has-no-attribute-config - # also, tests use conda so can't run on NixOS without buildFHSEnv + nativeCheckInputs = [ + aiohttp + azure-core + azure-storage-blob + azure-storage-file + boto3 + botocore + catboost + datasets + fastapi + google-cloud-storage + httpx + jwt + keras + langchain + librosa + moto + opentelemetry-exporter-otlp + optuna + pydantic + pyspark + pytestCheckHook + pytorch-lightning + sentence-transformers + starlette + statsmodels + tensorflow + torch + transformers + uvicorn + xgboost + ]; + + disabledTestPaths = [ + # Requires unpackaged `autogen` + "tests/autogen/test_autogen_autolog.py" + + # Requires unpackaged `diviner` + "tests/diviner/test_diviner_model_export.py" + + # Requires unpackaged `sktime` + "examples/sktime/test_sktime_model_export.py" + + # Requires `fastai` which would cause a circular dependency + "tests/fastai/test_fastai_autolog.py" + "tests/fastai/test_fastai_model_export.py" + + # Requires `spacy` which would cause a circular dependency + "tests/spacy/test_spacy_model_export.py" + + # Requires `tensorflow.keras` which is not included in our outdated version of `tensorflow` (2.13.0) + "tests/gateway/providers/test_ai21labs.py" + "tests/tensorflow/test_keras_model_export.py" + "tests/tensorflow/test_keras_pyfunc_model_works_with_all_input_types.py" + "tests/tensorflow/test_mlflow_callback.py" + ]; + + # I (@GaetanLepage) gave up at enabling tests: + # - They require a lot of dependencies (some unpackaged); + # - Many errors occur at collection time; + # - Most (all ?) tests require internet access anyway. doCheck = false; - meta = with lib; { + meta = { description = "Open source platform for the machine learning lifecycle"; mainProgram = "mlflow"; homepage = "https://github.com/mlflow/mlflow"; changelog = "https://github.com/mlflow/mlflow/blob/v${version}/CHANGELOG.md"; - license = licenses.asl20; - maintainers = with maintainers; [ tbenst ]; + license = lib.licenses.asl20; + maintainers = with lib.maintainers; [ tbenst ]; }; } diff --git a/pkgs/servers/mlflow-server/default.nix b/pkgs/servers/mlflow-server/default.nix index 6916b3b655476..82ac934dc8887 100644 --- a/pkgs/servers/mlflow-server/default.nix +++ b/pkgs/servers/mlflow-server/default.nix @@ -1,31 +1,33 @@ -{ python3, writeText}: +{ python3Packages, writers }: let - py = python3.pkgs; + py = python3Packages; + + gunicornScript = writers.writePython3 "gunicornMlflow" { } '' + import re + import sys + from gunicorn.app.wsgiapp import run + if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0]) + sys.exit(run()) + ''; in -py.toPythonApplication - (py.mlflow.overridePythonAttrs(old: rec { +py.toPythonApplication ( + py.mlflow.overridePythonAttrs (old: { - propagatedBuildInputs = old.propagatedBuildInputs ++ [ + propagatedBuildInputs = old.dependencies ++ [ py.boto3 py.mysqlclient ]; - postPatch = (old.postPatch or "") + '' - substituteInPlace mlflow/utils/process.py --replace \ - "child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," \ - "cmd[0]='$out/bin/gunicornMlflow'; child = subprocess.Popen(cmd, env=cmd_env, cwd=cwd, universal_newlines=True," - ''; + postPatch = + (old.postPatch or "") + + '' + cat mlflow/utils/process.py - gunicornScript = writeText "gunicornMlflow" - '' - #!/usr/bin/env python - import re - import sys - from gunicorn.app.wsgiapp import run - if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', ''', sys.argv[0]) - sys.exit(run()) + substituteInPlace mlflow/utils/process.py --replace-fail \ + "process = subprocess.Popen(" \ + "cmd[0]='${gunicornScript}'; process = subprocess.Popen(" ''; postInstall = '' @@ -33,4 +35,5 @@ py.toPythonApplication cp ${gunicornScript} $gpath chmod 555 $gpath ''; -})) + }) +) diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index f7d6020211ec6..b399ea5c2ea5a 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -2876,6 +2876,8 @@ self: super: with self; { databricks-sql-connector = callPackage ../development/python-modules/databricks-sql-connector { }; + databricks-sdk = callPackage ../development/python-modules/databricks-sdk { }; + dataclass-factory = callPackage ../development/python-modules/dataclass-factory { }; dataclass-wizard = callPackage ../development/python-modules/dataclass-wizard { };