diff --git a/.all-contributorsrc b/.all-contributorsrc new file mode 100644 index 000000000..b61be8539 --- /dev/null +++ b/.all-contributorsrc @@ -0,0 +1,162 @@ +{ + "projectName": "datalad-next", + "projectOwner": "datalad", + "repoType": "github", + "commitConvention": "angular", + "contributors": [ + { + "login": "mih", + "name": "Michael Hanke", + "avatar_url": "https://avatars.githubusercontent.com/u/136479?v=4", + "profile": "http://psychoinformatics.de/", + "contributions": [ + "bug", + "code", + "content", + "design", + "doc", + "financial", + "fundingFinding", + "ideas", + "infra", + "maintenance", + "mentoring", + "platform", + "projectManagement", + "review", + "talk", + "test", + "tool", + "userTesting" + ] + }, + { + "login": "catetrai", + "name": "catetrai", + "avatar_url": "https://avatars.githubusercontent.com/u/18424941?v=4", + "profile": "https://github.com/catetrai", + "contributions": [ + "code", + "design", + "doc", + "ideas", + "test" + ] + }, + { + "login": "effigies", + "name": "Chris Markiewicz", + "avatar_url": "https://avatars.githubusercontent.com/u/83442?v=4", + "profile": "https://github.com/effigies", + "contributions": [ + "maintenance", + "code" + ] + }, + { + "login": "mslw", + "name": "MichaΕ‚ Szczepanik", + "avatar_url": "https://avatars.githubusercontent.com/u/11985212?v=4", + "profile": "https://github.com/mslw", + "contributions": [ + "bug", + "code", + "content", + "doc", + "example", + "ideas", + "infra", + "maintenance", + "review", + "talk", + "test", + "tutorial", + "userTesting" + ] + }, + { + "login": "jsheunis", + "name": "Stephan Heunis", + "avatar_url": "https://avatars.githubusercontent.com/u/10141237?v=4", + "profile": "https://jsheunis.github.io/", + "contributions": [ + "bug", + "code", + "doc", + "ideas", + "maintenance", + "talk", + "userTesting" + ] + }, + { + "login": "bpoldrack", + "name": "Benjamin Poldrack", + "avatar_url": "https://avatars.githubusercontent.com/u/10498301?v=4", + "profile": "https://github.com/bpoldrack", + "contributions": [ + "bug", + "code" + ] + }, + { + "login": "yarikoptic", + "name": "Yaroslav Halchenko", + "avatar_url": "https://avatars.githubusercontent.com/u/39889?v=4", + "profile": "https://github.com/yarikoptic", + "contributions": [ + "bug", + "code", + "infra", + "maintenance", + "tool" + ] + }, + { + "login": "christian-monch", + "name": "Christian MΓΆnch", + "avatar_url": "https://avatars.githubusercontent.com/u/17925232?v=4", + "profile": "https://github.com/christian-monch", + "contributions": [ + "code", + "design", + "doc", + "ideas", + "review", + "test", + "userTesting" + ] + }, + { + "login": "adswa", + "name": "Adina Wagner", + "avatar_url": "https://avatars.githubusercontent.com/u/29738718?v=4", + "profile": "https://github.com/adswa", + "contributions": [ + "a11y", + "bug", + "code", + "doc", + "example", + "maintenance", + "projectManagement", + "review", + "talk", + "test", + "tutorial", + "userTesting" + ] + }, + { + "login": "jwodder", + "name": "John T. Wodder II", + "avatar_url": "https://avatars.githubusercontent.com/u/98207?v=4", + "profile": "https://github.com/jwodder", + "contributions": [ + "code", + "infra", + "test" + ] + } + ] +} \ No newline at end of file diff --git a/.appveyor.yml b/.appveyor.yml index 4dec8e42b..2624fcbae 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -99,7 +99,7 @@ environment: # to have `.overrides` be uniformly limited to instance overrides KEYWORDS: not test_gh1811 and not test_librarymode APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -120,7 +120,7 @@ environment: # because MIH does not know better KEYWORDS: not test_gh1811 and not test_fake_gitlab and not test_dryrun APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -130,7 +130,7 @@ environment: datalad.distribution KEYWORDS: not test_invalid_args APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -139,7 +139,7 @@ environment: DTS: > datalad.local APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -151,7 +151,7 @@ environment: datalad.tests datalad.ui APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -285,7 +285,7 @@ test_script: # run test selecion (--traverse-namespace needed from Python 3.8 onwards) - cmd: python -m pytest -s -v -m "not (turtle)" -k "%KEYWORDS%" --cov=datalad_next --pyargs %DTS% # also add --cov datalad, because some core test runs may not touch -next code - - sh: PATH=$PWD/../tools/coverage-bin:$PATH python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --pyargs ${DTS} + - sh: python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --cov-config=../.coveragerc --pyargs ${DTS} after_test: diff --git a/.coveragerc b/.coveragerc index 0ed61f69c..47273f876 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,7 +1,16 @@ +[run] +parallel = True +branch = True +data_file = ${COVERAGE_ROOT-.}/.coverage +omit = + # versioneer + */_version.py + [paths] source = datalad_next/ */datalad_next/ + [report] # show lines missing coverage in output show_missing = True diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 000000000..8b3d9be0c --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,39 @@ +name: Type annotation + +on: + pull_request: + paths: + - 'datalad_next/**.py' + - '!**/tests/**.py' + +jobs: + static-type-check: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + architecture: x64 + - name: Checkout + uses: actions/checkout@v3 + - name: Install mypy + run: python -m pip install mypy # you can pin your preferred version + - name: Get Python changed files + id: changed-py-files + uses: tj-actions/changed-files@v23 + with: + files: | + *.py + **/*.py + - name: Type check changed files + if: steps.changed-py-files.outputs.any_changed == 'true' + run: | + # get any type stubs that mypy thinks it needs + mypy --install-types --non-interactive --follow-imports skip --ignore-missing-imports ${{ steps.changed-py-files.outputs.all_changed_files }} + # run mypy on the modified files only, and do not even follow imports. + # this results is a fairly superficial test, but given the overall + # state of annotations, we strive to become more correct incrementally + # with focused error reports, rather than barfing a huge complaint + # that is unrelated to the changeset someone has been working on + mypy --follow-imports skip --ignore-missing-imports --pretty --show-error-context ${{ steps.changed-py-files.outputs.all_changed_files }} diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml new file mode 100644 index 000000000..261981c3f --- /dev/null +++ b/.github/workflows/update-contributors.yml @@ -0,0 +1,86 @@ +name: allcontributors-auto-detect + +on: + push: + branches: + - main + +jobs: + Update: + name: Generate + runs-on: ubuntu-latest + if: contains(github.repository, 'datalad/datalad-next') + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Tributors Update + uses: con/tributors@0.1.1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + + # Single text list (space separated) of parsers, leave unset to auto-detect + parsers: unset + + # Update lookup with GitHub metadata + update_lookup: github + + # Skip these users (example) + skip_users: + + # INFO, DEBUG, ERROR, WARNING, etc. + log_level: DEBUG + + # If files already exist and an init is done, force overwrite + force: true + + # the minimum number of contributions required to add a user + threshold: 1 + + - name: Checkout New Branch + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH_AGAINST: "main" + run: | + printf "GitHub Actor: ${GITHUB_ACTOR}\n" + export BRANCH_FROM="contributors/update-$(date '+%Y-%m-%d')" + git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" + + BRANCH_EXISTS=$(git ls-remote --heads origin ${BRANCH_FROM}) + if [[ -z ${BRANCH_EXISTS} ]]; then + printf "Branch does not exist in remote.\n" + else + printf "Branch already exists in remote.\n" + exit 1 + fi + git branch + git checkout -b "${BRANCH_FROM}" || git checkout "${BRANCH_FROM}" + git branch + + git config --global user.name "github-actions" + git config --global user.email "github-actions@users.noreply.github.com" + git status + + if git diff-index --quiet HEAD --; then + export OPEN_PULL_REQUEST=0 + printf "No changes\n" + else + export OPEN_PULL_REQUEST=1 + printf "Changes\n" + git commit -a -m "Automated deployment to update contributors $(date '+%Y-%m-%d') + + [skip ci]" + git push origin "${BRANCH_FROM}" + fi + echo "OPEN_PULL_REQUEST=${OPEN_PULL_REQUEST}" >> $GITHUB_ENV + echo "PULL_REQUEST_FROM_BRANCH=${BRANCH_FROM}" >> $GITHUB_ENV + echo "PULL_REQUEST_TITLE=[tributors] ${BRANCH_FROM}" >> $GITHUB_ENV + echo "PULL_REQUEST_BODY='Tributors update automated pull request.\n\n[skip ci]'" >> $GITHUB_ENV + + - name: Open Pull Request + uses: vsoch/pull-request-action@1.0.24 + if: ${{ env.OPEN_PULL_REQUEST == '1' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PULL_REQUEST_BRANCH: "main" \ No newline at end of file diff --git a/.zenodo.json b/.zenodo.json index 904bc1534..b05453614 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -42,7 +42,12 @@ "orcid": "0000-0003-2917-3450" }, { - "name": "Wodder II, John T." + "name": "Wodder II, John T.", + "orcid": "0000-0001-7106-2661" + }, + { + "name": "Trainito, Caterina", + "orcid": "0000-0002-1713-8343" } ], "keywords": [ @@ -52,4 +57,4 @@ "access_right": "open", "license": "MIT", "upload_type": "software" -} +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f51801bb8..7fe490fcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,191 @@ +# 1.0.2 (2023-10-23) -- Debianize! + +## 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) + +## πŸ›‘ Tests + +- The test battery now honors the `DATALAD_TESTS_NONETWORK` environment + variable and downgrades by skipping any tests that require external + network access. (by @mih) + +# 1.0.1 (2023-10-18) + +## πŸ› Bug Fixes + +- Fix f-string syntax in error message of the `uncurl` remote. + https://github.com/datalad/datalad-next/pull/455 (by @christian-monch) + +- `FileSystemItem.from_path()` now honors its `link_target` parameter, and + resolves a target for any symlink item conditional on this setting. + Previously, a symlink target was always resolved. + Fixes https://github.com/datalad/datalad-next/issues/462 via + https://github.com/datalad/datalad-next/pull/464 (by @mih) + +- Update the vendor installation of versioneer to v0.29. This + resolves an installation failure with Python 3.12 due to + the removal of an ancient class. + Fixes https://github.com/datalad/datalad-next/issues/475 via + https://github.com/datalad/datalad-next/pull/483 (by @mih) + +- Bump dependency on Python to 3.8. This is presently the oldest version + still supported upstream. However, some functionality already used + 3.8 features, so this is also a bug fix. + Fixes https://github.com/datalad/datalad-next/issues/481 via + https://github.com/datalad/datalad-next/pull/486 (by @mih) + +## πŸ’« Enhancements and new features + +- Patch datalad-core's `run` command to honor configuration defaults + for substitutions. This enables placeholders like `{python}` that + point to `sys.executable` by default, and need not be explicitly + defined in system/user/dataset configuration. + Fixes https://github.com/datalad/datalad-next/issues/478 via + https://github.com/datalad/datalad-next/pull/485 (by @mih) + +## πŸ“ Documentation + +- Include `gitworktree` among the available file collection types + listed in `ls-file-collection`'s docstring. Fixes + https://github.com/datalad/datalad-next/issues/470 via + https://github.com/datalad/datalad-next/pull/471 (by @mslw) + +- The renderer API documentation now includes an entrypoint for the + runner-related functionality and documentation at + https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.runners.html + Fixes https://github.com/datalad/datalad-next/issues/466 via + https://github.com/datalad/datalad-next/pull/467 (by @mih) + +## πŸ›‘ Tests + +- Simplified setup for subprocess test-coverage reporting. Standard + pytest-cov features are not employed, rather than the previous + approach that was adopted from datalad-core, which originated + in a time when testing was performed via nose. + Fixes https://github.com/datalad/datalad-next/issues/453 via + https://github.com/datalad/datalad-next/pull/457 (by @mih) + + +# 1.0.0 (2023-09-25) + +This release represents a milestone in the development of the extension. +The package is reorganized to be a collection of more self-contained +mini-packages, each with its own set of tests. + +Developer documentation and guidelines have been added to aid further +development. One particular goal is to establish datalad-next as a proxy +for importing datalad-core functionality for other extensions. Direct imports +from datalad-core can be minimized in favor of imports from datalad-next. +This helps identifying functionality needed outside the core package, +and guides efforts for future improvements. + +The 1.0 release marks the switch to a more standard approach to semantic +versioning. However, although a substantial improvements have been made, +the 1.0 version nohow indicates a slowdown of development or a change in the +likelihood of (breaking) changes. They will merely become more easily +discoverable from the version label alone. + +Notable high-level features introduced by this major release are: + +- The new `UrlOperations` framework to provide a set of basic operations like + `download`, `upload`, `stat` for different protocols. This framework can be + thought of as a replacement for the "downloaders" functionality in + datalad-core -- although the feature list is not 100% overlapping. This new + framework is more easily extensible by 3rd-party code. + +- The `Constraints` framework elevates parameter/input validation to the next + level. In contrast to datalad-core, declarative input validation is no longer + limited to the CLI. Instead, command parameters can now be validated regardless + of the entrypoint through which a command is used. They can be validated + individually, but also sets of parameters can be validated jointly to implement + particular interaction checks. All parameter validations can now be performed + exhaustive, to present a user with a complete list of validation errors, rather + then the fail-on-first-error method implemented exclusively in datalad-core. + Validation errors are now reported using dedicated structured data type to aid + their communication via non-console interfaces. + +- The `Credentials` system has been further refined with more homogenized + workflows and deeper integration into other subsystems. This release merely + represents a snapshot of continued development towards a standardization of + credential handling workflows. + +- The annex remotes `uncurl` and `archivist` are replacements for the + datalad-core implementations `datalad` and `datalad-archive`. The offer + substantially improved configurability and leaner operation -- built on the + `UrlOperations` framework. + +- A growing collection of iterator (see `iter_collections`) aims to provide + fast (and more Pythonic) operations on common data structures (Git worktrees, + directories, archives). The can be used as an alternative to the traditional + `Repo` classes (`GitRepo`, `AnnexRepo`) from datalad-core. + +- Analog to `UrlOperations` the `ArchiveOperations` framework aims to provide + an abstraction for operations on different archive types (e.g., TAR). The + represent an alternative to the traditional implementations of + `ExtractedArchive` and `ArchivesCache` from datalad-core, and aim at leaner + resource footprints. + +- The collection of runtime patches for datalad-core has been further expanded. + All patches are now individually documented, and applied using a set of standard + helpers (see http://docs.datalad.org/projects/next/en/latest/patches.html). + +For details, please see the changelogs of the 1.0.0 beta releases below. + +## πŸ’« Enhancements and new features + +- `TarArchiveOperations` is the first implementation of the `ArchiveOperations` + abstraction, providing archive handlers with a set of standard operations: + - `open` to get a file object for a particular archive member + - `__contains__` to check for the presence of a particular archive member + - `__iter__` to get an iterator for processing all archive members + https://github.com/datalad/datalad-next/pull/415 (by @mih) + +## πŸ› Bug Fixes + +- Make `TarfileItem.name` be of type `PurePosixPath` to reflect the fact + that a TAR archive can contain members with names that cannot be represent + unmodified on a non-POSIX file system. + https://github.com/datalad/datalad-next/pull/422 (by @mih) + An analog change is done for `ZipfileItem.name`. + https://github.com/datalad/datalad-next/pull/409 (by @christian-monch) + +- Fix `git ls-file` parsing in `iter_gitworktree()` to be compatible with + file names that start with a `tab` character. + https://github.com/datalad/datalad-next/pull/421 (by @christian-monch) + +## πŸ“ Documentation + +- Expanded guidelines on test implementations. + +- Add missing and fix wrong docstrings for HTTP/WebDAV server related fixtures. + https://github.com/datalad/datalad-next/pull/445 (by @adswa) + +## 🏠 Internal + +- Deduplicate configuration handling code in annex remotes. + https://github.com/datalad/datalad-next/pull/440 (by @adswa) + +## πŸ›‘ Tests + +- New test fixtures have been introduced to replace traditional test helpers + from datalad-core: + + - `datalad_interactive_ui` and `datalad_noninteractive_ui` for testing + user interactions. They replace `with_testsui`. + https://github.com/datalad/datalad-next/pull/427 (by @mih) + +- Expand test coverage for `create_sibling_webdav` to include recursive + operation. + https://github.com/datalad/datalad-next/pull/434 (by @adswa) + + # 1.0.0b3 (2023-06-09) ## πŸ› Bug Fixes @@ -134,7 +321,7 @@ https://github.com/datalad/datalad-next/pull/391 (by @mih) - The `main()` entrypoint of the `datalad-annex` Git remote helper has - be generalized to be more re-usable by other (derived) remote helper + be generalized to be more reusable by other (derived) remote helper implementations. https://github.com/datalad/datalad-next/pull/411 (by @mih) @@ -246,7 +433,7 @@ - The CredentialManager was elevated to a top-level module ([#229](https://github.com/datalad/datalad-next/pull/220) by @mih) - Dataset-lookup behavior of the ``credentials`` command became identical to - ``downlad`` ([#256](https://github.com/datalad/datalad-next/pull/256) by + ``download`` ([#256](https://github.com/datalad/datalad-next/pull/256) by @mih) - The DataLad runner performance patch and all patches to clone functionality diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3980676a8..4a3284f4b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -100,3 +100,19 @@ The following components of the `datalad` package must not be used (directly) in #### `require_dataset()` Commands must use `datalad_next.constraints.dataset.EnsureDataset` instead. + +#### nose-style decorators in test implementations + +The use of decorators like `with_tempfile` is not allowed. +`pytest` fixtures have to be used instead. +A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`. +However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort. +Therefore new usage is highly discouraged. + +#### nose-style assertion helpers in test implementations + +The use of helpers like `assert_equal` is not allowed. +`pytest` constructs have to be used instead -- this typically means plain `assert` statements. +A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`. +However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort. +Therefore new usage is highly discouraged. diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8f1765a7a..d7ac0d167 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,3 +1 @@ -The following people have contributed to this project: - -Michael Hanke +See README.md for a comprehensive list of contributors diff --git a/README.md b/README.md index c4446b41b..dc020d7d2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # DataLad NEXT extension +[![All Contributors](https://img.shields.io/github/all-contributors/datalad/datalad-next?color=ee8449&style=flat-square)](#contributors) [![Build status](https://ci.appveyor.com/api/projects/status/dxomp8wysjb7x2os/branch/main?svg=true)](https://ci.appveyor.com/project/mih/datalad-next/branch/main) [![codecov](https://codecov.io/gh/datalad/datalad-next/branch/main/graph/badge.svg?token=2P8rak7lSX)](https://codecov.io/gh/datalad/datalad-next) [![crippled-filesystems](https://github.com/datalad/datalad-next/workflows/crippled-filesystems/badge.svg)](https://github.com/datalad/datalad-next/actions?query=workflow%3Acrippled-filesystems) @@ -146,3 +147,33 @@ available at http://docs.datalad.org/projects/next/en/latest/#datalad-patches This DataLad extension was developed with funding from the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451 ([431549029](https://gepris.dfg.de/gepris/projekt/431549029), INF project). + + +## Contributors + + + + + + + + + + + + + + + + + + + + + +
Michael Hanke
Michael Hanke

πŸ› πŸ’» πŸ–‹ 🎨 πŸ“– πŸ’΅ πŸ” πŸ€” πŸš‡ 🚧 πŸ§‘β€πŸ« πŸ“¦ πŸ“† πŸ‘€ πŸ“’ ⚠️ πŸ”§ πŸ““
catetrai
catetrai

πŸ’» 🎨 πŸ“– πŸ€” ⚠️
Chris Markiewicz
Chris Markiewicz

🚧 πŸ’»
MichaΕ‚ Szczepanik
MichaΕ‚ Szczepanik

πŸ› πŸ’» πŸ–‹ πŸ“– πŸ’‘ πŸ€” πŸš‡ 🚧 πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““
Stephan Heunis
Stephan Heunis

πŸ› πŸ’» πŸ“– πŸ€” 🚧 πŸ“’ πŸ““
Benjamin Poldrack
Benjamin Poldrack

πŸ› πŸ’»
Yaroslav Halchenko
Yaroslav Halchenko

πŸ› πŸ’» πŸš‡ 🚧 πŸ”§
Christian MΓΆnch
Christian MΓΆnch

πŸ’» 🎨 πŸ“– πŸ€” πŸ‘€ ⚠️ πŸ““
Adina Wagner
Adina Wagner

️️️️♿️ πŸ› πŸ’» πŸ“– πŸ’‘ 🚧 πŸ“† πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““
John T. Wodder II
John T. Wodder II

πŸ’» πŸš‡ ⚠️
+ + + + + diff --git a/changelog.d/20231021_102012_michael.hanke_ensurehash.md b/changelog.d/20231021_102012_michael.hanke_ensurehash.md new file mode 100644 index 000000000..dcf0d5fdf --- /dev/null +++ b/changelog.d/20231021_102012_michael.hanke_ensurehash.md @@ -0,0 +1,6 @@ +### πŸ’« Enhancements and new features + +- New `EnsureHashAlgorithm` constraint to automatically expose + and verify algorithm labels from `hashlib.algorithms_guaranteed` + Fixes https://github.com/datalad/datalad-next/issues/346 via + https://github.com/datalad/datalad-next/pull/492 (by @mslw @adswa) diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md new file mode 100644 index 000000000..f4752d524 --- /dev/null +++ b/changelog.d/20231023_064405_michael.hanke_www_auth.md @@ -0,0 +1,8 @@ +### 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) diff --git a/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md new file mode 100644 index 000000000..f225fce82 --- /dev/null +++ b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md @@ -0,0 +1,6 @@ +### πŸ› Bug Fixes + +- Add patch to fix `update`'s target detection for adjusted mode datasets + that can crash under some circumstances. + See https://github.com/datalad/datalad/issues/7507, fixed via + https://github.com/datalad/datalad-next/pull/509 (by @mih) diff --git a/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md new file mode 100644 index 000000000..f41dadca2 --- /dev/null +++ b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md @@ -0,0 +1,6 @@ +### πŸ’« Enhancements and new features + +- The `archivist` remote now supports archive type detection + from `*E`-type annex keys for `.tgz` archives too. + Fixes https://github.com/datalad/datalad-next/issues/517 via + https://github.com/datalad/datalad-next/pull/518 (by @mih) diff --git a/datalad_next/__init__.py b/datalad_next/__init__.py index 36df189a5..116df896c 100644 --- a/datalad_next/__init__.py +++ b/datalad_next/__init__.py @@ -105,6 +105,5 @@ ) -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions +from . import _version +__version__ = _version.get_versions()['version'] diff --git a/datalad_next/_version.py b/datalad_next/_version.py index 2edb50eb5..1677391dc 100644 --- a/datalad_next/_version.py +++ b/datalad_next/_version.py @@ -5,8 +5,9 @@ # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -15,9 +16,11 @@ import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -33,8 +36,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool -def get_config(): + +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -52,13 +62,13 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -67,22 +77,35 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -93,18 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -113,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % @@ -130,41 +154,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -177,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -190,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -199,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %s" % r) return {"version": r, @@ -214,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -225,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -234,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["--git-dir=.git", "describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -293,26 +374,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["--git-dir=.git", "rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["--git-dir=.git", "show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -337,23 +419,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -380,12 +510,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -402,7 +561,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -422,7 +581,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -442,7 +601,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -456,10 +615,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -474,7 +637,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -495,7 +658,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, diff --git a/datalad_next/annexremotes/archivist.py b/datalad_next/annexremotes/archivist.py index d37a2d0f3..a91606084 100644 --- a/datalad_next/annexremotes/archivist.py +++ b/datalad_next/annexremotes/archivist.py @@ -119,12 +119,6 @@ class ArchivistRemote(SpecialRemote): """ def __init__(self, annex): super().__init__(annex) - # the following members will be initialized on prepare() - # as they require access to the underlying repository - self._repo = None - # name of the (git) remote archivist is operating under - # (for querying the correct configuration) - self._remotename = None # central archive handler cache, initialized on-prepare self._ahandlers = None # a potential instance of the legacy datalad-archives implementation @@ -162,8 +156,11 @@ def prepare(self): subsequent operations will be processed by the ``datalad-archives`` special remote implementation! """ + # we have to do this here, because the base class `.repo` will only give + # us a `LeanAnnexRepo`. + # TODO it is unclear to MIH what is actually needed API-wise of the legacy + # interface. Needs research. self._repo = LegacyAnnexRepo(self.annex.getgitdir()) - self._remotename = self.annex.getgitremotename() # are we in legacy mode? # let remote-specific setting take priority (there could be # multiple archivist-type remotes configured), and use unspecific switch @@ -185,7 +182,7 @@ def prepare(self): # central archive key handler coordination self._ahandlers = _ArchiveHandlers( - self._repo, + self.repo, # TODO #cache_mode=self._getcfg( # 'archive-cache-mode', @@ -272,7 +269,7 @@ def checkpresent(self, key: str) -> bool: # So let's do a two-pass approach, first check local availability # for any archive key, and only if that does not find us an archive # go for the remotes - if any(_get_key_contentpath(self._repo, akey) for akey in akeys): + if any(_get_key_contentpath(self.repo, akey) for akey in akeys): # any one is good enough # TODO here we could actually look into the archive and # verify member presence without relatively little cost @@ -283,7 +280,7 @@ def checkpresent(self, key: str) -> bool: try: # if it exits clean, the key is still present at at least one # remote - self._repo.call_annex(['checkpresentkey', akey]) + self.repo.call_annex(['checkpresentkey', akey]) return True except CommandError: self.message( diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index cd40378af..51dade904 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -45,7 +45,7 @@ for a dataset (as shown above):: $ echo '[{"url":"ssh://my.server.org/home/me/file", "file":"dummy"}]' \\ - | datalad addurls - '{url}' {'file'} + | datalad addurls - '{url}' '{file}' This makes legacy commands (e.g., ``datalad download-url``), unnecessary, and facilitates the use of more advanced ``datalad addurls`` features (e.g., @@ -68,7 +68,7 @@ password (repeat): Enter a name to save the credential (for accessing http://httpbin.org/basic-auth/myuser/mypassword) securely for future - re-use, or 'skip' to not save the credential + reuse, or 'skip' to not save the credential name: httpbin-dummy addurl http://httpbin.org/basic-auth/myuser/mypassword (from uncurl) (to ...) @@ -223,12 +223,6 @@ from pathlib import Path import re -# we intentionally limit ourselves to the most basic interface -# and even that we only need to get a `ConfigManager` instance. -# If that class would support a plain path argument, we could -# avoid it entirely -from datalad_next.datasets import LeanAnnexRepo - from datalad_next.exceptions import ( CapturedException, UrlOperationsRemoteError, @@ -411,7 +405,7 @@ def remove(self, key): ) except UrlOperationsResourceUnknown: self.message( - 'f{key} not found at the remote, skipping', type='debug') + f'{key!r} not found at the remote, skipping', type='debug') # # helpers @@ -424,7 +418,7 @@ def get_key_urls(self, key) -> list[str]: # this will also work within checkurl() for a temporary key # generated by git-annex after claimurl() urls = self.annex.geturls(key, prefix='') - self.message(f"Known urls for {key!r}: {urls}", type='debug') + self.message(f'Known urls for {key!r}: {urls}', type='debug') if self.url_tmpl: # we have a rewriting template. extract all properties # from all known URLs and instantiate the template diff --git a/datalad_next/archive_operations/tests/test_tarfile.py b/datalad_next/archive_operations/tests/test_tarfile.py index 4a493db49..52b4ea62d 100644 --- a/datalad_next/archive_operations/tests/test_tarfile.py +++ b/datalad_next/archive_operations/tests/test_tarfile.py @@ -10,6 +10,7 @@ import pytest from datalad_next.iter_collections.utils import FileSystemItemType +from datalad_next.tests.marker import skipif_no_network from ..tarfile import TarArchiveOperations @@ -37,6 +38,7 @@ def structured_sample_tar_xz( ) +@skipif_no_network def test_tararchive_basics(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz # this is intentionally a hard-coded POSIX relpath @@ -48,6 +50,7 @@ def test_tararchive_basics(structured_sample_tar_xz: TestArchive): assert member.read() == spec.content +@skipif_no_network def test_tararchive_contain(structured_sample_tar_xz: TestArchive): # this is intentionally a hard-coded POSIX relpath member_name = 'test-archive/onetwothree.txt' @@ -59,6 +62,7 @@ def test_tararchive_contain(structured_sample_tar_xz: TestArchive): assert 'bogus' not in archive_ops +@skipif_no_network def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz with TarArchiveOperations(spec.path) as archive_ops: @@ -68,6 +72,7 @@ def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): assert item.name in archive_ops +@skipif_no_network def test_open(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz file_pointer = set() diff --git a/datalad_next/commands/download.py b/datalad_next/commands/download.py index 4f61e2110..927079b3f 100644 --- a/datalad_next/commands/download.py +++ b/datalad_next/commands/download.py @@ -73,7 +73,7 @@ class Download(ValidatedInterface): In contrast to other downloader tools, this command integrates with the DataLad credential management and is able to auto-discover credentials. If no credential is available, it automatically prompts for them, and - offers to store them for re-use after a successful authentication. + offers to store them for reuse after a successful authentication. Simultaneous hashing (checksumming) of downloaded content is supported with user-specified algorithms. diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index cbc0aa867..57462c49f 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -39,6 +39,8 @@ EnsureChoice, EnsurePath, EnsureURL, + EnsureHashAlgorithm, + EnsureListOf, ) from datalad_next.uis import ( ansi_colors as ac, @@ -93,9 +95,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - # TODO EnsureHashAlgorithm - # https://github.com/datalad/datalad-next/issues/346 - #hash=None, + hash=EnsureHashAlgorithm() | EnsureListOf(EnsureHashAlgorithm()), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), @@ -226,7 +226,7 @@ class LsFileCollection(ValidatedInterface): ``directory`` Reports on the content of a given directory (non-recursively). The collection identifier is the path of the directory. Item identifiers - are the name of a file within that directory. Standard properties like + are the names of items within that directory. Standard properties like ``size``, ``mtime``, or ``link_target`` are included in the report. [PY: When hashes are computed, an ``fp`` property with a file-like is provided. Reading file data from it requires a ``seek(0)`` in most @@ -234,6 +234,20 @@ class LsFileCollection(ValidatedInterface): by this command (``return_type='generator``) and only until the next result is yielded. PY] + ``gitworktree`` + Reports on all tracked and untracked content of a Git repository's + work tree. The collection identifier is a path of a directory in a Git + repository (which can, but needs not be, its root). Item identifiers + are the relative paths of items within that directory. Reported + properties include ``gitsha`` and ``gittype``; note that the + ``gitsha`` is not equivalent to a SHA1 hash of a file's content, but + is the SHA-type blob identifier as reported and used by Git. + [PY: When hashes are computed, an ``fp`` property with a file-like is + provided. Reading file data from it requires a ``seek(0)`` in most + cases. This file handle is only open when items are yielded directly + by this command (``return_type='generator``) and only until the next + result is yielded. PY] + ``tarfile`` Reports on members of a TAR archive. The collection identifier is the path of the TAR file. Item identifiers are the relative paths diff --git a/datalad_next/commands/tests/test_create_sibling_webdav.py b/datalad_next/commands/tests/test_create_sibling_webdav.py index c303a3d76..d140edcab 100644 --- a/datalad_next/commands/tests/test_create_sibling_webdav.py +++ b/datalad_next/commands/tests/test_create_sibling_webdav.py @@ -138,6 +138,32 @@ def check_common_workflow( assert_status('ok', dsclone.get('.', **ca)) # verify testfile content eq_('dummy', (dsclone.pathobj / 'testfile.dat').read_text()) + # ensure that recursive operations succeed + # create a subdataset + subds = ds.create('mysubds') + targetdir_name = 'recursiontest' + subtargetdir = Path(webdav_server.path) / targetdir_name / 'mysubds' + url = f'{webdav_server.url}/{targetdir_name}' + + with chpwd(ds.path): + res = create_sibling_webdav( + url, + credential=webdav_credential['name'] + if declare_credential else None, + name='recursive-sibling', + mode=mode, + recursive=True, + **ca) + assert len(res) == 4 # 2 for create-sibling-webdav, 2 for storage + assert_in_results( + res, + action='create_sibling_webdav.storage', + status='ok', + type='sibling', + path=subds.path, + name='recursive-sibling-storage', + ) + ok_(subtargetdir.exists()) def test_bad_url_catching(existing_dataset): diff --git a/datalad_next/commands/tests/test_ls_file_collection.py b/datalad_next/commands/tests/test_ls_file_collection.py index 2455af7f4..dee6247f2 100644 --- a/datalad_next/commands/tests/test_ls_file_collection.py +++ b/datalad_next/commands/tests/test_ls_file_collection.py @@ -7,6 +7,7 @@ from datalad.api import ls_file_collection from datalad_next.constraints.exceptions import CommandParametrizationError +from datalad_next.tests.marker import skipif_no_network from ..ls_file_collection import LsFileCollectionParamValidator @@ -30,6 +31,7 @@ def test_ls_file_collection_insufficient_args(): ls_file_collection('bogus', 'http://example.com') +@skipif_no_network def test_ls_file_collection_tarfile(sample_tar_xz): kwa = dict(result_renderer='disabled') # smoke test first @@ -84,6 +86,7 @@ def test_ls_file_collection_validator(): val.get_collection_iter(type='bogus', collection='any', hash=None) +@skipif_no_network def test_replace_add_archive_content(sample_tar_xz, existing_dataset): kwa = dict(result_renderer='disabled') diff --git a/datalad_next/constraints/__init__.py b/datalad_next/constraints/__init__.py index 05442fd94..e6f013983 100644 --- a/datalad_next/constraints/__init__.py +++ b/datalad_next/constraints/__init__.py @@ -59,6 +59,7 @@ EnsureCallable, EnsureChoice, EnsureFloat, + EnsureHashAlgorithm, EnsureInt, EnsureKeyChoice, EnsureNone, diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 0d9c56bc2..af81bee45 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,8 +12,10 @@ __docformat__ = 'restructuredtext' +from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed from pathlib import Path import re +from typing import Callable from datalad_next.datasets import resolve_path @@ -274,6 +276,9 @@ def long_description(self): def short_description(self): return '{%s}' % ', '.join([repr(c) for c in self._allowed]) + def __str__(self): + return f"one of {self.short_description()}" + class EnsureKeyChoice(EnsureChoice): """Ensure value under a key in an input is in a set of possible values""" @@ -364,7 +369,7 @@ def __init__(self, path_type: type = Path, is_format: str | None = None, lexists: bool | None = None, - is_mode: callable | None = None, + is_mode: Callable | None = None, ref: Path | None = None, ref_is: str = 'parent-or-same-as', dsarg: DatasetParameter | None = None): @@ -497,3 +502,12 @@ def short_description(self): if self._ref else '', ) + + +class EnsureHashAlgorithm(EnsureChoice): + """Ensure an input matches a name of a ``hashlib`` algorithm + + Specifically the item must be in the ``algorithms_guaranteed`` collection. + """ + def __init__(self): + super().__init__(*hash_algorithms_guaranteed) diff --git a/datalad_next/constraints/compound.py b/datalad_next/constraints/compound.py index 99fe8d66d..bb0d87fd0 100644 --- a/datalad_next/constraints/compound.py +++ b/datalad_next/constraints/compound.py @@ -77,10 +77,12 @@ def __call__(self, value): iter = self._iter_type( self._item_constraint(i) for i in value ) - except TypeError as e: + except (ConstraintError, TypeError) as e: self.raise_for( value, - "cannot coerce to target (item) type", + "{itertype} item is not {itype}", + itertype=self._iter_type.__name__, + itype=self._item_constraint, __caused_by__=e, ) if self._min_len is not None or self._max_len is not None: diff --git a/datalad_next/constraints/exceptions.py b/datalad_next/constraints/exceptions.py index 2955c5025..d75b08ee5 100644 --- a/datalad_next/constraints/exceptions.py +++ b/datalad_next/constraints/exceptions.py @@ -105,7 +105,7 @@ def caused_by(self) -> Tuple[Exception] | None: """ cb = self.context.get('__caused_by__', None) if cb is None: - return + return None elif isinstance(cb, Exception): return (cb,) else: diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 2748a158e..b1301ba5e 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -11,6 +11,7 @@ EnsureNone, EnsureCallable, EnsureChoice, + EnsureHashAlgorithm, EnsureKeyChoice, EnsureRange, EnsurePath, @@ -65,14 +66,14 @@ def test_bool(): # this should always work assert c(True) is True assert c(False) is False - # all that resuls in True + # all that results in True assert c('True') is True assert c('true') is True assert c('1') is True assert c('yes') is True assert c('on') is True assert c('enable') is True - # all that resuls in False + # all that results in False assert c('false') is False assert c('False') is False assert c('0') is False @@ -188,6 +189,7 @@ def test_choice(): assert i in descr # short is a "set" or repr()s assert c.short_description() == "{'choice1', 'choice2', None}" + assert str(c) == "one of {'choice1', 'choice2', None}" # this should always work assert c('choice1') == 'choice1' assert c(None) is None @@ -317,3 +319,27 @@ def test_EnsurePath_fordataset(existing_dataset): # 2. dataset is given as a dataset object tc = c.for_dataset(DatasetParameter(ds, ds)) assert tc('relpath') == (ds.pathobj / 'relpath') + + +def test_EnsureHashAlgorithm(): + c = EnsureHashAlgorithm() + # simple cases that should pass + hashes = [ + 'sha3_256', 'shake_256', 'sha3_384', 'md5', 'shake_128', 'sha384', + 'sha3_224', 'blake2s', 'sha1', 'blake2b', 'sha224', 'sha512', 'sha256', + 'sha3_512' + ] + for hash in hashes: + c(hash) + # a few bogus ones: + bad_hashes = [ + 'md17', 'McGyver', 'sha2', 'bogus' + ] + for baddie in bad_hashes: + with pytest.raises(ConstraintError): + c(baddie) + + # check messaging + for i in ('md5', 'shake_256', 'sha3_512'): + assert i in c.short_description() + assert i in c.long_description() diff --git a/datalad_next/credman/manager.py b/datalad_next/credman/manager.py index 60124e2ff..dfe0f4fe2 100644 --- a/datalad_next/credman/manager.py +++ b/datalad_next/credman/manager.py @@ -15,6 +15,7 @@ __all__ = ['CredentialManager'] +from collections.abc import Set from datetime import datetime import logging import re @@ -294,7 +295,7 @@ def set(self, prompt = 'Enter a name to save the credential' if _context: prompt = f'{prompt} ({_context})' - prompt = f"{prompt} securely for future re-use, " \ + prompt = f"{prompt} securely for future reuse, " \ "or 'skip' to not save the credential" if _suggested_name: prompt = f'{prompt}, or leave empty to accept the name ' \ @@ -823,7 +824,7 @@ def _get_credential_from_cfg(self, name: str) -> Dict: if k.startswith(var_prefix) } - def _get_known_credential_names(self) -> set: + def _get_known_credential_names(self) -> Set[str]: known_credentials = set( '.'.join(k.split('.')[2:-1]) for k in self._cfg.keys() if k.startswith('datalad.credential.') @@ -898,7 +899,7 @@ def _get_legacy_credential_from_keyring( type_hint = dict(_yield_legacy_credential_types()).get(name) if not type_hint or type_hint not in self._cred_types: - return + return None cred = {} lc = self._cred_types[type_hint] @@ -952,7 +953,7 @@ def _get_secret( return secret # no secret found anywhere - return + return None @property def _cfg(self): diff --git a/datalad_next/datasets/__init__.py b/datalad_next/datasets/__init__.py index e2f6bdbc4..7c8baf334 100644 --- a/datalad_next/datasets/__init__.py +++ b/datalad_next/datasets/__init__.py @@ -3,18 +3,18 @@ Two sets of repository abstractions are available :class:`LeanGitRepo` and :class:`LeanAnnexRepo` vs. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo`. -The latter are the classic classes providing a, now legacy, low-level API to -repository operations. This functionality stems from the earliest days of -DataLad and implements paradigms and behaviors that are no longer common to -the rest of the DataLad API. :class:`LegacyGitRepo` and -:class:`LegacyAnnexRepo` should no longer be used in new developments. - -:class:`LeanGitRepo` and :class:`LeanAnnexRepo` on the other hand provide -a more modern, substantially restricted API and represent the present -standard API for low-level repository operations. They are geared towards -interacting with Git and git-annex more directly, and are more suitable -for generator-like implementations, promoting low response latencies, and -a leaner processing footprint. +:class:`LeanGitRepo` and :class:`LeanAnnexRepo` provide a more modern, +small-ish interface and represent the present standard API for low-level +repository operations. They are geared towards interacting with Git and +git-annex more directly, and are more suitable for generator-like +implementations, promoting low response latencies, and a leaner processing +footprint. + +The ``Legacy*Repo`` classes provide a, now legacy, low-level API to repository +operations. This functionality stems from the earliest days of DataLad and +implements paradigms and behaviors that are no longer common to the rest of the +DataLad API. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo` should no +longer be used in new developments, and are not documented here. """ from pathlib import Path @@ -38,18 +38,33 @@ class LeanAnnexRepo(LegacyAnnexRepo): """git-annex repository representation with a minimized API This is a companion of :class:`LeanGitRepo`. In the same spirit, it - restricts its API to a limited set of method that primarily extend - :class:`LeanGitRepo` with a set of ``call_annex*()`` methods. + restricts its API to a limited set of method that extend + :class:`LeanGitRepo`. + """ + #CA .. autosummary:: + + #CA call_annex + #CA call_annex_oneline + #CA call_annex_success # list of attributes permitted in the "lean" API. This list extends # the API of LeanGitRepo - # TODO extend whitelist of attributed as necessary + # TODO extend whitelist of attributes as necessary _lean_attrs = [ + #CA # these are the ones we intend to provide + #CA 'call_annex', + #CA 'call_annex_oneline', + #CA 'call_annex_success', + # and here are the ones that we need to permit in order to get them + # to run '_check_git_version', + #CA '_check_git_annex_version', # used by AnnexRepo.__init__() -- should be using `is_valid()` 'is_valid_git', 'is_valid_annex', '_is_direct_mode_from_config', + #CA '_call_annex', + #CA 'call_annex_items_', ] # intentionally limiting to just `path` as the only constructor argument @@ -65,5 +80,5 @@ def __new__(cls, path: Path): return obj -def _unsupported_method(self): +def _unsupported_method(self, *args, **kwargs): raise NotImplementedError('method unsupported by LeanAnnexRepo') diff --git a/datalad_next/gitremotes/datalad_annex.py b/datalad_next/gitremotes/datalad_annex.py index 55c86074c..135a64c49 100755 --- a/datalad_next/gitremotes/datalad_annex.py +++ b/datalad_next/gitremotes/datalad_annex.py @@ -1146,7 +1146,7 @@ def make_export_tree(repo): ID of the tree object, suitable for `git-annex export`. """ here = repo.config.get('annex.uuid') - # re-use existing, or go with fixed random one + # reuse existing, or go with fixed random one origin = repo.config.get('remote.origin.annex-uuid', '8249ffce-770a-11ec-9578-5f6af5e76eaa') assert here, "No 'here'" diff --git a/datalad_next/iter_collections/tests/test_itertar.py b/datalad_next/iter_collections/tests/test_itertar.py index 7f76b2985..23f393a48 100644 --- a/datalad_next/iter_collections/tests/test_itertar.py +++ b/datalad_next/iter_collections/tests/test_itertar.py @@ -3,6 +3,8 @@ from datalad.api import download +from datalad_next.tests.marker import skipif_no_network + from ..tarfile import ( TarfileItem, FileSystemItemType, @@ -42,6 +44,7 @@ def sample_tar_xz(tmp_path_factory): tfpath.unlink() +@skipif_no_network def test_iter_tar(sample_tar_xz): target_hash = {'SHA1': 'a8fdc205a9f19cc1c7507a60c4f01b13d11d7fd0', 'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'} diff --git a/datalad_next/iter_collections/tests/test_utils.py b/datalad_next/iter_collections/tests/test_utils.py new file mode 100644 index 000000000..1393431e9 --- /dev/null +++ b/datalad_next/iter_collections/tests/test_utils.py @@ -0,0 +1,32 @@ +from datalad_next.tests.utils import skip_wo_symlink_capability + +from ..utils import FileSystemItem + + +def test_FileSystemItem(tmp_path): + testfile = tmp_path / 'file1.txt' + testfile_content = 'content' + testfile.write_text(testfile_content) + + item = FileSystemItem.from_path(testfile) + assert item.size == len(testfile_content) + assert item.link_target is None + + +@skip_wo_symlink_capability +def test_FileSystemItem_linktarget(tmp_path): + testfile = tmp_path / 'file1.txt' + testfile_content = 'short' + testfile.write_text(testfile_content) + testlink = tmp_path / 'link' + testlink.symlink_to(testfile) + + item = FileSystemItem.from_path(testlink) + assert testfile.samefile(item.link_target) + # size of the link file does not anyhow propagate the size of the + # link target + assert item.size != len(testfile_content) + + # we can disable link resolution + item = FileSystemItem.from_path(testlink, link_target=False) + assert item.link_target is None diff --git a/datalad_next/iter_collections/utils.py b/datalad_next/iter_collections/utils.py index 0f00a2e5d..91fcdc4c9 100644 --- a/datalad_next/iter_collections/utils.py +++ b/datalad_next/iter_collections/utils.py @@ -96,7 +96,7 @@ def from_path( uid=cstat.st_uid, gid=cstat.st_gid, ) - if ctype == FileSystemItemType.symlink: + if link_target and ctype == FileSystemItemType.symlink: # could be p.readlink() from PY3.9+ item.link_target = PurePath(os.readlink(path)) return item diff --git a/datalad_next/patches/cli_configoverrides.py b/datalad_next/patches/cli_configoverrides.py index 59276b5d3..b553854e1 100644 --- a/datalad_next/patches/cli_configoverrides.py +++ b/datalad_next/patches/cli_configoverrides.py @@ -1,3 +1,10 @@ +"""Post DataLad config overrides CLI/ENV as GIT_CONFIG items in process ENV + +This enables their propagation to any subprocess. This includes the +specification of overrides via the ``datalad -c ...`` option of the +main CLI entrypoint. +""" + from datalad.config import _update_from_env as _update_from_datalad_env from datalad.cli.helpers import _parse_overrides_from_cmdline diff --git a/datalad_next/patches/commanderror.py b/datalad_next/patches/commanderror.py index daef7dc69..eb42182a0 100644 --- a/datalad_next/patches/commanderror.py +++ b/datalad_next/patches/commanderror.py @@ -1,3 +1,11 @@ +"""Improve ``CommandError`` rendering + +Without this patch that overwrites ``__repr__``, it would use +``RuntimeError``'s variant and ignore all additional structured information +except for ``.msg`` -- which is frequently empty and confuses with a +`CommandError('')` display. +""" + from datalad.runner.exception import CommandError @@ -5,7 +13,4 @@ def commanderror_repr(self) -> str: return self.to_str() -# without overwriting __repr__ it would use RuntimeError's variant -# with ignore all info but `.msg` which will be empty frequently -# and confuse people with `CommandError('')` CommandError.__repr__ = commanderror_repr diff --git a/datalad_next/patches/configuration.py b/datalad_next/patches/configuration.py index e718141ee..77c66e655 100644 --- a/datalad_next/patches/configuration.py +++ b/datalad_next/patches/configuration.py @@ -56,7 +56,7 @@ def __call__( raise ValueError( 'Scope selection is not supported for dumping') - # normalize variable specificatons + # normalize variable specifications specs = [] for s in ensure_list(spec): if isinstance(s, tuple): diff --git a/datalad_next/patches/create_sibling_gitlab.py b/datalad_next/patches/create_sibling_gitlab.py index c4c8974fe..971953dc3 100644 --- a/datalad_next/patches/create_sibling_gitlab.py +++ b/datalad_next/patches/create_sibling_gitlab.py @@ -1,5 +1,7 @@ -""" +"""Streamline user experience +Discontinue advertizing the ``hierarchy`` layout, and better explain +limitations of the command. """ import datalad.distributed.create_sibling_gitlab as mod_gitlab diff --git a/datalad_next/patches/distribution_dataset.py b/datalad_next/patches/distribution_dataset.py index f637006d4..4a56113d8 100644 --- a/datalad_next/patches/distribution_dataset.py +++ b/datalad_next/patches/distribution_dataset.py @@ -33,5 +33,5 @@ def resolve_path(path, ds=None, ds_resolved=None): resolve_path, msg='Apply datalad-next patch to distribution.dataset:resolve_path') -# re-use docs +# reuse docs resolve_path.__doc__ = orig_resolve_path.__doc__ diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py index 7fe207ca1..c705c0f60 100644 --- a/datalad_next/patches/enabled.py +++ b/datalad_next/patches/enabled.py @@ -12,4 +12,6 @@ test_keyring, customremotes_main, create_sibling_gitlab, + run, + update, ) diff --git a/datalad_next/patches/push_optimize.py b/datalad_next/patches/push_optimize.py index 67f915911..b2887665c 100644 --- a/datalad_next/patches/push_optimize.py +++ b/datalad_next/patches/push_optimize.py @@ -266,12 +266,12 @@ def _get_push_target(repo, target_arg): ------- str or None, str, str or None, list or None Target label, if determined; status label; optional message; - git-push-dryrun result for re-use or None, if no dry-run was + git-push-dryrun result for reuse or None, if no dry-run was attempted. """ # verified or auto-detected target = None - # for re-use + # for reuse wannabe_gitpush = None if not target_arg: # let Git figure out what needs doing diff --git a/datalad_next/patches/run.py b/datalad_next/patches/run.py new file mode 100644 index 000000000..b7672d7ac --- /dev/null +++ b/datalad_next/patches/run.py @@ -0,0 +1,90 @@ +"""Enhance ``run()`` placeholder substitutions to honor configuration defaults + +Previously, ``run()`` would not recognize configuration defaults for +placeholder substitution. This means that any placeholders globally declared in +``datalad.interface.common_cfg``, or via ``register_config()`` in DataLad +extensions would not be effective. + +This patch makes run's ``format_command()`` helper include such defaults +explicitly, and thereby enable the global declaration of substitution defaults. + +Moreoever a ``{python}`` placeholder is now defined via this mechanism, and +points to the value of ``sys.executable`` by default. This particular +placeholder was found to be valuable for improving the portability of +run-recording across (specific) Python versions, or across different (virtual) +environments. See https://github.com/datalad/datalad-container/issues/224 for +an example use case. + +https://github.com/datalad/datalad/pull/7509 +""" + +from itertools import filterfalse +import sys + +from datalad.core.local.run import ( + GlobbedPaths, + SequenceFormatter, + normalize_command, + quote_cmdlinearg, +) +from datalad.interface.common_cfg import definitions as cfg_defs +from datalad.support.constraints import EnsureStr +from datalad.support.extensions import register_config + +from . import apply_patch + + +# This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37 +# datalad/core/local/run.py +# The change has been proposed in https://github.com/datalad/datalad/pull/7509 +def format_command(dset, command, **kwds): + """Plug in placeholders in `command`. + + Parameters + ---------- + dset : Dataset + command : str or list + + `kwds` is passed to the `format` call. `inputs` and `outputs` are converted + to GlobbedPaths if necessary. + + Returns + ------- + formatted command (str) + """ + command = normalize_command(command) + sfmt = SequenceFormatter() + cprefix = 'datalad.run.substitutions.' + + def not_subst(x): + return not x.startswith(cprefix) + + for k in set(filterfalse(not_subst, cfg_defs.keys())).union( + filterfalse(not_subst, dset.config.keys())): + v = dset.config.get( + k, + # pull a default from the config definitions + # if we have no value, but a key + cfg_defs.get(k, {}).get('default', None)) + sub_key = k.replace(cprefix, "") + if sub_key not in kwds: + kwds[sub_key] = v + + for name in ["inputs", "outputs"]: + io_val = kwds.pop(name, None) + if not isinstance(io_val, GlobbedPaths): + io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd")) + kwds[name] = list(map(quote_cmdlinearg, io_val.expand(dot=False))) + return sfmt.format(command, **kwds) + + +apply_patch( + 'datalad.core.local.run', None, 'format_command', format_command) +register_config( + 'datalad.run.substitutions.python', + 'Substitution for {python} placeholder', + description='Path to a Python interpreter executable', + type=EnsureStr(), + default=sys.executable, + dialog='question', +) diff --git a/datalad_next/patches/tests/test_annex_progress_logging.py b/datalad_next/patches/tests/test_annex_progress_logging.py index 82e211474..6f575f305 100644 --- a/datalad_next/patches/tests/test_annex_progress_logging.py +++ b/datalad_next/patches/tests/test_annex_progress_logging.py @@ -1,4 +1,7 @@ +from datalad_next.tests.marker import skipif_no_network + +@skipif_no_network def test_uncurl_progress_reporting_to_annex(existing_dataset, monkeypatch): """Set up a repo that is used to download a key, check that we see progress reports diff --git a/datalad_next/patches/tests/test_run.py b/datalad_next/patches/tests/test_run.py new file mode 100644 index 000000000..721e6de96 --- /dev/null +++ b/datalad_next/patches/tests/test_run.py @@ -0,0 +1,25 @@ +import pytest + +from datalad_next.exceptions import IncompleteResultsError +from datalad_next.tests.utils import ( + SkipTest, + assert_result_count, +) + + +def test_substitution_config_default(existing_dataset): + ds = existing_dataset + + if ds.config.get('datalad.run.substitutions.python') is not None: + # we want to test default handling when no config is set + raise SkipTest( + 'Test assumptions conflict with effective configuration') + + # the {python} placeholder is not explicitly defined, but it has + # a default, which run() should discover and use + res = ds.run('{python} -c "True"', result_renderer='disabled') + assert_result_count(res, 1, action='run', status='ok') + + # make sure we could actually detect breakage with the check above + with pytest.raises(IncompleteResultsError): + ds.run('{python} -c "breakage"', result_renderer='disabled') diff --git a/datalad_next/patches/update.py b/datalad_next/patches/update.py new file mode 100644 index 000000000..d0f7be85b --- /dev/null +++ b/datalad_next/patches/update.py @@ -0,0 +1,58 @@ +"""Robustify ``update()`` target detection for adjusted mode datasets + +The true cause of the problem is not well understood. +https://github.com/datalad/datalad/issues/7507 documents that it is not +easy to capture the breakage in a test. +""" + +from . import apply_patch + + +# This function is taken from datalad-core@cdc0ceb30ae04265c5369186acf2ab2683a8ec96 +# datalad/distribution/update.py +# The change has been proposed in https://github.com/datalad/datalad/pull/7522 +def _choose_update_target(repo, branch, remote, cfg_remote): + """Select a target to update `repo` from. + + Note: This function is not concerned with _how_ the update is done (e.g., + merge, reset, ...). + + Parameters + ---------- + repo : Repo instance + branch : str + The current branch. + remote : str + The remote which updates are coming from. + cfg_remote : str + The configured upstream remote. + + Returns + ------- + str (the target) or None if a choice wasn't made. + """ + target = None + if cfg_remote and remote == cfg_remote: + # Use the configured cfg_remote branch as the target. + # + # In this scenario, it's tempting to use FETCH_HEAD as the target. For + # a merge, that would be the equivalent of 'git pull REMOTE'. But doing + # so would be problematic when the GitRepo.fetch() call was passed + # all_=True. Given we can't use FETCH_HEAD, it's tempting to use the + # branch.*.merge value, but that assumes a value for remote.*.fetch. + target = repo.call_git_oneline( + ["rev-parse", "--symbolic-full-name", "--abbrev-ref=strict", + # THIS IS THE PATCH: prefix @{upstream} with the branch name + # of the corresponding branch + f"{repo.get_corresponding_branch(branch) or ''}" "@{upstream}"], + read_only=True) + elif branch: + remote_branch = "{}/{}".format(remote, branch) + if repo.commit_exists(remote_branch): + target = remote_branch + return target + + +apply_patch( + 'datalad.distribution.update', None, '_choose_update_target', + _choose_update_target) diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py index ce3fa932c..cca244f9c 100644 --- a/datalad_next/runners/__init__.py +++ b/datalad_next/runners/__init__.py @@ -1,6 +1,44 @@ """Execution of subprocesses -This module import all relevant components for subprocess execution. +This module provides all relevant components for subprocess execution. + +.. currentmodule:: datalad_next.runners + +Low-level tooling +----------------- + +Two essential process execution/management utilities are provided, for +generic command execution, and for execution command in the context +of a Git repository. + +.. autosummary:: + :toctree: generated + + GitRunner + Runner + +Additional information on the design of the subprocess execution tooling +is available from https://docs.datalad.org/design/threaded_runner.html + +A standard exception type is used to communicate any process termination +with a non-zero exit code + +.. autosummary:: + :toctree: generated + + CommandError + +Command output can be processed via "protocol" implementations that are +inspired by ``asyncio.SubprocessProtocol``. + +.. autosummary:: + :toctree: generated + + KillOutput + NoCapture + StdOutCapture + StdErrCapture + StdOutErrCapture """ # runners diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 71d9dbbfb..254a36157 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -221,6 +221,8 @@ def existing_noannex_dataset(dataset): @pytest.fixture(autouse=False, scope="session") def webdav_credential(): + """Provides HTTP Basic authentication credential necessary to access the + server provided by the ``webdav_server`` fixture.""" yield dict( name='dltest-my&=webdav', user='datalad', @@ -256,6 +258,8 @@ def webdav_server(tmp_path_factory, webdav_credential): @pytest.fixture(autouse=False, scope="session") def http_credential(): + """Provides the HTTP Basic authentication credential necessary to access the + HTTP server provided by the ``http_server_with_basicauth`` fixture.""" yield dict( name='dltest-my&=http', user='datalad', @@ -273,9 +277,6 @@ def http_server(tmp_path_factory): - ``path``: ``Path`` instance of the served temporary directory - ``url``: HTTP URL to access the HTTP server - - Server access requires HTTP Basic authentication with the credential - provided by the ``webdav_credential`` fixture. """ # must use the factory to get a unique path even when a concrete # test also uses `tmp_path` @@ -289,7 +290,7 @@ def http_server(tmp_path_factory): @pytest.fixture(autouse=False, scope="function") def http_server_with_basicauth(tmp_path_factory, http_credential): - """Like ``http_server`` but requiring authenticat with ``http_credential`` + """Like ``http_server`` but requiring authentication via ``http_credential`` """ path = tmp_path_factory.mktemp("webdav") server = HTTPPath( @@ -367,6 +368,10 @@ def httpbin(httpbin_service): raises ``SkipTest`` whenever any of these undesired conditions is detected. Otherwise it just relays ``httpbin_service``. """ + if os.environ.get('DATALAD_TESTS_NONETWORK'): + raise SkipTest( + 'Not running httpbin-based test: NONETWORK flag set' + ) if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ: raise SkipTest( "Not running httpbin-based test on appveyor without " diff --git a/datalad_next/tests/marker.py b/datalad_next/tests/marker.py new file mode 100644 index 000000000..819970628 --- /dev/null +++ b/datalad_next/tests/marker.py @@ -0,0 +1,7 @@ +import os +import pytest + +skipif_no_network = pytest.mark.skipif( + 'DATALAD_TESTS_NONETWORK' in os.environ, + reason='DATALAD_TESTS_NONETWORK is set' +) diff --git a/datalad_next/tests/utils.py b/datalad_next/tests/utils.py index a92a87176..7339c5694 100644 --- a/datalad_next/tests/utils.py +++ b/datalad_next/tests/utils.py @@ -74,7 +74,7 @@ def __enter__(self): from cheroot import wsgi from wsgidav.wsgidav_app import WsgiDAVApp except ImportError as e: - raise SkipTest('No WSGI capabilities') from e + raise SkipTest('No WSGI capabilities. Install cheroot and/or wsgidav') from e if self.auth: auth = {self.auth[0]: {'password': self.auth[1]}} diff --git a/datalad_next/types/archivist.py b/datalad_next/types/archivist.py index 12e9b2b32..17c538dbe 100644 --- a/datalad_next/types/archivist.py +++ b/datalad_next/types/archivist.py @@ -74,7 +74,7 @@ class ArchivistLocator: """ akey: AnnexKey member: PurePosixPath - size: int + size: int | None = None # datalad-archives did not have the type info, we want to be # able to handle those too, make optional atype: ArchiveType | None = None @@ -91,21 +91,21 @@ def __str__(self) -> str: @classmethod def from_str(cls, url: str): """Return ``ArchivistLocator`` from ``str`` form""" - url_matched = _recognized_urls.match(url) - if not url_matched: + url_match = _recognized_urls.match(url) + if not url_match: raise ValueError('Unrecognized dl+archives locator syntax') - url_matched = url_matched.groupdict() + url_matched = url_match.groupdict() # convert to desired type akey = AnnexKey.from_str(url_matched['key']) # archive member properties - props_matched = _archive_member_props.match(url_matched['props']) - if not props_matched: + props_match = _archive_member_props.match(url_matched['props']) + if not props_match: # without at least a 'path' there is nothing we can do here raise ValueError( 'dl+archives locator contains invalid archive member ' f'specification: {url_matched["props"]!r}') - props_matched = props_matched.groupdict() + props_matched = props_match.groupdict() amember_path = PurePosixPath(props_matched['path']) if amember_path.is_absolute(): raise ValueError( @@ -116,6 +116,8 @@ def from_str(cls, url: str): # size is optional, regex ensure that it is an int size = props_matched.get('size') + if size is not None: + size = int(size) # archive type, could be None atype = props_matched.get('atype') @@ -134,6 +136,8 @@ def from_str(cls, url: str): atype = ArchiveType.zip elif '.tar' in suf: atype = ArchiveType.tar + elif '.tgz' in suf: + atype = ArchiveType.tar return cls( akey=akey, diff --git a/datalad_next/types/tests/test_archivist.py b/datalad_next/types/tests/test_archivist.py index 8f781633d..b3d03ac06 100644 --- a/datalad_next/types/tests/test_archivist.py +++ b/datalad_next/types/tests/test_archivist.py @@ -23,6 +23,12 @@ def test_archivistlocator(): assert ArchivistLocator.from_str( 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar#path=f.txt' ).atype == ArchiveType.tar + assert ArchivistLocator.from_str( + 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tgz#path=f.txt' + ).atype == ArchiveType.tar + assert ArchivistLocator.from_str( + 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar.gz#path=f.txt' + ).atype == ArchiveType.tar assert ArchivistLocator.from_str( 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.zip#path=f.txt' ).atype == ArchiveType.zip diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 854677c4e..11eaddc13 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -9,11 +9,13 @@ from typing import Dict import requests from requests_toolbelt import user_agent -import www_authenticate import datalad -from datalad_next.utils.requests_auth import DataladAuth +from datalad_next.utils.requests_auth import ( + DataladAuth, + parse_www_authenticate, +) from . import ( UrlOperations, UrlOperationsRemoteError, @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None): headers=headers, ) if 'www-authenticate' in req.headers: - props['auth'] = www_authenticate.parse( + props['auth'] = parse_www_authenticate( req.headers['www-authenticate']) props['is_redirect'] = True if req.history else False props['status_code'] = req.status_code @@ -244,16 +246,16 @@ def _stream_download_from_request( from_url = r.url hasher = self._get_hasher(hash) progress_id = self._get_progress_id(from_url, to_path) - # get download size, but not every server provides it + # try to get download size, it might not be provided, e.g. if + # chunked transport encoding is used try: # for compressed downloads the content length refers to the # compressed content expected_size = int(r.headers.get('content-length')) except (ValueError, TypeError): - # some responses do not have a `content-length` header, - # even though they HTTP200 and deliver the content. - # example: - # https://github.com/datalad/datalad-next/pull/365#issuecomment-1557114109 + # some HTTP-200 responses do not have a `content-length` header, + # e.g. if chunked transport encoding is used. in this case, set + # up everything to calculate size by ourselves expected_size = None self._progress_report_start( progress_id, @@ -264,7 +266,7 @@ def _stream_download_from_request( ) fp = None - props = {} + props: Dict[str, str] = {} try: # we can only write to file-likes opened in bytes mode fp = sys.stdout.buffer if to_path is None else open(to_path, 'wb') @@ -274,14 +276,16 @@ def _stream_download_from_request( # TODO make chunksize a config item, 65536 is the default in # requests_toolbelt for chunk in r.raw.stream(amt=65536, decode_content=True): - # update how much data was transferred from the remote server, - # but we cannot use the size of the chunk for that, - # because content might be downloaded with transparent - # (de)compression. ask the download stream itself for its - # "position" + # update how much data was transferred from the remote server. if expected_size: + # if we have an expected size, we don't use the size of the + # chunk for that because content might be downloaded with + # transparent (de)compression. instead we ask the download + # stream itself for its "position". tell = r.raw.tell() else: + # if we do not have an expected size, all we can use is + # the size of the downloaded chunk. tell = downloaded_bytes + len(chunk) self._progress_report_update( progress_id, diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index fe7de2ecb..a04ec085c 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -1,5 +1,10 @@ +from __future__ import annotations + import gzip import pytest +import requests + +from datalad_next.tests.marker import skipif_no_network from ..any import AnyUrlOperations from ..http import ( @@ -59,6 +64,7 @@ def test_custom_http_headers_via_config(datalad_cfg): assert huo._headers['X-Funky'] == 'Stuff' +@skipif_no_network def test_transparent_decompression(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver @@ -73,6 +79,7 @@ def test_transparent_decompression(tmp_path): '[build-system]\nrequires = ["setuptools >= 43.0.0", "wheel"]\n' +@skipif_no_network def test_compressed_file_stay_compressed(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver, but is also actually gzip'ed @@ -98,6 +105,29 @@ def test_compressed_file_stay_compressed(tmp_path): f.read(1000) +def test_size_less_progress_reporting(http_server, monkeypatch): + test_file = (http_server.path / 'test.bin').open('wb') + test_file.seek(100000) + test_file.write(b'a') + test_file.close() + + r = requests.get(http_server.url + '/test.bin', stream=True) + del r.headers['content-length'] + + logs = [] + # patch the log_progress() used in http.py + def catch_progress(*_, **kwargs): + logs.append(kwargs) + + import datalad_next.url_operations + monkeypatch.setattr(datalad_next.url_operations, 'log_progress', catch_progress) + + http_handler = HttpUrlOperations() + http_handler._stream_download_from_request(r, None) + assert any('update' in kwargs for kwargs in logs) + assert any(('total', None) in kwargs.items() for kwargs in logs) + + def test_header_adding(): default_headers = dict(key_1='value_1') added_headers = dict(key_2='value_2') diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index 742e1d1a2..089055995 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -7,7 +7,6 @@ from typing import Dict from urllib.parse import urlparse import requests -import www_authenticate from datalad_next.config import ConfigManager from datalad_next.utils import CredentialManager @@ -16,7 +15,77 @@ lgr = logging.getLogger('datalad.ext.next.utils.requests_auth') -__all__ = ['DataladAuth', 'HTTPBearerTokenAuth'] +__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate'] + + +def parse_www_authenticate(hdr: str) -> dict: + """Parse HTTP www-authenticate header + + This helper uses ``requests`` utilities to parse the ``www-authenticate`` + header as represented in a ``requests.Response`` instance. The header may + contain any number of challenge specifications. + + The implementation follows RFC7235, where a challenge parameters set is + specified as: either a comma-separated list of parameters, or a single + sequence of characters capable of holding base64-encoded information, + and parameters are name=value pairs, where the name token is matched + case-insensitively, and each parameter name MUST only occur once + per challenge. + + Returns + ------- + dict + Keys are casefolded challenge labels (e.g., 'basic', 'digest'). + Values are: ``None`` (no parameter), ``str`` (a token68), or + ``dict`` (name/value mapping of challenge parameters) + """ + plh = requests.utils.parse_list_header + pdh = requests.utils.parse_dict_header + challenges = {} + challenge = None + # challenges as well as their properties are in a single + # comma-separated list + for item in plh(hdr): + # parse the item into a key/value set + # the value will be `None` if this item was no mapping + k, v = pdh(item).popitem() + # split the key to check for a challenge spec start + key_split = k.split(' ', maxsplit=1) + if len(key_split) > 1 or v is None: + item_suffix = item[len(key_split[0]) + 1:] + challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None + challenges[key_split[0].casefold()] = challenge + else: + # implementation logic assumes that the above conditional + # was triggered before we ever get here + assert challenge + challenge.append(item) + + return { + challenge: _convert_www_authenticate_items(items) + for challenge, items in challenges.items() + } + + +def _convert_www_authenticate_items(items: list) -> None | str | dict: + pdh = requests.utils.parse_dict_header + # according to RFC7235, items can be: + # either a comma-separated list of parameters + # or a single sequence of characters capable of holding base64-encoded + # information. + # parameters are name=value pairs, where the name token is matched + # case-insensitively, and each parameter name MUST only occur once + # per challenge. + if items is None: + return None + elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None: + # this items matches the token68 appearance (no name value + # pair after potential base64 padding its removed + return items[0] + else: + return { + k.casefold(): v for i in items for k, v in pdh(i).items() + } class DataladAuth(requests.auth.AuthBase): @@ -64,7 +133,7 @@ def save_entered_credential(self, suggested_name: str | None = None, """ if self._entered_credential is None: # nothing to do - return + return None return self._credman.set( name=None, _lastused=True, @@ -178,7 +247,7 @@ def handle_401(self, r, **kwargs): header is ignored. Server-provided 'www-authenticated' challenges are inspected, and - corresponding credentials are looked-up (if needed) and subequently + corresponding credentials are looked-up (if needed) and subsequently tried in a re-request to the original URL after performing any necessary actions to meet a given challenge. Such a re-request is then using the same connection as the original request. @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs): # www-authenticate with e.g. 403s return r # which auth schemes does the server support? - auth_schemes = www_authenticate.parse(r.headers['www-authenticate']) + auth_schemes = parse_www_authenticate(r.headers['www-authenticate']) ascheme, credname, cred = self._get_credential(r.url, auth_schemes) if cred is None or 'secret' not in cred: diff --git a/datalad_next/utils/tests/test_parse_www_authenticate.py b/datalad_next/utils/tests/test_parse_www_authenticate.py new file mode 100644 index 000000000..d69fcd67b --- /dev/null +++ b/datalad_next/utils/tests/test_parse_www_authenticate.py @@ -0,0 +1,45 @@ + +from ..requests_auth import parse_www_authenticate + + +challenges = ( + # just challenge type + ('Negotiate', + [('negotiate', None)]), + # challenge and just a token, tolerate any base64 padding + ('Negotiate abcdef', + [('negotiate', 'abcdef')]), + ('Negotiate abcdef=', + [('negotiate', 'abcdef=')]), + ('Negotiate abcdef==', + [('negotiate', 'abcdef==')]), + # standard bearer + ('Bearer realm=example.com', + [('bearer', {'realm': 'example.com'})]), + # standard digest + ('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", ' + 'opaque="ghijkl"', + [('digest', {'realm': 'example.com', 'qop': 'auth,auth-int', + 'nonce': 'abcdef', 'opaque': 'ghijkl'})]), + # multi challenge + ('Basic speCial="paf ram", realm="basIC", ' + 'Bearer, ' + 'Digest realm="http-auth@example.org", qop="auth, auth-int", ' + 'algorithm=MD5', + [('basic', {'special': 'paf ram', 'realm': 'basIC'}), + ('bearer', None), + ('digest', {'realm': "http-auth@example.org", 'qop': "auth, auth-int", + 'algorithm': 'MD5'})]), + # same challenge, multiple times, last one wins + ('Basic realm="basIC", ' + 'Basic realm="complex"', + [('basic', {'realm': 'complex'})]), +) + + +def test_parse_www_authenticate(): + for hdr, targets in challenges: + res = parse_www_authenticate(hdr) + for ctype, props in targets: + assert ctype in res + assert res[ctype] == props diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS index 4e97cc044..09997217e 100644 --- a/docs/CODEOWNERS +++ b/docs/CODEOWNERS @@ -10,3 +10,5 @@ # Merge requests are accepted (automatically) when all (relevant) # status checks have passed, and RT approval was given. * michael.hanke@gmail.com +/iter_collections/ christian.moench@web.de +/runners/ christian.moench@web.de diff --git a/docs/source/conf.py b/docs/source/conf.py index e4b0ea7ca..aa0645d3b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,6 +26,12 @@ import datalad_next +# this cheats sphinx into thinking that LeanGit repo is not +# merely imported, and convinces it to document it +import datalad_next.datasets as dnd +dnd.LeanGitRepo.__module__ = dnd.__name__ +dnd.LeanGitRepo.__name__ = 'LeanGitRepo' + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. diff --git a/docs/source/developer_guide/index.rst b/docs/source/developer_guide/index.rst index 1ae91c6a1..25c398788 100644 --- a/docs/source/developer_guide/index.rst +++ b/docs/source/developer_guide/index.rst @@ -1,7 +1,7 @@ .. _devguide: -The developer's guide to datalad-next -************************************* +Developer Guide +=============== This guide sheds light on new and reusable subsystems developed in ``datalad-next``. The target audience are developers that intend to build up on or use functionality provided by this extension. @@ -10,4 +10,4 @@ The target audience are developers that intend to build up on or use functionali :maxdepth: 2 constraints.rst - contributing.rst \ No newline at end of file + contributing.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 86db9be4c..fea428125 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,82 +29,24 @@ extension has to be enabled for auto-loading by executing:: Doing so will enable the extension to also alter the behavior the core DataLad package and its commands. -API -=== -High-level API commands ------------------------ +Provided functionality +====================== .. toctree:: - :maxdepth: 2 + :maxdepth: 1 api.rst - -Command line reference ----------------------- - -.. toctree:: - :maxdepth: 2 - cmd.rst - - -Python tooling --------------- - -``datalad-next`` comprises a number of more-or-less self-contained -mini-packages providing particular functionality. - -.. toctree:: - :maxdepth: 1 - Infrastructure classes and utilities - - -Git remote helpers ------------------- - -.. toctree:: - :maxdepth: 2 - git-remote-helpers.rst - - -Git-annex backends ------------------- - -.. toctree:: - :maxdepth: 2 - annex-backends.rst - - - -Git-annex special remotes -------------------------- - - -.. toctree:: - :maxdepth: 2 - annex-specialremotes.rst - - - -DataLad patches ---------------- - -Patches that are automatically applied to DataLad when loading the -``datalad-next`` extension package. - -.. toctree:: - :maxdepth: 2 - patches.rst -Developer Guide ---------------- +Contributor information +======================= .. toctree:: :maxdepth: 2 diff --git a/docs/source/patches.rst b/docs/source/patches.rst index a25c1aab2..336545132 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -1,18 +1,26 @@ DataLad patches *************** +Patches that are automatically applied to DataLad when loading the +``datalad-next`` extension package. + .. currentmodule:: datalad_next.patches .. autosummary:: :toctree: generated annexrepo + cli_configoverrides + commanderror common_cfg configuration create_sibling_ghlike + create_sibling_gitlab customremotes_main distribution_dataset interface_utils push_optimize push_to_export_remote - test_keyring + run siblings + test_keyring + update diff --git a/setup.cfg b/setup.cfg index 7b4b01f00..3f6897aed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,11 +12,10 @@ classifiers = Programming Language :: Python :: 3 [options] -python_requires = >= 3.7 +python_requires = >= 3.8 install_requires = annexremote datalad >= 0.18.4 - www-authenticate humanize packages = find_namespace: include_package_data = True diff --git a/tools/appveyor/submit-coverage b/tools/appveyor/submit-coverage index 7ea560f0c..f8cef4134 100755 --- a/tools/appveyor/submit-coverage +++ b/tools/appveyor/submit-coverage @@ -2,8 +2,6 @@ set -e -u -# grab coverage reports from subprocesses, see tools/coverage-bin -python -m coverage combine -a /tmp/.coverage-entrypoints-*; python -m coverage xml curl -Os $CODECOV_BINARY chmod +x codecov diff --git a/tools/coverage-bin/datalad b/tools/coverage-bin/datalad deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/datalad +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-backend-XDLRA b/tools/coverage-bin/git-annex-backend-XDLRA deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-backend-XDLRA +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-archivist b/tools/coverage-bin/git-annex-remote-archivist deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-archivist +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-datalad b/tools/coverage-bin/git-annex-remote-datalad deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-datalad +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-datalad-archives b/tools/coverage-bin/git-annex-remote-datalad-archives deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-datalad-archives +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-ora b/tools/coverage-bin/git-annex-remote-ora deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-ora +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-uncurl b/tools/coverage-bin/git-annex-remote-uncurl deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-uncurl +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-remote-datalad-annex b/tools/coverage-bin/git-remote-datalad-annex deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-remote-datalad-annex +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/sitecustomize.py b/tools/coverage-bin/sitecustomize.py deleted file mode 100755 index c1ba919b9..000000000 --- a/tools/coverage-bin/sitecustomize.py +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python -import coverage -coverage.process_startup() diff --git a/tools/coverage-bin/with_coverage b/tools/coverage-bin/with_coverage deleted file mode 100755 index 82c79d23e..000000000 --- a/tools/coverage-bin/with_coverage +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# -# A little helper to overload executables with a coverage harness - -set -eu - -# what script is actually being called -bin=$(basename $0) -# where does this script live -curbin=$(which "$bin") -# this seems to determine where the full package puts it binaries -# in -core this is using `datalad` as the reference binary, -# here explicitly, and less confusingly use the name of the coverage -# wrapper -curdatalad=$(which with_coverage) -curdir=$(dirname $curdatalad) - -COVERAGE_RUN="-m coverage run" -export COVERAGE_PROCESS_START=$PWD/../.coveragerc -export PYTHONPATH="$PWD/../tools/coverage-bin/" -# remove the coverage wrapper binary location from the PATH -export PATH=${PATH//$curdir:/} -# check where the datalad binary is to -# - figure out which Python to call -# - to verify that we are in the right/different env/location -# and not where the coverage wrapper is coming from -newdatalad=$(which datalad) -newbin=$(which $bin) -newpython=$(sed -ne '1s/#!//gp' $newdatalad) - -if [ $(dirname $newdatalad) = $curdir ]; then - echo "E: binary remained the same: $newdatalad" >&2 - exit 1 -fi - -touch /tmp/coverages -export COVERAGE_FILE=/tmp/.coverage-entrypoints-$RANDOM -echo "Running now $newpython $COVERAGE_RUN -a $newbin $@" >> /tmp/coverages -$newpython $COVERAGE_RUN -a $newbin "$@" diff --git a/versioneer.py b/versioneer.py index 51ca8182e..1e3753e63 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,5 +1,5 @@ -# Version: 0.18 +# Version: 0.29 """The Versioneer - like a rocketeer, but for versions. @@ -7,18 +7,14 @@ ============== * like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer +* https://github.com/python-versioneer/python-versioneer * Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based +* License: Public Domain (Unlicense) +* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 +* [![Latest Version][pypi-image]][pypi-url] +* [![Build Status][travis-image]][travis-url] + +This is a tool for managing a recorded version number in setuptools-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control @@ -27,9 +23,38 @@ ## Quick Install -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results +Versioneer provides two installation modes. The "classic" vendored mode installs +a copy of versioneer into your repository. The experimental build-time dependency mode +is intended to allow you to skip this step and simplify the process of upgrading. + +### Vendored mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) + * Note that you will need to add `tomli; python_version < "3.11"` to your + build-time dependencies if you use `pyproject.toml` +* run `versioneer install --vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +### Build-time dependency mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) + to the `requires` key of the `build-system` table in `pyproject.toml`: + ```toml + [build-system] + requires = ["setuptools", "versioneer[toml]"] + build-backend = "setuptools.build_meta" + ``` +* run `versioneer install --no-vendor` in your source tree, commit the results +* verify version information with `python setup.py version` ## Version Identifiers @@ -61,7 +86,7 @@ for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. +uncommitted changes). The version identifier is used for multiple purposes: @@ -166,7 +191,7 @@ Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). +[issues page](https://github.com/python-versioneer/python-versioneer/issues). ### Subprojects @@ -180,7 +205,7 @@ `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. + provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs @@ -194,9 +219,9 @@ Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve @@ -224,31 +249,20 @@ cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace +* edit `setup.cfg` and `pyproject.toml`, if necessary, + to include any new configuration settings indicated by the release notes. + See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install --[no-]vendor` in your source tree, to replace `SRC/_version.py` * commit any changed files @@ -265,35 +279,70 @@ direction and include code from all supported VCS systems, reducing the number of intermediate scripts. +## Similar projects + +* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time + dependency +* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of + versioneer +* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools + plugin ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . +Specifically, both are released under the "Unlicense", as described in +https://unlicense.org/. + +[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg +[pypi-url]: https://pypi.python.org/pypi/versioneer/ +[travis-image]: +https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg +[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer """ +# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring +# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements +# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error +# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with +# pylint:disable=attribute-defined-outside-init,too-many-arguments -from __future__ import print_function -try: - import configparser -except ImportError: - import ConfigParser as configparser +import configparser import errno import json import os import re import subprocess import sys +from pathlib import Path +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import NoReturn +import functools + +have_tomllib = True +if sys.version_info >= (3, 11): + import tomllib +else: + try: + import tomli as tomllib + except ImportError: + have_tomllib = False class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + versionfile_source: str + versionfile_build: Optional[str] + parentdir_prefix: Optional[str] + verbose: Optional[bool] + -def get_root(): +def get_root() -> str: """Get the project root directory. We require that all commands are run from the project root, i.e. the @@ -301,13 +350,23 @@ def get_root(): """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " @@ -321,43 +380,62 @@ def get_root(): # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) + my_path = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: + if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) + % (os.path.dirname(my_path), versioneer_py)) except NameError: pass return root -def get_config_from_root(root): +def get_config_from_root(root: str) -> VersioneerConfig: """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or + # This might raise OSError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None + root_pth = Path(root) + pyproject_toml = root_pth / "pyproject.toml" + setup_cfg = root_pth / "setup.cfg" + section: Union[Dict[str, Any], configparser.SectionProxy, None] = None + if pyproject_toml.exists() and have_tomllib: + try: + with open(pyproject_toml, 'rb') as fobj: + pp = tomllib.load(fobj) + section = pp['tool']['versioneer'] + except (tomllib.TOMLDecodeError, KeyError) as e: + print(f"Failed to load config from {pyproject_toml}: {e}") + print("Try to load it from setup.cfg") + if not section: + parser = configparser.ConfigParser() + with open(setup_cfg) as cfg_file: + parser.read_file(cfg_file) + parser.get("versioneer", "VCS") # raise error if missing + + section = parser["versioneer"] + + # `cast`` really shouldn't be used, but its simplest for the + # common VersioneerConfig users at the moment. We verify against + # `None` values elsewhere where it matters + cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): + cfg.VCS = section['VCS'] + cfg.style = section.get("style", "") + cfg.versionfile_source = cast(str, section.get("versionfile_source")) + cfg.versionfile_build = section.get("versionfile_build") + cfg.tag_prefix = cast(str, section.get("tag_prefix")) + if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") + cfg.parentdir_prefix = section.get("parentdir_prefix") + if isinstance(section, configparser.SectionProxy): + # Make sure configparser translates to bool + cfg.verbose = section.getboolean("verbose") + else: + cfg.verbose = section.get("verbose") + return cfg @@ -366,37 +444,48 @@ class NotThisMethod(Exception): # these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f + HANDLERS.setdefault(vcs, {})[method] = f return f return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -407,26 +496,25 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY['git'] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -435,9 +523,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -453,8 +543,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + -def get_config(): +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -472,13 +569,13 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -487,22 +584,35 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -513,18 +623,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -533,15 +645,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% @@ -550,41 +661,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -597,11 +715,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d @@ -610,7 +728,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: @@ -619,6 +737,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %%s" %% r) return {"version": r, @@ -634,7 +757,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -645,8 +773,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) @@ -654,24 +789,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -688,7 +856,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces @@ -713,26 +881,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -757,23 +926,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%%d" %% (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] + rendered = "0.post0.dev%%d" %% pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -800,12 +1017,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -822,7 +1068,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -842,7 +1088,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -862,7 +1108,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -876,10 +1122,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -894,7 +1144,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -915,7 +1165,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, @@ -942,41 +1192,48 @@ def get_versions(): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -989,11 +1246,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1002,7 +1259,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1011,6 +1268,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %s" % r) return {"version": r, @@ -1026,7 +1288,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -1037,8 +1304,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1046,24 +1320,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -1080,7 +1387,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces @@ -1105,19 +1412,20 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def do_vcs_install(manifest_in, versionfile_source, ipy): +def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py @@ -1126,36 +1434,40 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] + files = [versionfile_source] if ipy: files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) + if "VERSIONEER_PEP518" not in globals(): + try: + my_path = __file__ + if my_path.endswith((".pyc", ".pyo")): + my_path = os.path.splitext(my_path)[0] + ".py" + versioneer_file = os.path.relpath(my_path) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: + with open(".gitattributes", "r") as fobj: + for line in fobj: + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + break + except OSError: pass if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() + with open(".gitattributes", "a+") as fobj: + fobj.write(f"{versionfile_source} export-subst\n") files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -1164,15 +1476,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % @@ -1181,7 +1492,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from +# This file was generated by 'versioneer.py' (0.29) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -1198,12 +1509,12 @@ def get_versions(): """ -def versions_from_file(filename): +def versions_from_file(filename: str) -> Dict[str, Any]: """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) @@ -1215,9 +1526,8 @@ def versions_from_file(filename): return json.loads(mo.group(1)) -def write_to_version_file(filename, versions): +def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: """Write the given version number to the given _version.py file.""" - os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: @@ -1226,14 +1536,14 @@ def write_to_version_file(filename, versions): print("set %s to '%s'" % (filename, versions["version"])) -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -1258,23 +1568,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -1301,12 +1659,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -1323,7 +1710,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -1343,7 +1730,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -1363,7 +1750,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -1377,10 +1764,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -1399,7 +1790,7 @@ class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" -def get_versions(verbose=False): +def get_versions(verbose: bool = False) -> Dict[str, Any]: """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. @@ -1414,7 +1805,7 @@ def get_versions(verbose=False): assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose + verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" @@ -1475,13 +1866,17 @@ def get_versions(verbose=False): "date": None} -def get_version(): +def get_version() -> str: """Get the short version string for this project.""" return get_versions()["version"] -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" +def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): + """Get the custom setuptools subclasses used by Versioneer. + + If the package uses a different cmdclass (e.g. one from numpy), it + should be provide as an argument. + """ if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and @@ -1495,25 +1890,25 @@ def get_cmdclass(): # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 + # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - cmds = {} + cmds = {} if cmdclass is None else cmdclass.copy() - # we add "version" to both distutils and setuptools + # we add "version" to setuptools from setuptools import Command class cmd_version(Command): description = "report generated version string" - user_options = [] - boolean_options = [] + user_options: List[Tuple[str, str, str]] = [] + boolean_options: List[str] = [] - def initialize_options(self): + def initialize_options(self) -> None: pass - def finalize_options(self): + def finalize_options(self) -> None: pass - def run(self): + def run(self) -> None: vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) @@ -1523,7 +1918,7 @@ def run(self): print(" error: %s" % vers["error"]) cmds["version"] = cmd_version - # we override "build_py" in both distutils and setuptools + # we override "build_py" in setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py @@ -1538,18 +1933,25 @@ def run(self): # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? + # pip install -e . and setuptool/editable_wheel will invoke build_py + # but the build_py command is not expected to copy any files. + # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py + if 'build_py' in cmds: + _build_py: Any = cmds['build_py'] else: - from distutils.command.build_py import build_py as _build_py + from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) + if getattr(self, "editable_mode", False): + # During editable installs `.py` and data files are + # not copied to build_lib + return # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: @@ -1559,8 +1961,40 @@ def run(self): write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py + if 'build_ext' in cmds: + _build_ext: Any = cmds['build_ext'] + else: + from setuptools.command.build_ext import build_ext as _build_ext + + class cmd_build_ext(_build_ext): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_ext.run(self) + if self.inplace: + # build_ext --inplace will only build extensions in + # build/lib<..> dir with no _version.py to write to. + # As in place builds will already have a _version.py + # in the module dir, we do not need to write one. + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if not cfg.versionfile_build: + return + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + if not os.path.exists(target_versionfile): + print(f"Warning: {target_versionfile} does not exist, skipping " + "version update. This can happen if you are running build_ext " + "without first running build_py.") + return + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext + if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe + from cx_Freeze.dist import build_exe as _build_exe # type: ignore # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1569,7 +2003,7 @@ def run(self): # ... class cmd_build_exe(_build_exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1593,12 +2027,12 @@ def run(self): if 'py2exe' in sys.modules: # py2exe enabled? try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 + from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore class cmd_py2exe(_py2exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1619,14 +2053,51 @@ def run(self): }) cmds["py2exe"] = cmd_py2exe + # sdist farms its file list building out to egg_info + if 'egg_info' in cmds: + _egg_info: Any = cmds['egg_info'] + else: + from setuptools.command.egg_info import egg_info as _egg_info + + class cmd_egg_info(_egg_info): + def find_sources(self) -> None: + # egg_info.find_sources builds the manifest list and writes it + # in one shot + super().find_sources() + + # Modify the filelist and normalize it + root = get_root() + cfg = get_config_from_root(root) + self.filelist.append('versioneer.py') + if cfg.versionfile_source: + # There are rare cases where versionfile_source might not be + # included by default, so we must be explicit + self.filelist.append(cfg.versionfile_source) + self.filelist.sort() + self.filelist.remove_duplicates() + + # The write method is hidden in the manifest_maker instance that + # generated the filelist and was thrown away + # We will instead replicate their final normalization (to unicode, + # and POSIX-style paths) + from setuptools import unicode_utils + normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') + for f in self.filelist.files] + + manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') + with open(manifest_filename, 'w') as fobj: + fobj.write('\n'.join(normalized)) + + cmds['egg_info'] = cmd_egg_info + # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist + if 'sdist' in cmds: + _sdist: Any = cmds['sdist'] else: - from distutils.command.sdist import sdist as _sdist + from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): - def run(self): + def run(self) -> None: versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old @@ -1634,7 +2105,7 @@ def run(self): self.distribution.metadata.version = versions["version"] return _sdist.run(self) - def make_release_tree(self, base_dir, files): + def make_release_tree(self, base_dir: str, files: List[str]) -> None: root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) @@ -1687,21 +2158,26 @@ def make_release_tree(self, base_dir, files): """ -INIT_PY_SNIPPET = """ +OLD_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ +INIT_PY_SNIPPET = """ +from . import {0} +__version__ = {0}.get_versions()['version'] +""" -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" + +def do_setup() -> int: + """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + if isinstance(e, (OSError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: @@ -1721,62 +2197,37 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + maybe_ipy: Optional[str] = ipy if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() - except EnvironmentError: + except OSError: old = "" - if INIT_PY_SNIPPET not in old: + module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] + snippet = INIT_PY_SNIPPET.format(module) + if OLD_SNIPPET in old: + print(" replacing boilerplate in %s" % ipy) + with open(ipy, "w") as f: + f.write(old.replace(OLD_SNIPPET, snippet)) + elif snippet not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) + f.write(snippet) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") + maybe_ipy = None # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + do_vcs_install(cfg.versionfile_source, maybe_ipy) return 0 -def scan_setup_py(): +def scan_setup_py() -> int: """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False @@ -1813,10 +2264,14 @@ def scan_setup_py(): return errors +def setup_command() -> NoReturn: + """Set up Versioneer and exit with appropriate error code.""" + errors = do_setup() + errors += scan_setup_py() + sys.exit(1 if errors else 0) + + if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) + setup_command()