diff --git a/.all-contributorsrc b/.all-contributorsrc
new file mode 100644
index 000000000..b61be8539
--- /dev/null
+++ b/.all-contributorsrc
@@ -0,0 +1,162 @@
+{
+ "projectName": "datalad-next",
+ "projectOwner": "datalad",
+ "repoType": "github",
+ "commitConvention": "angular",
+ "contributors": [
+ {
+ "login": "mih",
+ "name": "Michael Hanke",
+ "avatar_url": "https://avatars.githubusercontent.com/u/136479?v=4",
+ "profile": "http://psychoinformatics.de/",
+ "contributions": [
+ "bug",
+ "code",
+ "content",
+ "design",
+ "doc",
+ "financial",
+ "fundingFinding",
+ "ideas",
+ "infra",
+ "maintenance",
+ "mentoring",
+ "platform",
+ "projectManagement",
+ "review",
+ "talk",
+ "test",
+ "tool",
+ "userTesting"
+ ]
+ },
+ {
+ "login": "catetrai",
+ "name": "catetrai",
+ "avatar_url": "https://avatars.githubusercontent.com/u/18424941?v=4",
+ "profile": "https://github.com/catetrai",
+ "contributions": [
+ "code",
+ "design",
+ "doc",
+ "ideas",
+ "test"
+ ]
+ },
+ {
+ "login": "effigies",
+ "name": "Chris Markiewicz",
+ "avatar_url": "https://avatars.githubusercontent.com/u/83442?v=4",
+ "profile": "https://github.com/effigies",
+ "contributions": [
+ "maintenance",
+ "code"
+ ]
+ },
+ {
+ "login": "mslw",
+ "name": "MichaΕ Szczepanik",
+ "avatar_url": "https://avatars.githubusercontent.com/u/11985212?v=4",
+ "profile": "https://github.com/mslw",
+ "contributions": [
+ "bug",
+ "code",
+ "content",
+ "doc",
+ "example",
+ "ideas",
+ "infra",
+ "maintenance",
+ "review",
+ "talk",
+ "test",
+ "tutorial",
+ "userTesting"
+ ]
+ },
+ {
+ "login": "jsheunis",
+ "name": "Stephan Heunis",
+ "avatar_url": "https://avatars.githubusercontent.com/u/10141237?v=4",
+ "profile": "https://jsheunis.github.io/",
+ "contributions": [
+ "bug",
+ "code",
+ "doc",
+ "ideas",
+ "maintenance",
+ "talk",
+ "userTesting"
+ ]
+ },
+ {
+ "login": "bpoldrack",
+ "name": "Benjamin Poldrack",
+ "avatar_url": "https://avatars.githubusercontent.com/u/10498301?v=4",
+ "profile": "https://github.com/bpoldrack",
+ "contributions": [
+ "bug",
+ "code"
+ ]
+ },
+ {
+ "login": "yarikoptic",
+ "name": "Yaroslav Halchenko",
+ "avatar_url": "https://avatars.githubusercontent.com/u/39889?v=4",
+ "profile": "https://github.com/yarikoptic",
+ "contributions": [
+ "bug",
+ "code",
+ "infra",
+ "maintenance",
+ "tool"
+ ]
+ },
+ {
+ "login": "christian-monch",
+ "name": "Christian MΓΆnch",
+ "avatar_url": "https://avatars.githubusercontent.com/u/17925232?v=4",
+ "profile": "https://github.com/christian-monch",
+ "contributions": [
+ "code",
+ "design",
+ "doc",
+ "ideas",
+ "review",
+ "test",
+ "userTesting"
+ ]
+ },
+ {
+ "login": "adswa",
+ "name": "Adina Wagner",
+ "avatar_url": "https://avatars.githubusercontent.com/u/29738718?v=4",
+ "profile": "https://github.com/adswa",
+ "contributions": [
+ "a11y",
+ "bug",
+ "code",
+ "doc",
+ "example",
+ "maintenance",
+ "projectManagement",
+ "review",
+ "talk",
+ "test",
+ "tutorial",
+ "userTesting"
+ ]
+ },
+ {
+ "login": "jwodder",
+ "name": "John T. Wodder II",
+ "avatar_url": "https://avatars.githubusercontent.com/u/98207?v=4",
+ "profile": "https://github.com/jwodder",
+ "contributions": [
+ "code",
+ "infra",
+ "test"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/.appveyor.yml b/.appveyor.yml
index 4dec8e42b..2624fcbae 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -99,7 +99,7 @@ environment:
# to have `.overrides` be uniformly limited to instance overrides
KEYWORDS: not test_gh1811 and not test_librarymode
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
- PY: 3.7
+ PY: 3.8
INSTALL_SYSPKGS:
# datalad-annex git remote needs something after git-annex_8.20211x
INSTALL_GITANNEX: git-annex -m snapshot
@@ -120,7 +120,7 @@ environment:
# because MIH does not know better
KEYWORDS: not test_gh1811 and not test_fake_gitlab and not test_dryrun
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
- PY: 3.7
+ PY: 3.8
INSTALL_SYSPKGS:
# datalad-annex git remote needs something after git-annex_8.20211x
INSTALL_GITANNEX: git-annex -m snapshot
@@ -130,7 +130,7 @@ environment:
datalad.distribution
KEYWORDS: not test_invalid_args
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
- PY: 3.7
+ PY: 3.8
INSTALL_SYSPKGS:
# datalad-annex git remote needs something after git-annex_8.20211x
INSTALL_GITANNEX: git-annex -m snapshot
@@ -139,7 +139,7 @@ environment:
DTS: >
datalad.local
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
- PY: 3.7
+ PY: 3.8
INSTALL_SYSPKGS:
# datalad-annex git remote needs something after git-annex_8.20211x
INSTALL_GITANNEX: git-annex -m snapshot
@@ -151,7 +151,7 @@ environment:
datalad.tests
datalad.ui
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004
- PY: 3.7
+ PY: 3.8
INSTALL_SYSPKGS:
# datalad-annex git remote needs something after git-annex_8.20211x
INSTALL_GITANNEX: git-annex -m snapshot
@@ -285,7 +285,7 @@ test_script:
# run test selecion (--traverse-namespace needed from Python 3.8 onwards)
- cmd: python -m pytest -s -v -m "not (turtle)" -k "%KEYWORDS%" --cov=datalad_next --pyargs %DTS%
# also add --cov datalad, because some core test runs may not touch -next code
- - sh: PATH=$PWD/../tools/coverage-bin:$PATH python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --pyargs ${DTS}
+ - sh: python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --cov-config=../.coveragerc --pyargs ${DTS}
after_test:
diff --git a/.coveragerc b/.coveragerc
index 0ed61f69c..47273f876 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,7 +1,16 @@
+[run]
+parallel = True
+branch = True
+data_file = ${COVERAGE_ROOT-.}/.coverage
+omit =
+ # versioneer
+ */_version.py
+
[paths]
source =
datalad_next/
*/datalad_next/
+
[report]
# show lines missing coverage in output
show_missing = True
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
new file mode 100644
index 000000000..8b3d9be0c
--- /dev/null
+++ b/.github/workflows/mypy.yml
@@ -0,0 +1,39 @@
+name: Type annotation
+
+on:
+ pull_request:
+ paths:
+ - 'datalad_next/**.py'
+ - '!**/tests/**.py'
+
+jobs:
+ static-type-check:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Setup Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: 3.8
+ architecture: x64
+ - name: Checkout
+ uses: actions/checkout@v3
+ - name: Install mypy
+ run: python -m pip install mypy # you can pin your preferred version
+ - name: Get Python changed files
+ id: changed-py-files
+ uses: tj-actions/changed-files@v23
+ with:
+ files: |
+ *.py
+ **/*.py
+ - name: Type check changed files
+ if: steps.changed-py-files.outputs.any_changed == 'true'
+ run: |
+ # get any type stubs that mypy thinks it needs
+ mypy --install-types --non-interactive --follow-imports skip --ignore-missing-imports ${{ steps.changed-py-files.outputs.all_changed_files }}
+ # run mypy on the modified files only, and do not even follow imports.
+ # this results is a fairly superficial test, but given the overall
+ # state of annotations, we strive to become more correct incrementally
+ # with focused error reports, rather than barfing a huge complaint
+ # that is unrelated to the changeset someone has been working on
+ mypy --follow-imports skip --ignore-missing-imports --pretty --show-error-context ${{ steps.changed-py-files.outputs.all_changed_files }}
diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml
new file mode 100644
index 000000000..261981c3f
--- /dev/null
+++ b/.github/workflows/update-contributors.yml
@@ -0,0 +1,86 @@
+name: allcontributors-auto-detect
+
+on:
+ push:
+ branches:
+ - main
+
+jobs:
+ Update:
+ name: Generate
+ runs-on: ubuntu-latest
+ if: contains(github.repository, 'datalad/datalad-next')
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v4
+
+ - name: Tributors Update
+ uses: con/tributors@0.1.1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+
+ # Single text list (space separated) of parsers, leave unset to auto-detect
+ parsers: unset
+
+ # Update lookup with GitHub metadata
+ update_lookup: github
+
+ # Skip these users (example)
+ skip_users:
+
+ # INFO, DEBUG, ERROR, WARNING, etc.
+ log_level: DEBUG
+
+ # If files already exist and an init is done, force overwrite
+ force: true
+
+ # the minimum number of contributions required to add a user
+ threshold: 1
+
+ - name: Checkout New Branch
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ BRANCH_AGAINST: "main"
+ run: |
+ printf "GitHub Actor: ${GITHUB_ACTOR}\n"
+ export BRANCH_FROM="contributors/update-$(date '+%Y-%m-%d')"
+ git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
+
+ BRANCH_EXISTS=$(git ls-remote --heads origin ${BRANCH_FROM})
+ if [[ -z ${BRANCH_EXISTS} ]]; then
+ printf "Branch does not exist in remote.\n"
+ else
+ printf "Branch already exists in remote.\n"
+ exit 1
+ fi
+ git branch
+ git checkout -b "${BRANCH_FROM}" || git checkout "${BRANCH_FROM}"
+ git branch
+
+ git config --global user.name "github-actions"
+ git config --global user.email "github-actions@users.noreply.github.com"
+ git status
+
+ if git diff-index --quiet HEAD --; then
+ export OPEN_PULL_REQUEST=0
+ printf "No changes\n"
+ else
+ export OPEN_PULL_REQUEST=1
+ printf "Changes\n"
+ git commit -a -m "Automated deployment to update contributors $(date '+%Y-%m-%d')
+
+ [skip ci]"
+ git push origin "${BRANCH_FROM}"
+ fi
+ echo "OPEN_PULL_REQUEST=${OPEN_PULL_REQUEST}" >> $GITHUB_ENV
+ echo "PULL_REQUEST_FROM_BRANCH=${BRANCH_FROM}" >> $GITHUB_ENV
+ echo "PULL_REQUEST_TITLE=[tributors] ${BRANCH_FROM}" >> $GITHUB_ENV
+ echo "PULL_REQUEST_BODY='Tributors update automated pull request.\n\n[skip ci]'" >> $GITHUB_ENV
+
+ - name: Open Pull Request
+ uses: vsoch/pull-request-action@1.0.24
+ if: ${{ env.OPEN_PULL_REQUEST == '1' }}
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ PULL_REQUEST_BRANCH: "main"
\ No newline at end of file
diff --git a/.zenodo.json b/.zenodo.json
index 904bc1534..b05453614 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -42,7 +42,12 @@
"orcid": "0000-0003-2917-3450"
},
{
- "name": "Wodder II, John T."
+ "name": "Wodder II, John T.",
+ "orcid": "0000-0001-7106-2661"
+ },
+ {
+ "name": "Trainito, Caterina",
+ "orcid": "0000-0002-1713-8343"
}
],
"keywords": [
@@ -52,4 +57,4 @@
"access_right": "open",
"license": "MIT",
"upload_type": "software"
-}
+}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f51801bb8..7fe490fcc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,191 @@
+# 1.0.2 (2023-10-23) -- Debianize!
+
+## π Internal
+
+- The `www-authenticate` dependencies is dropped. The functionality is
+ replaced by a `requests`-based implementation of an alternative parser.
+ This trims the dependency footprint and facilitates Debian-packaging.
+ The previous test cases are kept and further extended.
+ Fixes https://github.com/datalad/datalad-next/issues/493 via
+ https://github.com/datalad/datalad-next/pull/495 (by @mih)
+
+## π‘ Tests
+
+- The test battery now honors the `DATALAD_TESTS_NONETWORK` environment
+ variable and downgrades by skipping any tests that require external
+ network access. (by @mih)
+
+# 1.0.1 (2023-10-18)
+
+## π Bug Fixes
+
+- Fix f-string syntax in error message of the `uncurl` remote.
+ https://github.com/datalad/datalad-next/pull/455 (by @christian-monch)
+
+- `FileSystemItem.from_path()` now honors its `link_target` parameter, and
+ resolves a target for any symlink item conditional on this setting.
+ Previously, a symlink target was always resolved.
+ Fixes https://github.com/datalad/datalad-next/issues/462 via
+ https://github.com/datalad/datalad-next/pull/464 (by @mih)
+
+- Update the vendor installation of versioneer to v0.29. This
+ resolves an installation failure with Python 3.12 due to
+ the removal of an ancient class.
+ Fixes https://github.com/datalad/datalad-next/issues/475 via
+ https://github.com/datalad/datalad-next/pull/483 (by @mih)
+
+- Bump dependency on Python to 3.8. This is presently the oldest version
+ still supported upstream. However, some functionality already used
+ 3.8 features, so this is also a bug fix.
+ Fixes https://github.com/datalad/datalad-next/issues/481 via
+ https://github.com/datalad/datalad-next/pull/486 (by @mih)
+
+## π« Enhancements and new features
+
+- Patch datalad-core's `run` command to honor configuration defaults
+ for substitutions. This enables placeholders like `{python}` that
+ point to `sys.executable` by default, and need not be explicitly
+ defined in system/user/dataset configuration.
+ Fixes https://github.com/datalad/datalad-next/issues/478 via
+ https://github.com/datalad/datalad-next/pull/485 (by @mih)
+
+## π Documentation
+
+- Include `gitworktree` among the available file collection types
+ listed in `ls-file-collection`'s docstring. Fixes
+ https://github.com/datalad/datalad-next/issues/470 via
+ https://github.com/datalad/datalad-next/pull/471 (by @mslw)
+
+- The renderer API documentation now includes an entrypoint for the
+ runner-related functionality and documentation at
+ https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.runners.html
+ Fixes https://github.com/datalad/datalad-next/issues/466 via
+ https://github.com/datalad/datalad-next/pull/467 (by @mih)
+
+## π‘ Tests
+
+- Simplified setup for subprocess test-coverage reporting. Standard
+ pytest-cov features are not employed, rather than the previous
+ approach that was adopted from datalad-core, which originated
+ in a time when testing was performed via nose.
+ Fixes https://github.com/datalad/datalad-next/issues/453 via
+ https://github.com/datalad/datalad-next/pull/457 (by @mih)
+
+
+# 1.0.0 (2023-09-25)
+
+This release represents a milestone in the development of the extension.
+The package is reorganized to be a collection of more self-contained
+mini-packages, each with its own set of tests.
+
+Developer documentation and guidelines have been added to aid further
+development. One particular goal is to establish datalad-next as a proxy
+for importing datalad-core functionality for other extensions. Direct imports
+from datalad-core can be minimized in favor of imports from datalad-next.
+This helps identifying functionality needed outside the core package,
+and guides efforts for future improvements.
+
+The 1.0 release marks the switch to a more standard approach to semantic
+versioning. However, although a substantial improvements have been made,
+the 1.0 version nohow indicates a slowdown of development or a change in the
+likelihood of (breaking) changes. They will merely become more easily
+discoverable from the version label alone.
+
+Notable high-level features introduced by this major release are:
+
+- The new `UrlOperations` framework to provide a set of basic operations like
+ `download`, `upload`, `stat` for different protocols. This framework can be
+ thought of as a replacement for the "downloaders" functionality in
+ datalad-core -- although the feature list is not 100% overlapping. This new
+ framework is more easily extensible by 3rd-party code.
+
+- The `Constraints` framework elevates parameter/input validation to the next
+ level. In contrast to datalad-core, declarative input validation is no longer
+ limited to the CLI. Instead, command parameters can now be validated regardless
+ of the entrypoint through which a command is used. They can be validated
+ individually, but also sets of parameters can be validated jointly to implement
+ particular interaction checks. All parameter validations can now be performed
+ exhaustive, to present a user with a complete list of validation errors, rather
+ then the fail-on-first-error method implemented exclusively in datalad-core.
+ Validation errors are now reported using dedicated structured data type to aid
+ their communication via non-console interfaces.
+
+- The `Credentials` system has been further refined with more homogenized
+ workflows and deeper integration into other subsystems. This release merely
+ represents a snapshot of continued development towards a standardization of
+ credential handling workflows.
+
+- The annex remotes `uncurl` and `archivist` are replacements for the
+ datalad-core implementations `datalad` and `datalad-archive`. The offer
+ substantially improved configurability and leaner operation -- built on the
+ `UrlOperations` framework.
+
+- A growing collection of iterator (see `iter_collections`) aims to provide
+ fast (and more Pythonic) operations on common data structures (Git worktrees,
+ directories, archives). The can be used as an alternative to the traditional
+ `Repo` classes (`GitRepo`, `AnnexRepo`) from datalad-core.
+
+- Analog to `UrlOperations` the `ArchiveOperations` framework aims to provide
+ an abstraction for operations on different archive types (e.g., TAR). The
+ represent an alternative to the traditional implementations of
+ `ExtractedArchive` and `ArchivesCache` from datalad-core, and aim at leaner
+ resource footprints.
+
+- The collection of runtime patches for datalad-core has been further expanded.
+ All patches are now individually documented, and applied using a set of standard
+ helpers (see http://docs.datalad.org/projects/next/en/latest/patches.html).
+
+For details, please see the changelogs of the 1.0.0 beta releases below.
+
+## π« Enhancements and new features
+
+- `TarArchiveOperations` is the first implementation of the `ArchiveOperations`
+ abstraction, providing archive handlers with a set of standard operations:
+ - `open` to get a file object for a particular archive member
+ - `__contains__` to check for the presence of a particular archive member
+ - `__iter__` to get an iterator for processing all archive members
+ https://github.com/datalad/datalad-next/pull/415 (by @mih)
+
+## π Bug Fixes
+
+- Make `TarfileItem.name` be of type `PurePosixPath` to reflect the fact
+ that a TAR archive can contain members with names that cannot be represent
+ unmodified on a non-POSIX file system.
+ https://github.com/datalad/datalad-next/pull/422 (by @mih)
+ An analog change is done for `ZipfileItem.name`.
+ https://github.com/datalad/datalad-next/pull/409 (by @christian-monch)
+
+- Fix `git ls-file` parsing in `iter_gitworktree()` to be compatible with
+ file names that start with a `tab` character.
+ https://github.com/datalad/datalad-next/pull/421 (by @christian-monch)
+
+## π Documentation
+
+- Expanded guidelines on test implementations.
+
+- Add missing and fix wrong docstrings for HTTP/WebDAV server related fixtures.
+ https://github.com/datalad/datalad-next/pull/445 (by @adswa)
+
+## π Internal
+
+- Deduplicate configuration handling code in annex remotes.
+ https://github.com/datalad/datalad-next/pull/440 (by @adswa)
+
+## π‘ Tests
+
+- New test fixtures have been introduced to replace traditional test helpers
+ from datalad-core:
+
+ - `datalad_interactive_ui` and `datalad_noninteractive_ui` for testing
+ user interactions. They replace `with_testsui`.
+ https://github.com/datalad/datalad-next/pull/427 (by @mih)
+
+- Expand test coverage for `create_sibling_webdav` to include recursive
+ operation.
+ https://github.com/datalad/datalad-next/pull/434 (by @adswa)
+
+
# 1.0.0b3 (2023-06-09)
## π Bug Fixes
@@ -134,7 +321,7 @@
https://github.com/datalad/datalad-next/pull/391 (by @mih)
- The `main()` entrypoint of the `datalad-annex` Git remote helper has
- be generalized to be more re-usable by other (derived) remote helper
+ be generalized to be more reusable by other (derived) remote helper
implementations.
https://github.com/datalad/datalad-next/pull/411 (by @mih)
@@ -246,7 +433,7 @@
- The CredentialManager was elevated to a top-level module
([#229](https://github.com/datalad/datalad-next/pull/220) by @mih)
- Dataset-lookup behavior of the ``credentials`` command became identical to
- ``downlad`` ([#256](https://github.com/datalad/datalad-next/pull/256) by
+ ``download`` ([#256](https://github.com/datalad/datalad-next/pull/256) by
@mih)
- The DataLad runner performance patch and all patches to clone functionality
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3980676a8..4a3284f4b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -100,3 +100,19 @@ The following components of the `datalad` package must not be used (directly) in
#### `require_dataset()`
Commands must use `datalad_next.constraints.dataset.EnsureDataset` instead.
+
+#### nose-style decorators in test implementations
+
+The use of decorators like `with_tempfile` is not allowed.
+`pytest` fixtures have to be used instead.
+A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`.
+However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort.
+Therefore new usage is highly discouraged.
+
+#### nose-style assertion helpers in test implementations
+
+The use of helpers like `assert_equal` is not allowed.
+`pytest` constructs have to be used instead -- this typically means plain `assert` statements.
+A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`.
+However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort.
+Therefore new usage is highly discouraged.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 8f1765a7a..d7ac0d167 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -1,3 +1 @@
-The following people have contributed to this project:
-
-Michael Hanke
+See README.md for a comprehensive list of contributors
diff --git a/README.md b/README.md
index c4446b41b..dc020d7d2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
# DataLad NEXT extension
+[![All Contributors](https://img.shields.io/github/all-contributors/datalad/datalad-next?color=ee8449&style=flat-square)](#contributors)
[![Build status](https://ci.appveyor.com/api/projects/status/dxomp8wysjb7x2os/branch/main?svg=true)](https://ci.appveyor.com/project/mih/datalad-next/branch/main)
[![codecov](https://codecov.io/gh/datalad/datalad-next/branch/main/graph/badge.svg?token=2P8rak7lSX)](https://codecov.io/gh/datalad/datalad-next)
[![crippled-filesystems](https://github.com/datalad/datalad-next/workflows/crippled-filesystems/badge.svg)](https://github.com/datalad/datalad-next/actions?query=workflow%3Acrippled-filesystems)
@@ -146,3 +147,33 @@ available at http://docs.datalad.org/projects/next/en/latest/#datalad-patches
This DataLad extension was developed with funding from the Deutsche
Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451
([431549029](https://gepris.dfg.de/gepris/projekt/431549029), INF project).
+
+
+## Contributors
+
+
+
+
+
+
+
+
+
+
diff --git a/changelog.d/20231021_102012_michael.hanke_ensurehash.md b/changelog.d/20231021_102012_michael.hanke_ensurehash.md
new file mode 100644
index 000000000..dcf0d5fdf
--- /dev/null
+++ b/changelog.d/20231021_102012_michael.hanke_ensurehash.md
@@ -0,0 +1,6 @@
+### π« Enhancements and new features
+
+- New `EnsureHashAlgorithm` constraint to automatically expose
+ and verify algorithm labels from `hashlib.algorithms_guaranteed`
+ Fixes https://github.com/datalad/datalad-next/issues/346 via
+ https://github.com/datalad/datalad-next/pull/492 (by @mslw @adswa)
diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md
new file mode 100644
index 000000000..f4752d524
--- /dev/null
+++ b/changelog.d/20231023_064405_michael.hanke_www_auth.md
@@ -0,0 +1,8 @@
+### π Internal
+
+- The `www-authenticate` dependencies is dropped. The functionality is
+ replaced by a `requests`-based implementation of an alternative parser.
+ This trims the dependency footprint and facilitates Debian-packaging.
+ The previous test cases are kept and further extended.
+ Fixes https://github.com/datalad/datalad-next/issues/493 via
+ https://github.com/datalad/datalad-next/pull/495 (by @mih)
diff --git a/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md
new file mode 100644
index 000000000..f225fce82
--- /dev/null
+++ b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md
@@ -0,0 +1,6 @@
+### π Bug Fixes
+
+- Add patch to fix `update`'s target detection for adjusted mode datasets
+ that can crash under some circumstances.
+ See https://github.com/datalad/datalad/issues/7507, fixed via
+ https://github.com/datalad/datalad-next/pull/509 (by @mih)
diff --git a/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md
new file mode 100644
index 000000000..f41dadca2
--- /dev/null
+++ b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md
@@ -0,0 +1,6 @@
+### π« Enhancements and new features
+
+- The `archivist` remote now supports archive type detection
+ from `*E`-type annex keys for `.tgz` archives too.
+ Fixes https://github.com/datalad/datalad-next/issues/517 via
+ https://github.com/datalad/datalad-next/pull/518 (by @mih)
diff --git a/datalad_next/__init__.py b/datalad_next/__init__.py
index 36df189a5..116df896c 100644
--- a/datalad_next/__init__.py
+++ b/datalad_next/__init__.py
@@ -105,6 +105,5 @@
)
-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/datalad_next/_version.py b/datalad_next/_version.py
index 2edb50eb5..1677391dc 100644
--- a/datalad_next/_version.py
+++ b/datalad_next/_version.py
@@ -5,8 +5,9 @@
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
@@ -15,9 +16,11 @@
import re
import subprocess
import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
-def get_keywords():
+def get_keywords() -> Dict[str, str]:
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
@@ -33,8 +36,15 @@ def get_keywords():
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
+ VCS: str
+ style: str
+ tag_prefix: str
+ parentdir_prefix: str
+ versionfile_source: str
+ verbose: bool
-def get_config():
+
+def get_config() -> VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
@@ -52,13 +62,13 @@ class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
-LONG_VERSION_PY = {}
-HANDLERS = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
-def register_vcs_handler(vcs, method): # decorator
- """Decorator to mark a method as the handler for a particular VCS."""
- def decorate(f):
+def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
+ """Create decorator to mark a method as the handler of a VCS."""
+ def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
@@ -67,22 +77,35 @@ def decorate(f):
return decorate
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(
+ commands: List[str],
+ args: List[str],
+ cwd: Optional[str] = None,
+ verbose: bool = False,
+ hide_stderr: bool = False,
+ env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
assert isinstance(commands, list)
- p = None
- for c in commands:
+ process = None
+
+ popen_kwargs: Dict[str, Any] = {}
+ if sys.platform == "win32":
+ # This hides the console window if pythonw.exe is used
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ popen_kwargs["startupinfo"] = startupinfo
+
+ for command in commands:
try:
- dispcmd = str([c] + args)
+ dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
- p = subprocess.Popen([c] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None), **popen_kwargs)
break
- except EnvironmentError:
- e = sys.exc_info()[1]
+ except OSError as e:
if e.errno == errno.ENOENT:
continue
if verbose:
@@ -93,18 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
- stdout = p.communicate()[0].strip()
- if sys.version_info[0] >= 3:
- stdout = stdout.decode()
- if p.returncode != 0:
+ stdout = process.communicate()[0].strip().decode()
+ if process.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
- return None, p.returncode
- return stdout, p.returncode
+ return None, process.returncode
+ return stdout, process.returncode
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+ parentdir_prefix: str,
+ root: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
@@ -113,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
"""
rootdirs = []
- for i in range(3):
+ for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
- else:
- rootdirs.append(root)
- root = os.path.dirname(root) # up a level
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
@@ -130,41 +154,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
- keywords = {}
+ keywords: Dict[str, str] = {}
try:
- f = open(versionfile_abs, "r")
- for line in f.readlines():
- if line.strip().startswith("git_refnames ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["refnames"] = mo.group(1)
- if line.strip().startswith("git_full ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["full"] = mo.group(1)
- if line.strip().startswith("git_date ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["date"] = mo.group(1)
- f.close()
- except EnvironmentError:
+ with open(versionfile_abs, "r") as fobj:
+ for line in fobj:
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+ keywords: Dict[str, str],
+ tag_prefix: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Get version information from git keywords."""
- if not keywords:
- raise NotThisMethod("no keywords at all, weird")
+ if "refnames" not in keywords:
+ raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
+
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
@@ -177,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
- refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
@@ -190,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = set([r for r in refs if re.search(r'\d', r)])
+ tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
@@ -199,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
+ # Filter out refs that exactly match prefix or that don't start
+ # with a number once the prefix is stripped (mostly a concern
+ # when prefix is '')
+ if not re.match(r'\d', r):
+ continue
if verbose:
print("picking %s" % r)
return {"version": r,
@@ -214,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+def git_pieces_from_vcs(
+ tag_prefix: str,
+ root: str,
+ verbose: bool,
+ runner: Callable = run_command
+) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
@@ -225,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ # GIT_DIR can interfere with correct operation of Versioneer.
+ # It may be intended to be passed to the Versioneer-versioned project,
+ # but that should not change where we get our version from.
+ env = os.environ.copy()
+ env.pop("GIT_DIR", None)
+ runner = functools.partial(runner, env=env)
+
+ _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
@@ -234,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = run_command(GITS, ["--git-dir=.git", "describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
+ describe_out, rc = runner(GITS, [
+ "describe", "--tags", "--dirty", "--always", "--long",
+ "--match", f"{tag_prefix}[[:digit:]]*"
+ ], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
- full_out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "HEAD"], cwd=root)
+ full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
- pieces = {}
+ pieces: Dict[str, Any] = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
+ branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+ cwd=root)
+ # --abbrev-ref was added in git-1.6.3
+ if rc != 0 or branch_name is None:
+ raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+ branch_name = branch_name.strip()
+
+ if branch_name == "HEAD":
+ # If we aren't exactly on a branch, pick a branch which represents
+ # the current commit. If all else fails, we are on a branchless
+ # commit.
+ branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+ # --contains was added in git-1.5.4
+ if rc != 0 or branches is None:
+ raise NotThisMethod("'git branch --contains' returned error")
+ branches = branches.split("\n")
+
+ # Remove the first line if we're running detached
+ if "(" in branches[0]:
+ branches.pop(0)
+
+ # Strip off the leading "* " from the list of branches.
+ branches = [branch[2:] for branch in branches]
+ if "master" in branches:
+ branch_name = "master"
+ elif not branches:
+ branch_name = None
+ else:
+ # Pick the first branch that is returned. Good or bad.
+ branch_name = branches[0]
+
+ pieces["branch"] = branch_name
+
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
@@ -293,26 +374,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
- count_out, rc = run_command(GITS, ["--git-dir=.git", "rev-list", "HEAD", "--count"],
- cwd=root)
- pieces["distance"] = int(count_out) # total number of commits
+ out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+ pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
- date = run_command(GITS, ["--git-dir=.git", "show", "-s", "--format=%ci", "HEAD"],
- cwd=root)[0].strip()
+ date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -337,23 +419,71 @@ def render_pep440(pieces):
return rendered
-def render_pep440_pre(pieces):
- """TAG[.post.devDISTANCE] -- No -dirty.
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+ The ".dev0" means not master branch. Note that .dev0 sorts backwards
+ (a feature branch will appear "older" than the master branch).
Exceptions:
- 1: no tags. 0.post.devDISTANCE
+ 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0"
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+ """Split pep440 version string at the post-release segment.
+
+ Returns the release segments before the post-release and the
+ post-release version number (or -1 if no post-release segment is present).
+ """
+ vc = str.split(ver, ".post")
+ return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+ """TAG[.postN.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post0.devDISTANCE
+ """
+ if pieces["closest-tag"]:
if pieces["distance"]:
- rendered += ".post.dev%d" % pieces["distance"]
+ # update the post release segment
+ tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+ rendered = tag_version
+ if post_version is not None:
+ rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+ else:
+ rendered += ".post0.dev%d" % (pieces["distance"])
+ else:
+ # no commits, use the tag as the version
+ rendered = pieces["closest-tag"]
else:
# exception #1
- rendered = "0.post.dev%d" % pieces["distance"]
+ rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -380,12 +510,41 @@ def render_pep440_post(pieces):
return rendered
-def render_pep440_old(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+ The ".dev0" means not master branch.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%s" % pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+g%s" % pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
- Eexceptions:
+ Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
@@ -402,7 +561,7 @@ def render_pep440_old(pieces):
return rendered
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
@@ -422,7 +581,7 @@ def render_git_describe(pieces):
return rendered
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
@@ -442,7 +601,7 @@ def render_git_describe_long(pieces):
return rendered
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
@@ -456,10 +615,14 @@ def render(pieces, style):
if style == "pep440":
rendered = render_pep440(pieces)
+ elif style == "pep440-branch":
+ rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
+ elif style == "pep440-post-branch":
+ rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
@@ -474,7 +637,7 @@ def render(pieces, style):
"date": pieces.get("date")}
-def get_versions():
+def get_versions() -> Dict[str, Any]:
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
@@ -495,7 +658,7 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
- for i in cfg.versionfile_source.split('/'):
+ for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
diff --git a/datalad_next/annexremotes/archivist.py b/datalad_next/annexremotes/archivist.py
index d37a2d0f3..a91606084 100644
--- a/datalad_next/annexremotes/archivist.py
+++ b/datalad_next/annexremotes/archivist.py
@@ -119,12 +119,6 @@ class ArchivistRemote(SpecialRemote):
"""
def __init__(self, annex):
super().__init__(annex)
- # the following members will be initialized on prepare()
- # as they require access to the underlying repository
- self._repo = None
- # name of the (git) remote archivist is operating under
- # (for querying the correct configuration)
- self._remotename = None
# central archive handler cache, initialized on-prepare
self._ahandlers = None
# a potential instance of the legacy datalad-archives implementation
@@ -162,8 +156,11 @@ def prepare(self):
subsequent operations will be processed by the ``datalad-archives``
special remote implementation!
"""
+ # we have to do this here, because the base class `.repo` will only give
+ # us a `LeanAnnexRepo`.
+ # TODO it is unclear to MIH what is actually needed API-wise of the legacy
+ # interface. Needs research.
self._repo = LegacyAnnexRepo(self.annex.getgitdir())
- self._remotename = self.annex.getgitremotename()
# are we in legacy mode?
# let remote-specific setting take priority (there could be
# multiple archivist-type remotes configured), and use unspecific switch
@@ -185,7 +182,7 @@ def prepare(self):
# central archive key handler coordination
self._ahandlers = _ArchiveHandlers(
- self._repo,
+ self.repo,
# TODO
#cache_mode=self._getcfg(
# 'archive-cache-mode',
@@ -272,7 +269,7 @@ def checkpresent(self, key: str) -> bool:
# So let's do a two-pass approach, first check local availability
# for any archive key, and only if that does not find us an archive
# go for the remotes
- if any(_get_key_contentpath(self._repo, akey) for akey in akeys):
+ if any(_get_key_contentpath(self.repo, akey) for akey in akeys):
# any one is good enough
# TODO here we could actually look into the archive and
# verify member presence without relatively little cost
@@ -283,7 +280,7 @@ def checkpresent(self, key: str) -> bool:
try:
# if it exits clean, the key is still present at at least one
# remote
- self._repo.call_annex(['checkpresentkey', akey])
+ self.repo.call_annex(['checkpresentkey', akey])
return True
except CommandError:
self.message(
diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py
index cd40378af..51dade904 100644
--- a/datalad_next/annexremotes/uncurl.py
+++ b/datalad_next/annexremotes/uncurl.py
@@ -45,7 +45,7 @@
for a dataset (as shown above)::
$ echo '[{"url":"ssh://my.server.org/home/me/file", "file":"dummy"}]' \\
- | datalad addurls - '{url}' {'file'}
+ | datalad addurls - '{url}' '{file}'
This makes legacy commands (e.g., ``datalad download-url``), unnecessary, and
facilitates the use of more advanced ``datalad addurls`` features (e.g.,
@@ -68,7 +68,7 @@
password (repeat):
Enter a name to save the credential
(for accessing http://httpbin.org/basic-auth/myuser/mypassword) securely for future
- re-use, or 'skip' to not save the credential
+ reuse, or 'skip' to not save the credential
name: httpbin-dummy
addurl http://httpbin.org/basic-auth/myuser/mypassword (from uncurl) (to ...)
@@ -223,12 +223,6 @@
from pathlib import Path
import re
-# we intentionally limit ourselves to the most basic interface
-# and even that we only need to get a `ConfigManager` instance.
-# If that class would support a plain path argument, we could
-# avoid it entirely
-from datalad_next.datasets import LeanAnnexRepo
-
from datalad_next.exceptions import (
CapturedException,
UrlOperationsRemoteError,
@@ -411,7 +405,7 @@ def remove(self, key):
)
except UrlOperationsResourceUnknown:
self.message(
- 'f{key} not found at the remote, skipping', type='debug')
+ f'{key!r} not found at the remote, skipping', type='debug')
#
# helpers
@@ -424,7 +418,7 @@ def get_key_urls(self, key) -> list[str]:
# this will also work within checkurl() for a temporary key
# generated by git-annex after claimurl()
urls = self.annex.geturls(key, prefix='')
- self.message(f"Known urls for {key!r}: {urls}", type='debug')
+ self.message(f'Known urls for {key!r}: {urls}', type='debug')
if self.url_tmpl:
# we have a rewriting template. extract all properties
# from all known URLs and instantiate the template
diff --git a/datalad_next/archive_operations/tests/test_tarfile.py b/datalad_next/archive_operations/tests/test_tarfile.py
index 4a493db49..52b4ea62d 100644
--- a/datalad_next/archive_operations/tests/test_tarfile.py
+++ b/datalad_next/archive_operations/tests/test_tarfile.py
@@ -10,6 +10,7 @@
import pytest
from datalad_next.iter_collections.utils import FileSystemItemType
+from datalad_next.tests.marker import skipif_no_network
from ..tarfile import TarArchiveOperations
@@ -37,6 +38,7 @@ def structured_sample_tar_xz(
)
+@skipif_no_network
def test_tararchive_basics(structured_sample_tar_xz: TestArchive):
spec = structured_sample_tar_xz
# this is intentionally a hard-coded POSIX relpath
@@ -48,6 +50,7 @@ def test_tararchive_basics(structured_sample_tar_xz: TestArchive):
assert member.read() == spec.content
+@skipif_no_network
def test_tararchive_contain(structured_sample_tar_xz: TestArchive):
# this is intentionally a hard-coded POSIX relpath
member_name = 'test-archive/onetwothree.txt'
@@ -59,6 +62,7 @@ def test_tararchive_contain(structured_sample_tar_xz: TestArchive):
assert 'bogus' not in archive_ops
+@skipif_no_network
def test_tararchive_iterator(structured_sample_tar_xz: TestArchive):
spec = structured_sample_tar_xz
with TarArchiveOperations(spec.path) as archive_ops:
@@ -68,6 +72,7 @@ def test_tararchive_iterator(structured_sample_tar_xz: TestArchive):
assert item.name in archive_ops
+@skipif_no_network
def test_open(structured_sample_tar_xz: TestArchive):
spec = structured_sample_tar_xz
file_pointer = set()
diff --git a/datalad_next/commands/download.py b/datalad_next/commands/download.py
index 4f61e2110..927079b3f 100644
--- a/datalad_next/commands/download.py
+++ b/datalad_next/commands/download.py
@@ -73,7 +73,7 @@ class Download(ValidatedInterface):
In contrast to other downloader tools, this command integrates with the
DataLad credential management and is able to auto-discover credentials.
If no credential is available, it automatically prompts for them, and
- offers to store them for re-use after a successful authentication.
+ offers to store them for reuse after a successful authentication.
Simultaneous hashing (checksumming) of downloaded content is supported
with user-specified algorithms.
diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py
index cbc0aa867..57462c49f 100644
--- a/datalad_next/commands/ls_file_collection.py
+++ b/datalad_next/commands/ls_file_collection.py
@@ -39,6 +39,8 @@
EnsureChoice,
EnsurePath,
EnsureURL,
+ EnsureHashAlgorithm,
+ EnsureListOf,
)
from datalad_next.uis import (
ansi_colors as ac,
@@ -93,9 +95,7 @@ def __init__(self):
param_constraints=dict(
type=self._collection_types,
collection=EnsurePath(lexists=True) | EnsureURL(),
- # TODO EnsureHashAlgorithm
- # https://github.com/datalad/datalad-next/issues/346
- #hash=None,
+ hash=EnsureHashAlgorithm() | EnsureListOf(EnsureHashAlgorithm()),
),
joint_constraints={
ParameterConstraintContext(('type', 'collection', 'hash'),
@@ -226,7 +226,7 @@ class LsFileCollection(ValidatedInterface):
``directory``
Reports on the content of a given directory (non-recursively). The
collection identifier is the path of the directory. Item identifiers
- are the name of a file within that directory. Standard properties like
+ are the names of items within that directory. Standard properties like
``size``, ``mtime``, or ``link_target`` are included in the report.
[PY: When hashes are computed, an ``fp`` property with a file-like
is provided. Reading file data from it requires a ``seek(0)`` in most
@@ -234,6 +234,20 @@ class LsFileCollection(ValidatedInterface):
by this command (``return_type='generator``) and only until the next
result is yielded. PY]
+ ``gitworktree``
+ Reports on all tracked and untracked content of a Git repository's
+ work tree. The collection identifier is a path of a directory in a Git
+ repository (which can, but needs not be, its root). Item identifiers
+ are the relative paths of items within that directory. Reported
+ properties include ``gitsha`` and ``gittype``; note that the
+ ``gitsha`` is not equivalent to a SHA1 hash of a file's content, but
+ is the SHA-type blob identifier as reported and used by Git.
+ [PY: When hashes are computed, an ``fp`` property with a file-like is
+ provided. Reading file data from it requires a ``seek(0)`` in most
+ cases. This file handle is only open when items are yielded directly
+ by this command (``return_type='generator``) and only until the next
+ result is yielded. PY]
+
``tarfile``
Reports on members of a TAR archive. The collection identifier is the
path of the TAR file. Item identifiers are the relative paths
diff --git a/datalad_next/commands/tests/test_create_sibling_webdav.py b/datalad_next/commands/tests/test_create_sibling_webdav.py
index c303a3d76..d140edcab 100644
--- a/datalad_next/commands/tests/test_create_sibling_webdav.py
+++ b/datalad_next/commands/tests/test_create_sibling_webdav.py
@@ -138,6 +138,32 @@ def check_common_workflow(
assert_status('ok', dsclone.get('.', **ca))
# verify testfile content
eq_('dummy', (dsclone.pathobj / 'testfile.dat').read_text())
+ # ensure that recursive operations succeed
+ # create a subdataset
+ subds = ds.create('mysubds')
+ targetdir_name = 'recursiontest'
+ subtargetdir = Path(webdav_server.path) / targetdir_name / 'mysubds'
+ url = f'{webdav_server.url}/{targetdir_name}'
+
+ with chpwd(ds.path):
+ res = create_sibling_webdav(
+ url,
+ credential=webdav_credential['name']
+ if declare_credential else None,
+ name='recursive-sibling',
+ mode=mode,
+ recursive=True,
+ **ca)
+ assert len(res) == 4 # 2 for create-sibling-webdav, 2 for storage
+ assert_in_results(
+ res,
+ action='create_sibling_webdav.storage',
+ status='ok',
+ type='sibling',
+ path=subds.path,
+ name='recursive-sibling-storage',
+ )
+ ok_(subtargetdir.exists())
def test_bad_url_catching(existing_dataset):
diff --git a/datalad_next/commands/tests/test_ls_file_collection.py b/datalad_next/commands/tests/test_ls_file_collection.py
index 2455af7f4..dee6247f2 100644
--- a/datalad_next/commands/tests/test_ls_file_collection.py
+++ b/datalad_next/commands/tests/test_ls_file_collection.py
@@ -7,6 +7,7 @@
from datalad.api import ls_file_collection
from datalad_next.constraints.exceptions import CommandParametrizationError
+from datalad_next.tests.marker import skipif_no_network
from ..ls_file_collection import LsFileCollectionParamValidator
@@ -30,6 +31,7 @@ def test_ls_file_collection_insufficient_args():
ls_file_collection('bogus', 'http://example.com')
+@skipif_no_network
def test_ls_file_collection_tarfile(sample_tar_xz):
kwa = dict(result_renderer='disabled')
# smoke test first
@@ -84,6 +86,7 @@ def test_ls_file_collection_validator():
val.get_collection_iter(type='bogus', collection='any', hash=None)
+@skipif_no_network
def test_replace_add_archive_content(sample_tar_xz, existing_dataset):
kwa = dict(result_renderer='disabled')
diff --git a/datalad_next/constraints/__init__.py b/datalad_next/constraints/__init__.py
index 05442fd94..e6f013983 100644
--- a/datalad_next/constraints/__init__.py
+++ b/datalad_next/constraints/__init__.py
@@ -59,6 +59,7 @@
EnsureCallable,
EnsureChoice,
EnsureFloat,
+ EnsureHashAlgorithm,
EnsureInt,
EnsureKeyChoice,
EnsureNone,
diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py
index 0d9c56bc2..af81bee45 100644
--- a/datalad_next/constraints/basic.py
+++ b/datalad_next/constraints/basic.py
@@ -12,8 +12,10 @@
__docformat__ = 'restructuredtext'
+from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed
from pathlib import Path
import re
+from typing import Callable
from datalad_next.datasets import resolve_path
@@ -274,6 +276,9 @@ def long_description(self):
def short_description(self):
return '{%s}' % ', '.join([repr(c) for c in self._allowed])
+ def __str__(self):
+ return f"one of {self.short_description()}"
+
class EnsureKeyChoice(EnsureChoice):
"""Ensure value under a key in an input is in a set of possible values"""
@@ -364,7 +369,7 @@ def __init__(self,
path_type: type = Path,
is_format: str | None = None,
lexists: bool | None = None,
- is_mode: callable | None = None,
+ is_mode: Callable | None = None,
ref: Path | None = None,
ref_is: str = 'parent-or-same-as',
dsarg: DatasetParameter | None = None):
@@ -497,3 +502,12 @@ def short_description(self):
if self._ref
else '',
)
+
+
+class EnsureHashAlgorithm(EnsureChoice):
+ """Ensure an input matches a name of a ``hashlib`` algorithm
+
+ Specifically the item must be in the ``algorithms_guaranteed`` collection.
+ """
+ def __init__(self):
+ super().__init__(*hash_algorithms_guaranteed)
diff --git a/datalad_next/constraints/compound.py b/datalad_next/constraints/compound.py
index 99fe8d66d..bb0d87fd0 100644
--- a/datalad_next/constraints/compound.py
+++ b/datalad_next/constraints/compound.py
@@ -77,10 +77,12 @@ def __call__(self, value):
iter = self._iter_type(
self._item_constraint(i) for i in value
)
- except TypeError as e:
+ except (ConstraintError, TypeError) as e:
self.raise_for(
value,
- "cannot coerce to target (item) type",
+ "{itertype} item is not {itype}",
+ itertype=self._iter_type.__name__,
+ itype=self._item_constraint,
__caused_by__=e,
)
if self._min_len is not None or self._max_len is not None:
diff --git a/datalad_next/constraints/exceptions.py b/datalad_next/constraints/exceptions.py
index 2955c5025..d75b08ee5 100644
--- a/datalad_next/constraints/exceptions.py
+++ b/datalad_next/constraints/exceptions.py
@@ -105,7 +105,7 @@ def caused_by(self) -> Tuple[Exception] | None:
"""
cb = self.context.get('__caused_by__', None)
if cb is None:
- return
+ return None
elif isinstance(cb, Exception):
return (cb,)
else:
diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py
index 2748a158e..b1301ba5e 100644
--- a/datalad_next/constraints/tests/test_basic.py
+++ b/datalad_next/constraints/tests/test_basic.py
@@ -11,6 +11,7 @@
EnsureNone,
EnsureCallable,
EnsureChoice,
+ EnsureHashAlgorithm,
EnsureKeyChoice,
EnsureRange,
EnsurePath,
@@ -65,14 +66,14 @@ def test_bool():
# this should always work
assert c(True) is True
assert c(False) is False
- # all that resuls in True
+ # all that results in True
assert c('True') is True
assert c('true') is True
assert c('1') is True
assert c('yes') is True
assert c('on') is True
assert c('enable') is True
- # all that resuls in False
+ # all that results in False
assert c('false') is False
assert c('False') is False
assert c('0') is False
@@ -188,6 +189,7 @@ def test_choice():
assert i in descr
# short is a "set" or repr()s
assert c.short_description() == "{'choice1', 'choice2', None}"
+ assert str(c) == "one of {'choice1', 'choice2', None}"
# this should always work
assert c('choice1') == 'choice1'
assert c(None) is None
@@ -317,3 +319,27 @@ def test_EnsurePath_fordataset(existing_dataset):
# 2. dataset is given as a dataset object
tc = c.for_dataset(DatasetParameter(ds, ds))
assert tc('relpath') == (ds.pathobj / 'relpath')
+
+
+def test_EnsureHashAlgorithm():
+ c = EnsureHashAlgorithm()
+ # simple cases that should pass
+ hashes = [
+ 'sha3_256', 'shake_256', 'sha3_384', 'md5', 'shake_128', 'sha384',
+ 'sha3_224', 'blake2s', 'sha1', 'blake2b', 'sha224', 'sha512', 'sha256',
+ 'sha3_512'
+ ]
+ for hash in hashes:
+ c(hash)
+ # a few bogus ones:
+ bad_hashes = [
+ 'md17', 'McGyver', 'sha2', 'bogus'
+ ]
+ for baddie in bad_hashes:
+ with pytest.raises(ConstraintError):
+ c(baddie)
+
+ # check messaging
+ for i in ('md5', 'shake_256', 'sha3_512'):
+ assert i in c.short_description()
+ assert i in c.long_description()
diff --git a/datalad_next/credman/manager.py b/datalad_next/credman/manager.py
index 60124e2ff..dfe0f4fe2 100644
--- a/datalad_next/credman/manager.py
+++ b/datalad_next/credman/manager.py
@@ -15,6 +15,7 @@
__all__ = ['CredentialManager']
+from collections.abc import Set
from datetime import datetime
import logging
import re
@@ -294,7 +295,7 @@ def set(self,
prompt = 'Enter a name to save the credential'
if _context:
prompt = f'{prompt} ({_context})'
- prompt = f"{prompt} securely for future re-use, " \
+ prompt = f"{prompt} securely for future reuse, " \
"or 'skip' to not save the credential"
if _suggested_name:
prompt = f'{prompt}, or leave empty to accept the name ' \
@@ -823,7 +824,7 @@ def _get_credential_from_cfg(self, name: str) -> Dict:
if k.startswith(var_prefix)
}
- def _get_known_credential_names(self) -> set:
+ def _get_known_credential_names(self) -> Set[str]:
known_credentials = set(
'.'.join(k.split('.')[2:-1]) for k in self._cfg.keys()
if k.startswith('datalad.credential.')
@@ -898,7 +899,7 @@ def _get_legacy_credential_from_keyring(
type_hint = dict(_yield_legacy_credential_types()).get(name)
if not type_hint or type_hint not in self._cred_types:
- return
+ return None
cred = {}
lc = self._cred_types[type_hint]
@@ -952,7 +953,7 @@ def _get_secret(
return secret
# no secret found anywhere
- return
+ return None
@property
def _cfg(self):
diff --git a/datalad_next/datasets/__init__.py b/datalad_next/datasets/__init__.py
index e2f6bdbc4..7c8baf334 100644
--- a/datalad_next/datasets/__init__.py
+++ b/datalad_next/datasets/__init__.py
@@ -3,18 +3,18 @@
Two sets of repository abstractions are available :class:`LeanGitRepo` and
:class:`LeanAnnexRepo` vs. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo`.
-The latter are the classic classes providing a, now legacy, low-level API to
-repository operations. This functionality stems from the earliest days of
-DataLad and implements paradigms and behaviors that are no longer common to
-the rest of the DataLad API. :class:`LegacyGitRepo` and
-:class:`LegacyAnnexRepo` should no longer be used in new developments.
-
-:class:`LeanGitRepo` and :class:`LeanAnnexRepo` on the other hand provide
-a more modern, substantially restricted API and represent the present
-standard API for low-level repository operations. They are geared towards
-interacting with Git and git-annex more directly, and are more suitable
-for generator-like implementations, promoting low response latencies, and
-a leaner processing footprint.
+:class:`LeanGitRepo` and :class:`LeanAnnexRepo` provide a more modern,
+small-ish interface and represent the present standard API for low-level
+repository operations. They are geared towards interacting with Git and
+git-annex more directly, and are more suitable for generator-like
+implementations, promoting low response latencies, and a leaner processing
+footprint.
+
+The ``Legacy*Repo`` classes provide a, now legacy, low-level API to repository
+operations. This functionality stems from the earliest days of DataLad and
+implements paradigms and behaviors that are no longer common to the rest of the
+DataLad API. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo` should no
+longer be used in new developments, and are not documented here.
"""
from pathlib import Path
@@ -38,18 +38,33 @@ class LeanAnnexRepo(LegacyAnnexRepo):
"""git-annex repository representation with a minimized API
This is a companion of :class:`LeanGitRepo`. In the same spirit, it
- restricts its API to a limited set of method that primarily extend
- :class:`LeanGitRepo` with a set of ``call_annex*()`` methods.
+ restricts its API to a limited set of method that extend
+ :class:`LeanGitRepo`.
+
"""
+ #CA .. autosummary::
+
+ #CA call_annex
+ #CA call_annex_oneline
+ #CA call_annex_success
# list of attributes permitted in the "lean" API. This list extends
# the API of LeanGitRepo
- # TODO extend whitelist of attributed as necessary
+ # TODO extend whitelist of attributes as necessary
_lean_attrs = [
+ #CA # these are the ones we intend to provide
+ #CA 'call_annex',
+ #CA 'call_annex_oneline',
+ #CA 'call_annex_success',
+ # and here are the ones that we need to permit in order to get them
+ # to run
'_check_git_version',
+ #CA '_check_git_annex_version',
# used by AnnexRepo.__init__() -- should be using `is_valid()`
'is_valid_git',
'is_valid_annex',
'_is_direct_mode_from_config',
+ #CA '_call_annex',
+ #CA 'call_annex_items_',
]
# intentionally limiting to just `path` as the only constructor argument
@@ -65,5 +80,5 @@ def __new__(cls, path: Path):
return obj
-def _unsupported_method(self):
+def _unsupported_method(self, *args, **kwargs):
raise NotImplementedError('method unsupported by LeanAnnexRepo')
diff --git a/datalad_next/gitremotes/datalad_annex.py b/datalad_next/gitremotes/datalad_annex.py
index 55c86074c..135a64c49 100755
--- a/datalad_next/gitremotes/datalad_annex.py
+++ b/datalad_next/gitremotes/datalad_annex.py
@@ -1146,7 +1146,7 @@ def make_export_tree(repo):
ID of the tree object, suitable for `git-annex export`.
"""
here = repo.config.get('annex.uuid')
- # re-use existing, or go with fixed random one
+ # reuse existing, or go with fixed random one
origin = repo.config.get('remote.origin.annex-uuid',
'8249ffce-770a-11ec-9578-5f6af5e76eaa')
assert here, "No 'here'"
diff --git a/datalad_next/iter_collections/tests/test_itertar.py b/datalad_next/iter_collections/tests/test_itertar.py
index 7f76b2985..23f393a48 100644
--- a/datalad_next/iter_collections/tests/test_itertar.py
+++ b/datalad_next/iter_collections/tests/test_itertar.py
@@ -3,6 +3,8 @@
from datalad.api import download
+from datalad_next.tests.marker import skipif_no_network
+
from ..tarfile import (
TarfileItem,
FileSystemItemType,
@@ -42,6 +44,7 @@ def sample_tar_xz(tmp_path_factory):
tfpath.unlink()
+@skipif_no_network
def test_iter_tar(sample_tar_xz):
target_hash = {'SHA1': 'a8fdc205a9f19cc1c7507a60c4f01b13d11d7fd0',
'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'}
diff --git a/datalad_next/iter_collections/tests/test_utils.py b/datalad_next/iter_collections/tests/test_utils.py
new file mode 100644
index 000000000..1393431e9
--- /dev/null
+++ b/datalad_next/iter_collections/tests/test_utils.py
@@ -0,0 +1,32 @@
+from datalad_next.tests.utils import skip_wo_symlink_capability
+
+from ..utils import FileSystemItem
+
+
+def test_FileSystemItem(tmp_path):
+ testfile = tmp_path / 'file1.txt'
+ testfile_content = 'content'
+ testfile.write_text(testfile_content)
+
+ item = FileSystemItem.from_path(testfile)
+ assert item.size == len(testfile_content)
+ assert item.link_target is None
+
+
+@skip_wo_symlink_capability
+def test_FileSystemItem_linktarget(tmp_path):
+ testfile = tmp_path / 'file1.txt'
+ testfile_content = 'short'
+ testfile.write_text(testfile_content)
+ testlink = tmp_path / 'link'
+ testlink.symlink_to(testfile)
+
+ item = FileSystemItem.from_path(testlink)
+ assert testfile.samefile(item.link_target)
+ # size of the link file does not anyhow propagate the size of the
+ # link target
+ assert item.size != len(testfile_content)
+
+ # we can disable link resolution
+ item = FileSystemItem.from_path(testlink, link_target=False)
+ assert item.link_target is None
diff --git a/datalad_next/iter_collections/utils.py b/datalad_next/iter_collections/utils.py
index 0f00a2e5d..91fcdc4c9 100644
--- a/datalad_next/iter_collections/utils.py
+++ b/datalad_next/iter_collections/utils.py
@@ -96,7 +96,7 @@ def from_path(
uid=cstat.st_uid,
gid=cstat.st_gid,
)
- if ctype == FileSystemItemType.symlink:
+ if link_target and ctype == FileSystemItemType.symlink:
# could be p.readlink() from PY3.9+
item.link_target = PurePath(os.readlink(path))
return item
diff --git a/datalad_next/patches/cli_configoverrides.py b/datalad_next/patches/cli_configoverrides.py
index 59276b5d3..b553854e1 100644
--- a/datalad_next/patches/cli_configoverrides.py
+++ b/datalad_next/patches/cli_configoverrides.py
@@ -1,3 +1,10 @@
+"""Post DataLad config overrides CLI/ENV as GIT_CONFIG items in process ENV
+
+This enables their propagation to any subprocess. This includes the
+specification of overrides via the ``datalad -c ...`` option of the
+main CLI entrypoint.
+"""
+
from datalad.config import _update_from_env as _update_from_datalad_env
from datalad.cli.helpers import _parse_overrides_from_cmdline
diff --git a/datalad_next/patches/commanderror.py b/datalad_next/patches/commanderror.py
index daef7dc69..eb42182a0 100644
--- a/datalad_next/patches/commanderror.py
+++ b/datalad_next/patches/commanderror.py
@@ -1,3 +1,11 @@
+"""Improve ``CommandError`` rendering
+
+Without this patch that overwrites ``__repr__``, it would use
+``RuntimeError``'s variant and ignore all additional structured information
+except for ``.msg`` -- which is frequently empty and confuses with a
+`CommandError('')` display.
+"""
+
from datalad.runner.exception import CommandError
@@ -5,7 +13,4 @@ def commanderror_repr(self) -> str:
return self.to_str()
-# without overwriting __repr__ it would use RuntimeError's variant
-# with ignore all info but `.msg` which will be empty frequently
-# and confuse people with `CommandError('')`
CommandError.__repr__ = commanderror_repr
diff --git a/datalad_next/patches/configuration.py b/datalad_next/patches/configuration.py
index e718141ee..77c66e655 100644
--- a/datalad_next/patches/configuration.py
+++ b/datalad_next/patches/configuration.py
@@ -56,7 +56,7 @@ def __call__(
raise ValueError(
'Scope selection is not supported for dumping')
- # normalize variable specificatons
+ # normalize variable specifications
specs = []
for s in ensure_list(spec):
if isinstance(s, tuple):
diff --git a/datalad_next/patches/create_sibling_gitlab.py b/datalad_next/patches/create_sibling_gitlab.py
index c4c8974fe..971953dc3 100644
--- a/datalad_next/patches/create_sibling_gitlab.py
+++ b/datalad_next/patches/create_sibling_gitlab.py
@@ -1,5 +1,7 @@
-"""
+"""Streamline user experience
+Discontinue advertizing the ``hierarchy`` layout, and better explain
+limitations of the command.
"""
import datalad.distributed.create_sibling_gitlab as mod_gitlab
diff --git a/datalad_next/patches/distribution_dataset.py b/datalad_next/patches/distribution_dataset.py
index f637006d4..4a56113d8 100644
--- a/datalad_next/patches/distribution_dataset.py
+++ b/datalad_next/patches/distribution_dataset.py
@@ -33,5 +33,5 @@ def resolve_path(path, ds=None, ds_resolved=None):
resolve_path,
msg='Apply datalad-next patch to distribution.dataset:resolve_path')
-# re-use docs
+# reuse docs
resolve_path.__doc__ = orig_resolve_path.__doc__
diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py
index 7fe207ca1..c705c0f60 100644
--- a/datalad_next/patches/enabled.py
+++ b/datalad_next/patches/enabled.py
@@ -12,4 +12,6 @@
test_keyring,
customremotes_main,
create_sibling_gitlab,
+ run,
+ update,
)
diff --git a/datalad_next/patches/push_optimize.py b/datalad_next/patches/push_optimize.py
index 67f915911..b2887665c 100644
--- a/datalad_next/patches/push_optimize.py
+++ b/datalad_next/patches/push_optimize.py
@@ -266,12 +266,12 @@ def _get_push_target(repo, target_arg):
-------
str or None, str, str or None, list or None
Target label, if determined; status label; optional message;
- git-push-dryrun result for re-use or None, if no dry-run was
+ git-push-dryrun result for reuse or None, if no dry-run was
attempted.
"""
# verified or auto-detected
target = None
- # for re-use
+ # for reuse
wannabe_gitpush = None
if not target_arg:
# let Git figure out what needs doing
diff --git a/datalad_next/patches/run.py b/datalad_next/patches/run.py
new file mode 100644
index 000000000..b7672d7ac
--- /dev/null
+++ b/datalad_next/patches/run.py
@@ -0,0 +1,90 @@
+"""Enhance ``run()`` placeholder substitutions to honor configuration defaults
+
+Previously, ``run()`` would not recognize configuration defaults for
+placeholder substitution. This means that any placeholders globally declared in
+``datalad.interface.common_cfg``, or via ``register_config()`` in DataLad
+extensions would not be effective.
+
+This patch makes run's ``format_command()`` helper include such defaults
+explicitly, and thereby enable the global declaration of substitution defaults.
+
+Moreoever a ``{python}`` placeholder is now defined via this mechanism, and
+points to the value of ``sys.executable`` by default. This particular
+placeholder was found to be valuable for improving the portability of
+run-recording across (specific) Python versions, or across different (virtual)
+environments. See https://github.com/datalad/datalad-container/issues/224 for
+an example use case.
+
+https://github.com/datalad/datalad/pull/7509
+"""
+
+from itertools import filterfalse
+import sys
+
+from datalad.core.local.run import (
+ GlobbedPaths,
+ SequenceFormatter,
+ normalize_command,
+ quote_cmdlinearg,
+)
+from datalad.interface.common_cfg import definitions as cfg_defs
+from datalad.support.constraints import EnsureStr
+from datalad.support.extensions import register_config
+
+from . import apply_patch
+
+
+# This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37
+# datalad/core/local/run.py
+# The change has been proposed in https://github.com/datalad/datalad/pull/7509
+def format_command(dset, command, **kwds):
+ """Plug in placeholders in `command`.
+
+ Parameters
+ ----------
+ dset : Dataset
+ command : str or list
+
+ `kwds` is passed to the `format` call. `inputs` and `outputs` are converted
+ to GlobbedPaths if necessary.
+
+ Returns
+ -------
+ formatted command (str)
+ """
+ command = normalize_command(command)
+ sfmt = SequenceFormatter()
+ cprefix = 'datalad.run.substitutions.'
+
+ def not_subst(x):
+ return not x.startswith(cprefix)
+
+ for k in set(filterfalse(not_subst, cfg_defs.keys())).union(
+ filterfalse(not_subst, dset.config.keys())):
+ v = dset.config.get(
+ k,
+ # pull a default from the config definitions
+ # if we have no value, but a key
+ cfg_defs.get(k, {}).get('default', None))
+ sub_key = k.replace(cprefix, "")
+ if sub_key not in kwds:
+ kwds[sub_key] = v
+
+ for name in ["inputs", "outputs"]:
+ io_val = kwds.pop(name, None)
+ if not isinstance(io_val, GlobbedPaths):
+ io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd"))
+ kwds[name] = list(map(quote_cmdlinearg, io_val.expand(dot=False)))
+ return sfmt.format(command, **kwds)
+
+
+apply_patch(
+ 'datalad.core.local.run', None, 'format_command', format_command)
+register_config(
+ 'datalad.run.substitutions.python',
+ 'Substitution for {python} placeholder',
+ description='Path to a Python interpreter executable',
+ type=EnsureStr(),
+ default=sys.executable,
+ dialog='question',
+)
diff --git a/datalad_next/patches/tests/test_annex_progress_logging.py b/datalad_next/patches/tests/test_annex_progress_logging.py
index 82e211474..6f575f305 100644
--- a/datalad_next/patches/tests/test_annex_progress_logging.py
+++ b/datalad_next/patches/tests/test_annex_progress_logging.py
@@ -1,4 +1,7 @@
+from datalad_next.tests.marker import skipif_no_network
+
+@skipif_no_network
def test_uncurl_progress_reporting_to_annex(existing_dataset, monkeypatch):
"""Set up a repo that is used to download a key,
check that we see progress reports
diff --git a/datalad_next/patches/tests/test_run.py b/datalad_next/patches/tests/test_run.py
new file mode 100644
index 000000000..721e6de96
--- /dev/null
+++ b/datalad_next/patches/tests/test_run.py
@@ -0,0 +1,25 @@
+import pytest
+
+from datalad_next.exceptions import IncompleteResultsError
+from datalad_next.tests.utils import (
+ SkipTest,
+ assert_result_count,
+)
+
+
+def test_substitution_config_default(existing_dataset):
+ ds = existing_dataset
+
+ if ds.config.get('datalad.run.substitutions.python') is not None:
+ # we want to test default handling when no config is set
+ raise SkipTest(
+ 'Test assumptions conflict with effective configuration')
+
+ # the {python} placeholder is not explicitly defined, but it has
+ # a default, which run() should discover and use
+ res = ds.run('{python} -c "True"', result_renderer='disabled')
+ assert_result_count(res, 1, action='run', status='ok')
+
+ # make sure we could actually detect breakage with the check above
+ with pytest.raises(IncompleteResultsError):
+ ds.run('{python} -c "breakage"', result_renderer='disabled')
diff --git a/datalad_next/patches/update.py b/datalad_next/patches/update.py
new file mode 100644
index 000000000..d0f7be85b
--- /dev/null
+++ b/datalad_next/patches/update.py
@@ -0,0 +1,58 @@
+"""Robustify ``update()`` target detection for adjusted mode datasets
+
+The true cause of the problem is not well understood.
+https://github.com/datalad/datalad/issues/7507 documents that it is not
+easy to capture the breakage in a test.
+"""
+
+from . import apply_patch
+
+
+# This function is taken from datalad-core@cdc0ceb30ae04265c5369186acf2ab2683a8ec96
+# datalad/distribution/update.py
+# The change has been proposed in https://github.com/datalad/datalad/pull/7522
+def _choose_update_target(repo, branch, remote, cfg_remote):
+ """Select a target to update `repo` from.
+
+ Note: This function is not concerned with _how_ the update is done (e.g.,
+ merge, reset, ...).
+
+ Parameters
+ ----------
+ repo : Repo instance
+ branch : str
+ The current branch.
+ remote : str
+ The remote which updates are coming from.
+ cfg_remote : str
+ The configured upstream remote.
+
+ Returns
+ -------
+ str (the target) or None if a choice wasn't made.
+ """
+ target = None
+ if cfg_remote and remote == cfg_remote:
+ # Use the configured cfg_remote branch as the target.
+ #
+ # In this scenario, it's tempting to use FETCH_HEAD as the target. For
+ # a merge, that would be the equivalent of 'git pull REMOTE'. But doing
+ # so would be problematic when the GitRepo.fetch() call was passed
+ # all_=True. Given we can't use FETCH_HEAD, it's tempting to use the
+ # branch.*.merge value, but that assumes a value for remote.*.fetch.
+ target = repo.call_git_oneline(
+ ["rev-parse", "--symbolic-full-name", "--abbrev-ref=strict",
+ # THIS IS THE PATCH: prefix @{upstream} with the branch name
+ # of the corresponding branch
+ f"{repo.get_corresponding_branch(branch) or ''}" "@{upstream}"],
+ read_only=True)
+ elif branch:
+ remote_branch = "{}/{}".format(remote, branch)
+ if repo.commit_exists(remote_branch):
+ target = remote_branch
+ return target
+
+
+apply_patch(
+ 'datalad.distribution.update', None, '_choose_update_target',
+ _choose_update_target)
diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py
index ce3fa932c..cca244f9c 100644
--- a/datalad_next/runners/__init__.py
+++ b/datalad_next/runners/__init__.py
@@ -1,6 +1,44 @@
"""Execution of subprocesses
-This module import all relevant components for subprocess execution.
+This module provides all relevant components for subprocess execution.
+
+.. currentmodule:: datalad_next.runners
+
+Low-level tooling
+-----------------
+
+Two essential process execution/management utilities are provided, for
+generic command execution, and for execution command in the context
+of a Git repository.
+
+.. autosummary::
+ :toctree: generated
+
+ GitRunner
+ Runner
+
+Additional information on the design of the subprocess execution tooling
+is available from https://docs.datalad.org/design/threaded_runner.html
+
+A standard exception type is used to communicate any process termination
+with a non-zero exit code
+
+.. autosummary::
+ :toctree: generated
+
+ CommandError
+
+Command output can be processed via "protocol" implementations that are
+inspired by ``asyncio.SubprocessProtocol``.
+
+.. autosummary::
+ :toctree: generated
+
+ KillOutput
+ NoCapture
+ StdOutCapture
+ StdErrCapture
+ StdOutErrCapture
"""
# runners
diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py
index 71d9dbbfb..254a36157 100644
--- a/datalad_next/tests/fixtures.py
+++ b/datalad_next/tests/fixtures.py
@@ -221,6 +221,8 @@ def existing_noannex_dataset(dataset):
@pytest.fixture(autouse=False, scope="session")
def webdav_credential():
+ """Provides HTTP Basic authentication credential necessary to access the
+ server provided by the ``webdav_server`` fixture."""
yield dict(
name='dltest-my&=webdav',
user='datalad',
@@ -256,6 +258,8 @@ def webdav_server(tmp_path_factory, webdav_credential):
@pytest.fixture(autouse=False, scope="session")
def http_credential():
+ """Provides the HTTP Basic authentication credential necessary to access the
+ HTTP server provided by the ``http_server_with_basicauth`` fixture."""
yield dict(
name='dltest-my&=http',
user='datalad',
@@ -273,9 +277,6 @@ def http_server(tmp_path_factory):
- ``path``: ``Path`` instance of the served temporary directory
- ``url``: HTTP URL to access the HTTP server
-
- Server access requires HTTP Basic authentication with the credential
- provided by the ``webdav_credential`` fixture.
"""
# must use the factory to get a unique path even when a concrete
# test also uses `tmp_path`
@@ -289,7 +290,7 @@ def http_server(tmp_path_factory):
@pytest.fixture(autouse=False, scope="function")
def http_server_with_basicauth(tmp_path_factory, http_credential):
- """Like ``http_server`` but requiring authenticat with ``http_credential``
+ """Like ``http_server`` but requiring authentication via ``http_credential``
"""
path = tmp_path_factory.mktemp("webdav")
server = HTTPPath(
@@ -367,6 +368,10 @@ def httpbin(httpbin_service):
raises ``SkipTest`` whenever any of these undesired conditions is
detected. Otherwise it just relays ``httpbin_service``.
"""
+ if os.environ.get('DATALAD_TESTS_NONETWORK'):
+ raise SkipTest(
+ 'Not running httpbin-based test: NONETWORK flag set'
+ )
if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ:
raise SkipTest(
"Not running httpbin-based test on appveyor without "
diff --git a/datalad_next/tests/marker.py b/datalad_next/tests/marker.py
new file mode 100644
index 000000000..819970628
--- /dev/null
+++ b/datalad_next/tests/marker.py
@@ -0,0 +1,7 @@
+import os
+import pytest
+
+skipif_no_network = pytest.mark.skipif(
+ 'DATALAD_TESTS_NONETWORK' in os.environ,
+ reason='DATALAD_TESTS_NONETWORK is set'
+)
diff --git a/datalad_next/tests/utils.py b/datalad_next/tests/utils.py
index a92a87176..7339c5694 100644
--- a/datalad_next/tests/utils.py
+++ b/datalad_next/tests/utils.py
@@ -74,7 +74,7 @@ def __enter__(self):
from cheroot import wsgi
from wsgidav.wsgidav_app import WsgiDAVApp
except ImportError as e:
- raise SkipTest('No WSGI capabilities') from e
+ raise SkipTest('No WSGI capabilities. Install cheroot and/or wsgidav') from e
if self.auth:
auth = {self.auth[0]: {'password': self.auth[1]}}
diff --git a/datalad_next/types/archivist.py b/datalad_next/types/archivist.py
index 12e9b2b32..17c538dbe 100644
--- a/datalad_next/types/archivist.py
+++ b/datalad_next/types/archivist.py
@@ -74,7 +74,7 @@ class ArchivistLocator:
"""
akey: AnnexKey
member: PurePosixPath
- size: int
+ size: int | None = None
# datalad-archives did not have the type info, we want to be
# able to handle those too, make optional
atype: ArchiveType | None = None
@@ -91,21 +91,21 @@ def __str__(self) -> str:
@classmethod
def from_str(cls, url: str):
"""Return ``ArchivistLocator`` from ``str`` form"""
- url_matched = _recognized_urls.match(url)
- if not url_matched:
+ url_match = _recognized_urls.match(url)
+ if not url_match:
raise ValueError('Unrecognized dl+archives locator syntax')
- url_matched = url_matched.groupdict()
+ url_matched = url_match.groupdict()
# convert to desired type
akey = AnnexKey.from_str(url_matched['key'])
# archive member properties
- props_matched = _archive_member_props.match(url_matched['props'])
- if not props_matched:
+ props_match = _archive_member_props.match(url_matched['props'])
+ if not props_match:
# without at least a 'path' there is nothing we can do here
raise ValueError(
'dl+archives locator contains invalid archive member '
f'specification: {url_matched["props"]!r}')
- props_matched = props_matched.groupdict()
+ props_matched = props_match.groupdict()
amember_path = PurePosixPath(props_matched['path'])
if amember_path.is_absolute():
raise ValueError(
@@ -116,6 +116,8 @@ def from_str(cls, url: str):
# size is optional, regex ensure that it is an int
size = props_matched.get('size')
+ if size is not None:
+ size = int(size)
# archive type, could be None
atype = props_matched.get('atype')
@@ -134,6 +136,8 @@ def from_str(cls, url: str):
atype = ArchiveType.zip
elif '.tar' in suf:
atype = ArchiveType.tar
+ elif '.tgz' in suf:
+ atype = ArchiveType.tar
return cls(
akey=akey,
diff --git a/datalad_next/types/tests/test_archivist.py b/datalad_next/types/tests/test_archivist.py
index 8f781633d..b3d03ac06 100644
--- a/datalad_next/types/tests/test_archivist.py
+++ b/datalad_next/types/tests/test_archivist.py
@@ -23,6 +23,12 @@ def test_archivistlocator():
assert ArchivistLocator.from_str(
'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar#path=f.txt'
).atype == ArchiveType.tar
+ assert ArchivistLocator.from_str(
+ 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tgz#path=f.txt'
+ ).atype == ArchiveType.tar
+ assert ArchivistLocator.from_str(
+ 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar.gz#path=f.txt'
+ ).atype == ArchiveType.tar
assert ArchivistLocator.from_str(
'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.zip#path=f.txt'
).atype == ArchiveType.zip
diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py
index 854677c4e..11eaddc13 100644
--- a/datalad_next/url_operations/http.py
+++ b/datalad_next/url_operations/http.py
@@ -9,11 +9,13 @@
from typing import Dict
import requests
from requests_toolbelt import user_agent
-import www_authenticate
import datalad
-from datalad_next.utils.requests_auth import DataladAuth
+from datalad_next.utils.requests_auth import (
+ DataladAuth,
+ parse_www_authenticate,
+)
from . import (
UrlOperations,
UrlOperationsRemoteError,
@@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None):
headers=headers,
)
if 'www-authenticate' in req.headers:
- props['auth'] = www_authenticate.parse(
+ props['auth'] = parse_www_authenticate(
req.headers['www-authenticate'])
props['is_redirect'] = True if req.history else False
props['status_code'] = req.status_code
@@ -244,16 +246,16 @@ def _stream_download_from_request(
from_url = r.url
hasher = self._get_hasher(hash)
progress_id = self._get_progress_id(from_url, to_path)
- # get download size, but not every server provides it
+ # try to get download size, it might not be provided, e.g. if
+ # chunked transport encoding is used
try:
# for compressed downloads the content length refers to the
# compressed content
expected_size = int(r.headers.get('content-length'))
except (ValueError, TypeError):
- # some responses do not have a `content-length` header,
- # even though they HTTP200 and deliver the content.
- # example:
- # https://github.com/datalad/datalad-next/pull/365#issuecomment-1557114109
+ # some HTTP-200 responses do not have a `content-length` header,
+ # e.g. if chunked transport encoding is used. in this case, set
+ # up everything to calculate size by ourselves
expected_size = None
self._progress_report_start(
progress_id,
@@ -264,7 +266,7 @@ def _stream_download_from_request(
)
fp = None
- props = {}
+ props: Dict[str, str] = {}
try:
# we can only write to file-likes opened in bytes mode
fp = sys.stdout.buffer if to_path is None else open(to_path, 'wb')
@@ -274,14 +276,16 @@ def _stream_download_from_request(
# TODO make chunksize a config item, 65536 is the default in
# requests_toolbelt
for chunk in r.raw.stream(amt=65536, decode_content=True):
- # update how much data was transferred from the remote server,
- # but we cannot use the size of the chunk for that,
- # because content might be downloaded with transparent
- # (de)compression. ask the download stream itself for its
- # "position"
+ # update how much data was transferred from the remote server.
if expected_size:
+ # if we have an expected size, we don't use the size of the
+ # chunk for that because content might be downloaded with
+ # transparent (de)compression. instead we ask the download
+ # stream itself for its "position".
tell = r.raw.tell()
else:
+ # if we do not have an expected size, all we can use is
+ # the size of the downloaded chunk.
tell = downloaded_bytes + len(chunk)
self._progress_report_update(
progress_id,
diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py
index fe7de2ecb..a04ec085c 100644
--- a/datalad_next/url_operations/tests/test_http.py
+++ b/datalad_next/url_operations/tests/test_http.py
@@ -1,5 +1,10 @@
+from __future__ import annotations
+
import gzip
import pytest
+import requests
+
+from datalad_next.tests.marker import skipif_no_network
from ..any import AnyUrlOperations
from ..http import (
@@ -59,6 +64,7 @@ def test_custom_http_headers_via_config(datalad_cfg):
assert huo._headers['X-Funky'] == 'Stuff'
+@skipif_no_network
def test_transparent_decompression(tmp_path):
# this file is offered with transparent compression/decompression
# by the github webserver
@@ -73,6 +79,7 @@ def test_transparent_decompression(tmp_path):
'[build-system]\nrequires = ["setuptools >= 43.0.0", "wheel"]\n'
+@skipif_no_network
def test_compressed_file_stay_compressed(tmp_path):
# this file is offered with transparent compression/decompression
# by the github webserver, but is also actually gzip'ed
@@ -98,6 +105,29 @@ def test_compressed_file_stay_compressed(tmp_path):
f.read(1000)
+def test_size_less_progress_reporting(http_server, monkeypatch):
+ test_file = (http_server.path / 'test.bin').open('wb')
+ test_file.seek(100000)
+ test_file.write(b'a')
+ test_file.close()
+
+ r = requests.get(http_server.url + '/test.bin', stream=True)
+ del r.headers['content-length']
+
+ logs = []
+ # patch the log_progress() used in http.py
+ def catch_progress(*_, **kwargs):
+ logs.append(kwargs)
+
+ import datalad_next.url_operations
+ monkeypatch.setattr(datalad_next.url_operations, 'log_progress', catch_progress)
+
+ http_handler = HttpUrlOperations()
+ http_handler._stream_download_from_request(r, None)
+ assert any('update' in kwargs for kwargs in logs)
+ assert any(('total', None) in kwargs.items() for kwargs in logs)
+
+
def test_header_adding():
default_headers = dict(key_1='value_1')
added_headers = dict(key_2='value_2')
diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py
index 742e1d1a2..089055995 100644
--- a/datalad_next/utils/requests_auth.py
+++ b/datalad_next/utils/requests_auth.py
@@ -7,7 +7,6 @@
from typing import Dict
from urllib.parse import urlparse
import requests
-import www_authenticate
from datalad_next.config import ConfigManager
from datalad_next.utils import CredentialManager
@@ -16,7 +15,77 @@
lgr = logging.getLogger('datalad.ext.next.utils.requests_auth')
-__all__ = ['DataladAuth', 'HTTPBearerTokenAuth']
+__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate']
+
+
+def parse_www_authenticate(hdr: str) -> dict:
+ """Parse HTTP www-authenticate header
+
+ This helper uses ``requests`` utilities to parse the ``www-authenticate``
+ header as represented in a ``requests.Response`` instance. The header may
+ contain any number of challenge specifications.
+
+ The implementation follows RFC7235, where a challenge parameters set is
+ specified as: either a comma-separated list of parameters, or a single
+ sequence of characters capable of holding base64-encoded information,
+ and parameters are name=value pairs, where the name token is matched
+ case-insensitively, and each parameter name MUST only occur once
+ per challenge.
+
+ Returns
+ -------
+ dict
+ Keys are casefolded challenge labels (e.g., 'basic', 'digest').
+ Values are: ``None`` (no parameter), ``str`` (a token68), or
+ ``dict`` (name/value mapping of challenge parameters)
+ """
+ plh = requests.utils.parse_list_header
+ pdh = requests.utils.parse_dict_header
+ challenges = {}
+ challenge = None
+ # challenges as well as their properties are in a single
+ # comma-separated list
+ for item in plh(hdr):
+ # parse the item into a key/value set
+ # the value will be `None` if this item was no mapping
+ k, v = pdh(item).popitem()
+ # split the key to check for a challenge spec start
+ key_split = k.split(' ', maxsplit=1)
+ if len(key_split) > 1 or v is None:
+ item_suffix = item[len(key_split[0]) + 1:]
+ challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None
+ challenges[key_split[0].casefold()] = challenge
+ else:
+ # implementation logic assumes that the above conditional
+ # was triggered before we ever get here
+ assert challenge
+ challenge.append(item)
+
+ return {
+ challenge: _convert_www_authenticate_items(items)
+ for challenge, items in challenges.items()
+ }
+
+
+def _convert_www_authenticate_items(items: list) -> None | str | dict:
+ pdh = requests.utils.parse_dict_header
+ # according to RFC7235, items can be:
+ # either a comma-separated list of parameters
+ # or a single sequence of characters capable of holding base64-encoded
+ # information.
+ # parameters are name=value pairs, where the name token is matched
+ # case-insensitively, and each parameter name MUST only occur once
+ # per challenge.
+ if items is None:
+ return None
+ elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None:
+ # this items matches the token68 appearance (no name value
+ # pair after potential base64 padding its removed
+ return items[0]
+ else:
+ return {
+ k.casefold(): v for i in items for k, v in pdh(i).items()
+ }
class DataladAuth(requests.auth.AuthBase):
@@ -64,7 +133,7 @@ def save_entered_credential(self, suggested_name: str | None = None,
"""
if self._entered_credential is None:
# nothing to do
- return
+ return None
return self._credman.set(
name=None,
_lastused=True,
@@ -178,7 +247,7 @@ def handle_401(self, r, **kwargs):
header is ignored.
Server-provided 'www-authenticated' challenges are inspected, and
- corresponding credentials are looked-up (if needed) and subequently
+ corresponding credentials are looked-up (if needed) and subsequently
tried in a re-request to the original URL after performing any
necessary actions to meet a given challenge. Such a re-request
is then using the same connection as the original request.
@@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs):
# www-authenticate with e.g. 403s
return r
# which auth schemes does the server support?
- auth_schemes = www_authenticate.parse(r.headers['www-authenticate'])
+ auth_schemes = parse_www_authenticate(r.headers['www-authenticate'])
ascheme, credname, cred = self._get_credential(r.url, auth_schemes)
if cred is None or 'secret' not in cred:
diff --git a/datalad_next/utils/tests/test_parse_www_authenticate.py b/datalad_next/utils/tests/test_parse_www_authenticate.py
new file mode 100644
index 000000000..d69fcd67b
--- /dev/null
+++ b/datalad_next/utils/tests/test_parse_www_authenticate.py
@@ -0,0 +1,45 @@
+
+from ..requests_auth import parse_www_authenticate
+
+
+challenges = (
+ # just challenge type
+ ('Negotiate',
+ [('negotiate', None)]),
+ # challenge and just a token, tolerate any base64 padding
+ ('Negotiate abcdef',
+ [('negotiate', 'abcdef')]),
+ ('Negotiate abcdef=',
+ [('negotiate', 'abcdef=')]),
+ ('Negotiate abcdef==',
+ [('negotiate', 'abcdef==')]),
+ # standard bearer
+ ('Bearer realm=example.com',
+ [('bearer', {'realm': 'example.com'})]),
+ # standard digest
+ ('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", '
+ 'opaque="ghijkl"',
+ [('digest', {'realm': 'example.com', 'qop': 'auth,auth-int',
+ 'nonce': 'abcdef', 'opaque': 'ghijkl'})]),
+ # multi challenge
+ ('Basic speCial="paf ram", realm="basIC", '
+ 'Bearer, '
+ 'Digest realm="http-auth@example.org", qop="auth, auth-int", '
+ 'algorithm=MD5',
+ [('basic', {'special': 'paf ram', 'realm': 'basIC'}),
+ ('bearer', None),
+ ('digest', {'realm': "http-auth@example.org", 'qop': "auth, auth-int",
+ 'algorithm': 'MD5'})]),
+ # same challenge, multiple times, last one wins
+ ('Basic realm="basIC", '
+ 'Basic realm="complex"',
+ [('basic', {'realm': 'complex'})]),
+)
+
+
+def test_parse_www_authenticate():
+ for hdr, targets in challenges:
+ res = parse_www_authenticate(hdr)
+ for ctype, props in targets:
+ assert ctype in res
+ assert res[ctype] == props
diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS
index 4e97cc044..09997217e 100644
--- a/docs/CODEOWNERS
+++ b/docs/CODEOWNERS
@@ -10,3 +10,5 @@
# Merge requests are accepted (automatically) when all (relevant)
# status checks have passed, and RT approval was given.
* michael.hanke@gmail.com
+/iter_collections/ christian.moench@web.de
+/runners/ christian.moench@web.de
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e4b0ea7ca..aa0645d3b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,6 +26,12 @@
import datalad_next
+# this cheats sphinx into thinking that LeanGit repo is not
+# merely imported, and convinces it to document it
+import datalad_next.datasets as dnd
+dnd.LeanGitRepo.__module__ = dnd.__name__
+dnd.LeanGitRepo.__name__ = 'LeanGitRepo'
+
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
diff --git a/docs/source/developer_guide/index.rst b/docs/source/developer_guide/index.rst
index 1ae91c6a1..25c398788 100644
--- a/docs/source/developer_guide/index.rst
+++ b/docs/source/developer_guide/index.rst
@@ -1,7 +1,7 @@
.. _devguide:
-The developer's guide to datalad-next
-*************************************
+Developer Guide
+===============
This guide sheds light on new and reusable subsystems developed in ``datalad-next``.
The target audience are developers that intend to build up on or use functionality provided by this extension.
@@ -10,4 +10,4 @@ The target audience are developers that intend to build up on or use functionali
:maxdepth: 2
constraints.rst
- contributing.rst
\ No newline at end of file
+ contributing.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 86db9be4c..fea428125 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -29,82 +29,24 @@ extension has to be enabled for auto-loading by executing::
Doing so will enable the extension to also alter the behavior the core DataLad
package and its commands.
-API
-===
-High-level API commands
------------------------
+Provided functionality
+======================
.. toctree::
- :maxdepth: 2
+ :maxdepth: 1
api.rst
-
-Command line reference
-----------------------
-
-.. toctree::
- :maxdepth: 2
-
cmd.rst
-
-
-Python tooling
---------------
-
-``datalad-next`` comprises a number of more-or-less self-contained
-mini-packages providing particular functionality.
-
-.. toctree::
- :maxdepth: 1
-
Infrastructure classes and utilities
-
-
-Git remote helpers
-------------------
-
-.. toctree::
- :maxdepth: 2
-
git-remote-helpers.rst
-
-
-Git-annex backends
-------------------
-
-.. toctree::
- :maxdepth: 2
-
annex-backends.rst
-
-
-
-Git-annex special remotes
--------------------------
-
-
-.. toctree::
- :maxdepth: 2
-
annex-specialremotes.rst
-
-
-
-DataLad patches
----------------
-
-Patches that are automatically applied to DataLad when loading the
-``datalad-next`` extension package.
-
-.. toctree::
- :maxdepth: 2
-
patches.rst
-Developer Guide
----------------
+Contributor information
+=======================
.. toctree::
:maxdepth: 2
diff --git a/docs/source/patches.rst b/docs/source/patches.rst
index a25c1aab2..336545132 100644
--- a/docs/source/patches.rst
+++ b/docs/source/patches.rst
@@ -1,18 +1,26 @@
DataLad patches
***************
+Patches that are automatically applied to DataLad when loading the
+``datalad-next`` extension package.
+
.. currentmodule:: datalad_next.patches
.. autosummary::
:toctree: generated
annexrepo
+ cli_configoverrides
+ commanderror
common_cfg
configuration
create_sibling_ghlike
+ create_sibling_gitlab
customremotes_main
distribution_dataset
interface_utils
push_optimize
push_to_export_remote
- test_keyring
+ run
siblings
+ test_keyring
+ update
diff --git a/setup.cfg b/setup.cfg
index 7b4b01f00..3f6897aed 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -12,11 +12,10 @@ classifiers =
Programming Language :: Python :: 3
[options]
-python_requires = >= 3.7
+python_requires = >= 3.8
install_requires =
annexremote
datalad >= 0.18.4
- www-authenticate
humanize
packages = find_namespace:
include_package_data = True
diff --git a/tools/appveyor/submit-coverage b/tools/appveyor/submit-coverage
index 7ea560f0c..f8cef4134 100755
--- a/tools/appveyor/submit-coverage
+++ b/tools/appveyor/submit-coverage
@@ -2,8 +2,6 @@
set -e -u
-# grab coverage reports from subprocesses, see tools/coverage-bin
-python -m coverage combine -a /tmp/.coverage-entrypoints-*;
python -m coverage xml
curl -Os $CODECOV_BINARY
chmod +x codecov
diff --git a/tools/coverage-bin/datalad b/tools/coverage-bin/datalad
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/datalad
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-backend-XDLRA b/tools/coverage-bin/git-annex-backend-XDLRA
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-backend-XDLRA
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-remote-archivist b/tools/coverage-bin/git-annex-remote-archivist
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-remote-archivist
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-remote-datalad b/tools/coverage-bin/git-annex-remote-datalad
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-remote-datalad
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-remote-datalad-archives b/tools/coverage-bin/git-annex-remote-datalad-archives
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-remote-datalad-archives
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-remote-ora b/tools/coverage-bin/git-annex-remote-ora
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-remote-ora
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-annex-remote-uncurl b/tools/coverage-bin/git-annex-remote-uncurl
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-annex-remote-uncurl
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/git-remote-datalad-annex b/tools/coverage-bin/git-remote-datalad-annex
deleted file mode 120000
index 3e0139186..000000000
--- a/tools/coverage-bin/git-remote-datalad-annex
+++ /dev/null
@@ -1 +0,0 @@
-with_coverage
\ No newline at end of file
diff --git a/tools/coverage-bin/sitecustomize.py b/tools/coverage-bin/sitecustomize.py
deleted file mode 100755
index c1ba919b9..000000000
--- a/tools/coverage-bin/sitecustomize.py
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env python
-import coverage
-coverage.process_startup()
diff --git a/tools/coverage-bin/with_coverage b/tools/coverage-bin/with_coverage
deleted file mode 100755
index 82c79d23e..000000000
--- a/tools/coverage-bin/with_coverage
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-#
-# A little helper to overload executables with a coverage harness
-
-set -eu
-
-# what script is actually being called
-bin=$(basename $0)
-# where does this script live
-curbin=$(which "$bin")
-# this seems to determine where the full package puts it binaries
-# in -core this is using `datalad` as the reference binary,
-# here explicitly, and less confusingly use the name of the coverage
-# wrapper
-curdatalad=$(which with_coverage)
-curdir=$(dirname $curdatalad)
-
-COVERAGE_RUN="-m coverage run"
-export COVERAGE_PROCESS_START=$PWD/../.coveragerc
-export PYTHONPATH="$PWD/../tools/coverage-bin/"
-# remove the coverage wrapper binary location from the PATH
-export PATH=${PATH//$curdir:/}
-# check where the datalad binary is to
-# - figure out which Python to call
-# - to verify that we are in the right/different env/location
-# and not where the coverage wrapper is coming from
-newdatalad=$(which datalad)
-newbin=$(which $bin)
-newpython=$(sed -ne '1s/#!//gp' $newdatalad)
-
-if [ $(dirname $newdatalad) = $curdir ]; then
- echo "E: binary remained the same: $newdatalad" >&2
- exit 1
-fi
-
-touch /tmp/coverages
-export COVERAGE_FILE=/tmp/.coverage-entrypoints-$RANDOM
-echo "Running now $newpython $COVERAGE_RUN -a $newbin $@" >> /tmp/coverages
-$newpython $COVERAGE_RUN -a $newbin "$@"
diff --git a/versioneer.py b/versioneer.py
index 51ca8182e..1e3753e63 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,5 +1,5 @@
-# Version: 0.18
+# Version: 0.29
"""The Versioneer - like a rocketeer, but for versions.
@@ -7,18 +7,14 @@
==============
* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
+* https://github.com/python-versioneer/python-versioneer
* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
+* License: Public Domain (Unlicense)
+* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in setuptools-based
python projects. The goal is to remove the tedious and error-prone "update
the embedded version string" step from your release process. Making a new
release should be as easy as recording a new tag in your version-control
@@ -27,9 +23,38 @@
## Quick Install
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
-* run `versioneer install` in your source tree, commit the results
+Versioneer provides two installation modes. The "classic" vendored mode installs
+a copy of versioneer into your repository. The experimental build-time dependency mode
+is intended to allow you to skip this step and simplify the process of upgrading.
+
+### Vendored mode
+
+* `pip install versioneer` to somewhere in your $PATH
+ * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
+ available, so you can also use `conda install -c conda-forge versioneer`
+* add a `[tool.versioneer]` section to your `pyproject.toml` or a
+ `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
+ * Note that you will need to add `tomli; python_version < "3.11"` to your
+ build-time dependencies if you use `pyproject.toml`
+* run `versioneer install --vendor` in your source tree, commit the results
+* verify version information with `python setup.py version`
+
+### Build-time dependency mode
+
+* `pip install versioneer` to somewhere in your $PATH
+ * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
+ available, so you can also use `conda install -c conda-forge versioneer`
+* add a `[tool.versioneer]` section to your `pyproject.toml` or a
+ `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
+* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`)
+ to the `requires` key of the `build-system` table in `pyproject.toml`:
+ ```toml
+ [build-system]
+ requires = ["setuptools", "versioneer[toml]"]
+ build-backend = "setuptools.build_meta"
+ ```
+* run `versioneer install --no-vendor` in your source tree, commit the results
+* verify version information with `python setup.py version`
## Version Identifiers
@@ -61,7 +86,7 @@
for example `git describe --tags --dirty --always` reports things like
"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
+uncommitted changes).
The version identifier is used for multiple purposes:
@@ -166,7 +191,7 @@
Some situations are known to cause problems for Versioneer. This details the
most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
### Subprojects
@@ -180,7 +205,7 @@
`setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
distributions (and upload multiple independently-installable tarballs).
* Source trees whose main purpose is to contain a C library, but which also
- provide bindings to Python (and perhaps other langauges) in subdirectories.
+ provide bindings to Python (and perhaps other languages) in subdirectories.
Versioneer will look for `.git` in parent directories, and most operations
should get the right version string. However `pip` and `setuptools` have bugs
@@ -194,9 +219,9 @@
Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
some later version.
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
issue from the Versioneer side in more detail.
[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
@@ -224,31 +249,20 @@
cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
a different virtualenv), so this can be surprising.
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
this one, but upgrading to a newer version of setuptools should probably
resolve it.
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
## Updating Versioneer
To upgrade your project to a new release of Versioneer, do the following:
* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
- indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
+* edit `setup.cfg` and `pyproject.toml`, if necessary,
+ to include any new configuration settings indicated by the release notes.
+ See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install --[no-]vendor` in your source tree, to replace
`SRC/_version.py`
* commit any changed files
@@ -265,35 +279,70 @@
direction and include code from all supported VCS systems, reducing the
number of intermediate scripts.
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+ dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+ versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+ plugin
## License
To make Versioneer easier to embed, all its code is dedicated to the public
domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
+Specifically, both are released under the "Unlicense", as described in
+https://unlicense.org/.
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
-from __future__ import print_function
-try:
- import configparser
-except ImportError:
- import ConfigParser as configparser
+import configparser
import errno
import json
import os
import re
import subprocess
import sys
+from pathlib import Path
+from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
+from typing import NoReturn
+import functools
+
+have_tomllib = True
+if sys.version_info >= (3, 11):
+ import tomllib
+else:
+ try:
+ import tomli as tomllib
+ except ImportError:
+ have_tomllib = False
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
+ VCS: str
+ style: str
+ tag_prefix: str
+ versionfile_source: str
+ versionfile_build: Optional[str]
+ parentdir_prefix: Optional[str]
+ verbose: Optional[bool]
+
-def get_root():
+def get_root() -> str:
"""Get the project root directory.
We require that all commands are run from the project root, i.e. the
@@ -301,13 +350,23 @@ def get_root():
"""
root = os.path.realpath(os.path.abspath(os.getcwd()))
setup_py = os.path.join(root, "setup.py")
+ pyproject_toml = os.path.join(root, "pyproject.toml")
versioneer_py = os.path.join(root, "versioneer.py")
- if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+ if not (
+ os.path.exists(setup_py)
+ or os.path.exists(pyproject_toml)
+ or os.path.exists(versioneer_py)
+ ):
# allow 'python path/to/setup.py COMMAND'
root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
setup_py = os.path.join(root, "setup.py")
+ pyproject_toml = os.path.join(root, "pyproject.toml")
versioneer_py = os.path.join(root, "versioneer.py")
- if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+ if not (
+ os.path.exists(setup_py)
+ or os.path.exists(pyproject_toml)
+ or os.path.exists(versioneer_py)
+ ):
err = ("Versioneer was unable to run the project root directory. "
"Versioneer requires setup.py to be executed from "
"its immediate directory (like 'python setup.py COMMAND'), "
@@ -321,43 +380,62 @@ def get_root():
# module-import table will cache the first one. So we can't use
# os.path.dirname(__file__), as that will find whichever
# versioneer.py was first imported, even in later projects.
- me = os.path.realpath(os.path.abspath(__file__))
- me_dir = os.path.normcase(os.path.splitext(me)[0])
+ my_path = os.path.realpath(os.path.abspath(__file__))
+ me_dir = os.path.normcase(os.path.splitext(my_path)[0])
vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
- if me_dir != vsr_dir:
+ if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals():
print("Warning: build in %s is using versioneer.py from %s"
- % (os.path.dirname(me), versioneer_py))
+ % (os.path.dirname(my_path), versioneer_py))
except NameError:
pass
return root
-def get_config_from_root(root):
+def get_config_from_root(root: str) -> VersioneerConfig:
"""Read the project setup.cfg file to determine Versioneer config."""
- # This might raise EnvironmentError (if setup.cfg is missing), or
+ # This might raise OSError (if setup.cfg is missing), or
# configparser.NoSectionError (if it lacks a [versioneer] section), or
# configparser.NoOptionError (if it lacks "VCS="). See the docstring at
# the top of versioneer.py for instructions on writing your setup.cfg .
- setup_cfg = os.path.join(root, "setup.cfg")
- parser = configparser.SafeConfigParser()
- with open(setup_cfg, "r") as f:
- parser.readfp(f)
- VCS = parser.get("versioneer", "VCS") # mandatory
-
- def get(parser, name):
- if parser.has_option("versioneer", name):
- return parser.get("versioneer", name)
- return None
+ root_pth = Path(root)
+ pyproject_toml = root_pth / "pyproject.toml"
+ setup_cfg = root_pth / "setup.cfg"
+ section: Union[Dict[str, Any], configparser.SectionProxy, None] = None
+ if pyproject_toml.exists() and have_tomllib:
+ try:
+ with open(pyproject_toml, 'rb') as fobj:
+ pp = tomllib.load(fobj)
+ section = pp['tool']['versioneer']
+ except (tomllib.TOMLDecodeError, KeyError) as e:
+ print(f"Failed to load config from {pyproject_toml}: {e}")
+ print("Try to load it from setup.cfg")
+ if not section:
+ parser = configparser.ConfigParser()
+ with open(setup_cfg) as cfg_file:
+ parser.read_file(cfg_file)
+ parser.get("versioneer", "VCS") # raise error if missing
+
+ section = parser["versioneer"]
+
+ # `cast`` really shouldn't be used, but its simplest for the
+ # common VersioneerConfig users at the moment. We verify against
+ # `None` values elsewhere where it matters
+
cfg = VersioneerConfig()
- cfg.VCS = VCS
- cfg.style = get(parser, "style") or ""
- cfg.versionfile_source = get(parser, "versionfile_source")
- cfg.versionfile_build = get(parser, "versionfile_build")
- cfg.tag_prefix = get(parser, "tag_prefix")
- if cfg.tag_prefix in ("''", '""'):
+ cfg.VCS = section['VCS']
+ cfg.style = section.get("style", "")
+ cfg.versionfile_source = cast(str, section.get("versionfile_source"))
+ cfg.versionfile_build = section.get("versionfile_build")
+ cfg.tag_prefix = cast(str, section.get("tag_prefix"))
+ if cfg.tag_prefix in ("''", '""', None):
cfg.tag_prefix = ""
- cfg.parentdir_prefix = get(parser, "parentdir_prefix")
- cfg.verbose = get(parser, "verbose")
+ cfg.parentdir_prefix = section.get("parentdir_prefix")
+ if isinstance(section, configparser.SectionProxy):
+ # Make sure configparser translates to bool
+ cfg.verbose = section.getboolean("verbose")
+ else:
+ cfg.verbose = section.get("verbose")
+
return cfg
@@ -366,37 +444,48 @@ class NotThisMethod(Exception):
# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-HANDLERS = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
-def register_vcs_handler(vcs, method): # decorator
- """Decorator to mark a method as the handler for a particular VCS."""
- def decorate(f):
+def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
+ """Create decorator to mark a method as the handler of a VCS."""
+ def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method]."""
- if vcs not in HANDLERS:
- HANDLERS[vcs] = {}
- HANDLERS[vcs][method] = f
+ HANDLERS.setdefault(vcs, {})[method] = f
return f
return decorate
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(
+ commands: List[str],
+ args: List[str],
+ cwd: Optional[str] = None,
+ verbose: bool = False,
+ hide_stderr: bool = False,
+ env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
assert isinstance(commands, list)
- p = None
- for c in commands:
+ process = None
+
+ popen_kwargs: Dict[str, Any] = {}
+ if sys.platform == "win32":
+ # This hides the console window if pythonw.exe is used
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ popen_kwargs["startupinfo"] = startupinfo
+
+ for command in commands:
try:
- dispcmd = str([c] + args)
+ dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
- p = subprocess.Popen([c] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None), **popen_kwargs)
break
- except EnvironmentError:
- e = sys.exc_info()[1]
+ except OSError as e:
if e.errno == errno.ENOENT:
continue
if verbose:
@@ -407,26 +496,25 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
- stdout = p.communicate()[0].strip()
- if sys.version_info[0] >= 3:
- stdout = stdout.decode()
- if p.returncode != 0:
+ stdout = process.communicate()[0].strip().decode()
+ if process.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
- return None, p.returncode
- return stdout, p.returncode
+ return None, process.returncode
+ return stdout, process.returncode
-LONG_VERSION_PY['git'] = '''
+LONG_VERSION_PY['git'] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
@@ -435,9 +523,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
import re
import subprocess
import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
-def get_keywords():
+def get_keywords() -> Dict[str, str]:
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
@@ -453,8 +543,15 @@ def get_keywords():
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
+ VCS: str
+ style: str
+ tag_prefix: str
+ parentdir_prefix: str
+ versionfile_source: str
+ verbose: bool
+
-def get_config():
+def get_config() -> VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
@@ -472,13 +569,13 @@ class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
-LONG_VERSION_PY = {}
-HANDLERS = {}
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
-def register_vcs_handler(vcs, method): # decorator
- """Decorator to mark a method as the handler for a particular VCS."""
- def decorate(f):
+def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
+ """Create decorator to mark a method as the handler of a VCS."""
+ def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
@@ -487,22 +584,35 @@ def decorate(f):
return decorate
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(
+ commands: List[str],
+ args: List[str],
+ cwd: Optional[str] = None,
+ verbose: bool = False,
+ hide_stderr: bool = False,
+ env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
assert isinstance(commands, list)
- p = None
- for c in commands:
+ process = None
+
+ popen_kwargs: Dict[str, Any] = {}
+ if sys.platform == "win32":
+ # This hides the console window if pythonw.exe is used
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ popen_kwargs["startupinfo"] = startupinfo
+
+ for command in commands:
try:
- dispcmd = str([c] + args)
+ dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
- p = subprocess.Popen([c] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None), **popen_kwargs)
break
- except EnvironmentError:
- e = sys.exc_info()[1]
+ except OSError as e:
if e.errno == errno.ENOENT:
continue
if verbose:
@@ -513,18 +623,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
if verbose:
print("unable to find command, tried %%s" %% (commands,))
return None, None
- stdout = p.communicate()[0].strip()
- if sys.version_info[0] >= 3:
- stdout = stdout.decode()
- if p.returncode != 0:
+ stdout = process.communicate()[0].strip().decode()
+ if process.returncode != 0:
if verbose:
print("unable to run %%s (error)" %% dispcmd)
print("stdout was %%s" %% stdout)
- return None, p.returncode
- return stdout, p.returncode
+ return None, process.returncode
+ return stdout, process.returncode
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+ parentdir_prefix: str,
+ root: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
@@ -533,15 +645,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
"""
rootdirs = []
- for i in range(3):
+ for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
- else:
- rootdirs.append(root)
- root = os.path.dirname(root) # up a level
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %%s but none started with prefix %%s" %%
@@ -550,41 +661,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
- keywords = {}
+ keywords: Dict[str, str] = {}
try:
- f = open(versionfile_abs, "r")
- for line in f.readlines():
- if line.strip().startswith("git_refnames ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["refnames"] = mo.group(1)
- if line.strip().startswith("git_full ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["full"] = mo.group(1)
- if line.strip().startswith("git_date ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["date"] = mo.group(1)
- f.close()
- except EnvironmentError:
+ with open(versionfile_abs, "r") as fobj:
+ for line in fobj:
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+ keywords: Dict[str, str],
+ tag_prefix: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Get version information from git keywords."""
- if not keywords:
- raise NotThisMethod("no keywords at all, weird")
+ if "refnames" not in keywords:
+ raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
+
# git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
@@ -597,11 +715,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
- refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %%d
@@ -610,7 +728,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = set([r for r in refs if re.search(r'\d', r)])
+ tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%%s', no digits" %% ",".join(refs - tags))
if verbose:
@@ -619,6 +737,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
+ # Filter out refs that exactly match prefix or that don't start
+ # with a number once the prefix is stripped (mostly a concern
+ # when prefix is '')
+ if not re.match(r'\d', r):
+ continue
if verbose:
print("picking %%s" %% r)
return {"version": r,
@@ -634,7 +757,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+def git_pieces_from_vcs(
+ tag_prefix: str,
+ root: str,
+ verbose: bool,
+ runner: Callable = run_command
+) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
@@ -645,8 +773,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ # GIT_DIR can interfere with correct operation of Versioneer.
+ # It may be intended to be passed to the Versioneer-versioned project,
+ # but that should not change where we get our version from.
+ env = os.environ.copy()
+ env.pop("GIT_DIR", None)
+ runner = functools.partial(runner, env=env)
+
+ _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %%s not under git control" %% root)
@@ -654,24 +789,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%%s*" %% tag_prefix],
- cwd=root)
+ describe_out, rc = runner(GITS, [
+ "describe", "--tags", "--dirty", "--always", "--long",
+ "--match", f"{tag_prefix}[[:digit:]]*"
+ ], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
- full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+ full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
- pieces = {}
+ pieces: Dict[str, Any] = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
+ branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+ cwd=root)
+ # --abbrev-ref was added in git-1.6.3
+ if rc != 0 or branch_name is None:
+ raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+ branch_name = branch_name.strip()
+
+ if branch_name == "HEAD":
+ # If we aren't exactly on a branch, pick a branch which represents
+ # the current commit. If all else fails, we are on a branchless
+ # commit.
+ branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+ # --contains was added in git-1.5.4
+ if rc != 0 or branches is None:
+ raise NotThisMethod("'git branch --contains' returned error")
+ branches = branches.split("\n")
+
+ # Remove the first line if we're running detached
+ if "(" in branches[0]:
+ branches.pop(0)
+
+ # Strip off the leading "* " from the list of branches.
+ branches = [branch[2:] for branch in branches]
+ if "master" in branches:
+ branch_name = "master"
+ elif not branches:
+ branch_name = None
+ else:
+ # Pick the first branch that is returned. Good or bad.
+ branch_name = branches[0]
+
+ pieces["branch"] = branch_name
+
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
@@ -688,7 +856,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
- # unparseable. Maybe git-describe is misbehaving?
+ # unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%%s'"
%% describe_out)
return pieces
@@ -713,26 +881,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
- count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
- cwd=root)
- pieces["distance"] = int(count_out) # total number of commits
+ out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+ pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
- date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
- cwd=root)[0].strip()
+ date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -757,23 +926,71 @@ def render_pep440(pieces):
return rendered
-def render_pep440_pre(pieces):
- """TAG[.post.devDISTANCE] -- No -dirty.
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+ The ".dev0" means not master branch. Note that .dev0 sorts backwards
+ (a feature branch will appear "older" than the master branch).
Exceptions:
- 1: no tags. 0.post.devDISTANCE
+ 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0"
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+ """Split pep440 version string at the post-release segment.
+
+ Returns the release segments before the post-release and the
+ post-release version number (or -1 if no post-release segment is present).
+ """
+ vc = str.split(ver, ".post")
+ return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+ """TAG[.postN.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post0.devDISTANCE
+ """
+ if pieces["closest-tag"]:
if pieces["distance"]:
- rendered += ".post.dev%%d" %% pieces["distance"]
+ # update the post release segment
+ tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+ rendered = tag_version
+ if post_version is not None:
+ rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"])
+ else:
+ rendered += ".post0.dev%%d" %% (pieces["distance"])
+ else:
+ # no commits, use the tag as the version
+ rendered = pieces["closest-tag"]
else:
# exception #1
- rendered = "0.post.dev%%d" %% pieces["distance"]
+ rendered = "0.post0.dev%%d" %% pieces["distance"]
return rendered
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -800,12 +1017,41 @@ def render_pep440_post(pieces):
return rendered
-def render_pep440_old(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+ The ".dev0" means not master branch.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%%d" %% pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%%s" %% pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0.post%%d" %% pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+g%%s" %% pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
- Eexceptions:
+ Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
@@ -822,7 +1068,7 @@ def render_pep440_old(pieces):
return rendered
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
@@ -842,7 +1088,7 @@ def render_git_describe(pieces):
return rendered
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
@@ -862,7 +1108,7 @@ def render_git_describe_long(pieces):
return rendered
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
@@ -876,10 +1122,14 @@ def render(pieces, style):
if style == "pep440":
rendered = render_pep440(pieces)
+ elif style == "pep440-branch":
+ rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
+ elif style == "pep440-post-branch":
+ rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
@@ -894,7 +1144,7 @@ def render(pieces, style):
"date": pieces.get("date")}
-def get_versions():
+def get_versions() -> Dict[str, Any]:
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
@@ -915,7 +1165,7 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
- for i in cfg.versionfile_source.split('/'):
+ for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
@@ -942,41 +1192,48 @@ def get_versions():
@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
- keywords = {}
+ keywords: Dict[str, str] = {}
try:
- f = open(versionfile_abs, "r")
- for line in f.readlines():
- if line.strip().startswith("git_refnames ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["refnames"] = mo.group(1)
- if line.strip().startswith("git_full ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["full"] = mo.group(1)
- if line.strip().startswith("git_date ="):
- mo = re.search(r'=\s*"(.*)"', line)
- if mo:
- keywords["date"] = mo.group(1)
- f.close()
- except EnvironmentError:
+ with open(versionfile_abs, "r") as fobj:
+ for line in fobj:
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+ keywords: Dict[str, str],
+ tag_prefix: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Get version information from git keywords."""
- if not keywords:
- raise NotThisMethod("no keywords at all, weird")
+ if "refnames" not in keywords:
+ raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
+
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
@@ -989,11 +1246,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
- refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
@@ -1002,7 +1259,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = set([r for r in refs if re.search(r'\d', r)])
+ tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
@@ -1011,6 +1268,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
+ # Filter out refs that exactly match prefix or that don't start
+ # with a number once the prefix is stripped (mostly a concern
+ # when prefix is '')
+ if not re.match(r'\d', r):
+ continue
if verbose:
print("picking %s" % r)
return {"version": r,
@@ -1026,7 +1288,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+def git_pieces_from_vcs(
+ tag_prefix: str,
+ root: str,
+ verbose: bool,
+ runner: Callable = run_command
+) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
@@ -1037,8 +1304,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ # GIT_DIR can interfere with correct operation of Versioneer.
+ # It may be intended to be passed to the Versioneer-versioned project,
+ # but that should not change where we get our version from.
+ env = os.environ.copy()
+ env.pop("GIT_DIR", None)
+ runner = functools.partial(runner, env=env)
+
+ _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
@@ -1046,24 +1320,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
+ describe_out, rc = runner(GITS, [
+ "describe", "--tags", "--dirty", "--always", "--long",
+ "--match", f"{tag_prefix}[[:digit:]]*"
+ ], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
- full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+ full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
- pieces = {}
+ pieces: Dict[str, Any] = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
+ branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+ cwd=root)
+ # --abbrev-ref was added in git-1.6.3
+ if rc != 0 or branch_name is None:
+ raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+ branch_name = branch_name.strip()
+
+ if branch_name == "HEAD":
+ # If we aren't exactly on a branch, pick a branch which represents
+ # the current commit. If all else fails, we are on a branchless
+ # commit.
+ branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+ # --contains was added in git-1.5.4
+ if rc != 0 or branches is None:
+ raise NotThisMethod("'git branch --contains' returned error")
+ branches = branches.split("\n")
+
+ # Remove the first line if we're running detached
+ if "(" in branches[0]:
+ branches.pop(0)
+
+ # Strip off the leading "* " from the list of branches.
+ branches = [branch[2:] for branch in branches]
+ if "master" in branches:
+ branch_name = "master"
+ elif not branches:
+ branch_name = None
+ else:
+ # Pick the first branch that is returned. Good or bad.
+ branch_name = branches[0]
+
+ pieces["branch"] = branch_name
+
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
@@ -1080,7 +1387,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
- # unparseable. Maybe git-describe is misbehaving?
+ # unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces
@@ -1105,19 +1412,20 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
- count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
- cwd=root)
- pieces["distance"] = int(count_out) # total number of commits
+ out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+ pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
- date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
- cwd=root)[0].strip()
+ date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+ # Use only the last line. Previous lines may contain GPG signature
+ # information.
+ date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
-def do_vcs_install(manifest_in, versionfile_source, ipy):
+def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None:
"""Git-specific installation logic for Versioneer.
For Git, this means creating/changing .gitattributes to mark _version.py
@@ -1126,36 +1434,40 @@ def do_vcs_install(manifest_in, versionfile_source, ipy):
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- files = [manifest_in, versionfile_source]
+ files = [versionfile_source]
if ipy:
files.append(ipy)
- try:
- me = __file__
- if me.endswith(".pyc") or me.endswith(".pyo"):
- me = os.path.splitext(me)[0] + ".py"
- versioneer_file = os.path.relpath(me)
- except NameError:
- versioneer_file = "versioneer.py"
- files.append(versioneer_file)
+ if "VERSIONEER_PEP518" not in globals():
+ try:
+ my_path = __file__
+ if my_path.endswith((".pyc", ".pyo")):
+ my_path = os.path.splitext(my_path)[0] + ".py"
+ versioneer_file = os.path.relpath(my_path)
+ except NameError:
+ versioneer_file = "versioneer.py"
+ files.append(versioneer_file)
present = False
try:
- f = open(".gitattributes", "r")
- for line in f.readlines():
- if line.strip().startswith(versionfile_source):
- if "export-subst" in line.strip().split()[1:]:
- present = True
- f.close()
- except EnvironmentError:
+ with open(".gitattributes", "r") as fobj:
+ for line in fobj:
+ if line.strip().startswith(versionfile_source):
+ if "export-subst" in line.strip().split()[1:]:
+ present = True
+ break
+ except OSError:
pass
if not present:
- f = open(".gitattributes", "a+")
- f.write("%s export-subst\n" % versionfile_source)
- f.close()
+ with open(".gitattributes", "a+") as fobj:
+ fobj.write(f"{versionfile_source} export-subst\n")
files.append(".gitattributes")
run_command(GITS, ["add", "--"] + files)
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+ parentdir_prefix: str,
+ root: str,
+ verbose: bool,
+) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
@@ -1164,15 +1476,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
"""
rootdirs = []
- for i in range(3):
+ for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
- else:
- rootdirs.append(root)
- root = os.path.dirname(root) # up a level
+ rootdirs.append(root)
+ root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
@@ -1181,7 +1492,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
+# This file was generated by 'versioneer.py' (0.29) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.
@@ -1198,12 +1509,12 @@ def get_versions():
"""
-def versions_from_file(filename):
+def versions_from_file(filename: str) -> Dict[str, Any]:
"""Try to determine the version from _version.py if present."""
try:
with open(filename) as f:
contents = f.read()
- except EnvironmentError:
+ except OSError:
raise NotThisMethod("unable to read _version.py")
mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON",
contents, re.M | re.S)
@@ -1215,9 +1526,8 @@ def versions_from_file(filename):
return json.loads(mo.group(1))
-def write_to_version_file(filename, versions):
+def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None:
"""Write the given version number to the given _version.py file."""
- os.unlink(filename)
contents = json.dumps(versions, sort_keys=True,
indent=1, separators=(",", ": "))
with open(filename, "w") as f:
@@ -1226,14 +1536,14 @@ def write_to_version_file(filename, versions):
print("set %s to '%s'" % (filename, versions["version"]))
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -1258,23 +1568,71 @@ def render_pep440(pieces):
return rendered
-def render_pep440_pre(pieces):
- """TAG[.post.devDISTANCE] -- No -dirty.
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+ The ".dev0" means not master branch. Note that .dev0 sorts backwards
+ (a feature branch will appear "older" than the master branch).
Exceptions:
- 1: no tags. 0.post.devDISTANCE
+ 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0"
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+ """Split pep440 version string at the post-release segment.
+
+ Returns the release segments before the post-release and the
+ post-release version number (or -1 if no post-release segment is present).
+ """
+ vc = str.split(ver, ".post")
+ return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+ """TAG[.postN.devDISTANCE] -- No -dirty.
+
+ Exceptions:
+ 1: no tags. 0.post0.devDISTANCE
+ """
+ if pieces["closest-tag"]:
if pieces["distance"]:
- rendered += ".post.dev%d" % pieces["distance"]
+ # update the post release segment
+ tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+ rendered = tag_version
+ if post_version is not None:
+ rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+ else:
+ rendered += ".post0.dev%d" % (pieces["distance"])
+ else:
+ # no commits, use the tag as the version
+ rendered = pieces["closest-tag"]
else:
# exception #1
- rendered = "0.post.dev%d" % pieces["distance"]
+ rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -1301,12 +1659,41 @@ def render_pep440_post(pieces):
return rendered
-def render_pep440_old(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+ """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+ The ".dev0" means not master branch.
+
+ Exceptions:
+ 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+ """
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".post%d" % pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "g%s" % pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0.post%d" % pieces["distance"]
+ if pieces["branch"] != "master":
+ rendered += ".dev0"
+ rendered += "+g%s" % pieces["short"]
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
- Eexceptions:
+ Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
@@ -1323,7 +1710,7 @@ def render_pep440_old(pieces):
return rendered
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
@@ -1343,7 +1730,7 @@ def render_git_describe(pieces):
return rendered
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
@@ -1363,7 +1750,7 @@ def render_git_describe_long(pieces):
return rendered
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
@@ -1377,10 +1764,14 @@ def render(pieces, style):
if style == "pep440":
rendered = render_pep440(pieces)
+ elif style == "pep440-branch":
+ rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
+ elif style == "pep440-post-branch":
+ rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
@@ -1399,7 +1790,7 @@ class VersioneerBadRootError(Exception):
"""The project root directory is unknown or missing key files."""
-def get_versions(verbose=False):
+def get_versions(verbose: bool = False) -> Dict[str, Any]:
"""Get the project version from whatever source is available.
Returns dict with two keys: 'version' and 'full'.
@@ -1414,7 +1805,7 @@ def get_versions(verbose=False):
assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
handlers = HANDLERS.get(cfg.VCS)
assert handlers, "unrecognized VCS '%s'" % cfg.VCS
- verbose = verbose or cfg.verbose
+ verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None`
assert cfg.versionfile_source is not None, \
"please set versioneer.versionfile_source"
assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
@@ -1475,13 +1866,17 @@ def get_versions(verbose=False):
"date": None}
-def get_version():
+def get_version() -> str:
"""Get the short version string for this project."""
return get_versions()["version"]
-def get_cmdclass():
- """Get the custom setuptools/distutils subclasses used by Versioneer."""
+def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None):
+ """Get the custom setuptools subclasses used by Versioneer.
+
+ If the package uses a different cmdclass (e.g. one from numpy), it
+ should be provide as an argument.
+ """
if "versioneer" in sys.modules:
del sys.modules["versioneer"]
# this fixes the "python setup.py develop" case (also 'install' and
@@ -1495,25 +1890,25 @@ def get_cmdclass():
# parent is protected against the child's "import versioneer". By
# removing ourselves from sys.modules here, before the child build
# happens, we protect the child from the parent's versioneer too.
- # Also see https://github.com/warner/python-versioneer/issues/52
+ # Also see https://github.com/python-versioneer/python-versioneer/issues/52
- cmds = {}
+ cmds = {} if cmdclass is None else cmdclass.copy()
- # we add "version" to both distutils and setuptools
+ # we add "version" to setuptools
from setuptools import Command
class cmd_version(Command):
description = "report generated version string"
- user_options = []
- boolean_options = []
+ user_options: List[Tuple[str, str, str]] = []
+ boolean_options: List[str] = []
- def initialize_options(self):
+ def initialize_options(self) -> None:
pass
- def finalize_options(self):
+ def finalize_options(self) -> None:
pass
- def run(self):
+ def run(self) -> None:
vers = get_versions(verbose=True)
print("Version: %s" % vers["version"])
print(" full-revisionid: %s" % vers.get("full-revisionid"))
@@ -1523,7 +1918,7 @@ def run(self):
print(" error: %s" % vers["error"])
cmds["version"] = cmd_version
- # we override "build_py" in both distutils and setuptools
+ # we override "build_py" in setuptools
#
# most invocation pathways end up running build_py:
# distutils/build -> build_py
@@ -1538,18 +1933,25 @@ def run(self):
# then does setup.py bdist_wheel, or sometimes setup.py install
# setup.py egg_info -> ?
+ # pip install -e . and setuptool/editable_wheel will invoke build_py
+ # but the build_py command is not expected to copy any files.
+
# we override different "build_py" commands for both environments
- if "setuptools" in sys.modules:
- from setuptools.command.build_py import build_py as _build_py
+ if 'build_py' in cmds:
+ _build_py: Any = cmds['build_py']
else:
- from distutils.command.build_py import build_py as _build_py
+ from setuptools.command.build_py import build_py as _build_py
class cmd_build_py(_build_py):
- def run(self):
+ def run(self) -> None:
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
_build_py.run(self)
+ if getattr(self, "editable_mode", False):
+ # During editable installs `.py` and data files are
+ # not copied to build_lib
+ return
# now locate _version.py in the new build/ directory and replace
# it with an updated value
if cfg.versionfile_build:
@@ -1559,8 +1961,40 @@ def run(self):
write_to_version_file(target_versionfile, versions)
cmds["build_py"] = cmd_build_py
+ if 'build_ext' in cmds:
+ _build_ext: Any = cmds['build_ext']
+ else:
+ from setuptools.command.build_ext import build_ext as _build_ext
+
+ class cmd_build_ext(_build_ext):
+ def run(self) -> None:
+ root = get_root()
+ cfg = get_config_from_root(root)
+ versions = get_versions()
+ _build_ext.run(self)
+ if self.inplace:
+ # build_ext --inplace will only build extensions in
+ # build/lib<..> dir with no _version.py to write to.
+ # As in place builds will already have a _version.py
+ # in the module dir, we do not need to write one.
+ return
+ # now locate _version.py in the new build/ directory and replace
+ # it with an updated value
+ if not cfg.versionfile_build:
+ return
+ target_versionfile = os.path.join(self.build_lib,
+ cfg.versionfile_build)
+ if not os.path.exists(target_versionfile):
+ print(f"Warning: {target_versionfile} does not exist, skipping "
+ "version update. This can happen if you are running build_ext "
+ "without first running build_py.")
+ return
+ print("UPDATING %s" % target_versionfile)
+ write_to_version_file(target_versionfile, versions)
+ cmds["build_ext"] = cmd_build_ext
+
if "cx_Freeze" in sys.modules: # cx_freeze enabled?
- from cx_Freeze.dist import build_exe as _build_exe
+ from cx_Freeze.dist import build_exe as _build_exe # type: ignore
# nczeczulin reports that py2exe won't like the pep440-style string
# as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
# setup(console=[{
@@ -1569,7 +2003,7 @@ def run(self):
# ...
class cmd_build_exe(_build_exe):
- def run(self):
+ def run(self) -> None:
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
@@ -1593,12 +2027,12 @@ def run(self):
if 'py2exe' in sys.modules: # py2exe enabled?
try:
- from py2exe.distutils_buildexe import py2exe as _py2exe # py3
+ from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore
except ImportError:
- from py2exe.build_exe import py2exe as _py2exe # py2
+ from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore
class cmd_py2exe(_py2exe):
- def run(self):
+ def run(self) -> None:
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
@@ -1619,14 +2053,51 @@ def run(self):
})
cmds["py2exe"] = cmd_py2exe
+ # sdist farms its file list building out to egg_info
+ if 'egg_info' in cmds:
+ _egg_info: Any = cmds['egg_info']
+ else:
+ from setuptools.command.egg_info import egg_info as _egg_info
+
+ class cmd_egg_info(_egg_info):
+ def find_sources(self) -> None:
+ # egg_info.find_sources builds the manifest list and writes it
+ # in one shot
+ super().find_sources()
+
+ # Modify the filelist and normalize it
+ root = get_root()
+ cfg = get_config_from_root(root)
+ self.filelist.append('versioneer.py')
+ if cfg.versionfile_source:
+ # There are rare cases where versionfile_source might not be
+ # included by default, so we must be explicit
+ self.filelist.append(cfg.versionfile_source)
+ self.filelist.sort()
+ self.filelist.remove_duplicates()
+
+ # The write method is hidden in the manifest_maker instance that
+ # generated the filelist and was thrown away
+ # We will instead replicate their final normalization (to unicode,
+ # and POSIX-style paths)
+ from setuptools import unicode_utils
+ normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/')
+ for f in self.filelist.files]
+
+ manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt')
+ with open(manifest_filename, 'w') as fobj:
+ fobj.write('\n'.join(normalized))
+
+ cmds['egg_info'] = cmd_egg_info
+
# we override different "sdist" commands for both environments
- if "setuptools" in sys.modules:
- from setuptools.command.sdist import sdist as _sdist
+ if 'sdist' in cmds:
+ _sdist: Any = cmds['sdist']
else:
- from distutils.command.sdist import sdist as _sdist
+ from setuptools.command.sdist import sdist as _sdist
class cmd_sdist(_sdist):
- def run(self):
+ def run(self) -> None:
versions = get_versions()
self._versioneer_generated_versions = versions
# unless we update this, the command will keep using the old
@@ -1634,7 +2105,7 @@ def run(self):
self.distribution.metadata.version = versions["version"]
return _sdist.run(self)
- def make_release_tree(self, base_dir, files):
+ def make_release_tree(self, base_dir: str, files: List[str]) -> None:
root = get_root()
cfg = get_config_from_root(root)
_sdist.make_release_tree(self, base_dir, files)
@@ -1687,21 +2158,26 @@ def make_release_tree(self, base_dir, files):
"""
-INIT_PY_SNIPPET = """
+OLD_SNIPPET = """
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
"""
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
-def do_setup():
- """Main VCS-independent setup function for installing Versioneer."""
+
+def do_setup() -> int:
+ """Do main VCS-independent setup function for installing Versioneer."""
root = get_root()
try:
cfg = get_config_from_root(root)
- except (EnvironmentError, configparser.NoSectionError,
+ except (OSError, configparser.NoSectionError,
configparser.NoOptionError) as e:
- if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+ if isinstance(e, (OSError, configparser.NoSectionError)):
print("Adding sample versioneer config to setup.cfg",
file=sys.stderr)
with open(os.path.join(root, "setup.cfg"), "a") as f:
@@ -1721,62 +2197,37 @@ def do_setup():
ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
"__init__.py")
+ maybe_ipy: Optional[str] = ipy
if os.path.exists(ipy):
try:
with open(ipy, "r") as f:
old = f.read()
- except EnvironmentError:
+ except OSError:
old = ""
- if INIT_PY_SNIPPET not in old:
+ module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+ snippet = INIT_PY_SNIPPET.format(module)
+ if OLD_SNIPPET in old:
+ print(" replacing boilerplate in %s" % ipy)
+ with open(ipy, "w") as f:
+ f.write(old.replace(OLD_SNIPPET, snippet))
+ elif snippet not in old:
print(" appending to %s" % ipy)
with open(ipy, "a") as f:
- f.write(INIT_PY_SNIPPET)
+ f.write(snippet)
else:
print(" %s unmodified" % ipy)
else:
print(" %s doesn't exist, ok" % ipy)
- ipy = None
-
- # Make sure both the top-level "versioneer.py" and versionfile_source
- # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
- # they'll be copied into source distributions. Pip won't be able to
- # install the package without this.
- manifest_in = os.path.join(root, "MANIFEST.in")
- simple_includes = set()
- try:
- with open(manifest_in, "r") as f:
- for line in f:
- if line.startswith("include "):
- for include in line.split()[1:]:
- simple_includes.add(include)
- except EnvironmentError:
- pass
- # That doesn't cover everything MANIFEST.in can do
- # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
- # it might give some false negatives. Appending redundant 'include'
- # lines is safe, though.
- if "versioneer.py" not in simple_includes:
- print(" appending 'versioneer.py' to MANIFEST.in")
- with open(manifest_in, "a") as f:
- f.write("include versioneer.py\n")
- else:
- print(" 'versioneer.py' already in MANIFEST.in")
- if cfg.versionfile_source not in simple_includes:
- print(" appending versionfile_source ('%s') to MANIFEST.in" %
- cfg.versionfile_source)
- with open(manifest_in, "a") as f:
- f.write("include %s\n" % cfg.versionfile_source)
- else:
- print(" versionfile_source already in MANIFEST.in")
+ maybe_ipy = None
# Make VCS-specific changes. For git, this means creating/changing
# .gitattributes to mark _version.py for export-subst keyword
# substitution.
- do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+ do_vcs_install(cfg.versionfile_source, maybe_ipy)
return 0
-def scan_setup_py():
+def scan_setup_py() -> int:
"""Validate the contents of setup.py against Versioneer's expectations."""
found = set()
setters = False
@@ -1813,10 +2264,14 @@ def scan_setup_py():
return errors
+def setup_command() -> NoReturn:
+ """Set up Versioneer and exit with appropriate error code."""
+ errors = do_setup()
+ errors += scan_setup_py()
+ sys.exit(1 if errors else 0)
+
+
if __name__ == "__main__":
cmd = sys.argv[1]
if cmd == "setup":
- errors = do_setup()
- errors += scan_setup_py()
- if errors:
- sys.exit(1)
+ setup_command()