From 2fa7b9997128812650eec894051e129488d869ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Mon, 15 May 2023 11:51:39 +0200 Subject: [PATCH 01/65] Create EnsureHashAlgorithm based on EnsureChoice This adds EnsureHashAlgorithm constraint which checks whether the given algorith name is contained in hashlib's algorithms_guaranteed set. It derives from EnsureChoice without further customization. Using `algorithms_guaranteed` covers algorithms that have named constructors in hashlib, i.e. accessible through getattr(hashlib, name) - that's the current usage in MultiHash. Future expansion could see us move to algorithms_available - these will be recognized when passed to (slower) hashlib.new(), but have no named constructors. For details, see: https://docs.python.org/3.11/library/hashlib.html --- datalad_next/commands/ls_file_collection.py | 5 ++--- datalad_next/constraints/__init__.py | 1 + datalad_next/constraints/basic.py | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index b48f04d25..19454e9b5 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -39,6 +39,7 @@ EnsureChoice, EnsurePath, EnsureURL, + EnsureHashAlgorithm, ) from datalad_next.uis import ( ansi_colors as ac, @@ -84,9 +85,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - # TODO EnsureHashAlgorithm - # https://github.com/datalad/datalad-next/issues/346 - #hash=None, + hash=EnsureHashAlgorithm(), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), diff --git a/datalad_next/constraints/__init__.py b/datalad_next/constraints/__init__.py index 05442fd94..e6f013983 100644 --- a/datalad_next/constraints/__init__.py +++ b/datalad_next/constraints/__init__.py @@ -59,6 +59,7 @@ EnsureCallable, EnsureChoice, EnsureFloat, + EnsureHashAlgorithm, EnsureInt, EnsureKeyChoice, EnsureNone, diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 0d9c56bc2..237838056 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext' +from hashlib import algorithms_guaranteed from pathlib import Path import re @@ -497,3 +498,8 @@ def short_description(self): if self._ref else '', ) + + +class EnsureHashAlgorithm(EnsureChoice): + def __init__(self): + super(EnsureHashAlgorithm, self).__init__(algorithms_guaranteed) From 632facb290e9f5129f63a7027de66be22ec1e1a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Tue, 16 May 2023 11:50:48 +0200 Subject: [PATCH 02/65] Fix EnsureHashAlgorithm & use EnsureListOf The EnsureHash algorithm now calls its super.__init__ correctly. We use EnsureHashAlgorithm() | EnsureListOf(...) in ls_file_colelction now. This correctly handles incoming values, which are always a list in CLI (argparse behaviour with --append) and can be str or list in Python API. At least when they are OK. When input is not OK, two things are suboptimal: - an incorrect hash (str) proceeds to EnsureListOf, which tries to go letter by letter, failing on first - a hash (list) with an incorrect value would first fail on EnsureHashAlgorithm(list), proceed to EnsureListOf, and fail on EnsureHashAlgorithm(str) As a conequence, in both cases the error message would say "does not match any of 2 alternatives" and print EnsureChoice message ("is not one of ...") twice. --- datalad_next/commands/ls_file_collection.py | 3 ++- datalad_next/constraints/basic.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index 19454e9b5..33aa26921 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -40,6 +40,7 @@ EnsurePath, EnsureURL, EnsureHashAlgorithm, + EnsureListOf, ) from datalad_next.uis import ( ansi_colors as ac, @@ -85,7 +86,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - hash=EnsureHashAlgorithm(), + hash=EnsureHashAlgorithm() | EnsureListOf(EnsureHashAlgorithm()), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 237838056..e60cbae74 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -502,4 +502,4 @@ def short_description(self): class EnsureHashAlgorithm(EnsureChoice): def __init__(self): - super(EnsureHashAlgorithm, self).__init__(algorithms_guaranteed) + super(EnsureHashAlgorithm, self).__init__(*algorithms_guaranteed) From 821ddc539e964e5d3abea20428eb0e01894f21d4 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Mon, 17 Jul 2023 20:22:21 +0200 Subject: [PATCH 03/65] ENH: Add actionable insight to skiptest condition It took me a while to figure out why the tests were being skipped, so I thought I'd add it to spare my future self the search --- datalad_next/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/tests/utils.py b/datalad_next/tests/utils.py index a92a87176..7339c5694 100644 --- a/datalad_next/tests/utils.py +++ b/datalad_next/tests/utils.py @@ -74,7 +74,7 @@ def __enter__(self): from cheroot import wsgi from wsgidav.wsgidav_app import WsgiDAVApp except ImportError as e: - raise SkipTest('No WSGI capabilities') from e + raise SkipTest('No WSGI capabilities. Install cheroot and/or wsgidav') from e if self.auth: auth = {self.auth[0]: {'password': self.auth[1]}} From 084bd3a794f2d1a207496565754289373370a955 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Mon, 17 Jul 2023 20:22:45 +0200 Subject: [PATCH 04/65] TST: Add test conditions for recusive webdav sibling creation --- .../tests/test_create_sibling_webdav.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/datalad_next/commands/tests/test_create_sibling_webdav.py b/datalad_next/commands/tests/test_create_sibling_webdav.py index c303a3d76..d140edcab 100644 --- a/datalad_next/commands/tests/test_create_sibling_webdav.py +++ b/datalad_next/commands/tests/test_create_sibling_webdav.py @@ -138,6 +138,32 @@ def check_common_workflow( assert_status('ok', dsclone.get('.', **ca)) # verify testfile content eq_('dummy', (dsclone.pathobj / 'testfile.dat').read_text()) + # ensure that recursive operations succeed + # create a subdataset + subds = ds.create('mysubds') + targetdir_name = 'recursiontest' + subtargetdir = Path(webdav_server.path) / targetdir_name / 'mysubds' + url = f'{webdav_server.url}/{targetdir_name}' + + with chpwd(ds.path): + res = create_sibling_webdav( + url, + credential=webdav_credential['name'] + if declare_credential else None, + name='recursive-sibling', + mode=mode, + recursive=True, + **ca) + assert len(res) == 4 # 2 for create-sibling-webdav, 2 for storage + assert_in_results( + res, + action='create_sibling_webdav.storage', + status='ok', + type='sibling', + path=subds.path, + name='recursive-sibling-storage', + ) + ok_(subtargetdir.exists()) def test_bad_url_catching(existing_dataset): From 351b710ee1552dd73e419f2b92518f20a78ab701 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 6 Jun 2023 10:29:09 +0200 Subject: [PATCH 05/65] update comments in url_operations/http.py This commit updates the comments in url_oerations/http.py to explain why and how we calculate downloaded bytes in the absence of a content-length header. --- datalad_next/url_operations/http.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 854677c4e..4dc96a92a 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -244,16 +244,16 @@ def _stream_download_from_request( from_url = r.url hasher = self._get_hasher(hash) progress_id = self._get_progress_id(from_url, to_path) - # get download size, but not every server provides it + # try to get download size, it might not be provided, e.g. if + # chunked transport encoding is used try: # for compressed downloads the content length refers to the # compressed content expected_size = int(r.headers.get('content-length')) except (ValueError, TypeError): - # some responses do not have a `content-length` header, - # even though they HTTP200 and deliver the content. - # example: - # https://github.com/datalad/datalad-next/pull/365#issuecomment-1557114109 + # some HTTP-200 responses do not have a `content-length` header, + # e.g. if chunked transport encoding is used. in this case, set + # up everything to calculate size by ourselves expected_size = None self._progress_report_start( progress_id, @@ -274,14 +274,16 @@ def _stream_download_from_request( # TODO make chunksize a config item, 65536 is the default in # requests_toolbelt for chunk in r.raw.stream(amt=65536, decode_content=True): - # update how much data was transferred from the remote server, - # but we cannot use the size of the chunk for that, - # because content might be downloaded with transparent - # (de)compression. ask the download stream itself for its - # "position" + # update how much data was transferred from the remote server. if expected_size: + # if we have an expected size, we don't use the size of the + # chunk for that because content might be downloaded with + # transparent (de)compression. instead we ask the download + # stream itself for its "position". tell = r.raw.tell() else: + # if we do not have an expected size, all we can use is + # the size of the downloaded chunk. tell = downloaded_bytes + len(chunk) self._progress_report_update( progress_id, From 87040e13493c7a73818f1e187dc83364138fb0fe Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 6 Jun 2023 10:31:29 +0200 Subject: [PATCH 06/65] test progress report for chunked http-download This commit adds a test that verifies that http-download progress is reported, if no content-length header is provided. --- .../url_operations/tests/test_http.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index be63c6a3b..47195e730 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -1,5 +1,6 @@ import gzip import pytest +import requests from ..any import AnyUrlOperations from ..http import ( @@ -98,6 +99,29 @@ def test_compressed_file_stay_compressed(tmp_path): f.read(1000) +def test_size_less_progress_reporting(http_server, monkeypatch): + test_file = (http_server.path / 'test.bin').open('wb') + test_file.seek(100000) + test_file.write(b'a') + test_file.close() + + r = requests.get(http_server.url + '/test.bin', stream=True) + del r.headers['content-length'] + + logs = [] + # patch the log_progress() used in http.py + def catch_progress(*_, **kwargs): + logs.append(kwargs) + + import datalad_next.url_operations + monkeypatch.setattr(datalad_next.url_operations, 'log_progress', catch_progress) + + http_handler = HttpUrlOperations() + http_handler._stream_download_from_request(r, None) + assert any('update' in kwargs for kwargs in logs) + assert any(('total', None) in kwargs.items() for kwargs in logs) + + def test_header_adding(): default_headers = dict(key_1='value_1') added_headers = dict(key_2='value_2') From ac11bb47ea7a7b66794618bbad82ebb4a0253d17 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Mon, 4 Sep 2023 14:51:05 +0200 Subject: [PATCH 07/65] Add missing and fix wrong docstrings for HTTP/WebDAV server related fixtures --- datalad_next/tests/fixtures.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 71d9dbbfb..23fdfd753 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -221,6 +221,8 @@ def existing_noannex_dataset(dataset): @pytest.fixture(autouse=False, scope="session") def webdav_credential(): + """Provides HTTP Basic authentication credential necessary to access the + server provided by the ``webdav_server`` fixture.""" yield dict( name='dltest-my&=webdav', user='datalad', @@ -256,6 +258,8 @@ def webdav_server(tmp_path_factory, webdav_credential): @pytest.fixture(autouse=False, scope="session") def http_credential(): + """Provides the HTTP Basic authentication credential necessary to access the + HTTP server provided by the ``http_server_with_basicauth`` fixture.""" yield dict( name='dltest-my&=http', user='datalad', @@ -273,9 +277,6 @@ def http_server(tmp_path_factory): - ``path``: ``Path`` instance of the served temporary directory - ``url``: HTTP URL to access the HTTP server - - Server access requires HTTP Basic authentication with the credential - provided by the ``webdav_credential`` fixture. """ # must use the factory to get a unique path even when a concrete # test also uses `tmp_path` @@ -289,7 +290,7 @@ def http_server(tmp_path_factory): @pytest.fixture(autouse=False, scope="function") def http_server_with_basicauth(tmp_path_factory, http_credential): - """Like ``http_server`` but requiring authenticat with ``http_credential`` + """Like ``http_server`` but requiring authentication via ``http_credential`` """ path = tmp_path_factory.mktemp("webdav") server = HTTPPath( From f347a542969074cd791d5b20eba3cc52a7cd5bb5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 14 Sep 2023 08:19:20 +0200 Subject: [PATCH 08/65] More guidelines on test implementations Closes #448 --- CONTRIBUTING.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3980676a8..4a3284f4b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -100,3 +100,19 @@ The following components of the `datalad` package must not be used (directly) in #### `require_dataset()` Commands must use `datalad_next.constraints.dataset.EnsureDataset` instead. + +#### nose-style decorators in test implementations + +The use of decorators like `with_tempfile` is not allowed. +`pytest` fixtures have to be used instead. +A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`. +However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort. +Therefore new usage is highly discouraged. + +#### nose-style assertion helpers in test implementations + +The use of helpers like `assert_equal` is not allowed. +`pytest` constructs have to be used instead -- this typically means plain `assert` statements. +A *temporary* exception *may* be the helpers that are imported in `datalad_next.tests.utils`. +However, these will be reduced and removed over time, and additional usage only adds to the necessary refactoring effort. +Therefore new usage is highly discouraged. From b1f286ff5006296fad5ba8865914187f408f69d3 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 4 Sep 2023 10:14:24 +0200 Subject: [PATCH 09/65] Add christian.moench@web.de to the release team The initial CODEOWNER scope is focused on iterator implementations and the runners. --- docs/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS index 4e97cc044..09997217e 100644 --- a/docs/CODEOWNERS +++ b/docs/CODEOWNERS @@ -10,3 +10,5 @@ # Merge requests are accepted (automatically) when all (relevant) # status checks have passed, and RT approval was given. * michael.hanke@gmail.com +/iter_collections/ christian.moench@web.de +/runners/ christian.moench@web.de From 8b1d5f56b19714b21feb29261ee3efe6f10b627f Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 25 Sep 2023 08:58:47 +0200 Subject: [PATCH 10/65] Assemble 1.0 changelog --- CHANGELOG.md | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f51801bb8..96ba12397 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,115 @@ +# 1.0.0 (2023-09-25) + +This release represents a milestone in the development of the extension. +The package is reorganize to be a collection of more self-contained +mini-packages. Each with its own set of tests. + +Developer documentation and guidelines have been added to aid further +development. One particular goal is to establish datalad-next as a proxy +for importing datalad-core functionality for other extensions. Direct imports +from datalad-core can be minimized in favor of imports from datalad-next +to enable the identification of functionality needed outside the core package, +and to aid its maintenance, and guide efforts for improvements. + +The 1.0 release marks the switch to a more standard approach to semantic +versioning. However, although a substantial improvements have been made, +the 1.0 version nohow indicates a slowdown of development or a change in the +likelihood of (breaking) changes. They will merely become more easily +discoverable from the version label alone. + +Notable high-level features introduced by this major release are: + +- The new `UrlOperations` framework to provide a set of basic operations like + `download`, `upload`, `stat` for different protocols. This framework can be + thought of as a replacement for the "downloaders" functionality in + datalad-core -- although the feature list is not 100% overlapping. This new + framework is more easily extensible by 3rd-party code. + +- The `Constraints` framework elevates parameter/input validation to the next + level. In contrast to datalad-core, declarative input validation is no longer + limited to the CLI. Instead, command parameters can now be validated regardless + of the entrypoint through which a command is used. They can be validated + individually, but also sets of parameters can be validated jointly to implement + particular interaction checks. All parameter validations can now be performed + exhaustive, to present a user with a complete list of validation errors, rather + then the fail-on-first-error method implemented exclusively in datalad-core. + Validation errors are now reported using dedicated structured data type to aid + their communication via non-console interfaces. + +- The `Credentials` system has been further refined with more homogenized + workflows and deeper integration into other subsystems. This release merely + represents a snapshot of continued development towards a standardization of + credential handling workflows. + +- The annex remotes `uncurl` and `archivist` are replacements for the + datalad-core implementations `datalad` and `datalad-archive`. The offer + substantially improved configurability and leaner operation -- built on the + `UrlOperations` framework. + +- A growing collection of iterator (see `iter_collections`) aims to provide + fast (and more Pythonic) operations on common data structures (Git worktrees, + directories, archives). The can be used as an alternative to the traditional + `Repo` classes (`GitRepo`, `AnnexRepo`) from datalad-core. + +- Analog to `UrlOperations` the `ArchiveOperations` framework aims to provide + an abstraction for operations on different archive types (e.g., TAR). The + represent an alternative to the traditional implementations of + `ExtractedArchive` and `ArchivesCache` from datalad-core, and aim at leaner + resource footprints. + +- The collection of runtime patches for datalad-core has been further expanded. + All patches are now individually documented, and applied using a set of standard + helpers (see http://docs.datalad.org/projects/next/en/latest/patches.html). + +For details, please see the changelogs of the 1.0.0 beta releases below. + +## πŸ’« Enhancements and new features + +- `TarArchiveOperations` is the first implementation of the `ArchiveOperations` + abstraction, providing archive handlers with a set of standard operations: + - `open` to get a file object for a particular archive member + - `__contains__` to check for the presence of a particular archive member + - `__iter__` to get an iterator for processing all archive members + https://github.com/datalad/datalad-next/pull/415 (by @mih) + +## πŸ› Bug Fixes + +- Make `TarfileItem.name` be of type `PurePosixPath` to reflect the fact + that a TAR archive can contain members with names that cannot be represent + unmodified on a non-POSIX file system. + https://github.com/datalad/datalad-next/pull/422 (by @mih) + An analog change is done for `ZipfileItem.name`. + https://github.com/datalad/datalad-next/pull/409 (by @christian-monch) + +- Fix `git ls-file` parsing in `iter_gitworktree()` to be compatible with + file names that start with a `tab` character. + https://github.com/datalad/datalad-next/pull/421 (by @christian-monch) + +## πŸ“ Documentation + +- Expanded guidelines on test implementations. + +- Add missing and fix wrong docstrings for HTTP/WebDAV server related fixtures. + https://github.com/datalad/datalad-next/pull/445 (by @adswa) + +## 🏠 Internal + +- Deduplicate configuration handling code in annex remotes. + https://github.com/datalad/datalad-next/pull/440 (by @adswa) + +## πŸ›‘ Tests + +- New test fixtures have been introduced to replace traditional test helpers + from datalad-core: + + - `datalad_interactive_ui` and `datalad_noninteractive_ui` for testing + user interactions. They replace `with_testsui`. + https://github.com/datalad/datalad-next/pull/427 (by @mih) + +- Expand test coverage for `create_sibling_webdav` to include recursive + operation. + https://github.com/datalad/datalad-next/pull/434 (by @adswa) + # 1.0.0b3 (2023-06-09) From 502fb1095b466ad5a6376ea952c7b1ec47600c18 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 25 Sep 2023 15:19:01 +0200 Subject: [PATCH 11/65] Apply suggestions from review Co-authored-by: Adina Wagner --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ba12397..25b0ced7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,15 @@ # 1.0.0 (2023-09-25) This release represents a milestone in the development of the extension. -The package is reorganize to be a collection of more self-contained -mini-packages. Each with its own set of tests. +The package is reorganized to be a collection of more self-contained +mini-packages, each with its own set of tests. Developer documentation and guidelines have been added to aid further development. One particular goal is to establish datalad-next as a proxy for importing datalad-core functionality for other extensions. Direct imports -from datalad-core can be minimized in favor of imports from datalad-next -to enable the identification of functionality needed outside the core package, -and to aid its maintenance, and guide efforts for improvements. +from datalad-core can be minimized in favor of imports from datalad-next. +This helps identifying functionality needed outside the core package, +and guides efforts for future improvements. The 1.0 release marks the switch to a more standard approach to semantic versioning. However, although a substantial improvements have been made, From 666aac81d296e7416199e74341f99392b708e2d6 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 5 Sep 2023 10:00:50 +0200 Subject: [PATCH 12/65] Update CI runs to use min Python version 3.8 3.7 is EOL. --- .appveyor.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 4dec8e42b..3b2aabf02 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -99,7 +99,7 @@ environment: # to have `.overrides` be uniformly limited to instance overrides KEYWORDS: not test_gh1811 and not test_librarymode APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -120,7 +120,7 @@ environment: # because MIH does not know better KEYWORDS: not test_gh1811 and not test_fake_gitlab and not test_dryrun APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -130,7 +130,7 @@ environment: datalad.distribution KEYWORDS: not test_invalid_args APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -139,7 +139,7 @@ environment: DTS: > datalad.local APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot @@ -151,7 +151,7 @@ environment: datalad.tests datalad.ui APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 - PY: 3.7 + PY: 3.8 INSTALL_SYSPKGS: # datalad-annex git remote needs something after git-annex_8.20211x INSTALL_GITANNEX: git-annex -m snapshot From 8c30812240c78a517224cd12a46a4ea39f3ea081 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 28 Sep 2023 09:32:37 +0200 Subject: [PATCH 13/65] unify quotes in doc-string --- datalad_next/annexremotes/uncurl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index cd40378af..9435ee7a7 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -45,7 +45,7 @@ for a dataset (as shown above):: $ echo '[{"url":"ssh://my.server.org/home/me/file", "file":"dummy"}]' \\ - | datalad addurls - '{url}' {'file'} + | datalad addurls - '{url}' '{file}' This makes legacy commands (e.g., ``datalad download-url``), unnecessary, and facilitates the use of more advanced ``datalad addurls`` features (e.g., From ab6133196b5128db9c698feb1de00ae40cdff59c Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 28 Sep 2023 09:40:13 +0200 Subject: [PATCH 14/65] put f-string identifier in correct place --- datalad_next/annexremotes/uncurl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index 9435ee7a7..72cc730cf 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -411,7 +411,7 @@ def remove(self, key): ) except UrlOperationsResourceUnknown: self.message( - 'f{key} not found at the remote, skipping', type='debug') + f'{key!r} not found at the remote, skipping', type='debug') # # helpers From 603bf01fa9ff9f22f2cd024a9a589186b3a772eb Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 28 Sep 2023 09:42:06 +0200 Subject: [PATCH 15/65] unify string delimiters --- datalad_next/annexremotes/uncurl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index 72cc730cf..6007daa6a 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -424,7 +424,7 @@ def get_key_urls(self, key) -> list[str]: # this will also work within checkurl() for a temporary key # generated by git-annex after claimurl() urls = self.annex.geturls(key, prefix='') - self.message(f"Known urls for {key!r}: {urls}", type='debug') + self.message(f'Known urls for {key!r}: {urls}', type='debug') if self.url_tmpl: # we have a rewriting template. extract all properties # from all known URLs and instantiate the template From b8e837142c010c73f3e506e83c7ebca1f49eeccd Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 28 Sep 2023 12:37:41 +0200 Subject: [PATCH 16/65] Add changelog item missing from #455 --- changelog.d/20230928_123529_michael.hanke_doc_pr455.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog.d/20230928_123529_michael.hanke_doc_pr455.md diff --git a/changelog.d/20230928_123529_michael.hanke_doc_pr455.md b/changelog.d/20230928_123529_michael.hanke_doc_pr455.md new file mode 100644 index 000000000..24f9ae6a9 --- /dev/null +++ b/changelog.d/20230928_123529_michael.hanke_doc_pr455.md @@ -0,0 +1,4 @@ +### πŸ› Bug Fixes + +- Fix f-string syntax in error message of the `uncurl` remote. + https://github.com/datalad/datalad-next/pull/455 (by @christian-monch) From 34bc1dadf13ccef309d2aab47c92e9ba90eedba2 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 28 Sep 2023 12:55:38 +0200 Subject: [PATCH 17/65] Small cleanup for using existing base class helpers, when possible This code removed here is merely a left-over for when this base class functionality did not exist yet. --- datalad_next/annexremotes/archivist.py | 17 +++++++---------- datalad_next/annexremotes/uncurl.py | 6 ------ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/datalad_next/annexremotes/archivist.py b/datalad_next/annexremotes/archivist.py index d37a2d0f3..a91606084 100644 --- a/datalad_next/annexremotes/archivist.py +++ b/datalad_next/annexremotes/archivist.py @@ -119,12 +119,6 @@ class ArchivistRemote(SpecialRemote): """ def __init__(self, annex): super().__init__(annex) - # the following members will be initialized on prepare() - # as they require access to the underlying repository - self._repo = None - # name of the (git) remote archivist is operating under - # (for querying the correct configuration) - self._remotename = None # central archive handler cache, initialized on-prepare self._ahandlers = None # a potential instance of the legacy datalad-archives implementation @@ -162,8 +156,11 @@ def prepare(self): subsequent operations will be processed by the ``datalad-archives`` special remote implementation! """ + # we have to do this here, because the base class `.repo` will only give + # us a `LeanAnnexRepo`. + # TODO it is unclear to MIH what is actually needed API-wise of the legacy + # interface. Needs research. self._repo = LegacyAnnexRepo(self.annex.getgitdir()) - self._remotename = self.annex.getgitremotename() # are we in legacy mode? # let remote-specific setting take priority (there could be # multiple archivist-type remotes configured), and use unspecific switch @@ -185,7 +182,7 @@ def prepare(self): # central archive key handler coordination self._ahandlers = _ArchiveHandlers( - self._repo, + self.repo, # TODO #cache_mode=self._getcfg( # 'archive-cache-mode', @@ -272,7 +269,7 @@ def checkpresent(self, key: str) -> bool: # So let's do a two-pass approach, first check local availability # for any archive key, and only if that does not find us an archive # go for the remotes - if any(_get_key_contentpath(self._repo, akey) for akey in akeys): + if any(_get_key_contentpath(self.repo, akey) for akey in akeys): # any one is good enough # TODO here we could actually look into the archive and # verify member presence without relatively little cost @@ -283,7 +280,7 @@ def checkpresent(self, key: str) -> bool: try: # if it exits clean, the key is still present at at least one # remote - self._repo.call_annex(['checkpresentkey', akey]) + self.repo.call_annex(['checkpresentkey', akey]) return True except CommandError: self.message( diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index cd40378af..3f50a3ed9 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -223,12 +223,6 @@ from pathlib import Path import re -# we intentionally limit ourselves to the most basic interface -# and even that we only need to get a `ConfigManager` instance. -# If that class would support a plain path argument, we could -# avoid it entirely -from datalad_next.datasets import LeanAnnexRepo - from datalad_next.exceptions import ( CapturedException, UrlOperationsRemoteError, From a12f6ca7ae4d41572ec94240b1bbc63f5b2a4b42 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 28 Sep 2023 13:19:41 +0200 Subject: [PATCH 18/65] Simplify (subprocess) coverage setup The previous setup was taken from datalad-core. It comes from a time before `pytest`. With `pytest-cov` none of this complexity should be needed anymore. A similar setup has been implemented as part of https://github.com/datalad/datalad-ria/pull/6 Closes #453 --- .appveyor.yml | 2 +- .coveragerc | 9 +++++ tools/appveyor/submit-coverage | 2 - tools/coverage-bin/datalad | 1 - tools/coverage-bin/git-annex-backend-XDLRA | 1 - tools/coverage-bin/git-annex-remote-archivist | 1 - tools/coverage-bin/git-annex-remote-datalad | 1 - .../git-annex-remote-datalad-archives | 1 - tools/coverage-bin/git-annex-remote-ora | 1 - tools/coverage-bin/git-annex-remote-uncurl | 1 - tools/coverage-bin/git-remote-datalad-annex | 1 - tools/coverage-bin/sitecustomize.py | 3 -- tools/coverage-bin/with_coverage | 39 ------------------- 13 files changed, 10 insertions(+), 53 deletions(-) delete mode 120000 tools/coverage-bin/datalad delete mode 120000 tools/coverage-bin/git-annex-backend-XDLRA delete mode 120000 tools/coverage-bin/git-annex-remote-archivist delete mode 120000 tools/coverage-bin/git-annex-remote-datalad delete mode 120000 tools/coverage-bin/git-annex-remote-datalad-archives delete mode 120000 tools/coverage-bin/git-annex-remote-ora delete mode 120000 tools/coverage-bin/git-annex-remote-uncurl delete mode 120000 tools/coverage-bin/git-remote-datalad-annex delete mode 100755 tools/coverage-bin/sitecustomize.py delete mode 100755 tools/coverage-bin/with_coverage diff --git a/.appveyor.yml b/.appveyor.yml index 3b2aabf02..2624fcbae 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -285,7 +285,7 @@ test_script: # run test selecion (--traverse-namespace needed from Python 3.8 onwards) - cmd: python -m pytest -s -v -m "not (turtle)" -k "%KEYWORDS%" --cov=datalad_next --pyargs %DTS% # also add --cov datalad, because some core test runs may not touch -next code - - sh: PATH=$PWD/../tools/coverage-bin:$PATH python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --pyargs ${DTS} + - sh: python -m pytest -s -v -m "not (turtle)" -k "$KEYWORDS" --cov=datalad_next --cov datalad --cov-config=../.coveragerc --pyargs ${DTS} after_test: diff --git a/.coveragerc b/.coveragerc index 0ed61f69c..47273f876 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,7 +1,16 @@ +[run] +parallel = True +branch = True +data_file = ${COVERAGE_ROOT-.}/.coverage +omit = + # versioneer + */_version.py + [paths] source = datalad_next/ */datalad_next/ + [report] # show lines missing coverage in output show_missing = True diff --git a/tools/appveyor/submit-coverage b/tools/appveyor/submit-coverage index 7ea560f0c..f8cef4134 100755 --- a/tools/appveyor/submit-coverage +++ b/tools/appveyor/submit-coverage @@ -2,8 +2,6 @@ set -e -u -# grab coverage reports from subprocesses, see tools/coverage-bin -python -m coverage combine -a /tmp/.coverage-entrypoints-*; python -m coverage xml curl -Os $CODECOV_BINARY chmod +x codecov diff --git a/tools/coverage-bin/datalad b/tools/coverage-bin/datalad deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/datalad +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-backend-XDLRA b/tools/coverage-bin/git-annex-backend-XDLRA deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-backend-XDLRA +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-archivist b/tools/coverage-bin/git-annex-remote-archivist deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-archivist +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-datalad b/tools/coverage-bin/git-annex-remote-datalad deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-datalad +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-datalad-archives b/tools/coverage-bin/git-annex-remote-datalad-archives deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-datalad-archives +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-ora b/tools/coverage-bin/git-annex-remote-ora deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-ora +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-annex-remote-uncurl b/tools/coverage-bin/git-annex-remote-uncurl deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-annex-remote-uncurl +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/git-remote-datalad-annex b/tools/coverage-bin/git-remote-datalad-annex deleted file mode 120000 index 3e0139186..000000000 --- a/tools/coverage-bin/git-remote-datalad-annex +++ /dev/null @@ -1 +0,0 @@ -with_coverage \ No newline at end of file diff --git a/tools/coverage-bin/sitecustomize.py b/tools/coverage-bin/sitecustomize.py deleted file mode 100755 index c1ba919b9..000000000 --- a/tools/coverage-bin/sitecustomize.py +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python -import coverage -coverage.process_startup() diff --git a/tools/coverage-bin/with_coverage b/tools/coverage-bin/with_coverage deleted file mode 100755 index 82c79d23e..000000000 --- a/tools/coverage-bin/with_coverage +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# -# A little helper to overload executables with a coverage harness - -set -eu - -# what script is actually being called -bin=$(basename $0) -# where does this script live -curbin=$(which "$bin") -# this seems to determine where the full package puts it binaries -# in -core this is using `datalad` as the reference binary, -# here explicitly, and less confusingly use the name of the coverage -# wrapper -curdatalad=$(which with_coverage) -curdir=$(dirname $curdatalad) - -COVERAGE_RUN="-m coverage run" -export COVERAGE_PROCESS_START=$PWD/../.coveragerc -export PYTHONPATH="$PWD/../tools/coverage-bin/" -# remove the coverage wrapper binary location from the PATH -export PATH=${PATH//$curdir:/} -# check where the datalad binary is to -# - figure out which Python to call -# - to verify that we are in the right/different env/location -# and not where the coverage wrapper is coming from -newdatalad=$(which datalad) -newbin=$(which $bin) -newpython=$(sed -ne '1s/#!//gp' $newdatalad) - -if [ $(dirname $newdatalad) = $curdir ]; then - echo "E: binary remained the same: $newdatalad" >&2 - exit 1 -fi - -touch /tmp/coverages -export COVERAGE_FILE=/tmp/.coverage-entrypoints-$RANDOM -echo "Running now $newpython $COVERAGE_RUN -a $newbin $@" >> /tmp/coverages -$newpython $COVERAGE_RUN -a $newbin "$@" From 2943ccbe34df57a51fd66748853169c4ccecb161 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Thu, 28 Sep 2023 13:48:09 +0200 Subject: [PATCH 19/65] add annotations import to http-tests --- datalad_next/url_operations/tests/test_http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index 47195e730..26e4c066e 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import gzip import pytest import requests From 12e94fe1a15bc8b728208c7c8e3c92cf1adb6a48 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 3 Oct 2023 10:02:55 +0200 Subject: [PATCH 20/65] Fix a whole bunch of typos --- CHANGELOG.md | 4 ++-- datalad_next/annexremotes/uncurl.py | 2 +- datalad_next/commands/download.py | 2 +- datalad_next/constraints/tests/test_basic.py | 4 ++-- datalad_next/credman/manager.py | 2 +- datalad_next/gitremotes/datalad_annex.py | 2 +- datalad_next/patches/configuration.py | 2 +- datalad_next/patches/distribution_dataset.py | 2 +- datalad_next/patches/push_optimize.py | 4 ++-- datalad_next/utils/requests_auth.py | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25b0ced7f..55aafdfd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -246,7 +246,7 @@ For details, please see the changelogs of the 1.0.0 beta releases below. https://github.com/datalad/datalad-next/pull/391 (by @mih) - The `main()` entrypoint of the `datalad-annex` Git remote helper has - be generalized to be more re-usable by other (derived) remote helper + be generalized to be more reusable by other (derived) remote helper implementations. https://github.com/datalad/datalad-next/pull/411 (by @mih) @@ -358,7 +358,7 @@ For details, please see the changelogs of the 1.0.0 beta releases below. - The CredentialManager was elevated to a top-level module ([#229](https://github.com/datalad/datalad-next/pull/220) by @mih) - Dataset-lookup behavior of the ``credentials`` command became identical to - ``downlad`` ([#256](https://github.com/datalad/datalad-next/pull/256) by + ``download`` ([#256](https://github.com/datalad/datalad-next/pull/256) by @mih) - The DataLad runner performance patch and all patches to clone functionality diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index 19f27c1c8..51dade904 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -68,7 +68,7 @@ password (repeat): Enter a name to save the credential (for accessing http://httpbin.org/basic-auth/myuser/mypassword) securely for future - re-use, or 'skip' to not save the credential + reuse, or 'skip' to not save the credential name: httpbin-dummy addurl http://httpbin.org/basic-auth/myuser/mypassword (from uncurl) (to ...) diff --git a/datalad_next/commands/download.py b/datalad_next/commands/download.py index 4f61e2110..927079b3f 100644 --- a/datalad_next/commands/download.py +++ b/datalad_next/commands/download.py @@ -73,7 +73,7 @@ class Download(ValidatedInterface): In contrast to other downloader tools, this command integrates with the DataLad credential management and is able to auto-discover credentials. If no credential is available, it automatically prompts for them, and - offers to store them for re-use after a successful authentication. + offers to store them for reuse after a successful authentication. Simultaneous hashing (checksumming) of downloaded content is supported with user-specified algorithms. diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 2748a158e..9b0d12c4d 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -65,14 +65,14 @@ def test_bool(): # this should always work assert c(True) is True assert c(False) is False - # all that resuls in True + # all that results in True assert c('True') is True assert c('true') is True assert c('1') is True assert c('yes') is True assert c('on') is True assert c('enable') is True - # all that resuls in False + # all that results in False assert c('false') is False assert c('False') is False assert c('0') is False diff --git a/datalad_next/credman/manager.py b/datalad_next/credman/manager.py index 60124e2ff..5e85c6a12 100644 --- a/datalad_next/credman/manager.py +++ b/datalad_next/credman/manager.py @@ -294,7 +294,7 @@ def set(self, prompt = 'Enter a name to save the credential' if _context: prompt = f'{prompt} ({_context})' - prompt = f"{prompt} securely for future re-use, " \ + prompt = f"{prompt} securely for future reuse, " \ "or 'skip' to not save the credential" if _suggested_name: prompt = f'{prompt}, or leave empty to accept the name ' \ diff --git a/datalad_next/gitremotes/datalad_annex.py b/datalad_next/gitremotes/datalad_annex.py index 55c86074c..135a64c49 100755 --- a/datalad_next/gitremotes/datalad_annex.py +++ b/datalad_next/gitremotes/datalad_annex.py @@ -1146,7 +1146,7 @@ def make_export_tree(repo): ID of the tree object, suitable for `git-annex export`. """ here = repo.config.get('annex.uuid') - # re-use existing, or go with fixed random one + # reuse existing, or go with fixed random one origin = repo.config.get('remote.origin.annex-uuid', '8249ffce-770a-11ec-9578-5f6af5e76eaa') assert here, "No 'here'" diff --git a/datalad_next/patches/configuration.py b/datalad_next/patches/configuration.py index e718141ee..77c66e655 100644 --- a/datalad_next/patches/configuration.py +++ b/datalad_next/patches/configuration.py @@ -56,7 +56,7 @@ def __call__( raise ValueError( 'Scope selection is not supported for dumping') - # normalize variable specificatons + # normalize variable specifications specs = [] for s in ensure_list(spec): if isinstance(s, tuple): diff --git a/datalad_next/patches/distribution_dataset.py b/datalad_next/patches/distribution_dataset.py index f637006d4..4a56113d8 100644 --- a/datalad_next/patches/distribution_dataset.py +++ b/datalad_next/patches/distribution_dataset.py @@ -33,5 +33,5 @@ def resolve_path(path, ds=None, ds_resolved=None): resolve_path, msg='Apply datalad-next patch to distribution.dataset:resolve_path') -# re-use docs +# reuse docs resolve_path.__doc__ = orig_resolve_path.__doc__ diff --git a/datalad_next/patches/push_optimize.py b/datalad_next/patches/push_optimize.py index 67f915911..b2887665c 100644 --- a/datalad_next/patches/push_optimize.py +++ b/datalad_next/patches/push_optimize.py @@ -266,12 +266,12 @@ def _get_push_target(repo, target_arg): ------- str or None, str, str or None, list or None Target label, if determined; status label; optional message; - git-push-dryrun result for re-use or None, if no dry-run was + git-push-dryrun result for reuse or None, if no dry-run was attempted. """ # verified or auto-detected target = None - # for re-use + # for reuse wannabe_gitpush = None if not target_arg: # let Git figure out what needs doing diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index 742e1d1a2..62cb5a491 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -178,7 +178,7 @@ def handle_401(self, r, **kwargs): header is ignored. Server-provided 'www-authenticated' challenges are inspected, and - corresponding credentials are looked-up (if needed) and subequently + corresponding credentials are looked-up (if needed) and subsequently tried in a re-request to the original URL after performing any necessary actions to meet a given challenge. Such a re-request is then using the same connection as the original request. From 6727efc301726accb78501964400373f7545cb8b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 3 Oct 2023 11:19:20 +0200 Subject: [PATCH 21/65] `FileSystemItem.from_path()` now honors `link_target=False` Closes #462 --- .../20231003_111547_michael.hanke_bf_462.md | 7 ++++ .../iter_collections/tests/test_utils.py | 32 +++++++++++++++++++ datalad_next/iter_collections/utils.py | 2 +- 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 changelog.d/20231003_111547_michael.hanke_bf_462.md create mode 100644 datalad_next/iter_collections/tests/test_utils.py diff --git a/changelog.d/20231003_111547_michael.hanke_bf_462.md b/changelog.d/20231003_111547_michael.hanke_bf_462.md new file mode 100644 index 000000000..b03c6af74 --- /dev/null +++ b/changelog.d/20231003_111547_michael.hanke_bf_462.md @@ -0,0 +1,7 @@ +### πŸ› Bug Fixes + +- `FileSystemItem.from_path()` now honors its `link_target` parameter, and + resolves a target for any symlink item conditional on this setting. + Previously, a symlink target was always resolved. + Fixes https://github.com/datalad/datalad-next/issues/462 via + https://github.com/datalad/datalad-next/pull/464 (by @mih) diff --git a/datalad_next/iter_collections/tests/test_utils.py b/datalad_next/iter_collections/tests/test_utils.py new file mode 100644 index 000000000..1393431e9 --- /dev/null +++ b/datalad_next/iter_collections/tests/test_utils.py @@ -0,0 +1,32 @@ +from datalad_next.tests.utils import skip_wo_symlink_capability + +from ..utils import FileSystemItem + + +def test_FileSystemItem(tmp_path): + testfile = tmp_path / 'file1.txt' + testfile_content = 'content' + testfile.write_text(testfile_content) + + item = FileSystemItem.from_path(testfile) + assert item.size == len(testfile_content) + assert item.link_target is None + + +@skip_wo_symlink_capability +def test_FileSystemItem_linktarget(tmp_path): + testfile = tmp_path / 'file1.txt' + testfile_content = 'short' + testfile.write_text(testfile_content) + testlink = tmp_path / 'link' + testlink.symlink_to(testfile) + + item = FileSystemItem.from_path(testlink) + assert testfile.samefile(item.link_target) + # size of the link file does not anyhow propagate the size of the + # link target + assert item.size != len(testfile_content) + + # we can disable link resolution + item = FileSystemItem.from_path(testlink, link_target=False) + assert item.link_target is None diff --git a/datalad_next/iter_collections/utils.py b/datalad_next/iter_collections/utils.py index 0f00a2e5d..91fcdc4c9 100644 --- a/datalad_next/iter_collections/utils.py +++ b/datalad_next/iter_collections/utils.py @@ -96,7 +96,7 @@ def from_path( uid=cstat.st_uid, gid=cstat.st_gid, ) - if ctype == FileSystemItemType.symlink: + if link_target and ctype == FileSystemItemType.symlink: # could be p.readlink() from PY3.9+ item.link_target = PurePath(os.readlink(path)) return item From 9b16d706b9336a5cd79c2b272827d5780b0416a8 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 4 Oct 2023 09:01:41 +0200 Subject: [PATCH 22/65] Have a documentation entrypoint for runner-tooling Closes #466 --- datalad_next/runners/__init__.py | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py index ce3fa932c..cca244f9c 100644 --- a/datalad_next/runners/__init__.py +++ b/datalad_next/runners/__init__.py @@ -1,6 +1,44 @@ """Execution of subprocesses -This module import all relevant components for subprocess execution. +This module provides all relevant components for subprocess execution. + +.. currentmodule:: datalad_next.runners + +Low-level tooling +----------------- + +Two essential process execution/management utilities are provided, for +generic command execution, and for execution command in the context +of a Git repository. + +.. autosummary:: + :toctree: generated + + GitRunner + Runner + +Additional information on the design of the subprocess execution tooling +is available from https://docs.datalad.org/design/threaded_runner.html + +A standard exception type is used to communicate any process termination +with a non-zero exit code + +.. autosummary:: + :toctree: generated + + CommandError + +Command output can be processed via "protocol" implementations that are +inspired by ``asyncio.SubprocessProtocol``. + +.. autosummary:: + :toctree: generated + + KillOutput + NoCapture + StdOutCapture + StdErrCapture + StdOutErrCapture """ # runners From 9a4a6e6dea410bb6821f710a08ae5d44328db11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 4 Oct 2023 17:40:46 +0200 Subject: [PATCH 23/65] Add gitworktree to ls-file-collection's docstring This adds a description of "gitworktree" as one of the available file collection types. --- datalad_next/commands/ls_file_collection.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index cbc0aa867..ceb9150b2 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -234,6 +234,20 @@ class LsFileCollection(ValidatedInterface): by this command (``return_type='generator``) and only until the next result is yielded. PY] + ``gitworktree`` + Reports on all tracked and untracked content of a Git repository's + work tree. The collection identifier is a path of a directory in a Git + repository (which can, but needs not be, its root). Item identifiers + are the relative paths of items within that directory. Reported + properties include ``gitsha`` and ``gittype``; note that the + ``gitsha`` is not equivalent to a SHA1 hash of a file's content, but + is the SHA-type blob identifier as reported and used by Git. + [PY: When hashes are computed, an ``fp`` property with a file-like is + provided. Reading file data from it requires a ``seek(0)`` in most + cases. This file handle is only open when items are yielded directly + by this command (``return_type='generator``) and only until the next + result is yielded. PY] + ``tarfile`` Reports on members of a TAR archive. The collection identifier is the path of the TAR file. Item identifiers are the relative paths From d4a9fee0196feeeadeac67acd4d7ca428f3f67d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 4 Oct 2023 17:57:12 +0200 Subject: [PATCH 24/65] Tweak docstring wording This changes "(item identifiers are) the name of a file" to "... the names of items", both to make things consistently plural, and to reflect the fact that the identifiers can be names (actually, paths) of directories, not just files. --- datalad_next/commands/ls_file_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index ceb9150b2..aa36bb82d 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -226,7 +226,7 @@ class LsFileCollection(ValidatedInterface): ``directory`` Reports on the content of a given directory (non-recursively). The collection identifier is the path of the directory. Item identifiers - are the name of a file within that directory. Standard properties like + are the names of items within that directory. Standard properties like ``size``, ``mtime``, or ``link_target`` are included in the report. [PY: When hashes are computed, an ``fp`` property with a file-like is provided. Reading file data from it requires a ``seek(0)`` in most From 3a415b4a96498a345ae0b77bd38b8fcac5ffd0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 4 Oct 2023 18:09:35 +0200 Subject: [PATCH 25/65] Add changelog snippet for docstring update --- changelog.d/20231004_180525_mslw.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog.d/20231004_180525_mslw.md diff --git a/changelog.d/20231004_180525_mslw.md b/changelog.d/20231004_180525_mslw.md new file mode 100644 index 000000000..5d7db36f7 --- /dev/null +++ b/changelog.d/20231004_180525_mslw.md @@ -0,0 +1,6 @@ +### πŸ“ Documentation + +- Include `gitworktree` among the available file collection types + listed in `ls-file-collection`'s docstring. Fixes + https://github.com/datalad/datalad-next/issues/470 via + https://github.com/datalad/datalad-next/pull/471 (by @mslw) From c0c46797f8e29996e3db5d52c8c6dd8fa1fc487c Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 13 Oct 2023 13:54:40 +0200 Subject: [PATCH 26/65] More properly present docs on the repo classes Explicitly list the few `call_annex()` methods we aim to support. Most others (`annex_records()` etc) are better covered by iterators. --- datalad_next/datasets/__init__.py | 45 ++++++++++++++++++++----------- docs/source/conf.py | 6 +++++ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/datalad_next/datasets/__init__.py b/datalad_next/datasets/__init__.py index e2f6bdbc4..e22e65df8 100644 --- a/datalad_next/datasets/__init__.py +++ b/datalad_next/datasets/__init__.py @@ -3,18 +3,18 @@ Two sets of repository abstractions are available :class:`LeanGitRepo` and :class:`LeanAnnexRepo` vs. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo`. -The latter are the classic classes providing a, now legacy, low-level API to -repository operations. This functionality stems from the earliest days of -DataLad and implements paradigms and behaviors that are no longer common to -the rest of the DataLad API. :class:`LegacyGitRepo` and -:class:`LegacyAnnexRepo` should no longer be used in new developments. - -:class:`LeanGitRepo` and :class:`LeanAnnexRepo` on the other hand provide -a more modern, substantially restricted API and represent the present -standard API for low-level repository operations. They are geared towards -interacting with Git and git-annex more directly, and are more suitable -for generator-like implementations, promoting low response latencies, and -a leaner processing footprint. +:class:`LeanGitRepo` and :class:`LeanAnnexRepo` provide a more modern, +small-ish interface and represent the present standard API for low-level +repository operations. They are geared towards interacting with Git and +git-annex more directly, and are more suitable for generator-like +implementations, promoting low response latencies, and a leaner processing +footprint. + +The ``Legacy*Repo`` classes provide a, now legacy, low-level API to repository +operations. This functionality stems from the earliest days of DataLad and +implements paradigms and behaviors that are no longer common to the rest of the +DataLad API. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo` should no +longer be used in new developments, and are not documented here. """ from pathlib import Path @@ -38,18 +38,33 @@ class LeanAnnexRepo(LegacyAnnexRepo): """git-annex repository representation with a minimized API This is a companion of :class:`LeanGitRepo`. In the same spirit, it - restricts its API to a limited set of method that primarily extend + restricts its API to a limited set of method that extend :class:`LeanGitRepo` with a set of ``call_annex*()`` methods. + + .. autosummary:: + + call_annex + call_annex_oneline + call_annex_success """ # list of attributes permitted in the "lean" API. This list extends # the API of LeanGitRepo - # TODO extend whitelist of attributed as necessary + # TODO extend whitelist of attributes as necessary _lean_attrs = [ + # these are the ones we intend to provide + 'call_annex', + 'call_annex_oneline', + 'call_annex_success', + # and here are the ones that we need to permit in order to get them + # to run '_check_git_version', + '_check_git_annex_version', # used by AnnexRepo.__init__() -- should be using `is_valid()` 'is_valid_git', 'is_valid_annex', '_is_direct_mode_from_config', + '_call_annex', + 'call_annex_items_', ] # intentionally limiting to just `path` as the only constructor argument @@ -65,5 +80,5 @@ def __new__(cls, path: Path): return obj -def _unsupported_method(self): +def _unsupported_method(self, *args, **kwargs): raise NotImplementedError('method unsupported by LeanAnnexRepo') diff --git a/docs/source/conf.py b/docs/source/conf.py index e4b0ea7ca..aa0645d3b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,6 +26,12 @@ import datalad_next +# this cheats sphinx into thinking that LeanGit repo is not +# merely imported, and convinces it to document it +import datalad_next.datasets as dnd +dnd.LeanGitRepo.__module__ = dnd.__name__ +dnd.LeanGitRepo.__name__ = 'LeanGitRepo' + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. From 6cd2fe181570dde93459e127428e7bb5169dedf7 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 13 Oct 2023 14:11:07 +0200 Subject: [PATCH 27/65] Update versioneer to resolve PY3.12 compatibility issue Closes #475 --- datalad_next/__init__.py | 5 +- datalad_next/_version.py | 331 ++++++++--- versioneer.py | 1133 ++++++++++++++++++++++++++------------ 3 files changed, 1043 insertions(+), 426 deletions(-) diff --git a/datalad_next/__init__.py b/datalad_next/__init__.py index 36df189a5..116df896c 100644 --- a/datalad_next/__init__.py +++ b/datalad_next/__init__.py @@ -105,6 +105,5 @@ ) -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions +from . import _version +__version__ = _version.get_versions()['version'] diff --git a/datalad_next/_version.py b/datalad_next/_version.py index 2edb50eb5..1677391dc 100644 --- a/datalad_next/_version.py +++ b/datalad_next/_version.py @@ -5,8 +5,9 @@ # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -15,9 +16,11 @@ import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -33,8 +36,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool -def get_config(): + +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -52,13 +62,13 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -67,22 +77,35 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -93,18 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -113,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % @@ -130,41 +154,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -177,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -190,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -199,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %s" % r) return {"version": r, @@ -214,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -225,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -234,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["--git-dir=.git", "describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["--git-dir=.git", "rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -293,26 +374,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["--git-dir=.git", "rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["--git-dir=.git", "show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -337,23 +419,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -380,12 +510,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -402,7 +561,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -422,7 +581,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -442,7 +601,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -456,10 +615,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -474,7 +637,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -495,7 +658,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, diff --git a/versioneer.py b/versioneer.py index 51ca8182e..1e3753e63 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,5 +1,5 @@ -# Version: 0.18 +# Version: 0.29 """The Versioneer - like a rocketeer, but for versions. @@ -7,18 +7,14 @@ ============== * like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer +* https://github.com/python-versioneer/python-versioneer * Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based +* License: Public Domain (Unlicense) +* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 +* [![Latest Version][pypi-image]][pypi-url] +* [![Build Status][travis-image]][travis-url] + +This is a tool for managing a recorded version number in setuptools-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control @@ -27,9 +23,38 @@ ## Quick Install -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results +Versioneer provides two installation modes. The "classic" vendored mode installs +a copy of versioneer into your repository. The experimental build-time dependency mode +is intended to allow you to skip this step and simplify the process of upgrading. + +### Vendored mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) + * Note that you will need to add `tomli; python_version < "3.11"` to your + build-time dependencies if you use `pyproject.toml` +* run `versioneer install --vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +### Build-time dependency mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) + to the `requires` key of the `build-system` table in `pyproject.toml`: + ```toml + [build-system] + requires = ["setuptools", "versioneer[toml]"] + build-backend = "setuptools.build_meta" + ``` +* run `versioneer install --no-vendor` in your source tree, commit the results +* verify version information with `python setup.py version` ## Version Identifiers @@ -61,7 +86,7 @@ for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. +uncommitted changes). The version identifier is used for multiple purposes: @@ -166,7 +191,7 @@ Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). +[issues page](https://github.com/python-versioneer/python-versioneer/issues). ### Subprojects @@ -180,7 +205,7 @@ `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. + provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs @@ -194,9 +219,9 @@ Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve @@ -224,31 +249,20 @@ cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace +* edit `setup.cfg` and `pyproject.toml`, if necessary, + to include any new configuration settings indicated by the release notes. + See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install --[no-]vendor` in your source tree, to replace `SRC/_version.py` * commit any changed files @@ -265,35 +279,70 @@ direction and include code from all supported VCS systems, reducing the number of intermediate scripts. +## Similar projects + +* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time + dependency +* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of + versioneer +* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools + plugin ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . +Specifically, both are released under the "Unlicense", as described in +https://unlicense.org/. + +[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg +[pypi-url]: https://pypi.python.org/pypi/versioneer/ +[travis-image]: +https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg +[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer """ +# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring +# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements +# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error +# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with +# pylint:disable=attribute-defined-outside-init,too-many-arguments -from __future__ import print_function -try: - import configparser -except ImportError: - import ConfigParser as configparser +import configparser import errno import json import os import re import subprocess import sys +from pathlib import Path +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import NoReturn +import functools + +have_tomllib = True +if sys.version_info >= (3, 11): + import tomllib +else: + try: + import tomli as tomllib + except ImportError: + have_tomllib = False class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + versionfile_source: str + versionfile_build: Optional[str] + parentdir_prefix: Optional[str] + verbose: Optional[bool] + -def get_root(): +def get_root() -> str: """Get the project root directory. We require that all commands are run from the project root, i.e. the @@ -301,13 +350,23 @@ def get_root(): """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " @@ -321,43 +380,62 @@ def get_root(): # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) + my_path = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: + if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) + % (os.path.dirname(my_path), versioneer_py)) except NameError: pass return root -def get_config_from_root(root): +def get_config_from_root(root: str) -> VersioneerConfig: """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or + # This might raise OSError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None + root_pth = Path(root) + pyproject_toml = root_pth / "pyproject.toml" + setup_cfg = root_pth / "setup.cfg" + section: Union[Dict[str, Any], configparser.SectionProxy, None] = None + if pyproject_toml.exists() and have_tomllib: + try: + with open(pyproject_toml, 'rb') as fobj: + pp = tomllib.load(fobj) + section = pp['tool']['versioneer'] + except (tomllib.TOMLDecodeError, KeyError) as e: + print(f"Failed to load config from {pyproject_toml}: {e}") + print("Try to load it from setup.cfg") + if not section: + parser = configparser.ConfigParser() + with open(setup_cfg) as cfg_file: + parser.read_file(cfg_file) + parser.get("versioneer", "VCS") # raise error if missing + + section = parser["versioneer"] + + # `cast`` really shouldn't be used, but its simplest for the + # common VersioneerConfig users at the moment. We verify against + # `None` values elsewhere where it matters + cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): + cfg.VCS = section['VCS'] + cfg.style = section.get("style", "") + cfg.versionfile_source = cast(str, section.get("versionfile_source")) + cfg.versionfile_build = section.get("versionfile_build") + cfg.tag_prefix = cast(str, section.get("tag_prefix")) + if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") + cfg.parentdir_prefix = section.get("parentdir_prefix") + if isinstance(section, configparser.SectionProxy): + # Make sure configparser translates to bool + cfg.verbose = section.getboolean("verbose") + else: + cfg.verbose = section.get("verbose") + return cfg @@ -366,37 +444,48 @@ class NotThisMethod(Exception): # these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f + HANDLERS.setdefault(vcs, {})[method] = f return f return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -407,26 +496,25 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY['git'] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -435,9 +523,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -453,8 +543,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + -def get_config(): +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -472,13 +569,13 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -487,22 +584,35 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -513,18 +623,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -533,15 +645,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% @@ -550,41 +661,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -597,11 +715,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d @@ -610,7 +728,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: @@ -619,6 +737,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %%s" %% r) return {"version": r, @@ -634,7 +757,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -645,8 +773,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) @@ -654,24 +789,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -688,7 +856,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces @@ -713,26 +881,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -757,23 +926,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%%d" %% (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] + rendered = "0.post0.dev%%d" %% pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -800,12 +1017,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -822,7 +1068,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -842,7 +1088,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -862,7 +1108,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -876,10 +1122,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -894,7 +1144,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -915,7 +1165,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, @@ -942,41 +1192,48 @@ def get_versions(): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -989,11 +1246,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1002,7 +1259,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1011,6 +1268,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %s" % r) return {"version": r, @@ -1026,7 +1288,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -1037,8 +1304,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1046,24 +1320,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -1080,7 +1387,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces @@ -1105,19 +1412,20 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def do_vcs_install(manifest_in, versionfile_source, ipy): +def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py @@ -1126,36 +1434,40 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] + files = [versionfile_source] if ipy: files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) + if "VERSIONEER_PEP518" not in globals(): + try: + my_path = __file__ + if my_path.endswith((".pyc", ".pyo")): + my_path = os.path.splitext(my_path)[0] + ".py" + versioneer_file = os.path.relpath(my_path) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: + with open(".gitattributes", "r") as fobj: + for line in fobj: + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + break + except OSError: pass if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() + with open(".gitattributes", "a+") as fobj: + fobj.write(f"{versionfile_source} export-subst\n") files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -1164,15 +1476,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % @@ -1181,7 +1492,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from +# This file was generated by 'versioneer.py' (0.29) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -1198,12 +1509,12 @@ def get_versions(): """ -def versions_from_file(filename): +def versions_from_file(filename: str) -> Dict[str, Any]: """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) @@ -1215,9 +1526,8 @@ def versions_from_file(filename): return json.loads(mo.group(1)) -def write_to_version_file(filename, versions): +def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: """Write the given version number to the given _version.py file.""" - os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: @@ -1226,14 +1536,14 @@ def write_to_version_file(filename, versions): print("set %s to '%s'" % (filename, versions["version"])) -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -1258,23 +1568,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -1301,12 +1659,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -1323,7 +1710,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -1343,7 +1730,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -1363,7 +1750,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -1377,10 +1764,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -1399,7 +1790,7 @@ class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" -def get_versions(verbose=False): +def get_versions(verbose: bool = False) -> Dict[str, Any]: """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. @@ -1414,7 +1805,7 @@ def get_versions(verbose=False): assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose + verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" @@ -1475,13 +1866,17 @@ def get_versions(verbose=False): "date": None} -def get_version(): +def get_version() -> str: """Get the short version string for this project.""" return get_versions()["version"] -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" +def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): + """Get the custom setuptools subclasses used by Versioneer. + + If the package uses a different cmdclass (e.g. one from numpy), it + should be provide as an argument. + """ if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and @@ -1495,25 +1890,25 @@ def get_cmdclass(): # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 + # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - cmds = {} + cmds = {} if cmdclass is None else cmdclass.copy() - # we add "version" to both distutils and setuptools + # we add "version" to setuptools from setuptools import Command class cmd_version(Command): description = "report generated version string" - user_options = [] - boolean_options = [] + user_options: List[Tuple[str, str, str]] = [] + boolean_options: List[str] = [] - def initialize_options(self): + def initialize_options(self) -> None: pass - def finalize_options(self): + def finalize_options(self) -> None: pass - def run(self): + def run(self) -> None: vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) @@ -1523,7 +1918,7 @@ def run(self): print(" error: %s" % vers["error"]) cmds["version"] = cmd_version - # we override "build_py" in both distutils and setuptools + # we override "build_py" in setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py @@ -1538,18 +1933,25 @@ def run(self): # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? + # pip install -e . and setuptool/editable_wheel will invoke build_py + # but the build_py command is not expected to copy any files. + # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py + if 'build_py' in cmds: + _build_py: Any = cmds['build_py'] else: - from distutils.command.build_py import build_py as _build_py + from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) + if getattr(self, "editable_mode", False): + # During editable installs `.py` and data files are + # not copied to build_lib + return # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: @@ -1559,8 +1961,40 @@ def run(self): write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py + if 'build_ext' in cmds: + _build_ext: Any = cmds['build_ext'] + else: + from setuptools.command.build_ext import build_ext as _build_ext + + class cmd_build_ext(_build_ext): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_ext.run(self) + if self.inplace: + # build_ext --inplace will only build extensions in + # build/lib<..> dir with no _version.py to write to. + # As in place builds will already have a _version.py + # in the module dir, we do not need to write one. + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if not cfg.versionfile_build: + return + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + if not os.path.exists(target_versionfile): + print(f"Warning: {target_versionfile} does not exist, skipping " + "version update. This can happen if you are running build_ext " + "without first running build_py.") + return + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext + if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe + from cx_Freeze.dist import build_exe as _build_exe # type: ignore # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1569,7 +2003,7 @@ def run(self): # ... class cmd_build_exe(_build_exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1593,12 +2027,12 @@ def run(self): if 'py2exe' in sys.modules: # py2exe enabled? try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 + from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore class cmd_py2exe(_py2exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1619,14 +2053,51 @@ def run(self): }) cmds["py2exe"] = cmd_py2exe + # sdist farms its file list building out to egg_info + if 'egg_info' in cmds: + _egg_info: Any = cmds['egg_info'] + else: + from setuptools.command.egg_info import egg_info as _egg_info + + class cmd_egg_info(_egg_info): + def find_sources(self) -> None: + # egg_info.find_sources builds the manifest list and writes it + # in one shot + super().find_sources() + + # Modify the filelist and normalize it + root = get_root() + cfg = get_config_from_root(root) + self.filelist.append('versioneer.py') + if cfg.versionfile_source: + # There are rare cases where versionfile_source might not be + # included by default, so we must be explicit + self.filelist.append(cfg.versionfile_source) + self.filelist.sort() + self.filelist.remove_duplicates() + + # The write method is hidden in the manifest_maker instance that + # generated the filelist and was thrown away + # We will instead replicate their final normalization (to unicode, + # and POSIX-style paths) + from setuptools import unicode_utils + normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') + for f in self.filelist.files] + + manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') + with open(manifest_filename, 'w') as fobj: + fobj.write('\n'.join(normalized)) + + cmds['egg_info'] = cmd_egg_info + # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist + if 'sdist' in cmds: + _sdist: Any = cmds['sdist'] else: - from distutils.command.sdist import sdist as _sdist + from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): - def run(self): + def run(self) -> None: versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old @@ -1634,7 +2105,7 @@ def run(self): self.distribution.metadata.version = versions["version"] return _sdist.run(self) - def make_release_tree(self, base_dir, files): + def make_release_tree(self, base_dir: str, files: List[str]) -> None: root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) @@ -1687,21 +2158,26 @@ def make_release_tree(self, base_dir, files): """ -INIT_PY_SNIPPET = """ +OLD_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ +INIT_PY_SNIPPET = """ +from . import {0} +__version__ = {0}.get_versions()['version'] +""" -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" + +def do_setup() -> int: + """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + if isinstance(e, (OSError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: @@ -1721,62 +2197,37 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + maybe_ipy: Optional[str] = ipy if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() - except EnvironmentError: + except OSError: old = "" - if INIT_PY_SNIPPET not in old: + module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] + snippet = INIT_PY_SNIPPET.format(module) + if OLD_SNIPPET in old: + print(" replacing boilerplate in %s" % ipy) + with open(ipy, "w") as f: + f.write(old.replace(OLD_SNIPPET, snippet)) + elif snippet not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) + f.write(snippet) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") + maybe_ipy = None # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + do_vcs_install(cfg.versionfile_source, maybe_ipy) return 0 -def scan_setup_py(): +def scan_setup_py() -> int: """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False @@ -1813,10 +2264,14 @@ def scan_setup_py(): return errors +def setup_command() -> NoReturn: + """Set up Versioneer and exit with appropriate error code.""" + errors = do_setup() + errors += scan_setup_py() + sys.exit(1 if errors else 0) + + if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) + setup_command() From 5716843f1a700cf1271fcbe6126681ae6b2bd5fa Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 13 Oct 2023 14:38:32 +0200 Subject: [PATCH 28/65] Patch `run()` placeholder substitutions to honor configuration defaults This is the companion of https://github.com/datalad/datalad/pull/7509 Closes #478 --- datalad_next/patches/enabled.py | 1 + datalad_next/patches/run.py | 84 ++++++++++++++++++++++++++ datalad_next/patches/tests/test_run.py | 25 ++++++++ docs/source/patches.rst | 1 + 4 files changed, 111 insertions(+) create mode 100644 datalad_next/patches/run.py create mode 100644 datalad_next/patches/tests/test_run.py diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py index 7fe207ca1..763961c4b 100644 --- a/datalad_next/patches/enabled.py +++ b/datalad_next/patches/enabled.py @@ -12,4 +12,5 @@ test_keyring, customremotes_main, create_sibling_gitlab, + run, ) diff --git a/datalad_next/patches/run.py b/datalad_next/patches/run.py new file mode 100644 index 000000000..fa4e97667 --- /dev/null +++ b/datalad_next/patches/run.py @@ -0,0 +1,84 @@ +"""Enhance ``run()`` placeholder substitutions to honor configuration defaults + +Previously, ``run()`` would not recognize configuration defaults for +placeholder substitution. This means that any placeholders globally declared in +``datalad.interface.common_cfg``, or via ``register_config()`` in DataLad +extensions would not be effective. + +This patch makes run's ``format_command()`` helper include such defaults +explicitly, and thereby enable the global declaration of substitution defaults. + +Moreoever a ``{python}`` placeholder is now defined via this mechanism, and +points to the value of ``sys.executable`` by default. This particular +placeholder was found to be valuable for improving the portability of +run-recording across (specific) Python versions, or across different (virtual) +environments. See https://github.com/datalad/datalad-container/issues/224 for +an example use case. + +https://github.com/datalad/datalad/pull/7509 +""" + +import sys + +from datalad.core.local.run import ( + GlobbedPaths, + SequenceFormatter, + normalize_command, + quote_cmdlinearg, +) +from datalad.interface.common_cfg import definitions as cfg_defs +from datalad.support.constraints import EnsureStr +from datalad.support.extensions import register_config + +from . import apply_patch + + +# This function is taken from datalad-core@a96c51c0b2794b2a2b4432ec7bd51f260cb91a37 +# datalad/core/local/run.py +# The change has been proposed in https://github.com/datalad/datalad/pull/7509 +def format_command(dset, command, **kwds): + """Plug in placeholders in `command`. + + Parameters + ---------- + dset : Dataset + command : str or list + + `kwds` is passed to the `format` call. `inputs` and `outputs` are converted + to GlobbedPaths if necessary. + + Returns + ------- + formatted command (str) + """ + command = normalize_command(command) + sfmt = SequenceFormatter() + + for k in set(cfg_defs.keys()).union(dset.config.keys()): + v = dset.config.get( + k, + # pull a default from the config definitions + # if we have no value, but a key + cfg_defs.get(k, {}).get('default', None)) + sub_key = k.replace("datalad.run.substitutions.", "") + if sub_key not in kwds: + kwds[sub_key] = v + + for name in ["inputs", "outputs"]: + io_val = kwds.pop(name, None) + if not isinstance(io_val, GlobbedPaths): + io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd")) + kwds[name] = list(map(quote_cmdlinearg, io_val.expand(dot=False))) + return sfmt.format(command, **kwds) + + +apply_patch( + 'datalad.core.local.run', None, 'format_command', format_command) +register_config( + 'datalad.run.substitutions.python', + 'Substitution for {python} placeholder', + description='Path to a Python interpreter executable', + type=EnsureStr(), + default=sys.executable, + dialog='question', +) diff --git a/datalad_next/patches/tests/test_run.py b/datalad_next/patches/tests/test_run.py new file mode 100644 index 000000000..721e6de96 --- /dev/null +++ b/datalad_next/patches/tests/test_run.py @@ -0,0 +1,25 @@ +import pytest + +from datalad_next.exceptions import IncompleteResultsError +from datalad_next.tests.utils import ( + SkipTest, + assert_result_count, +) + + +def test_substitution_config_default(existing_dataset): + ds = existing_dataset + + if ds.config.get('datalad.run.substitutions.python') is not None: + # we want to test default handling when no config is set + raise SkipTest( + 'Test assumptions conflict with effective configuration') + + # the {python} placeholder is not explicitly defined, but it has + # a default, which run() should discover and use + res = ds.run('{python} -c "True"', result_renderer='disabled') + assert_result_count(res, 1, action='run', status='ok') + + # make sure we could actually detect breakage with the check above + with pytest.raises(IncompleteResultsError): + ds.run('{python} -c "breakage"', result_renderer='disabled') diff --git a/docs/source/patches.rst b/docs/source/patches.rst index a25c1aab2..a7553cf47 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -16,3 +16,4 @@ DataLad patches push_to_export_remote test_keyring siblings + run From c0b4ab3ef30b206ed32463fea4dc8267fa497f0b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 16 Oct 2023 07:25:49 +0200 Subject: [PATCH 29/65] Bumpy minimal Python dep to 3.8 This is the oldest Python version not EOL. Moreover, datalad-next happens to already rely on 3.8 API (e.g. `gzip.BadZipFile`). Closes #481 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 7b4b01f00..8e31daaf7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ classifiers = Programming Language :: Python :: 3 [options] -python_requires = >= 3.7 +python_requires = >= 3.8 install_requires = annexremote datalad >= 0.18.4 From ce539d5ff13c0a28163619a1b64a405e0467807b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 09:08:31 +0200 Subject: [PATCH 30/65] Revert the API extension of `LeanAnnexRepo` This was prematurly done in https://github.com/datalad/datalad-next/pull/479 without an actual need or application as a motivation. The change is likely to come eventually, but should be approached in a less adhoc fashion. This conincidentally avoids the minor version upgrade otherwise implied by #479. --- datalad_next/datasets/__init__.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datalad_next/datasets/__init__.py b/datalad_next/datasets/__init__.py index e22e65df8..7c8baf334 100644 --- a/datalad_next/datasets/__init__.py +++ b/datalad_next/datasets/__init__.py @@ -39,32 +39,32 @@ class LeanAnnexRepo(LegacyAnnexRepo): This is a companion of :class:`LeanGitRepo`. In the same spirit, it restricts its API to a limited set of method that extend - :class:`LeanGitRepo` with a set of ``call_annex*()`` methods. + :class:`LeanGitRepo`. - .. autosummary:: - - call_annex - call_annex_oneline - call_annex_success """ + #CA .. autosummary:: + + #CA call_annex + #CA call_annex_oneline + #CA call_annex_success # list of attributes permitted in the "lean" API. This list extends # the API of LeanGitRepo # TODO extend whitelist of attributes as necessary _lean_attrs = [ - # these are the ones we intend to provide - 'call_annex', - 'call_annex_oneline', - 'call_annex_success', + #CA # these are the ones we intend to provide + #CA 'call_annex', + #CA 'call_annex_oneline', + #CA 'call_annex_success', # and here are the ones that we need to permit in order to get them # to run '_check_git_version', - '_check_git_annex_version', + #CA '_check_git_annex_version', # used by AnnexRepo.__init__() -- should be using `is_valid()` 'is_valid_git', 'is_valid_annex', '_is_direct_mode_from_config', - '_call_annex', - 'call_annex_items_', + #CA '_call_annex', + #CA 'call_annex_items_', ] # intentionally limiting to just `path` as the only constructor argument From bda2fd086b1891d950fef1ad07b1496e53303fb2 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 09:30:41 +0200 Subject: [PATCH 31/65] Fix run substitution patch Reported by @yarikoptic in https://github.com/datalad/datalad/pull/7509/files#r1358426552 This changeset adds protection against processing non-substitution configuration items. It also reduces duplication a bit more. In contrast to the change proposal in https://github.com/datalad/datalad/pull/7509 the (intermediate) set size is minimized by using a early filter. --- datalad_next/patches/run.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/datalad_next/patches/run.py b/datalad_next/patches/run.py index fa4e97667..b7672d7ac 100644 --- a/datalad_next/patches/run.py +++ b/datalad_next/patches/run.py @@ -18,6 +18,7 @@ https://github.com/datalad/datalad/pull/7509 """ +from itertools import filterfalse import sys from datalad.core.local.run import ( @@ -53,14 +54,19 @@ def format_command(dset, command, **kwds): """ command = normalize_command(command) sfmt = SequenceFormatter() + cprefix = 'datalad.run.substitutions.' - for k in set(cfg_defs.keys()).union(dset.config.keys()): + def not_subst(x): + return not x.startswith(cprefix) + + for k in set(filterfalse(not_subst, cfg_defs.keys())).union( + filterfalse(not_subst, dset.config.keys())): v = dset.config.get( k, # pull a default from the config definitions # if we have no value, but a key cfg_defs.get(k, {}).get('default', None)) - sub_key = k.replace("datalad.run.substitutions.", "") + sub_key = k.replace(cprefix, "") if sub_key not in kwds: kwds[sub_key] = v From 12e51db4efad54685c794bf1280d66bd92636828 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 09:56:33 +0200 Subject: [PATCH 32/65] Missing changelogs for 1.0.1 --- changelog.d/20231018_094329_michael.hanke_changelogs.md | 8 ++++++++ changelog.d/20231018_094752_michael.hanke_changelogs.md | 7 +++++++ changelog.d/20231018_095054_michael.hanke_changelogs.md | 7 +++++++ changelog.d/20231018_095359_michael.hanke_changelogs.md | 8 ++++++++ changelog.d/20231018_095958_michael.hanke_changelogs.md | 7 +++++++ 5 files changed, 37 insertions(+) create mode 100644 changelog.d/20231018_094329_michael.hanke_changelogs.md create mode 100644 changelog.d/20231018_094752_michael.hanke_changelogs.md create mode 100644 changelog.d/20231018_095054_michael.hanke_changelogs.md create mode 100644 changelog.d/20231018_095359_michael.hanke_changelogs.md create mode 100644 changelog.d/20231018_095958_michael.hanke_changelogs.md diff --git a/changelog.d/20231018_094329_michael.hanke_changelogs.md b/changelog.d/20231018_094329_michael.hanke_changelogs.md new file mode 100644 index 000000000..3d5b28d20 --- /dev/null +++ b/changelog.d/20231018_094329_michael.hanke_changelogs.md @@ -0,0 +1,8 @@ +### πŸ›‘ Tests + +- Simplified setup for subprocess test-coverage reporting. Standard + pytest-cov features are not employed, rather than the previous + approach that was adopted from datalad-core, which originated + in a time when testing was performed via nose. + Fixes https://github.com/datalad/datalad-next/issues/453 via + https://github.com/datalad/datalad-next/pull/457 (by @mih) diff --git a/changelog.d/20231018_094752_michael.hanke_changelogs.md b/changelog.d/20231018_094752_michael.hanke_changelogs.md new file mode 100644 index 000000000..fca4709b1 --- /dev/null +++ b/changelog.d/20231018_094752_michael.hanke_changelogs.md @@ -0,0 +1,7 @@ +### πŸ“ Documentation + +- The renderer API documentation now includes an entrypoint for the + runner-related functionality and documentation at + https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.runners.html + Fixes https://github.com/datalad/datalad-next/issues/466 via + https://github.com/datalad/datalad-next/pull/467 (by @mih) diff --git a/changelog.d/20231018_095054_michael.hanke_changelogs.md b/changelog.d/20231018_095054_michael.hanke_changelogs.md new file mode 100644 index 000000000..ff51075cc --- /dev/null +++ b/changelog.d/20231018_095054_michael.hanke_changelogs.md @@ -0,0 +1,7 @@ +### πŸ› Bug Fixes + +- Update the vendor installation of versioneer to v0.29. This + resolves an installation failure with Python 3.12 due to + the removal of an ancient class. + Fixes https://github.com/datalad/datalad-next/issues/475 via + https://github.com/datalad/datalad-next/pull/483 (by @mih) diff --git a/changelog.d/20231018_095359_michael.hanke_changelogs.md b/changelog.d/20231018_095359_michael.hanke_changelogs.md new file mode 100644 index 000000000..194688306 --- /dev/null +++ b/changelog.d/20231018_095359_michael.hanke_changelogs.md @@ -0,0 +1,8 @@ +### πŸ’« Enhancements and new features + +- Patch datalad-core's `run` command to honor configuration defaults + for substitutions. This enables placeholders like `{python}` that + point to `sys.executable` by default, and need not be explicitly + defined in system/user/dataset configuration. + Fixes https://github.com/datalad/datalad-next/issues/478 via + https://github.com/datalad/datalad-next/pull/485 (by @mih) diff --git a/changelog.d/20231018_095958_michael.hanke_changelogs.md b/changelog.d/20231018_095958_michael.hanke_changelogs.md new file mode 100644 index 000000000..a204910d3 --- /dev/null +++ b/changelog.d/20231018_095958_michael.hanke_changelogs.md @@ -0,0 +1,7 @@ +### πŸ› Bug Fixes + +- Bump dependency on Python to 3.8. This is presently the oldest version + still supported upstream. However, some functionality already used + 3.8 features, so this is also a bug fix. + Fixes https://github.com/datalad/datalad-next/issues/481 via + https://github.com/datalad/datalad-next/pull/486 (by @mih) From 78216f118d2ea59ffa8c8f38b7d444ebe138b3ea Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 10:08:03 +0200 Subject: [PATCH 33/65] Assemble 1.0.1 changelog --- CHANGELOG.md | 58 +++++++++++++++++++ ...20230928_123529_michael.hanke_doc_pr455.md | 4 -- .../20231003_111547_michael.hanke_bf_462.md | 7 --- changelog.d/20231004_180525_mslw.md | 6 -- ...0231018_094329_michael.hanke_changelogs.md | 8 --- ...0231018_094752_michael.hanke_changelogs.md | 7 --- ...0231018_095054_michael.hanke_changelogs.md | 7 --- ...0231018_095359_michael.hanke_changelogs.md | 8 --- ...0231018_095958_michael.hanke_changelogs.md | 7 --- 9 files changed, 58 insertions(+), 54 deletions(-) delete mode 100644 changelog.d/20230928_123529_michael.hanke_doc_pr455.md delete mode 100644 changelog.d/20231003_111547_michael.hanke_bf_462.md delete mode 100644 changelog.d/20231004_180525_mslw.md delete mode 100644 changelog.d/20231018_094329_michael.hanke_changelogs.md delete mode 100644 changelog.d/20231018_094752_michael.hanke_changelogs.md delete mode 100644 changelog.d/20231018_095054_michael.hanke_changelogs.md delete mode 100644 changelog.d/20231018_095359_michael.hanke_changelogs.md delete mode 100644 changelog.d/20231018_095958_michael.hanke_changelogs.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 55aafdfd2..f5cdd3c6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,61 @@ + +# 1.0.1 (2023-10-18) + +## πŸ› Bug Fixes + +- Fix f-string syntax in error message of the `uncurl` remote. + https://github.com/datalad/datalad-next/pull/455 (by @christian-monch) + +- `FileSystemItem.from_path()` now honors its `link_target` parameter, and + resolves a target for any symlink item conditional on this setting. + Previously, a symlink target was always resolved. + Fixes https://github.com/datalad/datalad-next/issues/462 via + https://github.com/datalad/datalad-next/pull/464 (by @mih) + +- Update the vendor installation of versioneer to v0.29. This + resolves an installation failure with Python 3.12 due to + the removal of an ancient class. + Fixes https://github.com/datalad/datalad-next/issues/475 via + https://github.com/datalad/datalad-next/pull/483 (by @mih) + +- Bump dependency on Python to 3.8. This is presently the oldest version + still supported upstream. However, some functionality already used + 3.8 features, so this is also a bug fix. + Fixes https://github.com/datalad/datalad-next/issues/481 via + https://github.com/datalad/datalad-next/pull/486 (by @mih) + +## πŸ’« Enhancements and new features + +- Patch datalad-core's `run` command to honor configuration defaults + for substitutions. This enables placeholders like `{python}` that + point to `sys.executable` by default, and need not be explicitly + defined in system/user/dataset configuration. + Fixes https://github.com/datalad/datalad-next/issues/478 via + https://github.com/datalad/datalad-next/pull/485 (by @mih) + +## πŸ“ Documentation + +- Include `gitworktree` among the available file collection types + listed in `ls-file-collection`'s docstring. Fixes + https://github.com/datalad/datalad-next/issues/470 via + https://github.com/datalad/datalad-next/pull/471 (by @mslw) + +- The renderer API documentation now includes an entrypoint for the + runner-related functionality and documentation at + https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.runners.html + Fixes https://github.com/datalad/datalad-next/issues/466 via + https://github.com/datalad/datalad-next/pull/467 (by @mih) + +## πŸ›‘ Tests + +- Simplified setup for subprocess test-coverage reporting. Standard + pytest-cov features are not employed, rather than the previous + approach that was adopted from datalad-core, which originated + in a time when testing was performed via nose. + Fixes https://github.com/datalad/datalad-next/issues/453 via + https://github.com/datalad/datalad-next/pull/457 (by @mih) + + # 1.0.0 (2023-09-25) This release represents a milestone in the development of the extension. diff --git a/changelog.d/20230928_123529_michael.hanke_doc_pr455.md b/changelog.d/20230928_123529_michael.hanke_doc_pr455.md deleted file mode 100644 index 24f9ae6a9..000000000 --- a/changelog.d/20230928_123529_michael.hanke_doc_pr455.md +++ /dev/null @@ -1,4 +0,0 @@ -### πŸ› Bug Fixes - -- Fix f-string syntax in error message of the `uncurl` remote. - https://github.com/datalad/datalad-next/pull/455 (by @christian-monch) diff --git a/changelog.d/20231003_111547_michael.hanke_bf_462.md b/changelog.d/20231003_111547_michael.hanke_bf_462.md deleted file mode 100644 index b03c6af74..000000000 --- a/changelog.d/20231003_111547_michael.hanke_bf_462.md +++ /dev/null @@ -1,7 +0,0 @@ -### πŸ› Bug Fixes - -- `FileSystemItem.from_path()` now honors its `link_target` parameter, and - resolves a target for any symlink item conditional on this setting. - Previously, a symlink target was always resolved. - Fixes https://github.com/datalad/datalad-next/issues/462 via - https://github.com/datalad/datalad-next/pull/464 (by @mih) diff --git a/changelog.d/20231004_180525_mslw.md b/changelog.d/20231004_180525_mslw.md deleted file mode 100644 index 5d7db36f7..000000000 --- a/changelog.d/20231004_180525_mslw.md +++ /dev/null @@ -1,6 +0,0 @@ -### πŸ“ Documentation - -- Include `gitworktree` among the available file collection types - listed in `ls-file-collection`'s docstring. Fixes - https://github.com/datalad/datalad-next/issues/470 via - https://github.com/datalad/datalad-next/pull/471 (by @mslw) diff --git a/changelog.d/20231018_094329_michael.hanke_changelogs.md b/changelog.d/20231018_094329_michael.hanke_changelogs.md deleted file mode 100644 index 3d5b28d20..000000000 --- a/changelog.d/20231018_094329_michael.hanke_changelogs.md +++ /dev/null @@ -1,8 +0,0 @@ -### πŸ›‘ Tests - -- Simplified setup for subprocess test-coverage reporting. Standard - pytest-cov features are not employed, rather than the previous - approach that was adopted from datalad-core, which originated - in a time when testing was performed via nose. - Fixes https://github.com/datalad/datalad-next/issues/453 via - https://github.com/datalad/datalad-next/pull/457 (by @mih) diff --git a/changelog.d/20231018_094752_michael.hanke_changelogs.md b/changelog.d/20231018_094752_michael.hanke_changelogs.md deleted file mode 100644 index fca4709b1..000000000 --- a/changelog.d/20231018_094752_michael.hanke_changelogs.md +++ /dev/null @@ -1,7 +0,0 @@ -### πŸ“ Documentation - -- The renderer API documentation now includes an entrypoint for the - runner-related functionality and documentation at - https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.runners.html - Fixes https://github.com/datalad/datalad-next/issues/466 via - https://github.com/datalad/datalad-next/pull/467 (by @mih) diff --git a/changelog.d/20231018_095054_michael.hanke_changelogs.md b/changelog.d/20231018_095054_michael.hanke_changelogs.md deleted file mode 100644 index ff51075cc..000000000 --- a/changelog.d/20231018_095054_michael.hanke_changelogs.md +++ /dev/null @@ -1,7 +0,0 @@ -### πŸ› Bug Fixes - -- Update the vendor installation of versioneer to v0.29. This - resolves an installation failure with Python 3.12 due to - the removal of an ancient class. - Fixes https://github.com/datalad/datalad-next/issues/475 via - https://github.com/datalad/datalad-next/pull/483 (by @mih) diff --git a/changelog.d/20231018_095359_michael.hanke_changelogs.md b/changelog.d/20231018_095359_michael.hanke_changelogs.md deleted file mode 100644 index 194688306..000000000 --- a/changelog.d/20231018_095359_michael.hanke_changelogs.md +++ /dev/null @@ -1,8 +0,0 @@ -### πŸ’« Enhancements and new features - -- Patch datalad-core's `run` command to honor configuration defaults - for substitutions. This enables placeholders like `{python}` that - point to `sys.executable` by default, and need not be explicitly - defined in system/user/dataset configuration. - Fixes https://github.com/datalad/datalad-next/issues/478 via - https://github.com/datalad/datalad-next/pull/485 (by @mih) diff --git a/changelog.d/20231018_095958_michael.hanke_changelogs.md b/changelog.d/20231018_095958_michael.hanke_changelogs.md deleted file mode 100644 index a204910d3..000000000 --- a/changelog.d/20231018_095958_michael.hanke_changelogs.md +++ /dev/null @@ -1,7 +0,0 @@ -### πŸ› Bug Fixes - -- Bump dependency on Python to 3.8. This is presently the oldest version - still supported upstream. However, some functionality already used - 3.8 features, so this is also a bug fix. - Fixes https://github.com/datalad/datalad-next/issues/481 via - https://github.com/datalad/datalad-next/pull/486 (by @mih) From 0f04769a36cc84ddcfe500120ff1d97a9da26e3d Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 13:30:22 +0200 Subject: [PATCH 34/65] Apply @mih's patch from comment for improved error as suggested in https://github.com/datalad/datalad-next/issues/346#issuecomment-1557394098 --- datalad_next/constraints/basic.py | 3 +++ datalad_next/constraints/compound.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index e60cbae74..4a47a114e 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -275,6 +275,9 @@ def long_description(self): def short_description(self): return '{%s}' % ', '.join([repr(c) for c in self._allowed]) + def __str__(self): + return f"one of {self.short_description()}" + class EnsureKeyChoice(EnsureChoice): """Ensure value under a key in an input is in a set of possible values""" diff --git a/datalad_next/constraints/compound.py b/datalad_next/constraints/compound.py index 99fe8d66d..bb0d87fd0 100644 --- a/datalad_next/constraints/compound.py +++ b/datalad_next/constraints/compound.py @@ -77,10 +77,12 @@ def __call__(self, value): iter = self._iter_type( self._item_constraint(i) for i in value ) - except TypeError as e: + except (ConstraintError, TypeError) as e: self.raise_for( value, - "cannot coerce to target (item) type", + "{itertype} item is not {itype}", + itertype=self._iter_type.__name__, + itype=self._item_constraint, __caused_by__=e, ) if self._min_len is not None or self._max_len is not None: From b27725aa7191b7414854b321e03d1f846ac16e39 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Fri, 20 Oct 2023 11:19:49 +0200 Subject: [PATCH 35/65] Basic set of tests for EnsureHashAlgorithm --- datalad_next/constraints/tests/test_basic.py | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 2748a158e..98f43f764 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -11,6 +11,7 @@ EnsureNone, EnsureCallable, EnsureChoice, + EnsureHashAlgorithm, EnsureKeyChoice, EnsureRange, EnsurePath, @@ -317,3 +318,27 @@ def test_EnsurePath_fordataset(existing_dataset): # 2. dataset is given as a dataset object tc = c.for_dataset(DatasetParameter(ds, ds)) assert tc('relpath') == (ds.pathobj / 'relpath') + + +def test_EnsureHashAlgorithm(): + c = EnsureHashAlgorithm() + # simple cases that should pass + hashes = [ + 'sha3_256', 'shake_256', 'sha3_384', 'md5', 'shake_128', 'sha384', + 'sha3_224', 'blake2s', 'sha1', 'blake2b', 'sha224', 'sha512', 'sha256', + 'sha3_512' + ] + for hash in hashes: + c(hash) + # a few bogus ones: + bad_hashes = [ + 'md17', 'McGyver', 'sha2', 'bogus' + ] + for baddie in bad_hashes: + with pytest.raises(ConstraintError): + c(baddie) + + # check messaging + for i in ('md5', 'shake_256', 'sha3_512'): + assert i in c.short_description() + assert i in c.long_description() From b27f2920de231fd129bbf846688c9e3b01ff78de Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 10:19:06 +0200 Subject: [PATCH 36/65] Add simple test for EnsureChoice.__str__ --- datalad_next/constraints/tests/test_basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 98f43f764..21cb6656d 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -189,6 +189,7 @@ def test_choice(): assert i in descr # short is a "set" or repr()s assert c.short_description() == "{'choice1', 'choice2', None}" + assert str(c) == "one of {'choice1', 'choice2', None}" # this should always work assert c('choice1') == 'choice1' assert c(None) is None From 678886408b0c5b40bbd23de7395712006ba99e87 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 10:24:17 +0200 Subject: [PATCH 37/65] Add changelog --- changelog.d/20231021_102012_michael.hanke_ensurehash.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog.d/20231021_102012_michael.hanke_ensurehash.md diff --git a/changelog.d/20231021_102012_michael.hanke_ensurehash.md b/changelog.d/20231021_102012_michael.hanke_ensurehash.md new file mode 100644 index 000000000..dcf0d5fdf --- /dev/null +++ b/changelog.d/20231021_102012_michael.hanke_ensurehash.md @@ -0,0 +1,6 @@ +### πŸ’« Enhancements and new features + +- New `EnsureHashAlgorithm` constraint to automatically expose + and verify algorithm labels from `hashlib.algorithms_guaranteed` + Fixes https://github.com/datalad/datalad-next/issues/346 via + https://github.com/datalad/datalad-next/pull/492 (by @mslw @adswa) From 666977d0492f554a3b764d825557597601bd9520 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 15:08:37 +0200 Subject: [PATCH 38/65] Minor tuning, documentation --- datalad_next/constraints/basic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 4a47a114e..80d0f7fec 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,7 +12,7 @@ __docformat__ = 'restructuredtext' -from hashlib import algorithms_guaranteed +from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed from pathlib import Path import re @@ -504,5 +504,9 @@ def short_description(self): class EnsureHashAlgorithm(EnsureChoice): + """Ensure an input matches a name of a ``hashlib`` algorithm + + Specifically the item must be in the ``algorithms_guaranteed`` collection. + """ def __init__(self): - super(EnsureHashAlgorithm, self).__init__(*algorithms_guaranteed) + super().__init__(*hash_algorithms_guaranteed) From ce6a9c5dd0e9b3726d3440aa9fa40874e2981050 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sun, 22 Oct 2023 22:12:06 +0200 Subject: [PATCH 39/65] Replace the www-authenticate dependency with internal function The implementation is built on request's parsing utilities rather than elusive regexes. I find this better to grasp. The associated test includes all www-authenticate test cases, plus a set of additional ones that focus on multi-challenge header specifications. Closes #493 --- .../20231023_064405_michael.hanke_www_auth.md | 8 ++ datalad_next/url_operations/http.py | 8 +- datalad_next/utils/requests_auth.py | 75 ++++++++++++++++++- .../tests/test_parse_www_authenticate.py | 45 +++++++++++ setup.cfg | 1 - 5 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 changelog.d/20231023_064405_michael.hanke_www_auth.md create mode 100644 datalad_next/utils/tests/test_parse_www_authenticate.py diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md new file mode 100644 index 000000000..f4752d524 --- /dev/null +++ b/changelog.d/20231023_064405_michael.hanke_www_auth.md @@ -0,0 +1,8 @@ +### 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 854677c4e..5d660e093 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -9,11 +9,13 @@ from typing import Dict import requests from requests_toolbelt import user_agent -import www_authenticate import datalad -from datalad_next.utils.requests_auth import DataladAuth +from datalad_next.utils.requests_auth import ( + DataladAuth, + parse_www_authenticate, +) from . import ( UrlOperations, UrlOperationsRemoteError, @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None): headers=headers, ) if 'www-authenticate' in req.headers: - props['auth'] = www_authenticate.parse( + props['auth'] = parse_www_authenticate( req.headers['www-authenticate']) props['is_redirect'] = True if req.history else False props['status_code'] = req.status_code diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index 62cb5a491..fb4f3ce9d 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -7,7 +7,6 @@ from typing import Dict from urllib.parse import urlparse import requests -import www_authenticate from datalad_next.config import ConfigManager from datalad_next.utils import CredentialManager @@ -16,7 +15,77 @@ lgr = logging.getLogger('datalad.ext.next.utils.requests_auth') -__all__ = ['DataladAuth', 'HTTPBearerTokenAuth'] +__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate'] + + +def parse_www_authenticate(hdr: str) -> dict: + """Parse HTTP www-authenticate header + + This helper uses ``requests`` utilities to parse the ``www-authenticate`` + header as represented in a ``requests.Response`` instance. The header may + contain any number of challenge specifications. + + The implementation follows RFC7235, where a challenge parameters set is + specified as: either a comma-separated list of parameters, or a single + sequence of characters capable of holding base64-encoded information, + and parameters are name=value pairs, where the name token is matched + case-insensitively, and each parameter name MUST only occur once + per challenge. + + Returns + ------- + dict + Keys are casefolded challenge labels (e.g., 'basic', 'digest'). + Values are: ``None`` (no parameter), ``str`` (a token68), or + ``dict`` (name/value mapping of challenge parameters) + """ + plh = requests.utils.parse_list_header + pdh = requests.utils.parse_dict_header + challenges = {} + challenge = None + # challenges as well as their properties are in a single + # comma-separated list + for item in plh(hdr): + # parse the item into a key/value set + # the value will be `None` if this item was no mapping + k, v = pdh(item).popitem() + # split the key to check for a challenge spec start + key_split = k.split(' ', maxsplit=1) + if len(key_split) > 1 or v is None: + item_suffix = item[len(key_split[0]) + 1:] + challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None + challenges[key_split[0].casefold()] = challenge + else: + # implementation logic assumes that the above conditional + # was triggered before we ever get here + assert challenge + challenge.append(item) + + return { + challenge: _convert_www_authenticate_items(items) + for challenge, items in challenges.items() + } + + +def _convert_www_authenticate_items(items: list) -> None | str | dict: + pdh = requests.utils.parse_dict_header + # according to RFC7235, items can be: + # either a comma-separated list of parameters + # or a single sequence of characters capable of holding base64-encoded + # information. + # parameters are name=value pairs, where the name token is matched + # case-insensitively, and each parameter name MUST only occur once + # per challenge. + if items is None: + return None + elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None: + # this items matches the token68 appearance (no name value + # pair after potential base64 padding its removed + return items[0] + else: + return { + k.casefold(): v for i in items for k, v in pdh(i).items() + } class DataladAuth(requests.auth.AuthBase): @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs): # www-authenticate with e.g. 403s return r # which auth schemes does the server support? - auth_schemes = www_authenticate.parse(r.headers['www-authenticate']) + auth_schemes = parse_www_authenticate(r.headers['www-authenticate']) ascheme, credname, cred = self._get_credential(r.url, auth_schemes) if cred is None or 'secret' not in cred: diff --git a/datalad_next/utils/tests/test_parse_www_authenticate.py b/datalad_next/utils/tests/test_parse_www_authenticate.py new file mode 100644 index 000000000..d69fcd67b --- /dev/null +++ b/datalad_next/utils/tests/test_parse_www_authenticate.py @@ -0,0 +1,45 @@ + +from ..requests_auth import parse_www_authenticate + + +challenges = ( + # just challenge type + ('Negotiate', + [('negotiate', None)]), + # challenge and just a token, tolerate any base64 padding + ('Negotiate abcdef', + [('negotiate', 'abcdef')]), + ('Negotiate abcdef=', + [('negotiate', 'abcdef=')]), + ('Negotiate abcdef==', + [('negotiate', 'abcdef==')]), + # standard bearer + ('Bearer realm=example.com', + [('bearer', {'realm': 'example.com'})]), + # standard digest + ('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", ' + 'opaque="ghijkl"', + [('digest', {'realm': 'example.com', 'qop': 'auth,auth-int', + 'nonce': 'abcdef', 'opaque': 'ghijkl'})]), + # multi challenge + ('Basic speCial="paf ram", realm="basIC", ' + 'Bearer, ' + 'Digest realm="http-auth@example.org", qop="auth, auth-int", ' + 'algorithm=MD5', + [('basic', {'special': 'paf ram', 'realm': 'basIC'}), + ('bearer', None), + ('digest', {'realm': "http-auth@example.org", 'qop': "auth, auth-int", + 'algorithm': 'MD5'})]), + # same challenge, multiple times, last one wins + ('Basic realm="basIC", ' + 'Basic realm="complex"', + [('basic', {'realm': 'complex'})]), +) + + +def test_parse_www_authenticate(): + for hdr, targets in challenges: + res = parse_www_authenticate(hdr) + for ctype, props in targets: + assert ctype in res + assert res[ctype] == props diff --git a/setup.cfg b/setup.cfg index 8e31daaf7..3f6897aed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ python_requires = >= 3.8 install_requires = annexremote datalad >= 0.18.4 - www-authenticate humanize packages = find_namespace: include_package_data = True From 3fdf1caf997b5bfc888e86b215de845247add2ca Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 06:53:42 +0200 Subject: [PATCH 40/65] Attempt to perform type annotation checks --- .github/workflows/mypy.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/mypy.yml diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 000000000..e1e2d36f1 --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,26 @@ +name: Type annotation + +on: + push: + paths: + - '*.py' + +jobs: + mypy: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + architecture: x64 + - name: Checkout + uses: actions/checkout@v3 + - name: Install mypy + run: pip install mypy + - name: Run mypy + uses: sasanquaneuf/mypy-github-action@releases/v1 + with: + checkName: 'mypy' # NOTE: this needs to be the same as the job name + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 953708df9fb8a731dc4edfcc81006575fce343a7 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 09:31:21 +0200 Subject: [PATCH 41/65] Tune path matching for mypy workflow --- .github/workflows/mypy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e1e2d36f1..c7309082b 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -3,7 +3,8 @@ name: Type annotation on: push: paths: - - '*.py' + - 'datalad_next/**.py' + - '!**/tests/**.py' jobs: mypy: From c1714166684daae693091b11397a5b2be68182c5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 09:38:20 +0200 Subject: [PATCH 42/65] Try running mypy on PRs --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index c7309082b..0764422d5 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -1,7 +1,7 @@ name: Type annotation on: - push: + pr: paths: - 'datalad_next/**.py' - '!**/tests/**.py' From fc964a9384a15d1ae06d80f8f7ac0e96801eb8d0 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 09:49:11 +0200 Subject: [PATCH 43/65] Fix mypy workflow, wrong event label --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 0764422d5..d91ae8295 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -1,7 +1,7 @@ name: Type annotation on: - pr: + pull_request: paths: - 'datalad_next/**.py' - '!**/tests/**.py' From 0b9c2db09f57c620bb42de9962555d7a0b57f10b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 10:10:44 +0200 Subject: [PATCH 44/65] Install dependencies for mypy run --- .github/workflows/mypy.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index d91ae8295..69d8a7ae1 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -17,6 +17,10 @@ jobs: architecture: x64 - name: Checkout uses: actions/checkout@v3 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements-devel.txt - name: Install mypy run: pip install mypy - name: Run mypy From 5dfa5f695cdd06407028ebacf65dae7489e94910 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 14:47:24 +0200 Subject: [PATCH 45/65] Let the test battery honor DATALAD_TESTS_NONETWORK This is primarily achieved via a custom pytest mark. However, there is a session-level fixture that itself depends on network access. Unlike with a function-level fixture, I cannot figure out how to make any dependent test be skipped. Instead, I need to use the mark as a skipping decorator. With these changes I can successfully run the test suite on a system with no network access (not just a proxy-based diversion): ``` 247 passed, 39 skipped, 19 warnings in 171.10s (0:02:51) ``` Ping #490 --- datalad_next/archive_operations/tests/test_tarfile.py | 5 +++++ datalad_next/commands/tests/test_ls_file_collection.py | 3 +++ datalad_next/iter_collections/tests/test_itertar.py | 3 +++ datalad_next/patches/tests/test_annex_progress_logging.py | 3 +++ datalad_next/tests/fixtures.py | 4 ++++ datalad_next/tests/marker.py | 7 +++++++ datalad_next/url_operations/tests/test_http.py | 4 ++++ 7 files changed, 29 insertions(+) create mode 100644 datalad_next/tests/marker.py diff --git a/datalad_next/archive_operations/tests/test_tarfile.py b/datalad_next/archive_operations/tests/test_tarfile.py index 4a493db49..52b4ea62d 100644 --- a/datalad_next/archive_operations/tests/test_tarfile.py +++ b/datalad_next/archive_operations/tests/test_tarfile.py @@ -10,6 +10,7 @@ import pytest from datalad_next.iter_collections.utils import FileSystemItemType +from datalad_next.tests.marker import skipif_no_network from ..tarfile import TarArchiveOperations @@ -37,6 +38,7 @@ def structured_sample_tar_xz( ) +@skipif_no_network def test_tararchive_basics(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz # this is intentionally a hard-coded POSIX relpath @@ -48,6 +50,7 @@ def test_tararchive_basics(structured_sample_tar_xz: TestArchive): assert member.read() == spec.content +@skipif_no_network def test_tararchive_contain(structured_sample_tar_xz: TestArchive): # this is intentionally a hard-coded POSIX relpath member_name = 'test-archive/onetwothree.txt' @@ -59,6 +62,7 @@ def test_tararchive_contain(structured_sample_tar_xz: TestArchive): assert 'bogus' not in archive_ops +@skipif_no_network def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz with TarArchiveOperations(spec.path) as archive_ops: @@ -68,6 +72,7 @@ def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): assert item.name in archive_ops +@skipif_no_network def test_open(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz file_pointer = set() diff --git a/datalad_next/commands/tests/test_ls_file_collection.py b/datalad_next/commands/tests/test_ls_file_collection.py index 2455af7f4..dee6247f2 100644 --- a/datalad_next/commands/tests/test_ls_file_collection.py +++ b/datalad_next/commands/tests/test_ls_file_collection.py @@ -7,6 +7,7 @@ from datalad.api import ls_file_collection from datalad_next.constraints.exceptions import CommandParametrizationError +from datalad_next.tests.marker import skipif_no_network from ..ls_file_collection import LsFileCollectionParamValidator @@ -30,6 +31,7 @@ def test_ls_file_collection_insufficient_args(): ls_file_collection('bogus', 'http://example.com') +@skipif_no_network def test_ls_file_collection_tarfile(sample_tar_xz): kwa = dict(result_renderer='disabled') # smoke test first @@ -84,6 +86,7 @@ def test_ls_file_collection_validator(): val.get_collection_iter(type='bogus', collection='any', hash=None) +@skipif_no_network def test_replace_add_archive_content(sample_tar_xz, existing_dataset): kwa = dict(result_renderer='disabled') diff --git a/datalad_next/iter_collections/tests/test_itertar.py b/datalad_next/iter_collections/tests/test_itertar.py index 7f76b2985..23f393a48 100644 --- a/datalad_next/iter_collections/tests/test_itertar.py +++ b/datalad_next/iter_collections/tests/test_itertar.py @@ -3,6 +3,8 @@ from datalad.api import download +from datalad_next.tests.marker import skipif_no_network + from ..tarfile import ( TarfileItem, FileSystemItemType, @@ -42,6 +44,7 @@ def sample_tar_xz(tmp_path_factory): tfpath.unlink() +@skipif_no_network def test_iter_tar(sample_tar_xz): target_hash = {'SHA1': 'a8fdc205a9f19cc1c7507a60c4f01b13d11d7fd0', 'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'} diff --git a/datalad_next/patches/tests/test_annex_progress_logging.py b/datalad_next/patches/tests/test_annex_progress_logging.py index 82e211474..6f575f305 100644 --- a/datalad_next/patches/tests/test_annex_progress_logging.py +++ b/datalad_next/patches/tests/test_annex_progress_logging.py @@ -1,4 +1,7 @@ +from datalad_next.tests.marker import skipif_no_network + +@skipif_no_network def test_uncurl_progress_reporting_to_annex(existing_dataset, monkeypatch): """Set up a repo that is used to download a key, check that we see progress reports diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 23fdfd753..254a36157 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -368,6 +368,10 @@ def httpbin(httpbin_service): raises ``SkipTest`` whenever any of these undesired conditions is detected. Otherwise it just relays ``httpbin_service``. """ + if os.environ.get('DATALAD_TESTS_NONETWORK'): + raise SkipTest( + 'Not running httpbin-based test: NONETWORK flag set' + ) if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ: raise SkipTest( "Not running httpbin-based test on appveyor without " diff --git a/datalad_next/tests/marker.py b/datalad_next/tests/marker.py new file mode 100644 index 000000000..819970628 --- /dev/null +++ b/datalad_next/tests/marker.py @@ -0,0 +1,7 @@ +import os +import pytest + +skipif_no_network = pytest.mark.skipif( + 'DATALAD_TESTS_NONETWORK' in os.environ, + reason='DATALAD_TESTS_NONETWORK is set' +) diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index be63c6a3b..7e4a0b544 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -1,6 +1,8 @@ import gzip import pytest +from datalad_next.tests.marker import skipif_no_network + from ..any import AnyUrlOperations from ..http import ( HttpUrlOperations, @@ -59,6 +61,7 @@ def test_custom_http_headers_via_config(datalad_cfg): assert huo._headers['X-Funky'] == 'Stuff' +@skipif_no_network def test_transparent_decompression(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver @@ -73,6 +76,7 @@ def test_transparent_decompression(tmp_path): '[build-system]\nrequires = ["setuptools >= 43.0.0", "wheel"]\n' +@skipif_no_network def test_compressed_file_stay_compressed(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver, but is also actually gzip'ed From 462d77aa88a0b9da58a07c4f16f47c84dafc9cc5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sun, 22 Oct 2023 22:12:06 +0200 Subject: [PATCH 46/65] Replace the www-authenticate dependency with internal function The implementation is built on request's parsing utilities rather than elusive regexes. I find this better to grasp. The associated test includes all www-authenticate test cases, plus a set of additional ones that focus on multi-challenge header specifications. Closes #493 --- .../20231023_064405_michael.hanke_www_auth.md | 8 ++ datalad_next/url_operations/http.py | 8 +- datalad_next/utils/requests_auth.py | 75 ++++++++++++++++++- .../tests/test_parse_www_authenticate.py | 45 +++++++++++ setup.cfg | 1 - 5 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 changelog.d/20231023_064405_michael.hanke_www_auth.md create mode 100644 datalad_next/utils/tests/test_parse_www_authenticate.py diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md new file mode 100644 index 000000000..f4752d524 --- /dev/null +++ b/changelog.d/20231023_064405_michael.hanke_www_auth.md @@ -0,0 +1,8 @@ +### 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 854677c4e..5d660e093 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -9,11 +9,13 @@ from typing import Dict import requests from requests_toolbelt import user_agent -import www_authenticate import datalad -from datalad_next.utils.requests_auth import DataladAuth +from datalad_next.utils.requests_auth import ( + DataladAuth, + parse_www_authenticate, +) from . import ( UrlOperations, UrlOperationsRemoteError, @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None): headers=headers, ) if 'www-authenticate' in req.headers: - props['auth'] = www_authenticate.parse( + props['auth'] = parse_www_authenticate( req.headers['www-authenticate']) props['is_redirect'] = True if req.history else False props['status_code'] = req.status_code diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index 62cb5a491..fb4f3ce9d 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -7,7 +7,6 @@ from typing import Dict from urllib.parse import urlparse import requests -import www_authenticate from datalad_next.config import ConfigManager from datalad_next.utils import CredentialManager @@ -16,7 +15,77 @@ lgr = logging.getLogger('datalad.ext.next.utils.requests_auth') -__all__ = ['DataladAuth', 'HTTPBearerTokenAuth'] +__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate'] + + +def parse_www_authenticate(hdr: str) -> dict: + """Parse HTTP www-authenticate header + + This helper uses ``requests`` utilities to parse the ``www-authenticate`` + header as represented in a ``requests.Response`` instance. The header may + contain any number of challenge specifications. + + The implementation follows RFC7235, where a challenge parameters set is + specified as: either a comma-separated list of parameters, or a single + sequence of characters capable of holding base64-encoded information, + and parameters are name=value pairs, where the name token is matched + case-insensitively, and each parameter name MUST only occur once + per challenge. + + Returns + ------- + dict + Keys are casefolded challenge labels (e.g., 'basic', 'digest'). + Values are: ``None`` (no parameter), ``str`` (a token68), or + ``dict`` (name/value mapping of challenge parameters) + """ + plh = requests.utils.parse_list_header + pdh = requests.utils.parse_dict_header + challenges = {} + challenge = None + # challenges as well as their properties are in a single + # comma-separated list + for item in plh(hdr): + # parse the item into a key/value set + # the value will be `None` if this item was no mapping + k, v = pdh(item).popitem() + # split the key to check for a challenge spec start + key_split = k.split(' ', maxsplit=1) + if len(key_split) > 1 or v is None: + item_suffix = item[len(key_split[0]) + 1:] + challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None + challenges[key_split[0].casefold()] = challenge + else: + # implementation logic assumes that the above conditional + # was triggered before we ever get here + assert challenge + challenge.append(item) + + return { + challenge: _convert_www_authenticate_items(items) + for challenge, items in challenges.items() + } + + +def _convert_www_authenticate_items(items: list) -> None | str | dict: + pdh = requests.utils.parse_dict_header + # according to RFC7235, items can be: + # either a comma-separated list of parameters + # or a single sequence of characters capable of holding base64-encoded + # information. + # parameters are name=value pairs, where the name token is matched + # case-insensitively, and each parameter name MUST only occur once + # per challenge. + if items is None: + return None + elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None: + # this items matches the token68 appearance (no name value + # pair after potential base64 padding its removed + return items[0] + else: + return { + k.casefold(): v for i in items for k, v in pdh(i).items() + } class DataladAuth(requests.auth.AuthBase): @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs): # www-authenticate with e.g. 403s return r # which auth schemes does the server support? - auth_schemes = www_authenticate.parse(r.headers['www-authenticate']) + auth_schemes = parse_www_authenticate(r.headers['www-authenticate']) ascheme, credname, cred = self._get_credential(r.url, auth_schemes) if cred is None or 'secret' not in cred: diff --git a/datalad_next/utils/tests/test_parse_www_authenticate.py b/datalad_next/utils/tests/test_parse_www_authenticate.py new file mode 100644 index 000000000..d69fcd67b --- /dev/null +++ b/datalad_next/utils/tests/test_parse_www_authenticate.py @@ -0,0 +1,45 @@ + +from ..requests_auth import parse_www_authenticate + + +challenges = ( + # just challenge type + ('Negotiate', + [('negotiate', None)]), + # challenge and just a token, tolerate any base64 padding + ('Negotiate abcdef', + [('negotiate', 'abcdef')]), + ('Negotiate abcdef=', + [('negotiate', 'abcdef=')]), + ('Negotiate abcdef==', + [('negotiate', 'abcdef==')]), + # standard bearer + ('Bearer realm=example.com', + [('bearer', {'realm': 'example.com'})]), + # standard digest + ('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", ' + 'opaque="ghijkl"', + [('digest', {'realm': 'example.com', 'qop': 'auth,auth-int', + 'nonce': 'abcdef', 'opaque': 'ghijkl'})]), + # multi challenge + ('Basic speCial="paf ram", realm="basIC", ' + 'Bearer, ' + 'Digest realm="http-auth@example.org", qop="auth, auth-int", ' + 'algorithm=MD5', + [('basic', {'special': 'paf ram', 'realm': 'basIC'}), + ('bearer', None), + ('digest', {'realm': "http-auth@example.org", 'qop': "auth, auth-int", + 'algorithm': 'MD5'})]), + # same challenge, multiple times, last one wins + ('Basic realm="basIC", ' + 'Basic realm="complex"', + [('basic', {'realm': 'complex'})]), +) + + +def test_parse_www_authenticate(): + for hdr, targets in challenges: + res = parse_www_authenticate(hdr) + for ctype, props in targets: + assert ctype in res + assert res[ctype] == props diff --git a/setup.cfg b/setup.cfg index 8e31daaf7..3f6897aed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ python_requires = >= 3.8 install_requires = annexremote datalad >= 0.18.4 - www-authenticate humanize packages = find_namespace: include_package_data = True From 9dc8934d545ef9db735070bd5d25dd458be98327 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 14:47:24 +0200 Subject: [PATCH 47/65] Let the test battery honor DATALAD_TESTS_NONETWORK This is primarily achieved via a custom pytest mark. However, there is a session-level fixture that itself depends on network access. Unlike with a function-level fixture, I cannot figure out how to make any dependent test be skipped. Instead, I need to use the mark as a skipping decorator. With these changes I can successfully run the test suite on a system with no network access (not just a proxy-based diversion): ``` 247 passed, 39 skipped, 19 warnings in 171.10s (0:02:51) ``` Ping #490 --- changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md | 5 +++++ datalad_next/archive_operations/tests/test_tarfile.py | 5 +++++ datalad_next/commands/tests/test_ls_file_collection.py | 3 +++ datalad_next/iter_collections/tests/test_itertar.py | 3 +++ datalad_next/patches/tests/test_annex_progress_logging.py | 3 +++ datalad_next/tests/fixtures.py | 4 ++++ datalad_next/tests/marker.py | 7 +++++++ datalad_next/url_operations/tests/test_http.py | 4 ++++ 8 files changed, 34 insertions(+) create mode 100644 changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md create mode 100644 datalad_next/tests/marker.py diff --git a/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md b/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md new file mode 100644 index 000000000..5c91509cc --- /dev/null +++ b/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md @@ -0,0 +1,5 @@ +### πŸ›‘ Tests + +- The test battery now honors the `DATALAD_TESTS_NONETWORK` environment + variable and downgrades by skipping any tests that require external + network access. (by @mih) diff --git a/datalad_next/archive_operations/tests/test_tarfile.py b/datalad_next/archive_operations/tests/test_tarfile.py index 4a493db49..52b4ea62d 100644 --- a/datalad_next/archive_operations/tests/test_tarfile.py +++ b/datalad_next/archive_operations/tests/test_tarfile.py @@ -10,6 +10,7 @@ import pytest from datalad_next.iter_collections.utils import FileSystemItemType +from datalad_next.tests.marker import skipif_no_network from ..tarfile import TarArchiveOperations @@ -37,6 +38,7 @@ def structured_sample_tar_xz( ) +@skipif_no_network def test_tararchive_basics(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz # this is intentionally a hard-coded POSIX relpath @@ -48,6 +50,7 @@ def test_tararchive_basics(structured_sample_tar_xz: TestArchive): assert member.read() == spec.content +@skipif_no_network def test_tararchive_contain(structured_sample_tar_xz: TestArchive): # this is intentionally a hard-coded POSIX relpath member_name = 'test-archive/onetwothree.txt' @@ -59,6 +62,7 @@ def test_tararchive_contain(structured_sample_tar_xz: TestArchive): assert 'bogus' not in archive_ops +@skipif_no_network def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz with TarArchiveOperations(spec.path) as archive_ops: @@ -68,6 +72,7 @@ def test_tararchive_iterator(structured_sample_tar_xz: TestArchive): assert item.name in archive_ops +@skipif_no_network def test_open(structured_sample_tar_xz: TestArchive): spec = structured_sample_tar_xz file_pointer = set() diff --git a/datalad_next/commands/tests/test_ls_file_collection.py b/datalad_next/commands/tests/test_ls_file_collection.py index 2455af7f4..dee6247f2 100644 --- a/datalad_next/commands/tests/test_ls_file_collection.py +++ b/datalad_next/commands/tests/test_ls_file_collection.py @@ -7,6 +7,7 @@ from datalad.api import ls_file_collection from datalad_next.constraints.exceptions import CommandParametrizationError +from datalad_next.tests.marker import skipif_no_network from ..ls_file_collection import LsFileCollectionParamValidator @@ -30,6 +31,7 @@ def test_ls_file_collection_insufficient_args(): ls_file_collection('bogus', 'http://example.com') +@skipif_no_network def test_ls_file_collection_tarfile(sample_tar_xz): kwa = dict(result_renderer='disabled') # smoke test first @@ -84,6 +86,7 @@ def test_ls_file_collection_validator(): val.get_collection_iter(type='bogus', collection='any', hash=None) +@skipif_no_network def test_replace_add_archive_content(sample_tar_xz, existing_dataset): kwa = dict(result_renderer='disabled') diff --git a/datalad_next/iter_collections/tests/test_itertar.py b/datalad_next/iter_collections/tests/test_itertar.py index 7f76b2985..23f393a48 100644 --- a/datalad_next/iter_collections/tests/test_itertar.py +++ b/datalad_next/iter_collections/tests/test_itertar.py @@ -3,6 +3,8 @@ from datalad.api import download +from datalad_next.tests.marker import skipif_no_network + from ..tarfile import ( TarfileItem, FileSystemItemType, @@ -42,6 +44,7 @@ def sample_tar_xz(tmp_path_factory): tfpath.unlink() +@skipif_no_network def test_iter_tar(sample_tar_xz): target_hash = {'SHA1': 'a8fdc205a9f19cc1c7507a60c4f01b13d11d7fd0', 'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'} diff --git a/datalad_next/patches/tests/test_annex_progress_logging.py b/datalad_next/patches/tests/test_annex_progress_logging.py index 82e211474..6f575f305 100644 --- a/datalad_next/patches/tests/test_annex_progress_logging.py +++ b/datalad_next/patches/tests/test_annex_progress_logging.py @@ -1,4 +1,7 @@ +from datalad_next.tests.marker import skipif_no_network + +@skipif_no_network def test_uncurl_progress_reporting_to_annex(existing_dataset, monkeypatch): """Set up a repo that is used to download a key, check that we see progress reports diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 23fdfd753..254a36157 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -368,6 +368,10 @@ def httpbin(httpbin_service): raises ``SkipTest`` whenever any of these undesired conditions is detected. Otherwise it just relays ``httpbin_service``. """ + if os.environ.get('DATALAD_TESTS_NONETWORK'): + raise SkipTest( + 'Not running httpbin-based test: NONETWORK flag set' + ) if 'APPVEYOR' in os.environ and 'DEPLOY_HTTPBIN_IMAGE' not in os.environ: raise SkipTest( "Not running httpbin-based test on appveyor without " diff --git a/datalad_next/tests/marker.py b/datalad_next/tests/marker.py new file mode 100644 index 000000000..819970628 --- /dev/null +++ b/datalad_next/tests/marker.py @@ -0,0 +1,7 @@ +import os +import pytest + +skipif_no_network = pytest.mark.skipif( + 'DATALAD_TESTS_NONETWORK' in os.environ, + reason='DATALAD_TESTS_NONETWORK is set' +) diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index be63c6a3b..7e4a0b544 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -1,6 +1,8 @@ import gzip import pytest +from datalad_next.tests.marker import skipif_no_network + from ..any import AnyUrlOperations from ..http import ( HttpUrlOperations, @@ -59,6 +61,7 @@ def test_custom_http_headers_via_config(datalad_cfg): assert huo._headers['X-Funky'] == 'Stuff' +@skipif_no_network def test_transparent_decompression(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver @@ -73,6 +76,7 @@ def test_transparent_decompression(tmp_path): '[build-system]\nrequires = ["setuptools >= 43.0.0", "wheel"]\n' +@skipif_no_network def test_compressed_file_stay_compressed(tmp_path): # this file is offered with transparent compression/decompression # by the github webserver, but is also actually gzip'ed From 70bc86085433da74f13538c9149a5d357930e33a Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 23 Oct 2023 15:18:55 +0200 Subject: [PATCH 48/65] Changelog for 1.0.2 --- CHANGELOG.md | 17 +++++++++++++++++ .../20231023_064405_michael.hanke_www_auth.md | 8 -------- ...231023_150459_michael.hanke_tst_nonetwork.md | 5 ----- 3 files changed, 17 insertions(+), 13 deletions(-) delete mode 100644 changelog.d/20231023_064405_michael.hanke_www_auth.md delete mode 100644 changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md diff --git a/CHANGELOG.md b/CHANGELOG.md index f5cdd3c6b..7fe490fcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,21 @@ +# 1.0.2 (2023-10-23) -- Debianize! + +## 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) + +## πŸ›‘ Tests + +- The test battery now honors the `DATALAD_TESTS_NONETWORK` environment + variable and downgrades by skipping any tests that require external + network access. (by @mih) + # 1.0.1 (2023-10-18) ## πŸ› Bug Fixes diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md deleted file mode 100644 index f4752d524..000000000 --- a/changelog.d/20231023_064405_michael.hanke_www_auth.md +++ /dev/null @@ -1,8 +0,0 @@ -### 🏠 Internal - -- The `www-authenticate` dependencies is dropped. The functionality is - replaced by a `requests`-based implementation of an alternative parser. - This trims the dependency footprint and facilitates Debian-packaging. - The previous test cases are kept and further extended. - Fixes https://github.com/datalad/datalad-next/issues/493 via - https://github.com/datalad/datalad-next/pull/495 (by @mih) diff --git a/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md b/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md deleted file mode 100644 index 5c91509cc..000000000 --- a/changelog.d/20231023_150459_michael.hanke_tst_nonetwork.md +++ /dev/null @@ -1,5 +0,0 @@ -### πŸ›‘ Tests - -- The test battery now honors the `DATALAD_TESTS_NONETWORK` environment - variable and downgrades by skipping any tests that require external - network access. (by @mih) From 03614010e562ab87eff080979f5998139ba4bb03 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 25 Oct 2023 12:08:44 +0200 Subject: [PATCH 49/65] add missing contributor and missing orcid --- .zenodo.json | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 904bc1534..b05453614 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -42,7 +42,12 @@ "orcid": "0000-0003-2917-3450" }, { - "name": "Wodder II, John T." + "name": "Wodder II, John T.", + "orcid": "0000-0001-7106-2661" + }, + { + "name": "Trainito, Caterina", + "orcid": "0000-0002-1713-8343" } ], "keywords": [ @@ -52,4 +57,4 @@ "access_right": "open", "license": "MIT", "upload_type": "software" -} +} \ No newline at end of file From e57c173a50f5d207a0665469791e93107d2074b3 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 25 Oct 2023 19:42:32 +0200 Subject: [PATCH 50/65] initialize all-contributors bot --- .all-contributorsrc | 4 ++++ README.md | 13 +++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 .all-contributorsrc diff --git a/.all-contributorsrc b/.all-contributorsrc new file mode 100644 index 000000000..4d3510a41 --- /dev/null +++ b/.all-contributorsrc @@ -0,0 +1,4 @@ +{ + "projectName": "datalad-next", + "projectOwner": "datalad" +} diff --git a/README.md b/README.md index c4446b41b..bb9012d90 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # DataLad NEXT extension +[![All Contributors](https://img.shields.io/github/all-contributors/datalad/datalad-next?color=ee8449&style=flat-square)](#contributors) [![Build status](https://ci.appveyor.com/api/projects/status/dxomp8wysjb7x2os/branch/main?svg=true)](https://ci.appveyor.com/project/mih/datalad-next/branch/main) [![codecov](https://codecov.io/gh/datalad/datalad-next/branch/main/graph/badge.svg?token=2P8rak7lSX)](https://codecov.io/gh/datalad/datalad-next) [![crippled-filesystems](https://github.com/datalad/datalad-next/workflows/crippled-filesystems/badge.svg)](https://github.com/datalad/datalad-next/actions?query=workflow%3Acrippled-filesystems) @@ -146,3 +147,15 @@ available at http://docs.datalad.org/projects/next/en/latest/#datalad-patches This DataLad extension was developed with funding from the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451 ([431549029](https://gepris.dfg.de/gepris/projekt/431549029), INF project). + + +## Contributors + + + + + + + + + From 5bdce294beac515e52a09df8daff130d050aea6d Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 25 Oct 2023 19:53:16 +0200 Subject: [PATCH 51/65] add all-contributors contributors --- .all-contributorsrc | 164 +++++++++++++++++++++++++++++++++++++++++++- README.md | 18 +++++ 2 files changed, 179 insertions(+), 3 deletions(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index 4d3510a41..b61be8539 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -1,4 +1,162 @@ { - "projectName": "datalad-next", - "projectOwner": "datalad" -} + "projectName": "datalad-next", + "projectOwner": "datalad", + "repoType": "github", + "commitConvention": "angular", + "contributors": [ + { + "login": "mih", + "name": "Michael Hanke", + "avatar_url": "https://avatars.githubusercontent.com/u/136479?v=4", + "profile": "http://psychoinformatics.de/", + "contributions": [ + "bug", + "code", + "content", + "design", + "doc", + "financial", + "fundingFinding", + "ideas", + "infra", + "maintenance", + "mentoring", + "platform", + "projectManagement", + "review", + "talk", + "test", + "tool", + "userTesting" + ] + }, + { + "login": "catetrai", + "name": "catetrai", + "avatar_url": "https://avatars.githubusercontent.com/u/18424941?v=4", + "profile": "https://github.com/catetrai", + "contributions": [ + "code", + "design", + "doc", + "ideas", + "test" + ] + }, + { + "login": "effigies", + "name": "Chris Markiewicz", + "avatar_url": "https://avatars.githubusercontent.com/u/83442?v=4", + "profile": "https://github.com/effigies", + "contributions": [ + "maintenance", + "code" + ] + }, + { + "login": "mslw", + "name": "MichaΕ‚ Szczepanik", + "avatar_url": "https://avatars.githubusercontent.com/u/11985212?v=4", + "profile": "https://github.com/mslw", + "contributions": [ + "bug", + "code", + "content", + "doc", + "example", + "ideas", + "infra", + "maintenance", + "review", + "talk", + "test", + "tutorial", + "userTesting" + ] + }, + { + "login": "jsheunis", + "name": "Stephan Heunis", + "avatar_url": "https://avatars.githubusercontent.com/u/10141237?v=4", + "profile": "https://jsheunis.github.io/", + "contributions": [ + "bug", + "code", + "doc", + "ideas", + "maintenance", + "talk", + "userTesting" + ] + }, + { + "login": "bpoldrack", + "name": "Benjamin Poldrack", + "avatar_url": "https://avatars.githubusercontent.com/u/10498301?v=4", + "profile": "https://github.com/bpoldrack", + "contributions": [ + "bug", + "code" + ] + }, + { + "login": "yarikoptic", + "name": "Yaroslav Halchenko", + "avatar_url": "https://avatars.githubusercontent.com/u/39889?v=4", + "profile": "https://github.com/yarikoptic", + "contributions": [ + "bug", + "code", + "infra", + "maintenance", + "tool" + ] + }, + { + "login": "christian-monch", + "name": "Christian MΓΆnch", + "avatar_url": "https://avatars.githubusercontent.com/u/17925232?v=4", + "profile": "https://github.com/christian-monch", + "contributions": [ + "code", + "design", + "doc", + "ideas", + "review", + "test", + "userTesting" + ] + }, + { + "login": "adswa", + "name": "Adina Wagner", + "avatar_url": "https://avatars.githubusercontent.com/u/29738718?v=4", + "profile": "https://github.com/adswa", + "contributions": [ + "a11y", + "bug", + "code", + "doc", + "example", + "maintenance", + "projectManagement", + "review", + "talk", + "test", + "tutorial", + "userTesting" + ] + }, + { + "login": "jwodder", + "name": "John T. Wodder II", + "avatar_url": "https://avatars.githubusercontent.com/u/98207?v=4", + "profile": "https://github.com/jwodder", + "contributions": [ + "code", + "infra", + "test" + ] + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index bb9012d90..6fb97b2e4 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,24 @@ Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451 + + + + + + + + + + + + + + + + + +
Michael Hanke
Michael Hanke

πŸ› πŸ’» πŸ–‹ 🎨 πŸ“– πŸ’΅ πŸ” πŸ€” πŸš‡ 🚧 πŸ§‘β€πŸ« πŸ“¦ πŸ“† πŸ‘€ πŸ“’ ⚠️ πŸ”§ πŸ““
catetrai
catetrai

πŸ’» 🎨 πŸ“– πŸ€” ⚠️
Chris Markiewicz
Chris Markiewicz

🚧
MichaΕ‚ Szczepanik
MichaΕ‚ Szczepanik

πŸ› πŸ’» πŸ–‹ πŸ“– πŸ’‘ πŸ€” πŸš‡ 🚧 πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““
Stephan Heunis
Stephan Heunis

πŸ› πŸ’» πŸ“– πŸ€” 🚧 πŸ“’ πŸ““
Benjamin Poldrack
Benjamin Poldrack

πŸ› πŸ’»
Yaroslav Halchenko
Yaroslav Halchenko

πŸ› πŸ’» πŸš‡ 🚧 πŸ”§
Christian MΓΆnch
Christian MΓΆnch

πŸ’» 🎨 πŸ“– πŸ€” πŸ‘€ ⚠️ πŸ““
Adina Wagner
Adina Wagner

️️️️♿️ πŸ› πŸ’» πŸ“– πŸ’‘ 🚧 πŸ“† πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““
John T. Wodder II
John T. Wodder II

πŸ’» πŸš‡ ⚠️
From 82a29bf9e69fe485c8bcd0a7019c0347b921a3cb Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 25 Oct 2023 19:59:28 +0200 Subject: [PATCH 52/65] Import tributors update workflow from core --- .github/workflows/update-contributors.yml | 86 +++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/update-contributors.yml diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml new file mode 100644 index 000000000..261981c3f --- /dev/null +++ b/.github/workflows/update-contributors.yml @@ -0,0 +1,86 @@ +name: allcontributors-auto-detect + +on: + push: + branches: + - main + +jobs: + Update: + name: Generate + runs-on: ubuntu-latest + if: contains(github.repository, 'datalad/datalad-next') + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Tributors Update + uses: con/tributors@0.1.1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + + # Single text list (space separated) of parsers, leave unset to auto-detect + parsers: unset + + # Update lookup with GitHub metadata + update_lookup: github + + # Skip these users (example) + skip_users: + + # INFO, DEBUG, ERROR, WARNING, etc. + log_level: DEBUG + + # If files already exist and an init is done, force overwrite + force: true + + # the minimum number of contributions required to add a user + threshold: 1 + + - name: Checkout New Branch + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH_AGAINST: "main" + run: | + printf "GitHub Actor: ${GITHUB_ACTOR}\n" + export BRANCH_FROM="contributors/update-$(date '+%Y-%m-%d')" + git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" + + BRANCH_EXISTS=$(git ls-remote --heads origin ${BRANCH_FROM}) + if [[ -z ${BRANCH_EXISTS} ]]; then + printf "Branch does not exist in remote.\n" + else + printf "Branch already exists in remote.\n" + exit 1 + fi + git branch + git checkout -b "${BRANCH_FROM}" || git checkout "${BRANCH_FROM}" + git branch + + git config --global user.name "github-actions" + git config --global user.email "github-actions@users.noreply.github.com" + git status + + if git diff-index --quiet HEAD --; then + export OPEN_PULL_REQUEST=0 + printf "No changes\n" + else + export OPEN_PULL_REQUEST=1 + printf "Changes\n" + git commit -a -m "Automated deployment to update contributors $(date '+%Y-%m-%d') + + [skip ci]" + git push origin "${BRANCH_FROM}" + fi + echo "OPEN_PULL_REQUEST=${OPEN_PULL_REQUEST}" >> $GITHUB_ENV + echo "PULL_REQUEST_FROM_BRANCH=${BRANCH_FROM}" >> $GITHUB_ENV + echo "PULL_REQUEST_TITLE=[tributors] ${BRANCH_FROM}" >> $GITHUB_ENV + echo "PULL_REQUEST_BODY='Tributors update automated pull request.\n\n[skip ci]'" >> $GITHUB_ENV + + - name: Open Pull Request + uses: vsoch/pull-request-action@1.0.24 + if: ${{ env.OPEN_PULL_REQUEST == '1' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PULL_REQUEST_BRANCH: "main" \ No newline at end of file From e70fde62410ad2639e5266346914773fcc564cf5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 25 Oct 2023 20:29:45 +0200 Subject: [PATCH 53/65] Patch update for more robust behavior with adjusted branches Companion of https://github.com/datalad/datalad/pull/7522 --- ...31025_202631_michael.hanke_bf_core_7522.md | 6 ++ datalad_next/patches/enabled.py | 1 + datalad_next/patches/update.py | 58 +++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 changelog.d/20231025_202631_michael.hanke_bf_core_7522.md create mode 100644 datalad_next/patches/update.py diff --git a/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md new file mode 100644 index 000000000..f225fce82 --- /dev/null +++ b/changelog.d/20231025_202631_michael.hanke_bf_core_7522.md @@ -0,0 +1,6 @@ +### πŸ› Bug Fixes + +- Add patch to fix `update`'s target detection for adjusted mode datasets + that can crash under some circumstances. + See https://github.com/datalad/datalad/issues/7507, fixed via + https://github.com/datalad/datalad-next/pull/509 (by @mih) diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py index 763961c4b..c705c0f60 100644 --- a/datalad_next/patches/enabled.py +++ b/datalad_next/patches/enabled.py @@ -13,4 +13,5 @@ customremotes_main, create_sibling_gitlab, run, + update, ) diff --git a/datalad_next/patches/update.py b/datalad_next/patches/update.py new file mode 100644 index 000000000..d0f7be85b --- /dev/null +++ b/datalad_next/patches/update.py @@ -0,0 +1,58 @@ +"""Robustify ``update()`` target detection for adjusted mode datasets + +The true cause of the problem is not well understood. +https://github.com/datalad/datalad/issues/7507 documents that it is not +easy to capture the breakage in a test. +""" + +from . import apply_patch + + +# This function is taken from datalad-core@cdc0ceb30ae04265c5369186acf2ab2683a8ec96 +# datalad/distribution/update.py +# The change has been proposed in https://github.com/datalad/datalad/pull/7522 +def _choose_update_target(repo, branch, remote, cfg_remote): + """Select a target to update `repo` from. + + Note: This function is not concerned with _how_ the update is done (e.g., + merge, reset, ...). + + Parameters + ---------- + repo : Repo instance + branch : str + The current branch. + remote : str + The remote which updates are coming from. + cfg_remote : str + The configured upstream remote. + + Returns + ------- + str (the target) or None if a choice wasn't made. + """ + target = None + if cfg_remote and remote == cfg_remote: + # Use the configured cfg_remote branch as the target. + # + # In this scenario, it's tempting to use FETCH_HEAD as the target. For + # a merge, that would be the equivalent of 'git pull REMOTE'. But doing + # so would be problematic when the GitRepo.fetch() call was passed + # all_=True. Given we can't use FETCH_HEAD, it's tempting to use the + # branch.*.merge value, but that assumes a value for remote.*.fetch. + target = repo.call_git_oneline( + ["rev-parse", "--symbolic-full-name", "--abbrev-ref=strict", + # THIS IS THE PATCH: prefix @{upstream} with the branch name + # of the corresponding branch + f"{repo.get_corresponding_branch(branch) or ''}" "@{upstream}"], + read_only=True) + elif branch: + remote_branch = "{}/{}".format(remote, branch) + if repo.commit_exists(remote_branch): + target = remote_branch + return target + + +apply_patch( + 'datalad.distribution.update', None, '_choose_update_target', + _choose_update_target) From 4f5d5f3ce4dbfd2c7734e087d1aaa4c62b9dd11b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 25 Oct 2023 20:38:02 +0200 Subject: [PATCH 54/65] Type annotation check simplified and only scanning changes The previous setup tried everything and produces an overwhelming report that nobody would pay attention to. This new approach would selectively report on content that is near that touch be a specific PR. Closes #501 --- .github/workflows/mypy.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 69d8a7ae1..89a3c1369 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -7,7 +7,7 @@ on: - '!**/tests/**.py' jobs: - mypy: + static-type-check: runs-on: ubuntu-latest steps: - name: Setup Python @@ -17,15 +17,15 @@ jobs: architecture: x64 - name: Checkout uses: actions/checkout@v3 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements-devel.txt - name: Install mypy - run: pip install mypy - - name: Run mypy - uses: sasanquaneuf/mypy-github-action@releases/v1 + run: pip install mypy # you can pin your preferred version + - name: Get Python changed files + id: changed-py-files + uses: tj-actions/changed-files@v23 with: - checkName: 'mypy' # NOTE: this needs to be the same as the job name - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + files: | + *.py + **/*.py + - name: Type check changed files + if: steps.changed-py-files.outputs.any_changed == 'true' + run: mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --ignore-missing-imports From a35a6549f0bff11656761382607d5d0e0418dafa Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 25 Oct 2023 20:50:54 +0200 Subject: [PATCH 55/65] Remove redundant contributor list After https://github.com/datalad/datalad-next/pull/508 was merged. --- CONTRIBUTORS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8f1765a7a..d7ac0d167 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,3 +1 @@ -The following people have contributed to this project: - -Michael Hanke +See README.md for a comprehensive list of contributors From ff9f2d1a6dc4c7fcadd5a1f4f2c310cc68827549 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 25 Oct 2023 18:51:09 +0000 Subject: [PATCH 56/65] Automated deployment to update contributors 2023-10-25 [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6fb97b2e4..dc020d7d2 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451 Michael Hanke
Michael Hanke

πŸ› πŸ’» πŸ–‹ 🎨 πŸ“– πŸ’΅ πŸ” πŸ€” πŸš‡ 🚧 πŸ§‘β€πŸ« πŸ“¦ πŸ“† πŸ‘€ πŸ“’ ⚠️ πŸ”§ πŸ““ catetrai
catetrai

πŸ’» 🎨 πŸ“– πŸ€” ⚠️ - Chris Markiewicz
Chris Markiewicz

🚧 + Chris Markiewicz
Chris Markiewicz

🚧 πŸ’» MichaΕ‚ Szczepanik
MichaΕ‚ Szczepanik

πŸ› πŸ’» πŸ–‹ πŸ“– πŸ’‘ πŸ€” πŸš‡ 🚧 πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““ Stephan Heunis
Stephan Heunis

πŸ› πŸ’» πŸ“– πŸ€” 🚧 πŸ“’ πŸ““ Benjamin Poldrack
Benjamin Poldrack

πŸ› πŸ’» From 9ff4f3cdd39a6992bb9cf200d7dfcac88e0631dc Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 25 Oct 2023 18:53:06 +0000 Subject: [PATCH 57/65] Automated deployment to update contributors 2023-10-25 [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6fb97b2e4..dc020d7d2 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ Forschungsgemeinschaft (DFG, German Research Foundation) under grant SFB 1451 Michael Hanke
Michael Hanke

πŸ› πŸ’» πŸ–‹ 🎨 πŸ“– πŸ’΅ πŸ” πŸ€” πŸš‡ 🚧 πŸ§‘β€πŸ« πŸ“¦ πŸ“† πŸ‘€ πŸ“’ ⚠️ πŸ”§ πŸ““ catetrai
catetrai

πŸ’» 🎨 πŸ“– πŸ€” ⚠️ - Chris Markiewicz
Chris Markiewicz

🚧 + Chris Markiewicz
Chris Markiewicz

🚧 πŸ’» MichaΕ‚ Szczepanik
MichaΕ‚ Szczepanik

πŸ› πŸ’» πŸ–‹ πŸ“– πŸ’‘ πŸ€” πŸš‡ 🚧 πŸ‘€ πŸ“’ ⚠️ βœ… πŸ““ Stephan Heunis
Stephan Heunis

πŸ› πŸ’» πŸ“– πŸ€” 🚧 πŸ“’ πŸ““ Benjamin Poldrack
Benjamin Poldrack

πŸ› πŸ’» From 826ec159d58b1365adb3a7bf8441658588bb70e5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 08:19:01 +0200 Subject: [PATCH 58/65] Simplify documentation structure The previous setup had two entrypoints for each API documentation chapter. it was easy for a user to miss information in each overview. This change remove the deep links to avoid that. This makes it possible to remove some document structure duplication too. --- docs/source/developer_guide/index.rst | 6 +-- docs/source/index.rst | 68 ++------------------------- docs/source/patches.rst | 3 ++ 3 files changed, 11 insertions(+), 66 deletions(-) diff --git a/docs/source/developer_guide/index.rst b/docs/source/developer_guide/index.rst index 1ae91c6a1..25c398788 100644 --- a/docs/source/developer_guide/index.rst +++ b/docs/source/developer_guide/index.rst @@ -1,7 +1,7 @@ .. _devguide: -The developer's guide to datalad-next -************************************* +Developer Guide +=============== This guide sheds light on new and reusable subsystems developed in ``datalad-next``. The target audience are developers that intend to build up on or use functionality provided by this extension. @@ -10,4 +10,4 @@ The target audience are developers that intend to build up on or use functionali :maxdepth: 2 constraints.rst - contributing.rst \ No newline at end of file + contributing.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 86db9be4c..fea428125 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,82 +29,24 @@ extension has to be enabled for auto-loading by executing:: Doing so will enable the extension to also alter the behavior the core DataLad package and its commands. -API -=== -High-level API commands ------------------------ +Provided functionality +====================== .. toctree:: - :maxdepth: 2 + :maxdepth: 1 api.rst - -Command line reference ----------------------- - -.. toctree:: - :maxdepth: 2 - cmd.rst - - -Python tooling --------------- - -``datalad-next`` comprises a number of more-or-less self-contained -mini-packages providing particular functionality. - -.. toctree:: - :maxdepth: 1 - Infrastructure classes and utilities - - -Git remote helpers ------------------- - -.. toctree:: - :maxdepth: 2 - git-remote-helpers.rst - - -Git-annex backends ------------------- - -.. toctree:: - :maxdepth: 2 - annex-backends.rst - - - -Git-annex special remotes -------------------------- - - -.. toctree:: - :maxdepth: 2 - annex-specialremotes.rst - - - -DataLad patches ---------------- - -Patches that are automatically applied to DataLad when loading the -``datalad-next`` extension package. - -.. toctree:: - :maxdepth: 2 - patches.rst -Developer Guide ---------------- +Contributor information +======================= .. toctree:: :maxdepth: 2 diff --git a/docs/source/patches.rst b/docs/source/patches.rst index a7553cf47..4a6974da2 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -1,6 +1,9 @@ DataLad patches *************** +Patches that are automatically applied to DataLad when loading the +``datalad-next`` extension package. + .. currentmodule:: datalad_next.patches .. autosummary:: :toctree: generated From 2e2d7948bc2ef288dff21f8a73567ac61e75a168 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 08:37:26 +0200 Subject: [PATCH 59/65] Complete list of patches in documentation Add minimal documentation to all patches that had none yet. --- datalad_next/patches/cli_configoverrides.py | 7 +++++++ datalad_next/patches/commanderror.py | 11 ++++++++--- datalad_next/patches/create_sibling_gitlab.py | 4 +++- docs/source/patches.rst | 8 ++++++-- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/datalad_next/patches/cli_configoverrides.py b/datalad_next/patches/cli_configoverrides.py index 59276b5d3..f76be4de1 100644 --- a/datalad_next/patches/cli_configoverrides.py +++ b/datalad_next/patches/cli_configoverrides.py @@ -1,3 +1,10 @@ +"""Post DataLad config overrides CLI/ENV as GIT_CONFIG items in process ENV + +This enables their propagation to any subprocess. This include the +specification of overrides via the ``datalad -c ...`` option of the +main CLI entrypoint. +""" + from datalad.config import _update_from_env as _update_from_datalad_env from datalad.cli.helpers import _parse_overrides_from_cmdline diff --git a/datalad_next/patches/commanderror.py b/datalad_next/patches/commanderror.py index daef7dc69..eb42182a0 100644 --- a/datalad_next/patches/commanderror.py +++ b/datalad_next/patches/commanderror.py @@ -1,3 +1,11 @@ +"""Improve ``CommandError`` rendering + +Without this patch that overwrites ``__repr__``, it would use +``RuntimeError``'s variant and ignore all additional structured information +except for ``.msg`` -- which is frequently empty and confuses with a +`CommandError('')` display. +""" + from datalad.runner.exception import CommandError @@ -5,7 +13,4 @@ def commanderror_repr(self) -> str: return self.to_str() -# without overwriting __repr__ it would use RuntimeError's variant -# with ignore all info but `.msg` which will be empty frequently -# and confuse people with `CommandError('')` CommandError.__repr__ = commanderror_repr diff --git a/datalad_next/patches/create_sibling_gitlab.py b/datalad_next/patches/create_sibling_gitlab.py index c4c8974fe..971953dc3 100644 --- a/datalad_next/patches/create_sibling_gitlab.py +++ b/datalad_next/patches/create_sibling_gitlab.py @@ -1,5 +1,7 @@ -""" +"""Streamline user experience +Discontinue advertizing the ``hierarchy`` layout, and better explain +limitations of the command. """ import datalad.distributed.create_sibling_gitlab as mod_gitlab diff --git a/docs/source/patches.rst b/docs/source/patches.rst index 4a6974da2..336545132 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -9,14 +9,18 @@ Patches that are automatically applied to DataLad when loading the :toctree: generated annexrepo + cli_configoverrides + commanderror common_cfg configuration create_sibling_ghlike + create_sibling_gitlab customremotes_main distribution_dataset interface_utils push_optimize push_to_export_remote - test_keyring - siblings run + siblings + test_keyring + update From b86109115f39ba8efc475063453395c53de0bcdb Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 08:41:31 +0200 Subject: [PATCH 60/65] Update datalad_next/patches/cli_configoverrides.py Co-authored-by: Adina Wagner --- datalad_next/patches/cli_configoverrides.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_next/patches/cli_configoverrides.py b/datalad_next/patches/cli_configoverrides.py index f76be4de1..b553854e1 100644 --- a/datalad_next/patches/cli_configoverrides.py +++ b/datalad_next/patches/cli_configoverrides.py @@ -1,6 +1,6 @@ """Post DataLad config overrides CLI/ENV as GIT_CONFIG items in process ENV -This enables their propagation to any subprocess. This include the +This enables their propagation to any subprocess. This includes the specification of overrides via the ``datalad -c ...`` option of the main CLI entrypoint. """ From b8af58beccaaecaf23bc2f800f346ef05b8a6adb Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 09:42:49 +0200 Subject: [PATCH 61/65] Tune type checker workflow Make the report narrowly focused on the changeset of a PR. See comments inside for rational. --- .github/workflows/mypy.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 89a3c1369..3a494c00d 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -18,7 +18,7 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Install mypy - run: pip install mypy # you can pin your preferred version + run: python -m pip install mypy # you can pin your preferred version - name: Get Python changed files id: changed-py-files uses: tj-actions/changed-files@v23 @@ -28,4 +28,12 @@ jobs: **/*.py - name: Type check changed files if: steps.changed-py-files.outputs.any_changed == 'true' - run: mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --ignore-missing-imports + run: | + # get any type stubs that mypy thinks it needs + mypy --install-types --non-interactive + # run mypy on the modified files only, and do not even follow imports. + # this results is a fairly superficial test, but given the overall + # state of annotations, we strive to become more correct incrementally + # with focused error reports, rather than barfing a huge complaint + # that is unrelated to the changeset someone has been working on + mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --follow-imports skip --ignore-missing-imports --pretty --show-error-context From b3ba25decb847a79ca95a02841a6b6a3e5c5d1ee Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 10:05:29 +0200 Subject: [PATCH 62/65] Fix few trivial type annotation bugs --- datalad_next/constraints/basic.py | 3 ++- datalad_next/constraints/exceptions.py | 2 +- datalad_next/credman/manager.py | 7 ++++--- datalad_next/url_operations/http.py | 2 +- datalad_next/utils/requests_auth.py | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 80d0f7fec..af81bee45 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -15,6 +15,7 @@ from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed from pathlib import Path import re +from typing import Callable from datalad_next.datasets import resolve_path @@ -368,7 +369,7 @@ def __init__(self, path_type: type = Path, is_format: str | None = None, lexists: bool | None = None, - is_mode: callable | None = None, + is_mode: Callable | None = None, ref: Path | None = None, ref_is: str = 'parent-or-same-as', dsarg: DatasetParameter | None = None): diff --git a/datalad_next/constraints/exceptions.py b/datalad_next/constraints/exceptions.py index 2955c5025..d75b08ee5 100644 --- a/datalad_next/constraints/exceptions.py +++ b/datalad_next/constraints/exceptions.py @@ -105,7 +105,7 @@ def caused_by(self) -> Tuple[Exception] | None: """ cb = self.context.get('__caused_by__', None) if cb is None: - return + return None elif isinstance(cb, Exception): return (cb,) else: diff --git a/datalad_next/credman/manager.py b/datalad_next/credman/manager.py index 5e85c6a12..dfe0f4fe2 100644 --- a/datalad_next/credman/manager.py +++ b/datalad_next/credman/manager.py @@ -15,6 +15,7 @@ __all__ = ['CredentialManager'] +from collections.abc import Set from datetime import datetime import logging import re @@ -823,7 +824,7 @@ def _get_credential_from_cfg(self, name: str) -> Dict: if k.startswith(var_prefix) } - def _get_known_credential_names(self) -> set: + def _get_known_credential_names(self) -> Set[str]: known_credentials = set( '.'.join(k.split('.')[2:-1]) for k in self._cfg.keys() if k.startswith('datalad.credential.') @@ -898,7 +899,7 @@ def _get_legacy_credential_from_keyring( type_hint = dict(_yield_legacy_credential_types()).get(name) if not type_hint or type_hint not in self._cred_types: - return + return None cred = {} lc = self._cred_types[type_hint] @@ -952,7 +953,7 @@ def _get_secret( return secret # no secret found anywhere - return + return None @property def _cfg(self): diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 5d660e093..0ba79b188 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -266,7 +266,7 @@ def _stream_download_from_request( ) fp = None - props = {} + props: Dict[str, str] = {} try: # we can only write to file-likes opened in bytes mode fp = sys.stdout.buffer if to_path is None else open(to_path, 'wb') diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index fb4f3ce9d..089055995 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -133,7 +133,7 @@ def save_entered_credential(self, suggested_name: str | None = None, """ if self._entered_credential is None: # nothing to do - return + return None return self._credman.set( name=None, _lastused=True, From f35233860a1ff76cf4c99f190a91e651df6e9982 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 10:09:04 +0200 Subject: [PATCH 63/65] Pass changed files to mypy type-stub install checker Otherwise it would error, not knowing what to look for --- .github/workflows/mypy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 3a494c00d..8b3d9be0c 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -30,10 +30,10 @@ jobs: if: steps.changed-py-files.outputs.any_changed == 'true' run: | # get any type stubs that mypy thinks it needs - mypy --install-types --non-interactive + mypy --install-types --non-interactive --follow-imports skip --ignore-missing-imports ${{ steps.changed-py-files.outputs.all_changed_files }} # run mypy on the modified files only, and do not even follow imports. # this results is a fairly superficial test, but given the overall # state of annotations, we strive to become more correct incrementally # with focused error reports, rather than barfing a huge complaint # that is unrelated to the changeset someone has been working on - mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --follow-imports skip --ignore-missing-imports --pretty --show-error-context + mypy --follow-imports skip --ignore-missing-imports --pretty --show-error-context ${{ steps.changed-py-files.outputs.all_changed_files }} From ca8202f1bf2c01dc5362183d5141dec98978f926 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 18:52:30 +0200 Subject: [PATCH 64/65] Support archive type detection for `.tgz` annex keys This seems to be at least as sensible as `.tar`. Closes #517 --- changelog.d/20231026_185357_michael.hanke_archivist_tgz.md | 6 ++++++ datalad_next/types/archivist.py | 2 ++ datalad_next/types/tests/test_archivist.py | 6 ++++++ 3 files changed, 14 insertions(+) create mode 100644 changelog.d/20231026_185357_michael.hanke_archivist_tgz.md diff --git a/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md new file mode 100644 index 000000000..f41dadca2 --- /dev/null +++ b/changelog.d/20231026_185357_michael.hanke_archivist_tgz.md @@ -0,0 +1,6 @@ +### πŸ’« Enhancements and new features + +- The `archivist` remote now supports archive type detection + from `*E`-type annex keys for `.tgz` archives too. + Fixes https://github.com/datalad/datalad-next/issues/517 via + https://github.com/datalad/datalad-next/pull/518 (by @mih) diff --git a/datalad_next/types/archivist.py b/datalad_next/types/archivist.py index 12e9b2b32..3c1ab4906 100644 --- a/datalad_next/types/archivist.py +++ b/datalad_next/types/archivist.py @@ -134,6 +134,8 @@ def from_str(cls, url: str): atype = ArchiveType.zip elif '.tar' in suf: atype = ArchiveType.tar + elif '.tgz' in suf: + atype = ArchiveType.tar return cls( akey=akey, diff --git a/datalad_next/types/tests/test_archivist.py b/datalad_next/types/tests/test_archivist.py index 8f781633d..b3d03ac06 100644 --- a/datalad_next/types/tests/test_archivist.py +++ b/datalad_next/types/tests/test_archivist.py @@ -23,6 +23,12 @@ def test_archivistlocator(): assert ArchivistLocator.from_str( 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar#path=f.txt' ).atype == ArchiveType.tar + assert ArchivistLocator.from_str( + 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tgz#path=f.txt' + ).atype == ArchiveType.tar + assert ArchivistLocator.from_str( + 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.tar.gz#path=f.txt' + ).atype == ArchiveType.tar assert ArchivistLocator.from_str( 'dl+archive:MD5E-s1--e9f624eb778e6f945771c543b6e9c7b2.zip#path=f.txt' ).atype == ArchiveType.zip From 202fec4703fb0495d99258b8430669b1c566b4a6 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Oct 2023 19:06:49 +0200 Subject: [PATCH 65/65] Fix a series of type-annotation issues --- datalad_next/types/archivist.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/datalad_next/types/archivist.py b/datalad_next/types/archivist.py index 3c1ab4906..17c538dbe 100644 --- a/datalad_next/types/archivist.py +++ b/datalad_next/types/archivist.py @@ -74,7 +74,7 @@ class ArchivistLocator: """ akey: AnnexKey member: PurePosixPath - size: int + size: int | None = None # datalad-archives did not have the type info, we want to be # able to handle those too, make optional atype: ArchiveType | None = None @@ -91,21 +91,21 @@ def __str__(self) -> str: @classmethod def from_str(cls, url: str): """Return ``ArchivistLocator`` from ``str`` form""" - url_matched = _recognized_urls.match(url) - if not url_matched: + url_match = _recognized_urls.match(url) + if not url_match: raise ValueError('Unrecognized dl+archives locator syntax') - url_matched = url_matched.groupdict() + url_matched = url_match.groupdict() # convert to desired type akey = AnnexKey.from_str(url_matched['key']) # archive member properties - props_matched = _archive_member_props.match(url_matched['props']) - if not props_matched: + props_match = _archive_member_props.match(url_matched['props']) + if not props_match: # without at least a 'path' there is nothing we can do here raise ValueError( 'dl+archives locator contains invalid archive member ' f'specification: {url_matched["props"]!r}') - props_matched = props_matched.groupdict() + props_matched = props_match.groupdict() amember_path = PurePosixPath(props_matched['path']) if amember_path.is_absolute(): raise ValueError( @@ -116,6 +116,8 @@ def from_str(cls, url: str): # size is optional, regex ensure that it is an int size = props_matched.get('size') + if size is not None: + size = int(size) # archive type, could be None atype = props_matched.get('atype')