Skip to content

Commit

Permalink
Merge pull request datalad#733 from christian-monch/urloperations-delete
Browse files Browse the repository at this point in the history
add `delete`-method to `UrlOperations`
  • Loading branch information
mih authored Jul 11, 2024
2 parents 249c5cc + 80bc38c commit bbe6c04
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 62 deletions.
5 changes: 1 addition & 4 deletions datalad_next/url_operations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def upload(self,
``UrlOperationsInteractionError`` general issues in communicating
with the remote side; ``UrlOperationsAuthenticationError`` for
errors related to (failed) authentication at the remote;
``UrlOperationsAuthorizationError`` for (lack of) authorizating
``UrlOperationsAuthorizationError`` for (lack of) authorization
to access a particular resource of perform a particular operation;
``UrlOperationsResourceUnknown`` if the target of an operation
does not exist.
Expand Down Expand Up @@ -367,6 +367,3 @@ def _with_progress(self,
end_log_msg
)
)



31 changes: 23 additions & 8 deletions datalad_next/url_operations/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import random
import stat
import sys
import time
from io import IOBase
Expand Down Expand Up @@ -207,8 +208,13 @@ def delete(self,
timeout: float | None = None) -> Dict:
"""Delete the target of a file:// URL
The target can be a file or a directory. If it is a directory, it has
to be empty.
The target can be a file or a directory. `delete` will attempt to
delete write protected targets (by setting write permissions). If
the target is a directory, the complete directory and all its
content will be deleted. `delete` will not modify the permissions
of the parent of the target. That means, it will not delete a target
in a write protected directory, but it will empty target, if target is
a directory.
See :meth:`datalad_next.url_operations.UrlOperations.delete`
for parameter documentation and exception behavior.
Expand All @@ -220,20 +226,29 @@ def delete(self,
"""
path = self._file_url_to_path(url)
try:
path.unlink()
if path.is_dir():
self._delete_dir(path)
else:
path.chmod(stat.S_IWUSR)
path.unlink()
except FileNotFoundError as e:
raise UrlOperationsResourceUnknown(url) from e
except IsADirectoryError:
try:
path.rmdir()
except Exception as e:
raise UrlOperationsRemoteError(url, message=str(e)) from e
except Exception as e:
# wrap this into the datalad-standard, but keep the
# original exception linked
raise UrlOperationsRemoteError(url, message=str(e)) from e
return {}

def _delete_dir(self, path: Path):
path.chmod(stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
for sub_path in path.iterdir():
if sub_path.is_dir():
self._delete_dir(sub_path)
else:
sub_path.chmod(stat.S_IWUSR)
sub_path.unlink()
path.rmdir()

def _copyfp(self,
src_fp: IOBase | BinaryIO,
dst_fp: IOBase | BinaryIO,
Expand Down
71 changes: 44 additions & 27 deletions datalad_next/url_operations/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ def get_headers(self, headers: Dict | None = None) -> Dict:
hdrs.update(headers)
return hdrs

def _check_status(self, response, url):
try:
response.raise_for_status()
except requests.exceptions.RequestException as e:
# wrap this into the datalad-standard, but keep the
# original exception linked
if e.response.status_code == 404:
# special case reporting for a 404
raise UrlOperationsResourceUnknown(
url, status_code=e.response.status_code) from e
else:
raise UrlOperationsRemoteError(
url, message=str(e), status_code=e.response.status_code
) from e

def stat(self,
url: str,
*,
Expand All @@ -79,6 +94,7 @@ def stat(self,
For access targets found absent.
"""
auth = DataladAuth(self.cfg, credential=credential)
props: Dict[str, str | int]
with requests.head(
url,
headers=self.get_headers(),
Expand All @@ -89,19 +105,7 @@ def stat(self,
allow_redirects=True,
) as r:
# fail visible for any non-OK outcome
try:
r.raise_for_status()
except requests.exceptions.RequestException as e:
# wrap this into the datalad-standard, but keep the
# original exception linked
if e.response.status_code == 404:
# special case reporting for a 404
raise UrlOperationsResourceUnknown(
url, status_code=e.response.status_code) from e
else:
raise UrlOperationsRemoteError(
url, message=str(e), status_code=e.response.status_code
) from e
self._check_status(r, url)
props = {
# standardize on lower-case header keys.
# also prefix anything other than 'content-length' to make
Expand All @@ -123,6 +127,32 @@ def stat(self,
pass
return props

def delete(self,
url: str,
*,
credential: str | None = None,
timeout: float | None = None) -> Dict:
"""Delete the target of a http(s)://-URL
"""
auth = DataladAuth(self.cfg, credential=credential)
try:
with requests.delete(
url=url,
stream=True,
headers=self.get_headers(),
auth=auth,
timeout=timeout,
) as r:
# fail visible for any non-OK outcome
self._check_status(r, url)
except requests.exceptions.ReadTimeout as e:
raise TimeoutError(f"Timeout while deleting {url}") from e
auth.save_entered_credential(
context=f'download from {url}'
)
return {}

def download(self,
from_url: str,
to_path: Path | None,
Expand Down Expand Up @@ -151,20 +181,7 @@ def download(self,
auth=auth,
) as r:
# fail visible for any non-OK outcome
try:
r.raise_for_status()
except requests.exceptions.RequestException as e:
# wrap this into the datalad-standard, but keep the
# original exception linked
if e.response.status_code == 404:
# special case reporting for a 404
raise UrlOperationsResourceUnknown(
from_url, status_code=e.response.status_code) from e
else:
raise UrlOperationsRemoteError(
from_url, message=str(e), status_code=e.response.status_code
) from e

self._check_status(r, from_url)
download_props = self._stream_download_from_request(
r, to_path, hash=hash)
auth.save_entered_credential(
Expand Down
61 changes: 56 additions & 5 deletions datalad_next/url_operations/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,15 @@ def __del__(self):
context.__exit__(None, None, None)

@staticmethod
def _check_return_code(return_code: int | None, url: str):
def _check_return_code(return_code: int | None, url: str, msg: str = ''):
if return_code == 244:
# this is the special code for a file-not-found
raise UrlOperationsResourceUnknown(url)
raise UrlOperationsResourceUnknown(url, message=msg)
elif return_code != 0:
raise UrlOperationsRemoteError(
url,
message=f'ssh command returned {return_code}'
+ f': {msg}' if msg else ''
)

def ssh_shell_for(self,
Expand Down Expand Up @@ -138,9 +139,51 @@ def stat(self,
cmd = self.format_cmd(stat_cmd, url)
ssh = self.ssh_shell_for(url)
result = ssh(cmd)
self._check_return_code(result.returncode, url)
self._check_return_code(result.returncode, url, result.stderr.decode())
return {'content-length': int(result.stdout)}

def delete(self,
url: str,
*,
credential: str | None = None,
timeout: float | None = None) -> Dict:
"""Delete the target of a shh://-URL
The target can be a file or a directory. `delete` will attempt to
delete write protected targets (by setting write permissions). If
the target is a directory, the complete directory and all its
content will be deleted. `delete` will not modify the permissions
of the parent of the target. That means, it will not delete a target
in a write protected directory, but it will empty target, if target is
a directory.
See :meth:`datalad_next.url_operations.UrlOperations.delete`
for parameter documentation and exception behavior.
Raises
------
UrlOperationsResourceUnknown
For deletion targets found absent.
"""

delete_cmd = """
ret() {{ return $1; }}
if [ -f {fpath} ]; then
chmod u+w {fpath}
rm -f {fpath}
elif [ -d {fpath} ]; then
chmod -R u+wx {fpath}
rm -rf {fpath}
else
ret 244
fi"""

cmd = self.format_cmd(delete_cmd, url)
ssh = self.ssh_shell_for(url)
result = ssh(cmd)
self._check_return_code(result.returncode, url, result.stderr.decode())
return {}

def download(self,
from_url: str,
to_path: Path | None,
Expand Down Expand Up @@ -206,7 +249,11 @@ def download(self,
if dst_fp and to_path is not None:
dst_fp.close()

self._check_return_code(result_generator.returncode, from_url)
self._check_return_code(
result_generator.returncode,
from_url,
''.join(result_generator.stderr_deque)
)

return {
**stat,
Expand Down Expand Up @@ -340,7 +387,11 @@ def _perform_upload(self,
# stdin of the shell was closed, it cannot be used anymore.
self.close_shell_for(to_url)

self._check_return_code(result_generator.returncode, to_url)
self._check_return_code(
result_generator.returncode,
to_url,
''.join(result_generator.stderr_deque)
)

return {
**hasher.get_hexdigest(),
Expand Down
43 changes: 26 additions & 17 deletions datalad_next/url_operations/tests/test_file.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import io
import stat

import pytest
import sys

from datalad_next.tests import skip_if_on_windows
from datalad_next.utils import on_linux

from ..file import (
FileUrlOperations,
Expand Down Expand Up @@ -70,36 +71,44 @@ def test_file_url_upload(tmp_path, monkeypatch):

# TODO test missing write permissions


def test_file_url_delete(tmp_path):
payload = 'payload'
test_path = tmp_path / 'subdir' / 'myfile'
test_path.parent.mkdir()
test_dir_path = tmp_path / 'subdir'
test_path = test_dir_path / 'myfile'

test_dir_path.mkdir()
test_url = test_path.as_uri()
test_dir_url = test_dir_path.as_uri()
ops = FileUrlOperations()

# missing file
with pytest.raises(UrlOperationsResourceUnknown):
ops.delete(test_url)

# place file
# place file and write protect
test_path.write_text(payload)
assert test_path.read_text() == payload
# try deleting a non-empty dir
with pytest.raises(UrlOperationsRemoteError):
ops.delete(test_path.parent.as_uri())

test_path.chmod(stat.S_IRUSR)
# file deletion works
ops.delete(test_url)
assert not test_path.exists()

# both windows and mac give incomprehensible AccessDenied
# errors on appveyor, although the directory is confirmed
# to be empty
if on_linux:
# empty dir deletion works too
# confirm it is indeed empty
assert not list(test_path.parent.iterdir())
ops.delete(test_path.parent.as_uri())
assert not test_path.parent.exists()
# place file again protect directory and file
test_path.write_text(payload)
assert test_path.read_text() == payload
test_dir_path.chmod(stat.S_IXUSR | stat.S_IRUSR)
test_path.chmod(stat.S_IRUSR)
# non-empty directory deletion works
ops.delete(test_dir_url)
assert not test_dir_path.exists()

# create empty, write-protected dir
assert not test_dir_path.exists()
test_dir_path.mkdir()
test_dir_path.chmod(stat.S_IXUSR | stat.S_IRUSR)
ops.delete(test_dir_url)
assert not test_dir_path.exists()


@skip_if_on_windows
Expand Down
13 changes: 13 additions & 0 deletions datalad_next/url_operations/tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,16 @@ def test_header_adding():
# ensure that `headers` did not change the stored headers
result_2 = url_ops.get_headers()
assert 'key_2' not in set(result_2)


def test_delete_method(httpbin):
url_base = httpbin['standard']
url_ops = HttpUrlOperations()
url_ops.delete(f'{url_base}/delete', timeout=1.0)


def test_delete_timeout(httpbin):
url_base = httpbin['standard']
url_ops = HttpUrlOperations()
with pytest.raises(TimeoutError):
url_ops.delete(f'{url_base}/delay/2', timeout=1.0)
Loading

0 comments on commit bbe6c04

Please sign in to comment.