diff --git a/docs/changelog-fragments/638.bugfix.rst b/docs/changelog-fragments/638.bugfix.rst new file mode 100644 index 000000000..4c4849c70 --- /dev/null +++ b/docs/changelog-fragments/638.bugfix.rst @@ -0,0 +1 @@ +Fixed large sftp reads and proper overriding existing files -- by :user:`Jakuje`. diff --git a/docs/changelog-fragments/664.bugfix.rst b/docs/changelog-fragments/664.bugfix.rst new file mode 100644 index 000000000..cd12b2562 --- /dev/null +++ b/docs/changelog-fragments/664.bugfix.rst @@ -0,0 +1 @@ +Improved performance of SFTP transfers by using larger transfer chunks -- by :user:`Jakuje`. diff --git a/src/pylibsshext/includes/libssh.pxd b/src/pylibsshext/includes/libssh.pxd index 3219f1cd9..673d7eef1 100644 --- a/src/pylibsshext/includes/libssh.pxd +++ b/src/pylibsshext/includes/libssh.pxd @@ -27,6 +27,10 @@ cdef extern from "libssh/libssh.h" nogil: pass ctypedef ssh_session_struct* ssh_session + cdef struct ssh_string_struct: + pass + ctypedef ssh_string_struct* ssh_string + cdef struct ssh_key_struct: pass ctypedef ssh_key_struct* ssh_key diff --git a/src/pylibsshext/includes/sftp.pxd b/src/pylibsshext/includes/sftp.pxd index 9d3db9310..94376578d 100644 --- a/src/pylibsshext/includes/sftp.pxd +++ b/src/pylibsshext/includes/sftp.pxd @@ -17,7 +17,9 @@ # from posix.types cimport mode_t -from pylibsshext.includes.libssh cimport ssh_channel, ssh_session +from libc cimport stdint + +from pylibsshext.includes.libssh cimport ssh_channel, ssh_session, ssh_string cdef extern from "libssh/sftp.h" nogil: @@ -30,6 +32,31 @@ cdef extern from "libssh/sftp.h" nogil: pass ctypedef sftp_file_struct * sftp_file + struct sftp_attributes_struct: + char *name + char *longname + stdint.uint32_t flags + stdint.uint8_t type + stdint.uint64_t size + stdint.uint32_t uid + stdint.uint32_t gid + char *owner + char *group + stdint.uint32_t permissions + stdint.uint64_t atime64 + stdint.uint32_t atime + stdint.uint32_t atime_nseconds + stdint.uint64_t createtime + stdint.uint32_t createtime_nseconds + stdint.uint64_t mtime64 + stdint.uint32_t mtime + stdint.uint32_t mtime_nseconds + ssh_string acl + stdint.uint32_t extended_count + ssh_string extended_type + ssh_string extended_data + ctypedef sftp_attributes_struct * sftp_attributes + cdef int SSH_FX_OK cdef int SSH_FX_EOF cdef int SSH_FX_NO_SUCH_FILE @@ -55,5 +82,8 @@ cdef extern from "libssh/sftp.h" nogil: ssize_t sftp_read(sftp_file file, const void *buf, size_t count) int sftp_get_error(sftp_session sftp) + sftp_attributes sftp_stat(sftp_session session, const char *path) + + cdef extern from "sys/stat.h" nogil: cdef int S_IRWXU diff --git a/src/pylibsshext/sftp.pyx b/src/pylibsshext/sftp.pyx index 6331c529d..555f0d2ad 100644 --- a/src/pylibsshext/sftp.pyx +++ b/src/pylibsshext/sftp.pyx @@ -18,11 +18,15 @@ from posix.fcntl cimport O_CREAT, O_RDONLY, O_TRUNC, O_WRONLY from cpython.bytes cimport PyBytes_AS_STRING +from cpython.mem cimport PyMem_Free, PyMem_Malloc from pylibsshext.errors cimport LibsshSFTPException from pylibsshext.session cimport get_libssh_session +SFTP_MAX_CHUNK = 32_768 # 32kB + + MSG_MAP = { sftp.SSH_FX_OK: "No error", sftp.SSH_FX_EOF: "End-of-file encountered", @@ -63,7 +67,7 @@ cdef class SFTP: rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_WRONLY | O_CREAT | O_TRUNC, sftp.S_IRWXU) if rf is NULL: raise LibsshSFTPException("Opening remote file [%s] for write failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) while buffer != b"": length = len(buffer) @@ -76,38 +80,55 @@ cdef class SFTP: self._get_sftp_error_str(), ) ) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) sftp.sftp_close(rf) def get(self, remote_file, local_file): cdef sftp.sftp_file rf - cdef char read_buffer[1024] + cdef char *read_buffer = NULL + cdef sftp.sftp_attributes attrs remote_file_b = remote_file if isinstance(remote_file_b, unicode): remote_file_b = remote_file.encode("utf-8") + attrs = sftp.sftp_stat(self._libssh_sftp_session, remote_file_b) + if attrs is NULL: + raise LibsshSFTPException("Failed to stat the remote file [%s]. Error: [%s]" + % (remote_file, self._get_sftp_error_str())) + file_size = attrs.size + rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_RDONLY, sftp.S_IRWXU) if rf is NULL: - raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - - while True: - file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * 1024) - if file_data == 0: - break - elif file_data < 0: - sftp.sftp_close(rf) - raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" - % (remote_file, self._get_sftp_error_str())) - - with open(local_file, 'wb+') as f: - bytes_written = f.write(read_buffer[:file_data]) - if bytes_written and file_data != bytes_written: - sftp.sftp_close(rf) - raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" - " does not match number of bytes [%s] written to local file [%s]" - " due to error [%s]" - % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + + try: + with open(local_file, 'wb') as f: + buffer_size = min(SFTP_MAX_CHUNK, file_size) + read_buffer = PyMem_Malloc(buffer_size) + if read_buffer is NULL: + raise LibsshSFTPException("Memory allocation error") + + while True: + file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * buffer_size) + if file_data == 0: + break + elif file_data < 0: + sftp.sftp_close(rf) + raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + + bytes_written = f.write(read_buffer[:file_data]) + if bytes_written and file_data != bytes_written: + sftp.sftp_close(rf) + raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" + " does not match number of bytes [%s] written to local file [%s]" + " due to error [%s]" + % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + finally: + if read_buffer is not NULL: + PyMem_Free(read_buffer) sftp.sftp_close(rf) def close(self): diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index fc70c4512..7a179e109 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -2,6 +2,8 @@ """Tests suite for sftp.""" +import random +import string import uuid import pytest @@ -48,6 +50,22 @@ def dst_path(file_paths_pair): return path +@pytest.fixture +def other_payload(): + """Generate a binary test payload.""" + uuid_name = uuid.uuid4() + return 'Original content: {name!s}'.format(name=uuid_name).encode() + + +@pytest.fixture +def dst_exists_path(file_paths_pair, other_payload): + """Return a data destination path.""" + path = file_paths_pair[1] + path.write_bytes(other_payload) + assert path.exists() + return path + + def test_make_sftp(sftp_session): """Smoke-test SFTP instance creation.""" assert sftp_session @@ -63,3 +81,49 @@ def test_get(dst_path, src_path, sftp_session, transmit_payload): """Check that SFTP file download works.""" sftp_session.get(str(src_path), str(dst_path)) assert dst_path.read_bytes() == transmit_payload + + +def test_get_existing(dst_exists_path, src_path, sftp_session, transmit_payload): + """Check that SFTP file download works when target file exists.""" + sftp_session.get(str(src_path), str(dst_exists_path)) + assert dst_exists_path.read_bytes() == transmit_payload + + +def test_put_existing(dst_exists_path, src_path, sftp_session, transmit_payload): + """Check that SFTP file download works when target file exists.""" + sftp_session.put(str(src_path), str(dst_exists_path)) + assert dst_exists_path.read_bytes() == transmit_payload + + +@pytest.fixture +def large_payload(): + """Generate a large 32769 byte (32kB + 1B) test payload.""" + random_char_kilobyte = [ord(random.choice(string.printable)) for _ in range(1024)] + full_bytes_number = 32 + a_32kB_chunk = bytes(random_char_kilobyte * full_bytes_number) + the_last_byte = random.choice(random_char_kilobyte).to_bytes(length=1, byteorder='big') + return a_32kB_chunk + the_last_byte + + +@pytest.fixture +def src_path_large(tmp_path, large_payload): + """Return a remote path to a 32769 byte-sized file. + + The pylibssh chunk size is 32769 so the test needs a file that would + execute at least two loops. + """ + path = tmp_path / 'large.txt' + path.write_bytes(large_payload) + return path + + +def test_put_large(dst_path, src_path_large, sftp_session, large_payload): + """Check that SFTP can upload large file.""" + sftp_session.put(str(src_path_large), str(dst_path)) + assert dst_path.read_bytes() == large_payload + + +def test_get_large(dst_path, src_path_large, sftp_session, large_payload): + """Check that SFTP can download large file.""" + sftp_session.get(str(src_path_large), str(dst_path)) + assert dst_path.read_bytes() == large_payload