Skip to content

Commit

Permalink
fix(decode_bytes): handle multiple errors
Browse files Browse the repository at this point in the history
This commit fixes an issue in multiple error
handling where parts of the input strings were
repeated in the output of `decode_bytes`.

It also adds a regreesion test to enure that
multiple encoding errors in a single input
chunk are handled properly.
  • Loading branch information
christian-monch committed Jul 11, 2024
1 parent 0ac7974 commit 7a73d8c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
7 changes: 5 additions & 2 deletions datalad_next/itertools/decode_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,11 @@ def handle_decoding_error(position: int,
else:
return (
position + exc.end,
joined_data[:position + exc.end].decode(
encoding, errors='backslashreplace')
joined_data[position:position + exc.start].decode(encoding)
+ joined_data[position + exc.start:position + exc.end].decode(
encoding,
errors='backslashreplace'
),
)

joined_data = b''
Expand Down
5 changes: 5 additions & 0 deletions datalad_next/itertools/tests/test_decode_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,8 @@ def test_no_empty_strings():
# check that empty strings are not yielded
r = tuple(decode_bytes([b'\xc3', b'\xb6']))
assert r == ('ö',)


def test_multiple_errors():
r = ''.join(decode_bytes([b'08 War \xaf No \xaf More \xaf Trouble.shn.mp3']))
assert r == '08 War \\xaf No \\xaf More \\xaf Trouble.shn.mp3'

0 comments on commit 7a73d8c

Please sign in to comment.