Skip to content

Commit

Permalink
improve test coverage of data_processors.py
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch committed Oct 18, 2023
1 parent 446159e commit e986654
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions datalad_next/runners/tests/test_data_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import json
from itertools import chain

import pytest

from ..data_processors import (
ProcessorPipeline,
SplitLinesProcessor,
Expand Down Expand Up @@ -184,3 +186,29 @@ def test_processor_removal():

source = chain([chunk], stream) if chunk else stream
assert b''.join(source) == b'content'


def test_split_decoding():
encoded = 'ö'.encode('utf-8')
part_1, part_2 = encoded[:1], encoded[1:]

# check that incomplete encodings are caught
decoded, remaining = decode_utf8_processor([part_1])
assert decoded == []
assert remaining == [part_1]

# vreify that the omplete encoding decodes correctly
decoded, remaining = decode_utf8_processor([part_1, part_2])
assert decoded == ['ö']
assert remaining == []


def test_pipeline_finishing():
encoded = 'ö'.encode('utf-8')
part_1, part_2 = encoded[:1], encoded[1:]

pipeline = ProcessorPipeline([decode_utf8_processor])
res = pipeline.process(part_1)
assert res == []
with pytest.raises(UnicodeDecodeError):
pipeline.finalize()

0 comments on commit e986654

Please sign in to comment.