Skip to content

Commit

Permalink
More tests
Browse files Browse the repository at this point in the history
  • Loading branch information
caseyclements committed Sep 13, 2024
1 parent 9b323da commit 5877708
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 3 deletions.
30 changes: 30 additions & 0 deletions test/bson_corpus/binary.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,36 @@
"description": "$type query operator (conflicts with legacy $binary form with $type field)",
"canonical_bson": "180000000378001000000010247479706500020000000000",
"canonical_extjson": "{\"x\" : { \"$type\" : {\"$numberInt\": \"2\"}}}"
},
{
"description": "subtype 0x09 Vector FLOAT32",
"canonical_bson": "170000000578000A0000000927000000FE420000E04000",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwAAAP5CAADgQA==\", \"subType\": \"09\"}}}"
},
{
"description": "subtype 0x09 Vector INT8",
"canonical_bson": "11000000057800040000000903007F0700",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwB/Bw==\", \"subType\": \"09\"}}}"
},
{
"description": "subtype 0x09 Vector PACKED_BIT",
"canonical_bson": "11000000057800040000000910007F0700",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAB/Bw==\", \"subType\": \"09\"}}}"
},
{
"description": "subtype 0x09 Vector (Zero-length) FLOAT32",
"canonical_bson": "0F0000000578000200000009270000",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwA=\", \"subType\": \"09\"}}}"
},
{
"description": "subtype 0x09 Vector (Zero-length) INT8",
"canonical_bson": "0F0000000578000200000009030000",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwA=\", \"subType\": \"09\"}}}"
},
{
"description": "subtype 0x09 Vector (Zero-length) PACKED_BIT",
"canonical_bson": "0F0000000578000200000009100000",
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAA=\", \"subType\": \"09\"}}}"
}
],
"decodeErrors": [
Expand Down
29 changes: 26 additions & 3 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,14 +729,17 @@ def test_uuid_legacy(self):
self.assertEqual(id, transformed)

def test_vector(self):
"""Tests of subtype 9"""
# We start with valid cases, across the 3 dtypes implemented.
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
list_vector = [127, 7]
# As INT8, vector has length 2
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
vector = binary_vector.as_vector()
assert vector.data == list_vector
# test encoding roundtrip
assert {"vector": binary_vector} == decode(encode({"vector": binary_vector}))
# test json roundtrip # TODO - Is this the wrong place?
# test json roundtrip
assert binary_vector == json_util.loads(json_util.dumps(binary_vector))

# For vectors of bits, aka PACKED_BIT type, vector has length 8 * 2
Expand All @@ -758,13 +761,33 @@ def test_vector(self):
len(padded_vec.as_vector(BinaryVectorDtype.INT8).data) == 8 * len(list_vector) - padding
)

# It is worthwhile explicitly showing the values encoded to BSON
padded_doc = {"padded_vec": padded_vec}
assert (
encode(padded_doc)
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
)
# and dumped to json
assert (
json_util.dumps(padded_doc)
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
)

# FLOAT32 is also implemented
float_binary = Binary.from_vector(list_vector, BinaryVectorDtype.FLOAT32)
assert all(isinstance(d, float) for d in float_binary.as_vector().data)

# The C extension was segfaulting on unicode RegExs, so we have this test
# that doesn't really test anything but the lack of a segfault.
# Now some invalid cases
for x in [-1, 257]:
try:
Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT)
except struct.error as e:
assert str(e) == "ubyte format requires 0 <= number <= 255"

def test_unicode_regex(self):
"""Tests we do not get a segfault for C extension on unicode RegExs.
This had been happening.
"""
regex = re.compile("revisi\xf3n")
decode(encode({"regex": regex}))

Expand Down

0 comments on commit 5877708

Please sign in to comment.