Skip to content

Commit

Permalink
Fix #239
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Jan 31, 2021
1 parent 6bf8509 commit f64a886
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,7 @@ public String nextFieldName() throws IOException
if (name != null) {
_inputPtr += lenMarker;
} else {
name = _decodeShortName(lenMarker);
name = _decodeContiguousName(lenMarker);
name = _addDecodedToSymbols(lenMarker, name);
}
}
Expand Down Expand Up @@ -2122,17 +2122,21 @@ protected void _finishToken() throws IOException
}
return;
}
if (len > (_inputEnd - _inputPtr)) {
// or if not, could we read?
if (len >= _inputBuffer.length) {
// If not enough space, need handling similar to chunked
_finishLongText(len);
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
// the longest individual unit is 4 bytes (surrogate pair) so we
// actually need len+3 bytes to avoid bounds checks
final int needed = len + 3;
final int available = _inputEnd - _inputPtr;

if ((available >= needed)
// if not, could we read? NOTE: we do not require it, just attempt to read
|| ((_inputBuffer.length >= needed)
&& _tryToLoadToHaveAtLeast(needed))) {
_finishShortText(len);
return;
}
_loadToHaveAtLeast(len);
}
// offline for better optimization
_finishShortText(len);
// If not enough space, need handling similar to chunked
_finishLongText(len);
}

/**
Expand Down Expand Up @@ -2184,7 +2188,7 @@ private final String _finishShortText(int len) throws IOException
if (outBuf.length < len) { // one minor complication
outBuf = _textBuffer.expandCurrentSegment(len);
}

int outPtr = 0;
int inPtr = _inputPtr;
_inputPtr += len;
Expand All @@ -2200,33 +2204,47 @@ private final String _finishShortText(int len) throws IOException
return _textBuffer.setCurrentAndReturn(outPtr);
}
}

final int[] codes = UTF8_UNIT_CODES;
do {
i = inputBuf[inPtr++] & 0xFF;
switch (codes[i]) {
case 0:
break;
case 1:
i = ((i & 0x1F) << 6) | (inputBuf[inPtr++] & 0x3F);
{
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
}
break;
case 2:
i = ((i & 0x0F) << 12)
| ((inputBuf[inPtr++] & 0x3F) << 6)
| (inputBuf[inPtr++] & 0x3F);
{
final int c2 = inputBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
final int c3 = inputBuf[inPtr++];
if ((c3 & 0xC0) != 0x080) {
_reportInvalidOther(c3 & 0xFF, inPtr);
}
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
}
break;
case 3:
// 30-Jan-2021, tatu: TODO - validate these too?
i = ((i & 0x07) << 18)
| ((inputBuf[inPtr++] & 0x3F) << 12)
| ((inputBuf[inPtr++] & 0x3F) << 6)
| (inputBuf[inPtr++] & 0x3F);
| ((inputBuf[inPtr++] & 0x3F) << 12)
| ((inputBuf[inPtr++] & 0x3F) << 6)
| (inputBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportError("Invalid byte "+Integer.toHexString(i)+" in Unicode text block");
_reportInvalidInitial(i);
}
outBuf[outPtr++] = (char) i;
} while (inPtr < end);
Expand Down Expand Up @@ -2594,7 +2612,7 @@ protected final JsonToken _decodePropertyName() throws IOException
if (name != null) {
_inputPtr += lenMarker;
} else {
name = _decodeShortName(lenMarker);
name = _decodeContiguousName(lenMarker);
name = _addDecodedToSymbols(lenMarker, name);
}
}
Expand All @@ -2610,7 +2628,7 @@ protected final JsonToken _decodePropertyName() throws IOException
return JsonToken.FIELD_NAME;
}

private final String _decodeShortName(int len) throws IOException
private final String _decodeContiguousName(int len) throws IOException
{
// note: caller ensures we have enough bytes available
int outPtr = 0;
Expand All @@ -2623,7 +2641,7 @@ private final String _decodeShortName(int len) throws IOException
final int[] codes = UTF8_UNIT_CODES;
final byte[] inBuf = _inputBuffer;

// First a tight loop for Ascii
// First a tight loop for ASCII
final int end = inPtr + len;
while (true) {
int i = inBuf[inPtr] & 0xFF;
Expand All @@ -2645,25 +2663,40 @@ private final String _decodeShortName(int len) throws IOException
// trickiest one, need surrogate handling
switch (code) {
case 1:
i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F);
{
final int c2 = inBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
}
break;
case 2:
i = ((i & 0x0F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
{
final int c2 = inBuf[inPtr++];
if ((c2 & 0xC0) != 0x080) {
_reportInvalidOther(c2 & 0xFF, inPtr);
}
final int c3 = inBuf[inPtr++];
if ((c3 & 0xC0) != 0x080) {
_reportInvalidOther(c3 & 0xFF, inPtr);
}
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
}
break;
case 3:
// 30-Jan-2021, tatu: TODO - validate surrogate case too?
i = ((i & 0x07) << 18)
| ((inBuf[inPtr++] & 0x3F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
| ((inBuf[inPtr++] & 0x3F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportError("Invalid byte "+Integer.toHexString(i)+" in Object name");
_reportError("Invalid UTF-8 byte 0x"+Integer.toHexString(i)+" in Object property name");
}
}
outBuf[outPtr++] = (char) i;
Expand All @@ -2688,7 +2721,7 @@ private final String _decodeLongerName(int len) throws IOException
_inputPtr += len;
return name;
}
name = _decodeShortName(len);
name = _decodeContiguousName(len);
return _addDecodedToSymbols(len, name);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,74 @@ public void testShortString236TruncatedString() throws Exception
}
}
}

public void testShortString237InvalidTextValue() throws Exception
{
// String with length of 2 bytes, but a few null bytes as fillers to
// avoid buffer boundary
// (2nd byte implies 2-byte sequence but 3rd byte does not have high-bit set)
byte[] input2 = {0x62, (byte) 0xCF, 0x2d,
0, 0, 0, 0, 0, 0};
try (CBORParser p = cborParser(input2)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Invalid UTF-8 middle byte 0x2d");
}
}

// but let's also validate 3-byte variant as well
byte[] input3 = {0x63, (byte) 0xEF, (byte) 0x8e, 0x2d,
0, 0, 0, 0, 0, 0};
try (CBORParser p = cborParser(input3)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Invalid UTF-8 middle byte 0x2d");
}
}
}

public void testShortString237InvalidName() throws Exception
{
// Object with 2-byte invalid name
byte[] input2 = { (byte) 0xBF, // Object, indefinite length
0x62, (byte) 0xCF, 0x2e, // 2-byte name but invalid second byte
0x21, // int value of 33
(byte) 0xFF, // Object END marker
0, 0, 0, 0 // padding
};
try (CBORParser p = cborParser(input2)) {
assertToken(JsonToken.START_OBJECT, p.nextToken());
try {
p.nextToken();
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Invalid UTF-8 middle byte 0x2e");
}
}

// but let's also validate 3-byte variant as well
byte[] input3 = { (byte) 0xBF, // Object, indefinite length
0x62, (byte) 0xEF, (byte) 0x8e, 0x2f, // 3-byte name but invalid third byte
0x22, // int value of 34
(byte) 0xFF, // Object END marker
0, 0, 0, 0 // padding
};
try (CBORParser p = cborParser(input3)) {
assertToken(JsonToken.START_OBJECT, p.nextToken());
try {
p.nextToken();
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Invalid UTF-8 middle byte 0x2f");
}
}
}
}
2 changes: 1 addition & 1 deletion release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Project: jackson-datatypes-binaryModules:

2.13.0 (not yet released)

No changes since 2.12
#239: Should validate UTF-8 multi-byte validity for short decode path too

2.12.2 (not yet released)

Expand Down

0 comments on commit f64a886

Please sign in to comment.