Skip to content
This repository has been archived by the owner on May 25, 2024. It is now read-only.

Commit

Permalink
fix: fix data been overwritten by incorrect type
Browse files Browse the repository at this point in the history
  • Loading branch information
Nambers committed Apr 25, 2024
1 parent 5e6b8ca commit b9b36ae
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/pycJSON_decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ static bool parse_string(PyObject **item, parse_buffer *const input_buffer) {
} else {
uint32_t unicode_value;
// what is actually len of this utf8 sequence
int skip = get_unicode_value(input_end, &unicode_value);
int skip = get_unicode_value_usc4(input_end, &unicode_value);
if (unicode_value == 0) {
PyErr_SetString(PyExc_ValueError, "Invalid utf8 string.");
return false;
Expand Down
69 changes: 64 additions & 5 deletions src/str.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ int get_utf8_type(uint32_t unciode_value) {
}
}

int get_unicode_value(const char *str, Py_UCS4 *re) {
int get_unicode_value_usc4(const char *str, Py_UCS4 *re) {
assert(re != NULL);
if (str[0] & 0b10000000 && !str[0] & 0b01000000) {
// Continue bytes should be skipped.
return 0;
Expand All @@ -83,16 +84,67 @@ int get_unicode_value(const char *str, Py_UCS4 *re) {
}
}

int get_unicode_value_usc2(const char *str, Py_UCS2 *re) {
assert(re != NULL);
if (str[0] & 0b10000000 && !str[0] & 0b01000000) {
// Continue bytes should be skipped.
*re = -1;
return 0;
}
if (str[0] & 0b10000000) {
if (str[0] & 0b00100000) {
if (str[0] & 0b00010000) {
// NOPE
*re = -1;
return 0;
} else {
*re = ((str[0] & 0b1111) << 12) + ((str[1] & 0b111111) << 6) + (str[2] & 0b111111);
return 3; // 3 bytes can be represented under 0xffff
}
} else {
*re = ((str[0] & 0b11111) << 6) + (str[1] & 0b111111);
return 2;
}
} else {
*re = str[0] & 0b1111111;
return 1;
}
}

int get_unicode_value_usc1(const char *str, Py_UCS1 *re) {
assert(re != NULL);
if (str[0] & 0b10000000 && !str[0] & 0b01000000) {
// Continue bytes should be skipped.
*re = -1;
return 0;
}
if (str[0] & 0b10000000) {
if (str[0] & 0b00100000) {
// NOPE
*re = -1;
return 0;
} else {
*re = ((str[0] & 0b11111) << 6) + (str[1] & 0b111111);
return 2; // some of 2 bytes can be represented under 0xff
}
} else {
*re = str[0] & 0b1111111;
return 1;
}
}

bool str2unicode_1byte(PyObject **re, const char *str, const long alloc, const long num) {
typedef Py_UCS1 t;
*re = PyUnicode_New(alloc, 0xFF); // TODO must be at least alloc + 1 idk why
*re = PyUnicode_New(alloc, 0xFF);
if (*re == NULL) {
PyErr_Format(PyExc_MemoryError, "Failed to parse string: allocation failure");
return false;
}
t *data = (t *) ((PyCompactUnicodeObject *) *re + 1);
long real_len = 0;
for (int i = 0; i < num - 1; i++) {
// no overflow
assert(real_len < alloc);
if (str[i] == '\\') {
switch (str[++i]) {
PARSE_STRING_CHAR_MATCHER(str[i], data, char)
Expand All @@ -106,7 +158,7 @@ bool str2unicode_1byte(PyObject **re, const char *str, const long alloc, const l
return false;
}
} else {
const int skip = get_unicode_value(str + i, data++);
const int skip = get_unicode_value_usc1(str + i, data++);
if (skip == 0) {
PyErr_SetString(PyExc_ValueError, "Invalid utf8 string.");
return false;
Expand All @@ -115,6 +167,7 @@ bool str2unicode_1byte(PyObject **re, const char *str, const long alloc, const l
}
real_len++;
}
*data = 0;
// PyUnicode_Resize(re, real_len);
assert(real_len == alloc); // TODO remove real_len after testing
return true;
Expand All @@ -130,6 +183,8 @@ bool str2unicode_2byte(PyObject **re, const char *str, const long alloc, const l
t *data = (t *) ((PyCompactUnicodeObject *) *re + 1);
long real_len = 0;
for (int i = 0; i < num - 1; i++) {
// no overflow
assert(real_len < alloc);
if (str[i] == '\\') {
switch (str[++i]) {
PARSE_STRING_CHAR_MATCHER(str[i], data, t)
Expand All @@ -143,7 +198,7 @@ bool str2unicode_2byte(PyObject **re, const char *str, const long alloc, const l
return false;
}
} else {
const int skip = get_unicode_value(str + i, data++);
const int skip = get_unicode_value_usc2(str + i, data++);
if (skip == 0) {
PyErr_SetString(PyExc_ValueError, "Invalid utf8 string.");
return false;
Expand All @@ -152,6 +207,7 @@ bool str2unicode_2byte(PyObject **re, const char *str, const long alloc, const l
}
real_len++;
}
*data = 0;
// PyUnicode_Resize(re, real_len);
assert(real_len == alloc); // TODO remove real_len after testing
return true;
Expand All @@ -167,6 +223,8 @@ bool str2unicode_4byte(PyObject **re, const char *str, const long alloc, const l
t *data = (t *) ((PyCompactUnicodeObject *) *re + 1);
long real_len = 0;
for (int i = 0; i < num - 1; i++) {
// no overflow
assert(real_len < alloc);
if (str[i] == '\\') {
switch (str[++i]) {
PARSE_STRING_CHAR_MATCHER(str[i], data, t)
Expand All @@ -180,7 +238,7 @@ bool str2unicode_4byte(PyObject **re, const char *str, const long alloc, const l
return false;
}
} else {
const int skip = get_unicode_value(str + i, data++);
const int skip = get_unicode_value_usc4(str + i, data++);
if (skip == 0) {
PyErr_SetString(PyExc_ValueError, "Invalid utf8 string.");
return false;
Expand All @@ -189,6 +247,7 @@ bool str2unicode_4byte(PyObject **re, const char *str, const long alloc, const l
}
real_len++;
}
*data = 0;
// PyUnicode_Resize(re, real_len);
assert(real_len == alloc); // TODO remove real_len after testing
return true;
Expand Down
4 changes: 3 additions & 1 deletion src/str.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
#include <stdbool.h>

int get_utf8_type(uint32_t unciode_value);
int get_unicode_value(const char *str, Py_UCS4 *re);
int get_unicode_value_usc4(const char *str, Py_UCS4 *re);
int get_unicode_value_usc2(const char *str, Py_UCS2 *re);
int get_unicode_value_usc1(const char *str, Py_UCS1 *re);
bool str2unicode_1byte(PyObject **re, const char *str, long alloc, long num);
bool str2unicode_2byte(PyObject **re, const char *str, long alloc, long num);
bool str2unicode_4byte(PyObject **re, const char *str, long alloc, long num);
Expand Down

0 comments on commit b9b36ae

Please sign in to comment.