Skip to content

Commit

Permalink
support escape and unescape[xml]
Browse files Browse the repository at this point in the history
  • Loading branch information
bbbgan committed Apr 28, 2024
1 parent fdfdac2 commit 5f0d3a1
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 38 deletions.
88 changes: 52 additions & 36 deletions iguana/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,59 @@ inline constexpr auto has_qoute = [](uint64_t chunk) IGUANA__INLINE_LAMBDA {
0b0010001000100010001000100010001000100010001000100010001000100010);
};

template <bool is_xml_serialization = false, typename Stream, typename Ch>
IGUANA_INLINE void write_unicode_to_string(Ch& it, Stream& ss) {
static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
unsigned codepoint = 0;
if (!decode_utf8(it, codepoint))
IGUANA_UNLIKELY { throw std::runtime_error("illegal unicode character"); }
if constexpr (is_xml_serialization) {
ss.append("&#x");
}
else {
ss.push_back('\\');
ss.push_back('u');
}

if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
ss.push_back(hexDigits[(codepoint >> 12) & 15]);
ss.push_back(hexDigits[(codepoint >> 8) & 15]);
ss.push_back(hexDigits[(codepoint >> 4) & 15]);
ss.push_back(hexDigits[(codepoint)&15]);
}
else {
if (codepoint < 0x010000 || codepoint > 0x10FFFF)
IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); }
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
ss.push_back(hexDigits[(lead >> 12) & 15]);
ss.push_back(hexDigits[(lead >> 8) & 15]);
ss.push_back(hexDigits[(lead >> 4) & 15]);
ss.push_back(hexDigits[(lead)&15]);
if constexpr (is_xml_serialization) {
ss.append(";&#x");
}
else {
ss.push_back('\\');
ss.push_back('u');
}
ss.push_back(hexDigits[(trail >> 12) & 15]);
ss.push_back(hexDigits[(trail >> 8) & 15]);
ss.push_back(hexDigits[(trail >> 4) & 15]);
ss.push_back(hexDigits[(trail)&15]);
}
if constexpr (is_xml_serialization) {
ss.push_back(';');
}
}

// https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/writer.h
template <typename Ch, typename SizeType, typename Stream>
inline void write_string_with_escape(const Ch* it, SizeType length,
Stream& ss) {
IGUANA_INLINE void write_string_with_escape(const Ch* it, SizeType length,
Stream& ss) {
static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
static const char escape[256] = {
Expand All @@ -241,40 +290,7 @@ inline void write_string_with_escape(const Ch* it, SizeType length,
std::advance(end, length);
while (it < end) {
if (static_cast<unsigned>(*it) >= 0x80)
IGUANA_UNLIKELY {
unsigned codepoint = 0;
if (!decode_utf8(it, codepoint))
IGUANA_UNLIKELY {
throw std::runtime_error("illegal unicode character");
}
ss.push_back('\\');
ss.push_back('u');
if (codepoint <= 0xD7FF ||
(codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
ss.push_back(hexDigits[(codepoint >> 12) & 15]);
ss.push_back(hexDigits[(codepoint >> 8) & 15]);
ss.push_back(hexDigits[(codepoint >> 4) & 15]);
ss.push_back(hexDigits[(codepoint)&15]);
}
else {
if (codepoint < 0x010000 || codepoint > 0x10FFFF)
IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); }
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
ss.push_back(hexDigits[(lead >> 12) & 15]);
ss.push_back(hexDigits[(lead >> 8) & 15]);
ss.push_back(hexDigits[(lead >> 4) & 15]);
ss.push_back(hexDigits[(lead)&15]);
ss.push_back('\\');
ss.push_back('u');
ss.push_back(hexDigits[(trail >> 12) & 15]);
ss.push_back(hexDigits[(trail >> 8) & 15]);
ss.push_back(hexDigits[(trail >> 4) & 15]);
ss.push_back(hexDigits[(trail)&15]);
}
}
IGUANA_UNLIKELY { write_unicode_to_string(it, ss); }
else if (escape[static_cast<unsigned char>(*it)])
IGUANA_UNLIKELY {
ss.push_back('\\');
Expand Down
7 changes: 6 additions & 1 deletion iguana/xml_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ template <typename U, typename It, std::enable_if_t<plain_v<U>, int> = 0>
IGUANA_INLINE void parse_value(U &&value, It &&begin, It &&end) {
using T = std::decay_t<U>;
if constexpr (string_container_v<T>) {
value = T(&*begin, static_cast<size_t>(std::distance(begin, end)));
if constexpr (string_view_v<T>) {
value = T(&*begin, static_cast<size_t>(std::distance(begin, end)));
}
else {
parse_escape_xml(value, begin, end);
}
}
else if constexpr (num_v<T>) {
auto size = std::distance(begin, end);
Expand Down
111 changes: 111 additions & 0 deletions iguana/xml_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,115 @@ IGUANA_INLINE auto skip_pass(It &&it, It &&end) {
return res + 1;
}

template <char... C, typename It>
IGUANA_INLINE bool is_match(It &&it, const It &end) {
const auto n = static_cast<size_t>(std::distance(it, end));
if ((n < sizeof...(C)) || (... || (*it++ != C))) {
return false;
}
return true;
}

// loose policy: allow '&'
template <typename U, typename It, std::enable_if_t<string_v<U>, int> = 0>
IGUANA_INLINE void parse_escape_xml(U &value, It &&it, It &&end) {
static const unsigned char lookup_digits[256] = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255};
while (it < end) {
if (*it == '&')
IGUANA_UNLIKELY {
switch (*(it + 1)) {
// &amp; &apos;
case 'a':
if (is_match<'m', 'p', ';'>(it + 2, end)) {
value.push_back('&');
it += 5;
continue;
}
if (is_match<'p', 'o', 's', ';'>(it + 2, end)) {
value.push_back('\'');
it += 6;
continue;
}
break;
// &quot;
case 'q':
if (is_match<'u', 'o', 't', ';'>(it + 2, end)) {
value.push_back('\"');
it += 6;
continue;
}
break;
// &gt;
case 'g':
if (is_match<'t', ';'>(it + 2, end)) {
value.push_back('>');
it += 4;
continue;
}
break;
// &lt;
case 'l':
if (is_match<'t', ';'>(it + 2, end)) {
value.push_back('<');
it += 4;
continue;
}
break;
case '#':
if (*(it + 2) == 'x') {
// &#x
unsigned long codepoint = 0;
it += 3;
while (true) {
auto digit = lookup_digits[static_cast<unsigned char>(*it)];
if (digit == 0xFF)
break;
codepoint = codepoint * 16 + digit;
++it;
}
encode_utf8(value, codepoint);
}
else {
unsigned long codepoint = 0;
it += 2;
while (true) {
auto digit = lookup_digits[static_cast<unsigned char>(*it)];
if (digit == 0xFF)
break;
codepoint = codepoint * 10 + digit;
++it;
}
encode_utf8(value, codepoint);
}
match<';'>(it, end);
continue;
default:
break;
}
value.push_back(*(it++));
}
else {
value.push_back(*(it++));
}
}
}

} // namespace iguana
31 changes: 30 additions & 1 deletion iguana/xml_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@

namespace iguana {

template <typename Ch, typename SizeType, typename Stream>
IGUANA_INLINE void render_string_with_escape_xml(const Ch *it, SizeType length,
Stream &ss) {
auto end = it;
std::advance(end, length);
while (it < end) {
if (static_cast<unsigned>(*it) >= 0x80)
IGUANA_UNLIKELY {
write_unicode_to_string<true>(it, ss);
continue;
ss.push_back(*it);
}
else if (*it == '\'')
IGUANA_UNLIKELY { ss.append("&apos;"); }
else if (*it == '"')
IGUANA_UNLIKELY { ss.append("&quot;"); }
else if (*it == '&')
IGUANA_UNLIKELY { ss.append("&amp;"); }
else if (*it == '>')
IGUANA_UNLIKELY { ss.append("&gt;"); }
else if (*it == '<')
IGUANA_UNLIKELY { ss.append("&lt;"); }
else {
ss.push_back(*it);
}
++it;
}
}

template <bool pretty, size_t spaces, typename Stream, typename T,
std::enable_if_t<sequence_container_v<T>, int> = 0>
IGUANA_INLINE void render_xml_value(Stream &ss, const T &value,
Expand Down Expand Up @@ -42,7 +71,7 @@ IGUANA_INLINE void render_head(Stream &ss, std::string_view str) {
template <typename Stream, typename T, std::enable_if_t<plain_v<T>, int> = 0>
IGUANA_INLINE void render_value(Stream &ss, const T &value) {
if constexpr (string_container_v<T>) {
ss.append(value.data(), value.size());
render_string_with_escape_xml(value.data(), value.size(), ss);
}
else if constexpr (num_v<T>) {
char temp[65];
Expand Down
29 changes: 29 additions & 0 deletions test/test_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@ struct Owner_t {
}
};
REFLECTION(Owner_t, ID, DisplayName);
TEST_CASE("test escape") {
{
std::string str = R"(
<Owner_t description="&lt;&#x5c0f;&#24378;&gt;">
<ID>&apos;&amp;&quot;&lt;&gt;</ID>
<DisplayName>&#x5c0f;&#24378;</DisplayName>
</Owner_t>
)";
using Owner_attr_t =
iguana::xml_attr_t<Owner_t, std::map<std::string_view, std::string>>;
auto validator = [](const Owner_attr_t &Owner) {
auto Ow = Owner.value();
auto attr = Owner.attr();
CHECK(attr["description"] == "<小强>");
CHECK(Ow.ID == R"('&"<>)");
CHECK(Ow.DisplayName == "小强");
};
Owner_attr_t Owner;
iguana::from_xml(Owner, str);
validator(Owner);

std::string ss;
iguana::to_xml(Owner, ss);
std::cout << ss << std::endl;
Owner_attr_t Owner1;
iguana::from_xml(Owner1, ss);
validator(Owner1);
}
}

struct Contents {
std::string Key;
Expand Down

0 comments on commit 5f0d3a1

Please sign in to comment.