diff --git a/src/json.hpp b/src/json.hpp index 5cf06249..8d545a93 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -34,6 +34,7 @@ SOFTWARE. #include // assert #include // isdigit #include // and, not, or +#include // lconv, localeconv #include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t @@ -6203,7 +6204,9 @@ class basic_json { public: serializer(std::ostream& s) - : o(s) + : o(s), loc(std::localeconv()), + thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), + decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) {} /*! @@ -6244,7 +6247,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements auto i = val.m_value.object->cbegin(); @@ -6252,8 +6258,7 @@ class basic_json { o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); o.write(",\n", 2); @@ -6263,8 +6268,7 @@ class basic_json assert(i != val.m_value.object->cend()); o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); @@ -6281,8 +6285,7 @@ class basic_json for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); o.put(','); @@ -6291,8 +6294,7 @@ class basic_json // last element assert(i != val.m_value.object->cend()); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); @@ -6316,7 +6318,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) @@ -6359,8 +6364,7 @@ class basic_json case value_t::string: { o.put('\"'); - const auto s = escape_string(*val.m_value.string); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(*val.m_value.string); o.put('\"'); return; } @@ -6380,19 +6384,19 @@ class basic_json case value_t::number_integer: { - x_write(val.m_value.number_integer); + dump_integer(val.m_value.number_integer); return; } case value_t::number_unsigned: { - x_write(val.m_value.number_unsigned); + dump_integer(val.m_value.number_unsigned); return; } case value_t::number_float: { - x_write(val.m_value.number_float); + dump_float(val.m_value.number_float); return; } @@ -6438,14 +6442,40 @@ class basic_json return res + 1; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - return res; } } @@ -6465,12 +6495,13 @@ class basic_json @complexity Linear in the length of string @a s. */ - static string_t escape_string(const string_t& s) + void dump_escaped(const string_t& s) const { const auto space = extra_space(s); if (space == 0) { - return s; + o.write(s.c_str(), static_cast(s.size())); + return; } // create a result string of necessary size @@ -6537,43 +6568,69 @@ class basic_json break; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + break; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } + // all other characters are added as-is + result[pos++] = c; break; } } } - return result; + assert(pos == s.size() + space); + o.write(result.c_str(), static_cast(result.size())); } template - void x_write(NumberType x) + void dump_integer(NumberType x) { // special case for "0" if (x == 0) @@ -6607,7 +6664,7 @@ class basic_json o.write(m_buf.data(), static_cast(i)); } - void x_write(number_float_t x) + void dump_float(number_float_t x) { // special case for 0.0 and -0.0 if (x == 0) @@ -6634,15 +6691,6 @@ class basic_json // check if buffer was large enough assert(static_cast(written_bytes) < m_buf.size()); - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - // erase thousands separator if (thousands_sep != '\0') { @@ -6687,6 +6735,12 @@ class basic_json /// a (hopefully) large enough character buffer std::array < char, 64 > m_buf{{}}; + + const std::lconv* loc = nullptr; + const char thousands_sep = '\0'; + const char decimal_point = '\0'; + + string_t indent_string = string_t(512, ' '); }; public: diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index c7784266..b14fb68d 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -34,6 +34,7 @@ SOFTWARE. #include // assert #include // isdigit #include // and, not, or +#include // lconv, localeconv #include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t @@ -6203,7 +6204,9 @@ class basic_json { public: serializer(std::ostream& s) - : o(s) + : o(s), loc(std::localeconv()), + thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), + decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) {} /*! @@ -6244,7 +6247,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements auto i = val.m_value.object->cbegin(); @@ -6252,8 +6258,7 @@ class basic_json { o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); o.write(",\n", 2); @@ -6263,8 +6268,7 @@ class basic_json assert(i != val.m_value.object->cend()); o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); @@ -6281,8 +6285,7 @@ class basic_json for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); o.put(','); @@ -6291,8 +6294,7 @@ class basic_json // last element assert(i != val.m_value.object->cend()); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); @@ -6316,7 +6318,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) @@ -6359,8 +6364,7 @@ class basic_json case value_t::string: { o.put('\"'); - const auto s = escape_string(*val.m_value.string); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(*val.m_value.string); o.put('\"'); return; } @@ -6380,19 +6384,19 @@ class basic_json case value_t::number_integer: { - x_write(val.m_value.number_integer); + dump_integer(val.m_value.number_integer); return; } case value_t::number_unsigned: { - x_write(val.m_value.number_unsigned); + dump_integer(val.m_value.number_unsigned); return; } case value_t::number_float: { - x_write(val.m_value.number_float); + dump_float(val.m_value.number_float); return; } @@ -6438,14 +6442,40 @@ class basic_json return res + 1; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - return res; } } @@ -6465,12 +6495,13 @@ class basic_json @complexity Linear in the length of string @a s. */ - static string_t escape_string(const string_t& s) + void dump_escaped(const string_t& s) const { const auto space = extra_space(s); if (space == 0) { - return s; + o.write(s.c_str(), static_cast(s.size())); + return; } // create a result string of necessary size @@ -6537,43 +6568,69 @@ class basic_json break; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + break; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } + // all other characters are added as-is + result[pos++] = c; break; } } } - return result; + assert(pos == s.size() + space); + o.write(result.c_str(), static_cast(result.size())); } template - void x_write(NumberType x) + void dump_integer(NumberType x) { // special case for "0" if (x == 0) @@ -6607,7 +6664,7 @@ class basic_json o.write(m_buf.data(), static_cast(i)); } - void x_write(number_float_t x) + void dump_float(number_float_t x) { // special case for 0.0 and -0.0 if (x == 0) @@ -6634,15 +6691,6 @@ class basic_json // check if buffer was large enough assert(static_cast(written_bytes) < m_buf.size()); - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - // erase thousands separator if (thousands_sep != '\0') { @@ -6687,6 +6735,12 @@ class basic_json /// a (hopefully) large enough character buffer std::array < char, 64 > m_buf{{}}; + + const std::lconv* loc = nullptr; + const char thousands_sep = '\0'; + const char decimal_point = '\0'; + + string_t indent_string = string_t(512, ' '); }; public: diff --git a/test/src/unit-convenience.cpp b/test/src/unit-convenience.cpp index 45637033..33556311 100644 --- a/test/src/unit-convenience.cpp +++ b/test/src/unit-convenience.cpp @@ -49,44 +49,53 @@ TEST_CASE("convenience functions") SECTION("string escape") { - CHECK(json::serializer::escape_string("\"") == "\\\""); - CHECK(json::serializer::escape_string("\\") == "\\\\"); - CHECK(json::serializer::escape_string("\b") == "\\b"); - CHECK(json::serializer::escape_string("\f") == "\\f"); - CHECK(json::serializer::escape_string("\n") == "\\n"); - CHECK(json::serializer::escape_string("\r") == "\\r"); - CHECK(json::serializer::escape_string("\t") == "\\t"); + const auto check_escaped = [](const char* original, + const char* escaped) + { + std::stringstream ss; + json::serializer s(ss); + s.dump_escaped(original); + CHECK(ss.str() == escaped); + }; - CHECK(json::serializer::escape_string("\x01") == "\\u0001"); - CHECK(json::serializer::escape_string("\x02") == "\\u0002"); - CHECK(json::serializer::escape_string("\x03") == "\\u0003"); - CHECK(json::serializer::escape_string("\x04") == "\\u0004"); - CHECK(json::serializer::escape_string("\x05") == "\\u0005"); - CHECK(json::serializer::escape_string("\x06") == "\\u0006"); - CHECK(json::serializer::escape_string("\x07") == "\\u0007"); - CHECK(json::serializer::escape_string("\x08") == "\\b"); - CHECK(json::serializer::escape_string("\x09") == "\\t"); - CHECK(json::serializer::escape_string("\x0a") == "\\n"); - CHECK(json::serializer::escape_string("\x0b") == "\\u000b"); - CHECK(json::serializer::escape_string("\x0c") == "\\f"); - CHECK(json::serializer::escape_string("\x0d") == "\\r"); - CHECK(json::serializer::escape_string("\x0e") == "\\u000e"); - CHECK(json::serializer::escape_string("\x0f") == "\\u000f"); - CHECK(json::serializer::escape_string("\x10") == "\\u0010"); - CHECK(json::serializer::escape_string("\x11") == "\\u0011"); - CHECK(json::serializer::escape_string("\x12") == "\\u0012"); - CHECK(json::serializer::escape_string("\x13") == "\\u0013"); - CHECK(json::serializer::escape_string("\x14") == "\\u0014"); - CHECK(json::serializer::escape_string("\x15") == "\\u0015"); - CHECK(json::serializer::escape_string("\x16") == "\\u0016"); - CHECK(json::serializer::escape_string("\x17") == "\\u0017"); - CHECK(json::serializer::escape_string("\x18") == "\\u0018"); - CHECK(json::serializer::escape_string("\x19") == "\\u0019"); - CHECK(json::serializer::escape_string("\x1a") == "\\u001a"); - CHECK(json::serializer::escape_string("\x1b") == "\\u001b"); - CHECK(json::serializer::escape_string("\x1c") == "\\u001c"); - CHECK(json::serializer::escape_string("\x1d") == "\\u001d"); - CHECK(json::serializer::escape_string("\x1e") == "\\u001e"); - CHECK(json::serializer::escape_string("\x1f") == "\\u001f"); + check_escaped("\"", "\\\""); + check_escaped("\\", "\\\\"); + check_escaped("\b", "\\b"); + check_escaped("\f", "\\f"); + check_escaped("\n", "\\n"); + check_escaped("\r", "\\r"); + check_escaped("\t", "\\t"); + + check_escaped("\x01", "\\u0001"); + check_escaped("\x02", "\\u0002"); + check_escaped("\x03", "\\u0003"); + check_escaped("\x04", "\\u0004"); + check_escaped("\x05", "\\u0005"); + check_escaped("\x06", "\\u0006"); + check_escaped("\x07", "\\u0007"); + check_escaped("\x08", "\\b"); + check_escaped("\x09", "\\t"); + check_escaped("\x0a", "\\n"); + check_escaped("\x0b", "\\u000b"); + check_escaped("\x0c", "\\f"); + check_escaped("\x0d", "\\r"); + check_escaped("\x0e", "\\u000e"); + check_escaped("\x0f", "\\u000f"); + check_escaped("\x10", "\\u0010"); + check_escaped("\x11", "\\u0011"); + check_escaped("\x12", "\\u0012"); + check_escaped("\x13", "\\u0013"); + check_escaped("\x14", "\\u0014"); + check_escaped("\x15", "\\u0015"); + check_escaped("\x16", "\\u0016"); + check_escaped("\x17", "\\u0017"); + check_escaped("\x18", "\\u0018"); + check_escaped("\x19", "\\u0019"); + check_escaped("\x1a", "\\u001a"); + check_escaped("\x1b", "\\u001b"); + check_escaped("\x1c", "\\u001c"); + check_escaped("\x1d", "\\u001d"); + check_escaped("\x1e", "\\u001e"); + check_escaped("\x1f", "\\u001f"); } }