diff --git a/README.md b/README.md index e2787236..3662ff43 100644 --- a/README.md +++ b/README.md @@ -387,7 +387,7 @@ I deeply appreciate the help of the following people. - [Aaron Burghardt](https://github.com/aburgh) implemented code to parse streams incrementally. Furthermore, he greatly improved the parser class by allowing the definition of a filter function to discard undesired elements while parsing. - [Daniel Kopeček](https://github.com/dkopecek) fixed a bug in the compilation with GCC 5.0. - [Florian Weber](https://github.com/Florianjw) fixed a bug in and improved the performance of the comparison operators. -- [Eric Cornelius](https://github.com/EricMCornelius) pointed out a bug in the handling with NaN and infinity values. +- [Eric Cornelius](https://github.com/EricMCornelius) pointed out a bug in the handling with NaN and infinity values. He also improved the performance of the string escaping. - [易思龙](https://github.com/likebeta) implemented a conversion from anonymous enums. - [kepkin](https://github.com/kepkin) patiently pushed forward the support for Microsoft Visual studio. - [gregmarr](https://github.com/gregmarr) simplified the implementation of reverse iterators. diff --git a/src/json.hpp b/src/json.hpp index 55f0e301..a064393a 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -4535,68 +4535,31 @@ class basic_json } /*! - @brief escape a string + @brief calculates the extra space to escape a JSON string - Escape a string by replacing certain special characters by a sequence of an - escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. - - @param[out] o the stream to write the escaped string to @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. */ - static void escape_string(std::ostream& o, const string_t& s) + static std::size_t extra_space(const string_t& s) noexcept { - for (const auto c : s) + std::size_t result = 0; + + for (const auto& c : s) { switch (c) { - // quotation mark (0x22) case '"': - { - o << "\\\""; - break; - } - - // reverse solidus (0x5c) case '\\': - { - o << "\\\\"; - break; - } - - // backspace (0x08) case '\b': - { - o << "\\b"; - break; - } - - // formfeed (0x0c) case '\f': - { - o << "\\f"; - break; - } - - // newline (0x0a) case '\n': - { - o << "\\n"; - break; - } - - // carriage return (0x0d) case '\r': - { - o << "\\r"; - break; - } - - // horizontal tab (0x09) case '\t': { - o << "\\t"; + // from c (1 byte) to \x (2 bytes) + result += 1; break; } @@ -4604,19 +4567,121 @@ class basic_json { if (c >= 0x00 and c <= 0x1f) { - // control characters (everything between 0x00 and 0x1f) - // -> create four-digit hex representation - o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c) << std::dec; - } - else - { - // all other characters are added as-is - o << c; + // from c (1 byte) to \uxxxx (6 bytes) + result += 5; } break; } } } + + return result; + } + + /*! + @brief escape a string + + Escape a string by replacing certain special characters by a sequence of an + escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. + + @param[in] s the string to escape + @return the escaped string + + @complexity Linear in the length of string @a s. + */ + static string_t escape_string(const string_t& s) noexcept + { + const auto space = extra_space(s); + if (space == 0) + { + return s; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // print character c as \uxxxx + sprintf(&result[pos + 1], "u%04x", int(c)); + pos += 6; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; } /*! @@ -4667,9 +4732,9 @@ class basic_json { o << (pretty_print ? ",\n" : ","); } - o << string_t(new_indent, ' ') << "\""; - escape_string(o, i->first); - o << "\":" << (pretty_print ? " " : ""); + o << string_t(new_indent, ' ') << "\"" + << escape_string(i->first) << "\":" + << (pretty_print ? " " : ""); i->second.dump(o, pretty_print, indent_step, new_indent); } @@ -4724,9 +4789,7 @@ class basic_json case (value_t::string): { - o << string_t("\""); - escape_string(o, *m_value.string); - o << "\""; + o << string_t("\"") << escape_string(*m_value.string) << "\""; return; } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index aa524f57..a8506e49 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -4535,68 +4535,31 @@ class basic_json } /*! - @brief escape a string + @brief calculates the extra space to escape a JSON string - Escape a string by replacing certain special characters by a sequence of an - escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. - - @param[out] o the stream to write the escaped string to @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. */ - static void escape_string(std::ostream& o, const string_t& s) + static std::size_t extra_space(const string_t& s) noexcept { - for (const auto c : s) + std::size_t result = 0; + + for (const auto& c : s) { switch (c) { - // quotation mark (0x22) case '"': - { - o << "\\\""; - break; - } - - // reverse solidus (0x5c) case '\\': - { - o << "\\\\"; - break; - } - - // backspace (0x08) case '\b': - { - o << "\\b"; - break; - } - - // formfeed (0x0c) case '\f': - { - o << "\\f"; - break; - } - - // newline (0x0a) case '\n': - { - o << "\\n"; - break; - } - - // carriage return (0x0d) case '\r': - { - o << "\\r"; - break; - } - - // horizontal tab (0x09) case '\t': { - o << "\\t"; + // from c (1 byte) to \x (2 bytes) + result += 1; break; } @@ -4604,19 +4567,121 @@ class basic_json { if (c >= 0x00 and c <= 0x1f) { - // control characters (everything between 0x00 and 0x1f) - // -> create four-digit hex representation - o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(c) << std::dec; - } - else - { - // all other characters are added as-is - o << c; + // from c (1 byte) to \uxxxx (6 bytes) + result += 5; } break; } } } + + return result; + } + + /*! + @brief escape a string + + Escape a string by replacing certain special characters by a sequence of an + escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. + + @param[in] s the string to escape + @return the escaped string + + @complexity Linear in the length of string @a s. + */ + static string_t escape_string(const string_t& s) noexcept + { + const auto space = extra_space(s); + if (space == 0) + { + return s; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // print character c as \uxxxx + sprintf(&result[pos + 1], "u%04x", int(c)); + pos += 6; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; } /*! @@ -4667,9 +4732,9 @@ class basic_json { o << (pretty_print ? ",\n" : ","); } - o << string_t(new_indent, ' ') << "\""; - escape_string(o, i->first); - o << "\":" << (pretty_print ? " " : ""); + o << string_t(new_indent, ' ') << "\"" + << escape_string(i->first) << "\":" + << (pretty_print ? " " : ""); i->second.dump(o, pretty_print, indent_step, new_indent); } @@ -4724,9 +4789,7 @@ class basic_json case (value_t::string): { - o << string_t("\""); - escape_string(o, *m_value.string); - o << "\""; + o << string_t("\"") << escape_string(*m_value.string) << "\""; return; } diff --git a/test/unit.cpp b/test/unit.cpp index f1863fb7..735fbd24 100644 --- a/test/unit.cpp +++ b/test/unit.cpp @@ -7927,52 +7927,45 @@ TEST_CASE("convenience functions") SECTION("string escape") { - auto escape_string = [](const std::string & s) - { - std::stringstream ss; - json::escape_string(ss, s); - return ss.str(); - }; + CHECK(json::escape_string("\"") == "\\\""); + CHECK(json::escape_string("\\") == "\\\\"); + CHECK(json::escape_string("\b") == "\\b"); + CHECK(json::escape_string("\f") == "\\f"); + CHECK(json::escape_string("\n") == "\\n"); + CHECK(json::escape_string("\r") == "\\r"); + CHECK(json::escape_string("\t") == "\\t"); - CHECK(escape_string("\"") == "\\\""); - CHECK(escape_string("\\") == "\\\\"); - CHECK(escape_string("\b") == "\\b"); - CHECK(escape_string("\f") == "\\f"); - CHECK(escape_string("\n") == "\\n"); - CHECK(escape_string("\r") == "\\r"); - CHECK(escape_string("\t") == "\\t"); - - CHECK(escape_string("\x01") == "\\u0001"); - CHECK(escape_string("\x02") == "\\u0002"); - CHECK(escape_string("\x03") == "\\u0003"); - CHECK(escape_string("\x04") == "\\u0004"); - CHECK(escape_string("\x05") == "\\u0005"); - CHECK(escape_string("\x06") == "\\u0006"); - CHECK(escape_string("\x07") == "\\u0007"); - CHECK(escape_string("\x08") == "\\b"); - CHECK(escape_string("\x09") == "\\t"); - CHECK(escape_string("\x0a") == "\\n"); - CHECK(escape_string("\x0b") == "\\u000b"); - CHECK(escape_string("\x0c") == "\\f"); - CHECK(escape_string("\x0d") == "\\r"); - CHECK(escape_string("\x0e") == "\\u000e"); - CHECK(escape_string("\x0f") == "\\u000f"); - CHECK(escape_string("\x10") == "\\u0010"); - CHECK(escape_string("\x11") == "\\u0011"); - CHECK(escape_string("\x12") == "\\u0012"); - CHECK(escape_string("\x13") == "\\u0013"); - CHECK(escape_string("\x14") == "\\u0014"); - CHECK(escape_string("\x15") == "\\u0015"); - CHECK(escape_string("\x16") == "\\u0016"); - CHECK(escape_string("\x17") == "\\u0017"); - CHECK(escape_string("\x18") == "\\u0018"); - CHECK(escape_string("\x19") == "\\u0019"); - CHECK(escape_string("\x1a") == "\\u001a"); - CHECK(escape_string("\x1b") == "\\u001b"); - CHECK(escape_string("\x1c") == "\\u001c"); - CHECK(escape_string("\x1d") == "\\u001d"); - CHECK(escape_string("\x1e") == "\\u001e"); - CHECK(escape_string("\x1f") == "\\u001f"); + CHECK(json::escape_string("\x01") == "\\u0001"); + CHECK(json::escape_string("\x02") == "\\u0002"); + CHECK(json::escape_string("\x03") == "\\u0003"); + CHECK(json::escape_string("\x04") == "\\u0004"); + CHECK(json::escape_string("\x05") == "\\u0005"); + CHECK(json::escape_string("\x06") == "\\u0006"); + CHECK(json::escape_string("\x07") == "\\u0007"); + CHECK(json::escape_string("\x08") == "\\b"); + CHECK(json::escape_string("\x09") == "\\t"); + CHECK(json::escape_string("\x0a") == "\\n"); + CHECK(json::escape_string("\x0b") == "\\u000b"); + CHECK(json::escape_string("\x0c") == "\\f"); + CHECK(json::escape_string("\x0d") == "\\r"); + CHECK(json::escape_string("\x0e") == "\\u000e"); + CHECK(json::escape_string("\x0f") == "\\u000f"); + CHECK(json::escape_string("\x10") == "\\u0010"); + CHECK(json::escape_string("\x11") == "\\u0011"); + CHECK(json::escape_string("\x12") == "\\u0012"); + CHECK(json::escape_string("\x13") == "\\u0013"); + CHECK(json::escape_string("\x14") == "\\u0014"); + CHECK(json::escape_string("\x15") == "\\u0015"); + CHECK(json::escape_string("\x16") == "\\u0016"); + CHECK(json::escape_string("\x17") == "\\u0017"); + CHECK(json::escape_string("\x18") == "\\u0018"); + CHECK(json::escape_string("\x19") == "\\u0019"); + CHECK(json::escape_string("\x1a") == "\\u001a"); + CHECK(json::escape_string("\x1b") == "\\u001b"); + CHECK(json::escape_string("\x1c") == "\\u001c"); + CHECK(json::escape_string("\x1d") == "\\u001d"); + CHECK(json::escape_string("\x1e") == "\\u001e"); + CHECK(json::escape_string("\x1f") == "\\u001f"); } }