From 54ef5f7b47276fc56637afbf083ee9e8a9b1ef44 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 27 Feb 2017 21:22:39 +0100 Subject: [PATCH 1/6] :hammer: moved serialization functions to serializer class The class is currently just a wrapper for an std::ostream and collects all functions related to serialization. The next step should be recycling of variables to avoid repetitive initialization for each recursive dump call. --- src/json.hpp | 1046 ++++++++++++++++---------------- src/json.hpp.re2c | 1050 +++++++++++++++++---------------- test/src/unit-convenience.cpp | 76 +-- 3 files changed, 1101 insertions(+), 1071 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 1094aade..a73b5048 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -2644,14 +2644,15 @@ class basic_json string_t dump(const int indent = -1) const { std::stringstream ss; + serializer s(ss); if (indent >= 0) { - dump(ss, true, static_cast(indent)); + s.dump(*this, true, static_cast(indent)); } else { - dump(ss, false, 0); + s.dump(*this, false, 0); } return ss.str(); @@ -6194,6 +6195,531 @@ class basic_json /// @name serialization /// @{ + private: + class serializer + { + public: + serializer(std::ostream& s) + : o(s) + {} + + /*! + @brief internal implementation of the serialization function + + This function is called by the public member function dump and organizes + the serialization internally. The indentation level is propagated as + additional parameter. In case of arrays and objects, the function is + called recursively. Note that + + - strings and object keys are escaped using `escape_string()` + - integer numbers are converted implicitly via `operator<<` + - floating-point numbers are converted to a string using `"%g"` format + + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + */ + void dump(const basic_json& val, + const bool pretty_print, + const unsigned int indent_step, + const unsigned int current_indent = 0) const + { + switch (val.m_type) + { + case value_t::object: + { + if (val.m_value.object->empty()) + { + o.write("{}", 2); + return; + } + + if (pretty_print) + { + o.write("{\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + string_t indent_string = string_t(new_indent, ' '); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o.write(indent_string.c_str(), new_indent); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\": ", 3); + dump(i->second, true, indent_step, new_indent); + o.write(",\n", 2); + } + + // last element + assert(i != val.m_value.object->cend()); + o.write(indent_string.c_str(), new_indent); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\": ", 3); + dump(i->second, true, indent_step, new_indent); + + o.put('\n'); + o.write(indent_string.c_str(), current_indent); + o.put('}'); + } + else + { + o.put('{'); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\":", 2); + dump(i->second, false, indent_step, current_indent); + o.put(','); + } + + // last element + assert(i != val.m_value.object->cend()); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\":", 2); + dump(i->second, false, indent_step, current_indent); + + o.put('}'); + } + + return; + } + + case value_t::array: + { + if (val.m_value.array->empty()) + { + o.write("[]", 2); + return; + } + + if (pretty_print) + { + o.write("[\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + string_t indent_string = string_t(new_indent, ' '); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + o.write(indent_string.c_str(), new_indent); + dump(*i, true, indent_step, new_indent); + o.write(",\n", 2); + } + + // last element + assert(not val.m_value.array->empty()); + o.write(indent_string.c_str(), new_indent); + dump(val.m_value.array->back(), true, indent_step, new_indent); + + o.put('\n'); + o.write(indent_string.c_str(), current_indent); + o.put(']'); + } + else + { + o.put('['); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + dump(*i, false, indent_step, current_indent); + o.put(','); + } + + // last element + assert(not val.m_value.array->empty()); + dump(val.m_value.array->back(), false, indent_step, current_indent); + + o.put(']'); + } + + return; + } + + case value_t::string: + { + o.put('\"'); + const auto s = escape_string(*val.m_value.string); + o.write(s.c_str(), static_cast(s.size())); + o.put('\"'); + return; + } + + case value_t::boolean: + { + if (val.m_value.boolean) + { + o.write("true", 4); + } + else + { + o.write("false", 5); + } + return; + } + + case value_t::number_integer: + { + o << numtostr(val.m_value.number_integer).c_str(); + return; + } + + case value_t::number_unsigned: + { + o << numtostr(val.m_value.number_unsigned).c_str(); + return; + } + + case value_t::number_float: + { + o << numtostr(val.m_value.number_float).c_str(); + return; + } + + case value_t::discarded: + { + o.write("", 11); + return; + } + + case value_t::null: + { + o.write("null", 4); + return; + } + } + } + + private: + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t extra_space(const string_t& s) noexcept + { + return std::accumulate(s.begin(), s.end(), size_t{}, + [](size_t res, typename string_t::value_type c) + { + switch (c) + { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + return res + 1; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + + return res; + } + } + }); + } + + /*! + @brief escape a string + + Escape a string by replacing certain special characters by a sequence of + an escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. + + @param[in] s the string to escape + @return the escaped string + + @complexity Linear in the length of string @a s. + */ + static string_t escape_string(const string_t& s) + { + const auto space = extra_space(s); + if (space == 0) + { + return s; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; + } + + /*! + @brief locale-independent serialization for built-in arithmetic types + */ + struct numtostr + { + public: + template + numtostr(NumberType value) + { + x_write(value, std::is_integral()); + } + + const char* c_str() const + { + return m_buf.data(); + } + + private: + /// a (hopefully) large enough character buffer + std::array < char, 64 > m_buf{{}}; + + template + void x_write(NumberType x, /*is_integral=*/std::true_type) + { + // special case for "0" + if (x == 0) + { + m_buf[0] = '0'; + return; + } + + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < m_buf.size() - 1) + { + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; + } + + // make sure the number has been processed completely + assert(x == 0); + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < m_buf.size() - 2); + m_buf[i++] = '-'; + } + + std::reverse(m_buf.begin(), m_buf.begin() + i); + } + + template + void x_write(NumberType x, /*is_integral=*/std::false_type) + { + // special case for 0.0 and -0.0 + if (x == 0) + { + size_t i = 0; + if (std::signbit(x)) + { + m_buf[i++] = '-'; + } + m_buf[i++] = '0'; + m_buf[i++] = '.'; + m_buf[i] = '0'; + return; + } + + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; + + // the actual conversion + const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + + // negative value indicates an error + assert(written_bytes > 0); + // check if buffer was large enough + assert(static_cast(written_bytes) < m_buf.size()); + + // read information from locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); + std::fill(end, m_buf.end(), '\0'); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : m_buf) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + // determine if need to append ".0" + size_t i = 0; + bool value_is_int_like = true; + for (i = 0; i < m_buf.size(); ++i) + { + // break when end of number is reached + if (m_buf[i] == '\0') + { + break; + } + + // check if we find non-int character + value_is_int_like = value_is_int_like and m_buf[i] != '.' and + m_buf[i] != 'e' and m_buf[i] != 'E'; + } + + if (value_is_int_like) + { + // there must be 2 bytes left for ".0" + assert((i + 2) < m_buf.size()); + // we write to the end of the number + assert(m_buf[i] == '\0'); + assert(m_buf[i - 1] != '\0'); + + // add ".0" + m_buf[i] = '.'; + m_buf[i + 1] = '0'; + + // the resulting string is properly terminated + assert(m_buf[i + 2] == '\0'); + } + } + }; + + private: + std::ostream& o; + }; + + public: /*! @brief serialize to stream @@ -6226,7 +6752,8 @@ class basic_json o.width(0); // do the actual serialization - j.dump(o, pretty_print, static_cast(indentation)); + serializer s(o); + s.dump(j, pretty_print, static_cast(indentation)); return o; } @@ -8082,519 +8609,6 @@ class basic_json } } - private: - /*! - @brief calculates the extra space to escape a JSON string - - @param[in] s the string to escape - @return the number of characters required to escape string @a s - - @complexity Linear in the length of string @a s. - */ - static std::size_t extra_space(const string_t& s) noexcept - { - return std::accumulate(s.begin(), s.end(), size_t{}, - [](size_t res, typename string_t::value_type c) - { - switch (c) - { - case '"': - case '\\': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - { - // from c (1 byte) to \x (2 bytes) - return res + 1; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - - return res; - } - } - }); - } - - /*! - @brief escape a string - - Escape a string by replacing certain special characters by a sequence of - an escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. - - @param[in] s the string to escape - @return the escaped string - - @complexity Linear in the length of string @a s. - */ - static string_t escape_string(const string_t& s) - { - const auto space = extra_space(s); - if (space == 0) - { - return s; - } - - // create a result string of necessary size - string_t result(s.size() + space, '\\'); - std::size_t pos = 0; - - for (const auto& c : s) - { - switch (c) - { - // quotation mark (0x22) - case '"': - { - result[pos + 1] = '"'; - pos += 2; - break; - } - - // reverse solidus (0x5c) - case '\\': - { - // nothing to change - pos += 2; - break; - } - - // backspace (0x08) - case '\b': - { - result[pos + 1] = 'b'; - pos += 2; - break; - } - - // formfeed (0x0c) - case '\f': - { - result[pos + 1] = 'f'; - pos += 2; - break; - } - - // newline (0x0a) - case '\n': - { - result[pos + 1] = 'n'; - pos += 2; - break; - } - - // carriage return (0x0d) - case '\r': - { - result[pos + 1] = 'r'; - pos += 2; - break; - } - - // horizontal tab (0x09) - case '\t': - { - result[pos + 1] = 't'; - pos += 2; - break; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } - break; - } - } - } - - return result; - } - - - /*! - @brief locale-independent serialization for built-in arithmetic types - */ - struct numtostr - { - public: - template - numtostr(NumberType value) - { - x_write(value, std::is_integral()); - } - - const char* c_str() const - { - return m_buf.data(); - } - - private: - /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; - - template - void x_write(NumberType x, /*is_integral=*/std::true_type) - { - // special case for "0" - if (x == 0) - { - m_buf[0] = '0'; - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; - } - - std::reverse(m_buf.begin(), m_buf.begin() + i); - } - - template - void x_write(NumberType x, /*is_integral=*/std::false_type) - { - // special case for 0.0 and -0.0 - if (x == 0) - { - size_t i = 0; - if (std::signbit(x)) - { - m_buf[i++] = '-'; - } - m_buf[i++] = '0'; - m_buf[i++] = '.'; - m_buf[i] = '0'; - return; - } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); - - // negative value indicates an error - assert(written_bytes > 0); - // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); - - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - - // erase thousands separator - if (thousands_sep != '\0') - { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); - } - - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : m_buf) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } - - // determine if need to append ".0" - size_t i = 0; - bool value_is_int_like = true; - for (i = 0; i < m_buf.size(); ++i) - { - // break when end of number is reached - if (m_buf[i] == '\0') - { - break; - } - - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e' and m_buf[i] != 'E'; - } - - if (value_is_int_like) - { - // there must be 2 bytes left for ".0" - assert((i + 2) < m_buf.size()); - // we write to the end of the number - assert(m_buf[i] == '\0'); - assert(m_buf[i - 1] != '\0'); - - // add ".0" - m_buf[i] = '.'; - m_buf[i + 1] = '0'; - - // the resulting string is properly terminated - assert(m_buf[i + 2] == '\0'); - } - } - }; - - - /*! - @brief internal implementation of the serialization function - - This function is called by the public member function dump and organizes - the serialization internally. The indentation level is propagated as - additional parameter. In case of arrays and objects, the function is - called recursively. Note that - - - strings and object keys are escaped using `escape_string()` - - integer numbers are converted implicitly via `operator<<` - - floating-point numbers are converted to a string using `"%g"` format - - @param[out] o stream to write to - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) - */ - void dump(std::ostream& o, - const bool pretty_print, - const unsigned int indent_step, - const unsigned int current_indent = 0) const - { - switch (m_type) - { - case value_t::object: - { - if (m_value.object->empty()) - { - o.write("{}", 2); - return; - } - - if (pretty_print) - { - o.write("{\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); - - // first n-1 elements - auto i = m_value.object->cbegin(); - for (size_t cnt = 0; cnt < m_value.object->size() - 1; ++cnt, ++i) - { - o.write(indent_string.c_str(), new_indent); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\": ", 3); - i->second.dump(o, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(i != m_value.object->cend()); - o.write(indent_string.c_str(), new_indent); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\": ", 3); - i->second.dump(o, true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), current_indent); - o.put('}'); - } - else - { - o.put('{'); - - // first n-1 elements - auto i = m_value.object->cbegin(); - for (size_t cnt = 0; cnt < m_value.object->size() - 1; ++cnt, ++i) - { - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\":", 2); - i->second.dump(o, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(i != m_value.object->cend()); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\":", 2); - i->second.dump(o, false, indent_step, current_indent); - - o.put('}'); - } - - return; - } - - case value_t::array: - { - if (m_value.array->empty()) - { - o.write("[]", 2); - return; - } - - if (pretty_print) - { - o.write("[\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); - - // first n-1 elements - for (auto i = m_value.array->cbegin(); i != m_value.array->cend() - 1; ++i) - { - o.write(indent_string.c_str(), new_indent); - i->dump(o, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(not m_value.array->empty()); - o.write(indent_string.c_str(), new_indent); - m_value.array->back().dump(o, true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), current_indent); - o.put(']'); - } - else - { - o.put('['); - - // first n-1 elements - for (auto i = m_value.array->cbegin(); i != m_value.array->cend() - 1; ++i) - { - i->dump(o, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(not m_value.array->empty()); - m_value.array->back().dump(o, false, indent_step, current_indent); - - o.put(']'); - } - - return; - } - - case value_t::string: - { - o.put('\"'); - const auto s = escape_string(*m_value.string); - o.write(s.c_str(), static_cast(s.size())); - o.put('\"'); - return; - } - - case value_t::boolean: - { - if (m_value.boolean) - { - o.write("true", 4); - } - else - { - o.write("false", 5); - } - return; - } - - case value_t::number_integer: - { - o << numtostr(m_value.number_integer).c_str(); - return; - } - - case value_t::number_unsigned: - { - o << numtostr(m_value.number_unsigned).c_str(); - return; - } - - case value_t::number_float: - { - o << numtostr(m_value.number_float).c_str(); - return; - } - - case value_t::discarded: - { - o.write("", 11); - return; - } - - case value_t::null: - { - o.write("null", 4); - return; - } - } - } private: ////////////////////// diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index a686a1a7..01f6bbfe 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -2644,14 +2644,15 @@ class basic_json string_t dump(const int indent = -1) const { std::stringstream ss; + serializer s(ss); if (indent >= 0) { - dump(ss, true, static_cast(indent)); + s.dump(*this, true, static_cast(indent)); } else { - dump(ss, false, 0); + s.dump(*this, false, 0); } return ss.str(); @@ -6194,6 +6195,534 @@ class basic_json /// @name serialization /// @{ + private: + /*! + @brief wrapper around the serialization functions + */ + class serializer + { + public: + serializer(std::ostream& s) + : o(s) + {} + + /*! + @brief internal implementation of the serialization function + + This function is called by the public member function dump and organizes + the serialization internally. The indentation level is propagated as + additional parameter. In case of arrays and objects, the function is + called recursively. Note that + + - strings and object keys are escaped using `escape_string()` + - integer numbers are converted implicitly via `operator<<` + - floating-point numbers are converted to a string using `"%g"` format + + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + */ + void dump(const basic_json& val, + const bool pretty_print, + const unsigned int indent_step, + const unsigned int current_indent = 0) const + { + switch (val.m_type) + { + case value_t::object: + { + if (val.m_value.object->empty()) + { + o.write("{}", 2); + return; + } + + if (pretty_print) + { + o.write("{\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + string_t indent_string = string_t(new_indent, ' '); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o.write(indent_string.c_str(), new_indent); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\": ", 3); + dump(i->second, true, indent_step, new_indent); + o.write(",\n", 2); + } + + // last element + assert(i != val.m_value.object->cend()); + o.write(indent_string.c_str(), new_indent); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\": ", 3); + dump(i->second, true, indent_step, new_indent); + + o.put('\n'); + o.write(indent_string.c_str(), current_indent); + o.put('}'); + } + else + { + o.put('{'); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\":", 2); + dump(i->second, false, indent_step, current_indent); + o.put(','); + } + + // last element + assert(i != val.m_value.object->cend()); + o.put('\"'); + const auto s = escape_string(i->first); + o.write(s.c_str(), static_cast(s.size())); + o.write("\":", 2); + dump(i->second, false, indent_step, current_indent); + + o.put('}'); + } + + return; + } + + case value_t::array: + { + if (val.m_value.array->empty()) + { + o.write("[]", 2); + return; + } + + if (pretty_print) + { + o.write("[\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + string_t indent_string = string_t(new_indent, ' '); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + o.write(indent_string.c_str(), new_indent); + dump(*i, true, indent_step, new_indent); + o.write(",\n", 2); + } + + // last element + assert(not val.m_value.array->empty()); + o.write(indent_string.c_str(), new_indent); + dump(val.m_value.array->back(), true, indent_step, new_indent); + + o.put('\n'); + o.write(indent_string.c_str(), current_indent); + o.put(']'); + } + else + { + o.put('['); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) + { + dump(*i, false, indent_step, current_indent); + o.put(','); + } + + // last element + assert(not val.m_value.array->empty()); + dump(val.m_value.array->back(), false, indent_step, current_indent); + + o.put(']'); + } + + return; + } + + case value_t::string: + { + o.put('\"'); + const auto s = escape_string(*val.m_value.string); + o.write(s.c_str(), static_cast(s.size())); + o.put('\"'); + return; + } + + case value_t::boolean: + { + if (val.m_value.boolean) + { + o.write("true", 4); + } + else + { + o.write("false", 5); + } + return; + } + + case value_t::number_integer: + { + o << numtostr(val.m_value.number_integer).c_str(); + return; + } + + case value_t::number_unsigned: + { + o << numtostr(val.m_value.number_unsigned).c_str(); + return; + } + + case value_t::number_float: + { + o << numtostr(val.m_value.number_float).c_str(); + return; + } + + case value_t::discarded: + { + o.write("", 11); + return; + } + + case value_t::null: + { + o.write("null", 4); + return; + } + } + } + + private: + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t extra_space(const string_t& s) noexcept + { + return std::accumulate(s.begin(), s.end(), size_t{}, + [](size_t res, typename string_t::value_type c) + { + switch (c) + { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + return res + 1; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + + return res; + } + } + }); + } + + /*! + @brief escape a string + + Escape a string by replacing certain special characters by a sequence of + an escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. + + @param[in] s the string to escape + @return the escaped string + + @complexity Linear in the length of string @a s. + */ + static string_t escape_string(const string_t& s) + { + const auto space = extra_space(s); + if (space == 0) + { + return s; + } + + // create a result string of necessary size + string_t result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + default: + { + if (c >= 0x00 and c <= 0x1f) + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; + } + + /*! + @brief locale-independent serialization for built-in arithmetic types + */ + struct numtostr + { + public: + template + numtostr(NumberType value) + { + x_write(value, std::is_integral()); + } + + const char* c_str() const + { + return m_buf.data(); + } + + private: + /// a (hopefully) large enough character buffer + std::array < char, 64 > m_buf{{}}; + + template + void x_write(NumberType x, /*is_integral=*/std::true_type) + { + // special case for "0" + if (x == 0) + { + m_buf[0] = '0'; + return; + } + + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < m_buf.size() - 1) + { + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; + } + + // make sure the number has been processed completely + assert(x == 0); + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < m_buf.size() - 2); + m_buf[i++] = '-'; + } + + std::reverse(m_buf.begin(), m_buf.begin() + i); + } + + template + void x_write(NumberType x, /*is_integral=*/std::false_type) + { + // special case for 0.0 and -0.0 + if (x == 0) + { + size_t i = 0; + if (std::signbit(x)) + { + m_buf[i++] = '-'; + } + m_buf[i++] = '0'; + m_buf[i++] = '.'; + m_buf[i] = '0'; + return; + } + + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; + + // the actual conversion + const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + + // negative value indicates an error + assert(written_bytes > 0); + // check if buffer was large enough + assert(static_cast(written_bytes) < m_buf.size()); + + // read information from locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); + std::fill(end, m_buf.end(), '\0'); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : m_buf) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + // determine if need to append ".0" + size_t i = 0; + bool value_is_int_like = true; + for (i = 0; i < m_buf.size(); ++i) + { + // break when end of number is reached + if (m_buf[i] == '\0') + { + break; + } + + // check if we find non-int character + value_is_int_like = value_is_int_like and m_buf[i] != '.' and + m_buf[i] != 'e' and m_buf[i] != 'E'; + } + + if (value_is_int_like) + { + // there must be 2 bytes left for ".0" + assert((i + 2) < m_buf.size()); + // we write to the end of the number + assert(m_buf[i] == '\0'); + assert(m_buf[i - 1] != '\0'); + + // add ".0" + m_buf[i] = '.'; + m_buf[i + 1] = '0'; + + // the resulting string is properly terminated + assert(m_buf[i + 2] == '\0'); + } + } + }; + + private: + std::ostream& o; + }; + + public: /*! @brief serialize to stream @@ -6226,8 +6755,8 @@ class basic_json o.width(0); // do the actual serialization - j.dump(o, pretty_print, static_cast(indentation)); - + serializer s(o); + s.dump(j, pretty_print, static_cast(indentation)); return o; } @@ -8082,519 +8611,6 @@ class basic_json } } - private: - /*! - @brief calculates the extra space to escape a JSON string - - @param[in] s the string to escape - @return the number of characters required to escape string @a s - - @complexity Linear in the length of string @a s. - */ - static std::size_t extra_space(const string_t& s) noexcept - { - return std::accumulate(s.begin(), s.end(), size_t{}, - [](size_t res, typename string_t::value_type c) - { - switch (c) - { - case '"': - case '\\': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - { - // from c (1 byte) to \x (2 bytes) - return res + 1; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - - return res; - } - } - }); - } - - /*! - @brief escape a string - - Escape a string by replacing certain special characters by a sequence of - an escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. - - @param[in] s the string to escape - @return the escaped string - - @complexity Linear in the length of string @a s. - */ - static string_t escape_string(const string_t& s) - { - const auto space = extra_space(s); - if (space == 0) - { - return s; - } - - // create a result string of necessary size - string_t result(s.size() + space, '\\'); - std::size_t pos = 0; - - for (const auto& c : s) - { - switch (c) - { - // quotation mark (0x22) - case '"': - { - result[pos + 1] = '"'; - pos += 2; - break; - } - - // reverse solidus (0x5c) - case '\\': - { - // nothing to change - pos += 2; - break; - } - - // backspace (0x08) - case '\b': - { - result[pos + 1] = 'b'; - pos += 2; - break; - } - - // formfeed (0x0c) - case '\f': - { - result[pos + 1] = 'f'; - pos += 2; - break; - } - - // newline (0x0a) - case '\n': - { - result[pos + 1] = 'n'; - pos += 2; - break; - } - - // carriage return (0x0d) - case '\r': - { - result[pos + 1] = 'r'; - pos += 2; - break; - } - - // horizontal tab (0x09) - case '\t': - { - result[pos + 1] = 't'; - pos += 2; - break; - } - - default: - { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } - break; - } - } - } - - return result; - } - - - /*! - @brief locale-independent serialization for built-in arithmetic types - */ - struct numtostr - { - public: - template - numtostr(NumberType value) - { - x_write(value, std::is_integral()); - } - - const char* c_str() const - { - return m_buf.data(); - } - - private: - /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; - - template - void x_write(NumberType x, /*is_integral=*/std::true_type) - { - // special case for "0" - if (x == 0) - { - m_buf[0] = '0'; - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; - } - - std::reverse(m_buf.begin(), m_buf.begin() + i); - } - - template - void x_write(NumberType x, /*is_integral=*/std::false_type) - { - // special case for 0.0 and -0.0 - if (x == 0) - { - size_t i = 0; - if (std::signbit(x)) - { - m_buf[i++] = '-'; - } - m_buf[i++] = '0'; - m_buf[i++] = '.'; - m_buf[i] = '0'; - return; - } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); - - // negative value indicates an error - assert(written_bytes > 0); - // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); - - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - - // erase thousands separator - if (thousands_sep != '\0') - { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); - } - - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : m_buf) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } - - // determine if need to append ".0" - size_t i = 0; - bool value_is_int_like = true; - for (i = 0; i < m_buf.size(); ++i) - { - // break when end of number is reached - if (m_buf[i] == '\0') - { - break; - } - - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e' and m_buf[i] != 'E'; - } - - if (value_is_int_like) - { - // there must be 2 bytes left for ".0" - assert((i + 2) < m_buf.size()); - // we write to the end of the number - assert(m_buf[i] == '\0'); - assert(m_buf[i - 1] != '\0'); - - // add ".0" - m_buf[i] = '.'; - m_buf[i + 1] = '0'; - - // the resulting string is properly terminated - assert(m_buf[i + 2] == '\0'); - } - } - }; - - - /*! - @brief internal implementation of the serialization function - - This function is called by the public member function dump and organizes - the serialization internally. The indentation level is propagated as - additional parameter. In case of arrays and objects, the function is - called recursively. Note that - - - strings and object keys are escaped using `escape_string()` - - integer numbers are converted implicitly via `operator<<` - - floating-point numbers are converted to a string using `"%g"` format - - @param[out] o stream to write to - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) - */ - void dump(std::ostream& o, - const bool pretty_print, - const unsigned int indent_step, - const unsigned int current_indent = 0) const - { - switch (m_type) - { - case value_t::object: - { - if (m_value.object->empty()) - { - o.write("{}", 2); - return; - } - - if (pretty_print) - { - o.write("{\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); - - // first n-1 elements - auto i = m_value.object->cbegin(); - for (size_t cnt = 0; cnt < m_value.object->size() - 1; ++cnt, ++i) - { - o.write(indent_string.c_str(), new_indent); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\": ", 3); - i->second.dump(o, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(i != m_value.object->cend()); - o.write(indent_string.c_str(), new_indent); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\": ", 3); - i->second.dump(o, true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), current_indent); - o.put('}'); - } - else - { - o.put('{'); - - // first n-1 elements - auto i = m_value.object->cbegin(); - for (size_t cnt = 0; cnt < m_value.object->size() - 1; ++cnt, ++i) - { - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\":", 2); - i->second.dump(o, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(i != m_value.object->cend()); - o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); - o.write("\":", 2); - i->second.dump(o, false, indent_step, current_indent); - - o.put('}'); - } - - return; - } - - case value_t::array: - { - if (m_value.array->empty()) - { - o.write("[]", 2); - return; - } - - if (pretty_print) - { - o.write("[\n", 2); - - // variable to hold indentation for recursive calls - const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); - - // first n-1 elements - for (auto i = m_value.array->cbegin(); i != m_value.array->cend() - 1; ++i) - { - o.write(indent_string.c_str(), new_indent); - i->dump(o, true, indent_step, new_indent); - o.write(",\n", 2); - } - - // last element - assert(not m_value.array->empty()); - o.write(indent_string.c_str(), new_indent); - m_value.array->back().dump(o, true, indent_step, new_indent); - - o.put('\n'); - o.write(indent_string.c_str(), current_indent); - o.put(']'); - } - else - { - o.put('['); - - // first n-1 elements - for (auto i = m_value.array->cbegin(); i != m_value.array->cend() - 1; ++i) - { - i->dump(o, false, indent_step, current_indent); - o.put(','); - } - - // last element - assert(not m_value.array->empty()); - m_value.array->back().dump(o, false, indent_step, current_indent); - - o.put(']'); - } - - return; - } - - case value_t::string: - { - o.put('\"'); - const auto s = escape_string(*m_value.string); - o.write(s.c_str(), static_cast(s.size())); - o.put('\"'); - return; - } - - case value_t::boolean: - { - if (m_value.boolean) - { - o.write("true", 4); - } - else - { - o.write("false", 5); - } - return; - } - - case value_t::number_integer: - { - o << numtostr(m_value.number_integer).c_str(); - return; - } - - case value_t::number_unsigned: - { - o << numtostr(m_value.number_unsigned).c_str(); - return; - } - - case value_t::number_float: - { - o << numtostr(m_value.number_float).c_str(); - return; - } - - case value_t::discarded: - { - o.write("", 11); - return; - } - - case value_t::null: - { - o.write("null", 4); - return; - } - } - } private: ////////////////////// diff --git a/test/src/unit-convenience.cpp b/test/src/unit-convenience.cpp index 891dbc14..45637033 100644 --- a/test/src/unit-convenience.cpp +++ b/test/src/unit-convenience.cpp @@ -49,44 +49,44 @@ TEST_CASE("convenience functions") SECTION("string escape") { - CHECK(json::escape_string("\"") == "\\\""); - CHECK(json::escape_string("\\") == "\\\\"); - CHECK(json::escape_string("\b") == "\\b"); - CHECK(json::escape_string("\f") == "\\f"); - CHECK(json::escape_string("\n") == "\\n"); - CHECK(json::escape_string("\r") == "\\r"); - CHECK(json::escape_string("\t") == "\\t"); + CHECK(json::serializer::escape_string("\"") == "\\\""); + CHECK(json::serializer::escape_string("\\") == "\\\\"); + CHECK(json::serializer::escape_string("\b") == "\\b"); + CHECK(json::serializer::escape_string("\f") == "\\f"); + CHECK(json::serializer::escape_string("\n") == "\\n"); + CHECK(json::serializer::escape_string("\r") == "\\r"); + CHECK(json::serializer::escape_string("\t") == "\\t"); - CHECK(json::escape_string("\x01") == "\\u0001"); - CHECK(json::escape_string("\x02") == "\\u0002"); - CHECK(json::escape_string("\x03") == "\\u0003"); - CHECK(json::escape_string("\x04") == "\\u0004"); - CHECK(json::escape_string("\x05") == "\\u0005"); - CHECK(json::escape_string("\x06") == "\\u0006"); - CHECK(json::escape_string("\x07") == "\\u0007"); - CHECK(json::escape_string("\x08") == "\\b"); - CHECK(json::escape_string("\x09") == "\\t"); - CHECK(json::escape_string("\x0a") == "\\n"); - CHECK(json::escape_string("\x0b") == "\\u000b"); - CHECK(json::escape_string("\x0c") == "\\f"); - CHECK(json::escape_string("\x0d") == "\\r"); - CHECK(json::escape_string("\x0e") == "\\u000e"); - CHECK(json::escape_string("\x0f") == "\\u000f"); - CHECK(json::escape_string("\x10") == "\\u0010"); - CHECK(json::escape_string("\x11") == "\\u0011"); - CHECK(json::escape_string("\x12") == "\\u0012"); - CHECK(json::escape_string("\x13") == "\\u0013"); - CHECK(json::escape_string("\x14") == "\\u0014"); - CHECK(json::escape_string("\x15") == "\\u0015"); - CHECK(json::escape_string("\x16") == "\\u0016"); - CHECK(json::escape_string("\x17") == "\\u0017"); - CHECK(json::escape_string("\x18") == "\\u0018"); - CHECK(json::escape_string("\x19") == "\\u0019"); - CHECK(json::escape_string("\x1a") == "\\u001a"); - CHECK(json::escape_string("\x1b") == "\\u001b"); - CHECK(json::escape_string("\x1c") == "\\u001c"); - CHECK(json::escape_string("\x1d") == "\\u001d"); - CHECK(json::escape_string("\x1e") == "\\u001e"); - CHECK(json::escape_string("\x1f") == "\\u001f"); + CHECK(json::serializer::escape_string("\x01") == "\\u0001"); + CHECK(json::serializer::escape_string("\x02") == "\\u0002"); + CHECK(json::serializer::escape_string("\x03") == "\\u0003"); + CHECK(json::serializer::escape_string("\x04") == "\\u0004"); + CHECK(json::serializer::escape_string("\x05") == "\\u0005"); + CHECK(json::serializer::escape_string("\x06") == "\\u0006"); + CHECK(json::serializer::escape_string("\x07") == "\\u0007"); + CHECK(json::serializer::escape_string("\x08") == "\\b"); + CHECK(json::serializer::escape_string("\x09") == "\\t"); + CHECK(json::serializer::escape_string("\x0a") == "\\n"); + CHECK(json::serializer::escape_string("\x0b") == "\\u000b"); + CHECK(json::serializer::escape_string("\x0c") == "\\f"); + CHECK(json::serializer::escape_string("\x0d") == "\\r"); + CHECK(json::serializer::escape_string("\x0e") == "\\u000e"); + CHECK(json::serializer::escape_string("\x0f") == "\\u000f"); + CHECK(json::serializer::escape_string("\x10") == "\\u0010"); + CHECK(json::serializer::escape_string("\x11") == "\\u0011"); + CHECK(json::serializer::escape_string("\x12") == "\\u0012"); + CHECK(json::serializer::escape_string("\x13") == "\\u0013"); + CHECK(json::serializer::escape_string("\x14") == "\\u0014"); + CHECK(json::serializer::escape_string("\x15") == "\\u0015"); + CHECK(json::serializer::escape_string("\x16") == "\\u0016"); + CHECK(json::serializer::escape_string("\x17") == "\\u0017"); + CHECK(json::serializer::escape_string("\x18") == "\\u0018"); + CHECK(json::serializer::escape_string("\x19") == "\\u0019"); + CHECK(json::serializer::escape_string("\x1a") == "\\u001a"); + CHECK(json::serializer::escape_string("\x1b") == "\\u001b"); + CHECK(json::serializer::escape_string("\x1c") == "\\u001c"); + CHECK(json::serializer::escape_string("\x1d") == "\\u001d"); + CHECK(json::serializer::escape_string("\x1e") == "\\u001e"); + CHECK(json::serializer::escape_string("\x1f") == "\\u001f"); } } From af070744aecf944598c8655f5460779a63e66b40 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 27 Feb 2017 22:10:57 +0100 Subject: [PATCH 2/6] :hammer: integrating numtostr into serializer class By merging numtostr into serializer, we can write directly to the output stream. As a consequence, all stream calls are now unformatted. --- src/json.hpp | 220 ++++++++++++++++++++-------------------------- src/json.hpp.re2c | 216 +++++++++++++++++++-------------------------- 2 files changed, 185 insertions(+), 251 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index a73b5048..3e5a45cf 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6196,6 +6196,9 @@ class basic_json /// @{ private: + /*! + @brief wrapper around the serialization functions + */ class serializer { public: @@ -6223,7 +6226,7 @@ class basic_json void dump(const basic_json& val, const bool pretty_print, const unsigned int indent_step, - const unsigned int current_indent = 0) const + const unsigned int current_indent = 0) { switch (val.m_type) { @@ -6377,19 +6380,19 @@ class basic_json case value_t::number_integer: { - o << numtostr(val.m_value.number_integer).c_str(); + x_write(val.m_value.number_integer); return; } case value_t::number_unsigned: { - o << numtostr(val.m_value.number_unsigned).c_str(); + x_write(val.m_value.number_unsigned); return; } case value_t::number_float: { - o << numtostr(val.m_value.number_float).c_str(); + x_write(val.m_value.number_float); return; } @@ -6569,154 +6572,120 @@ class basic_json return result; } - /*! - @brief locale-independent serialization for built-in arithmetic types - */ - struct numtostr + template + void x_write(NumberType x) { - public: - template - numtostr(NumberType value) + // special case for "0" + if (x == 0) { - x_write(value, std::is_integral()); + o.put('0'); + return; } - const char* c_str() const + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < m_buf.size() - 1) { - return m_buf.data(); + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; } - private: - /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; + // make sure the number has been processed completely + assert(x == 0); - template - void x_write(NumberType x, /*is_integral=*/std::true_type) + if (is_negative) { - // special case for "0" - if (x == 0) - { - m_buf[0] = '0'; - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; - } - - std::reverse(m_buf.begin(), m_buf.begin() + i); + // make sure there is capacity for the '-' + assert(i < m_buf.size() - 2); + m_buf[i++] = '-'; } - template - void x_write(NumberType x, /*is_integral=*/std::false_type) + std::reverse(m_buf.begin(), m_buf.begin() + i); + o.write(m_buf.data(), static_cast(i)); + } + + void x_write(number_float_t x) + { + // special case for 0.0 and -0.0 + if (x == 0) { - // special case for 0.0 and -0.0 - if (x == 0) + if (std::signbit(x)) { - size_t i = 0; - if (std::signbit(x)) - { - m_buf[i++] = '-'; - } - m_buf[i++] = '0'; - m_buf[i++] = '.'; - m_buf[i] = '0'; - return; + o.write("-0.0", 4); } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); - - // negative value indicates an error - assert(written_bytes > 0); - // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); - - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - - // erase thousands separator - if (thousands_sep != '\0') + else { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); + o.write("0.0", 3); } + return; + } - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : m_buf) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; - // determine if need to append ".0" - size_t i = 0; - bool value_is_int_like = true; - for (i = 0; i < m_buf.size(); ++i) + // the actual conversion + auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + + // negative value indicates an error + assert(written_bytes > 0); + // check if buffer was large enough + assert(static_cast(written_bytes) < m_buf.size()); + + // read information from locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); + std::fill(end, m_buf.end(), '\0'); + written_bytes -= (m_buf.end() - end); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : m_buf) { - // break when end of number is reached - if (m_buf[i] == '\0') + if (c == decimal_point) { + c = '.'; break; } - - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e' and m_buf[i] != 'E'; - } - - if (value_is_int_like) - { - // there must be 2 bytes left for ".0" - assert((i + 2) < m_buf.size()); - // we write to the end of the number - assert(m_buf[i] == '\0'); - assert(m_buf[i - 1] != '\0'); - - // add ".0" - m_buf[i] = '.'; - m_buf[i + 1] = '0'; - - // the resulting string is properly terminated - assert(m_buf[i + 2] == '\0'); } } - }; + + // determine if need to append ".0" + bool value_is_int_like = true; + for (size_t i = 0; i < static_cast(written_bytes); ++i) + { + // check if we find non-int character + value_is_int_like = value_is_int_like and m_buf[i] != '.' and + m_buf[i] != 'e'; + } + + o.write(m_buf.data(), static_cast(written_bytes)); + + if (value_is_int_like) + { + o.write(".0", 2); + } + } private: std::ostream& o; + + /// a (hopefully) large enough character buffer + std::array < char, 64 > m_buf{{}}; }; public: @@ -6754,7 +6723,6 @@ class basic_json // do the actual serialization serializer s(o); s.dump(j, pretty_print, static_cast(indentation)); - return o; } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 01f6bbfe..e648b41b 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -6226,7 +6226,7 @@ class basic_json void dump(const basic_json& val, const bool pretty_print, const unsigned int indent_step, - const unsigned int current_indent = 0) const + const unsigned int current_indent = 0) { switch (val.m_type) { @@ -6380,19 +6380,19 @@ class basic_json case value_t::number_integer: { - o << numtostr(val.m_value.number_integer).c_str(); + x_write(val.m_value.number_integer); return; } case value_t::number_unsigned: { - o << numtostr(val.m_value.number_unsigned).c_str(); + x_write(val.m_value.number_unsigned); return; } case value_t::number_float: { - o << numtostr(val.m_value.number_float).c_str(); + x_write(val.m_value.number_float); return; } @@ -6572,154 +6572,120 @@ class basic_json return result; } - /*! - @brief locale-independent serialization for built-in arithmetic types - */ - struct numtostr + template + void x_write(NumberType x) { - public: - template - numtostr(NumberType value) + // special case for "0" + if (x == 0) { - x_write(value, std::is_integral()); + o.put('0'); + return; } - const char* c_str() const + const bool is_negative = x < 0; + size_t i = 0; + + // spare 1 byte for '\0' + while (x != 0 and i < m_buf.size() - 1) { - return m_buf.data(); + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; } - private: - /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; + // make sure the number has been processed completely + assert(x == 0); - template - void x_write(NumberType x, /*is_integral=*/std::true_type) + if (is_negative) { - // special case for "0" - if (x == 0) - { - m_buf[0] = '0'; - return; - } - - const bool is_negative = x < 0; - size_t i = 0; - - // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) - { - const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); - x /= 10; - } - - // make sure the number has been processed completely - assert(x == 0); - - if (is_negative) - { - // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; - } - - std::reverse(m_buf.begin(), m_buf.begin() + i); + // make sure there is capacity for the '-' + assert(i < m_buf.size() - 2); + m_buf[i++] = '-'; } - template - void x_write(NumberType x, /*is_integral=*/std::false_type) + std::reverse(m_buf.begin(), m_buf.begin() + i); + o.write(m_buf.data(), static_cast(i)); + } + + void x_write(number_float_t x) + { + // special case for 0.0 and -0.0 + if (x == 0) { - // special case for 0.0 and -0.0 - if (x == 0) + if (std::signbit(x)) { - size_t i = 0; - if (std::signbit(x)) - { - m_buf[i++] = '-'; - } - m_buf[i++] = '0'; - m_buf[i++] = '.'; - m_buf[i] = '0'; - return; + o.write("-0.0", 4); } - - // get number of digits for a text -> float -> text round-trip - static constexpr auto d = std::numeric_limits::digits10; - - // the actual conversion - const auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); - - // negative value indicates an error - assert(written_bytes > 0); - // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); - - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - - // erase thousands separator - if (thousands_sep != '\0') + else { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); + o.write("0.0", 3); } + return; + } - // convert decimal point to '.' - if (decimal_point != '\0' and decimal_point != '.') - { - for (auto& c : m_buf) - { - if (c == decimal_point) - { - c = '.'; - break; - } - } - } + // get number of digits for a text -> float -> text round-trip + static constexpr auto d = std::numeric_limits::digits10; - // determine if need to append ".0" - size_t i = 0; - bool value_is_int_like = true; - for (i = 0; i < m_buf.size(); ++i) + // the actual conversion + auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + + // negative value indicates an error + assert(written_bytes > 0); + // check if buffer was large enough + assert(static_cast(written_bytes) < m_buf.size()); + + // read information from locale + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); + std::fill(end, m_buf.end(), '\0'); + written_bytes -= (m_buf.end() - end); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + for (auto& c : m_buf) { - // break when end of number is reached - if (m_buf[i] == '\0') + if (c == decimal_point) { + c = '.'; break; } - - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e' and m_buf[i] != 'E'; - } - - if (value_is_int_like) - { - // there must be 2 bytes left for ".0" - assert((i + 2) < m_buf.size()); - // we write to the end of the number - assert(m_buf[i] == '\0'); - assert(m_buf[i - 1] != '\0'); - - // add ".0" - m_buf[i] = '.'; - m_buf[i + 1] = '0'; - - // the resulting string is properly terminated - assert(m_buf[i + 2] == '\0'); } } - }; + + // determine if need to append ".0" + bool value_is_int_like = true; + for (size_t i = 0; i < static_cast(written_bytes); ++i) + { + // check if we find non-int character + value_is_int_like = value_is_int_like and m_buf[i] != '.' and + m_buf[i] != 'e'; + } + + o.write(m_buf.data(), static_cast(written_bytes)); + + if (value_is_int_like) + { + o.write(".0", 2); + } + } private: std::ostream& o; + + /// a (hopefully) large enough character buffer + std::array < char, 64 > m_buf{{}}; }; public: From fc48b8ac2b595a8df5ad121b588d8820b3b6941d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Feb 2017 11:45:38 +0100 Subject: [PATCH 3/6] :bug: fixed a logical error Treated the size of the range as the number of thousand separators. This logical error yielded a negative value for written_bytes and eventually an infinite loop, as written_bytes was converted to an unsigned value. --- src/json.hpp | 3 ++- src/json.hpp.re2c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 3e5a45cf..5cf06249 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6648,7 +6648,8 @@ class basic_json { const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); std::fill(end, m_buf.end(), '\0'); - written_bytes -= (m_buf.end() - end); + assert((end - m_buf.begin()) <= written_bytes); + written_bytes = (end - m_buf.begin()); } // convert decimal point to '.' diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index e648b41b..c7784266 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -6648,7 +6648,8 @@ class basic_json { const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); std::fill(end, m_buf.end(), '\0'); - written_bytes -= (m_buf.end() - end); + assert((end - m_buf.begin()) <= written_bytes); + written_bytes = (end - m_buf.begin()); } // convert decimal point to '.' From 224f99070b8083e2c574014c1c7c13607795ec5a Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Feb 2017 16:28:22 +0100 Subject: [PATCH 4/6] :zap: micro-optimization of dump() A lot of small changes to avoid memory allocations: - The locale is only queried once rather than with every number serialization. - The indentation string is recycled between different calls. - The string escape function avoids a copy if no escaping is necessary. - The string escape and the space function use a complete switch case instead of cascaded ifs. Cachegrind measures some 15% performance improvement. --- src/json.hpp | 176 ++++++++++++++++++++++------------ src/json.hpp.re2c | 176 ++++++++++++++++++++++------------ test/src/unit-convenience.cpp | 85 ++++++++-------- 3 files changed, 277 insertions(+), 160 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 5cf06249..8d545a93 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -34,6 +34,7 @@ SOFTWARE. #include // assert #include // isdigit #include // and, not, or +#include // lconv, localeconv #include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t @@ -6203,7 +6204,9 @@ class basic_json { public: serializer(std::ostream& s) - : o(s) + : o(s), loc(std::localeconv()), + thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), + decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) {} /*! @@ -6244,7 +6247,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements auto i = val.m_value.object->cbegin(); @@ -6252,8 +6258,7 @@ class basic_json { o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); o.write(",\n", 2); @@ -6263,8 +6268,7 @@ class basic_json assert(i != val.m_value.object->cend()); o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); @@ -6281,8 +6285,7 @@ class basic_json for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); o.put(','); @@ -6291,8 +6294,7 @@ class basic_json // last element assert(i != val.m_value.object->cend()); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); @@ -6316,7 +6318,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) @@ -6359,8 +6364,7 @@ class basic_json case value_t::string: { o.put('\"'); - const auto s = escape_string(*val.m_value.string); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(*val.m_value.string); o.put('\"'); return; } @@ -6380,19 +6384,19 @@ class basic_json case value_t::number_integer: { - x_write(val.m_value.number_integer); + dump_integer(val.m_value.number_integer); return; } case value_t::number_unsigned: { - x_write(val.m_value.number_unsigned); + dump_integer(val.m_value.number_unsigned); return; } case value_t::number_float: { - x_write(val.m_value.number_float); + dump_float(val.m_value.number_float); return; } @@ -6438,14 +6442,40 @@ class basic_json return res + 1; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - return res; } } @@ -6465,12 +6495,13 @@ class basic_json @complexity Linear in the length of string @a s. */ - static string_t escape_string(const string_t& s) + void dump_escaped(const string_t& s) const { const auto space = extra_space(s); if (space == 0) { - return s; + o.write(s.c_str(), static_cast(s.size())); + return; } // create a result string of necessary size @@ -6537,43 +6568,69 @@ class basic_json break; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + break; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } + // all other characters are added as-is + result[pos++] = c; break; } } } - return result; + assert(pos == s.size() + space); + o.write(result.c_str(), static_cast(result.size())); } template - void x_write(NumberType x) + void dump_integer(NumberType x) { // special case for "0" if (x == 0) @@ -6607,7 +6664,7 @@ class basic_json o.write(m_buf.data(), static_cast(i)); } - void x_write(number_float_t x) + void dump_float(number_float_t x) { // special case for 0.0 and -0.0 if (x == 0) @@ -6634,15 +6691,6 @@ class basic_json // check if buffer was large enough assert(static_cast(written_bytes) < m_buf.size()); - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - // erase thousands separator if (thousands_sep != '\0') { @@ -6687,6 +6735,12 @@ class basic_json /// a (hopefully) large enough character buffer std::array < char, 64 > m_buf{{}}; + + const std::lconv* loc = nullptr; + const char thousands_sep = '\0'; + const char decimal_point = '\0'; + + string_t indent_string = string_t(512, ' '); }; public: diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index c7784266..b14fb68d 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -34,6 +34,7 @@ SOFTWARE. #include // assert #include // isdigit #include // and, not, or +#include // lconv, localeconv #include // isfinite, labs, ldexp, signbit #include // nullptr_t, ptrdiff_t, size_t #include // int64_t, uint64_t @@ -6203,7 +6204,9 @@ class basic_json { public: serializer(std::ostream& s) - : o(s) + : o(s), loc(std::localeconv()), + thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), + decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0]) {} /*! @@ -6244,7 +6247,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements auto i = val.m_value.object->cbegin(); @@ -6252,8 +6258,7 @@ class basic_json { o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); o.write(",\n", 2); @@ -6263,8 +6268,7 @@ class basic_json assert(i != val.m_value.object->cend()); o.write(indent_string.c_str(), new_indent); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\": ", 3); dump(i->second, true, indent_step, new_indent); @@ -6281,8 +6285,7 @@ class basic_json for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) { o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); o.put(','); @@ -6291,8 +6294,7 @@ class basic_json // last element assert(i != val.m_value.object->cend()); o.put('\"'); - const auto s = escape_string(i->first); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(i->first); o.write("\":", 2); dump(i->second, false, indent_step, current_indent); @@ -6316,7 +6318,10 @@ class basic_json // variable to hold indentation for recursive calls const auto new_indent = current_indent + indent_step; - string_t indent_string = string_t(new_indent, ' '); + if (indent_string.size() < new_indent) + { + indent_string.resize(new_indent, ' '); + } // first n-1 elements for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) @@ -6359,8 +6364,7 @@ class basic_json case value_t::string: { o.put('\"'); - const auto s = escape_string(*val.m_value.string); - o.write(s.c_str(), static_cast(s.size())); + dump_escaped(*val.m_value.string); o.put('\"'); return; } @@ -6380,19 +6384,19 @@ class basic_json case value_t::number_integer: { - x_write(val.m_value.number_integer); + dump_integer(val.m_value.number_integer); return; } case value_t::number_unsigned: { - x_write(val.m_value.number_unsigned); + dump_integer(val.m_value.number_unsigned); return; } case value_t::number_float: { - x_write(val.m_value.number_float); + dump_float(val.m_value.number_float); return; } @@ -6438,14 +6442,40 @@ class basic_json return res + 1; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // from c (1 byte) to \uxxxx (6 bytes) + return res + 5; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // from c (1 byte) to \uxxxx (6 bytes) - return res + 5; - } - return res; } } @@ -6465,12 +6495,13 @@ class basic_json @complexity Linear in the length of string @a s. */ - static string_t escape_string(const string_t& s) + void dump_escaped(const string_t& s) const { const auto space = extra_space(s); if (space == 0) { - return s; + o.write(s.c_str(), static_cast(s.size())); + return; } // create a result string of necessary size @@ -6537,43 +6568,69 @@ class basic_json break; } + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + // convert a number 0..15 to its hex representation + // (0..f) + static const char hexify[16] = + { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + // print character c as \uxxxx + for (const char m : + { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] + }) + { + result[++pos] = m; + } + + ++pos; + break; + } + default: { - if (c >= 0x00 and c <= 0x1f) - { - // convert a number 0..15 to its hex representation - // (0..f) - static const char hexify[16] = - { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - // print character c as \uxxxx - for (const char m : - { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f] - }) - { - result[++pos] = m; - } - - ++pos; - } - else - { - // all other characters are added as-is - result[pos++] = c; - } + // all other characters are added as-is + result[pos++] = c; break; } } } - return result; + assert(pos == s.size() + space); + o.write(result.c_str(), static_cast(result.size())); } template - void x_write(NumberType x) + void dump_integer(NumberType x) { // special case for "0" if (x == 0) @@ -6607,7 +6664,7 @@ class basic_json o.write(m_buf.data(), static_cast(i)); } - void x_write(number_float_t x) + void dump_float(number_float_t x) { // special case for 0.0 and -0.0 if (x == 0) @@ -6634,15 +6691,6 @@ class basic_json // check if buffer was large enough assert(static_cast(written_bytes) < m_buf.size()); - // read information from locale - const auto loc = localeconv(); - assert(loc != nullptr); - const char thousands_sep = !loc->thousands_sep ? '\0' - : loc->thousands_sep[0]; - - const char decimal_point = !loc->decimal_point ? '\0' - : loc->decimal_point[0]; - // erase thousands separator if (thousands_sep != '\0') { @@ -6687,6 +6735,12 @@ class basic_json /// a (hopefully) large enough character buffer std::array < char, 64 > m_buf{{}}; + + const std::lconv* loc = nullptr; + const char thousands_sep = '\0'; + const char decimal_point = '\0'; + + string_t indent_string = string_t(512, ' '); }; public: diff --git a/test/src/unit-convenience.cpp b/test/src/unit-convenience.cpp index 45637033..33556311 100644 --- a/test/src/unit-convenience.cpp +++ b/test/src/unit-convenience.cpp @@ -49,44 +49,53 @@ TEST_CASE("convenience functions") SECTION("string escape") { - CHECK(json::serializer::escape_string("\"") == "\\\""); - CHECK(json::serializer::escape_string("\\") == "\\\\"); - CHECK(json::serializer::escape_string("\b") == "\\b"); - CHECK(json::serializer::escape_string("\f") == "\\f"); - CHECK(json::serializer::escape_string("\n") == "\\n"); - CHECK(json::serializer::escape_string("\r") == "\\r"); - CHECK(json::serializer::escape_string("\t") == "\\t"); + const auto check_escaped = [](const char* original, + const char* escaped) + { + std::stringstream ss; + json::serializer s(ss); + s.dump_escaped(original); + CHECK(ss.str() == escaped); + }; - CHECK(json::serializer::escape_string("\x01") == "\\u0001"); - CHECK(json::serializer::escape_string("\x02") == "\\u0002"); - CHECK(json::serializer::escape_string("\x03") == "\\u0003"); - CHECK(json::serializer::escape_string("\x04") == "\\u0004"); - CHECK(json::serializer::escape_string("\x05") == "\\u0005"); - CHECK(json::serializer::escape_string("\x06") == "\\u0006"); - CHECK(json::serializer::escape_string("\x07") == "\\u0007"); - CHECK(json::serializer::escape_string("\x08") == "\\b"); - CHECK(json::serializer::escape_string("\x09") == "\\t"); - CHECK(json::serializer::escape_string("\x0a") == "\\n"); - CHECK(json::serializer::escape_string("\x0b") == "\\u000b"); - CHECK(json::serializer::escape_string("\x0c") == "\\f"); - CHECK(json::serializer::escape_string("\x0d") == "\\r"); - CHECK(json::serializer::escape_string("\x0e") == "\\u000e"); - CHECK(json::serializer::escape_string("\x0f") == "\\u000f"); - CHECK(json::serializer::escape_string("\x10") == "\\u0010"); - CHECK(json::serializer::escape_string("\x11") == "\\u0011"); - CHECK(json::serializer::escape_string("\x12") == "\\u0012"); - CHECK(json::serializer::escape_string("\x13") == "\\u0013"); - CHECK(json::serializer::escape_string("\x14") == "\\u0014"); - CHECK(json::serializer::escape_string("\x15") == "\\u0015"); - CHECK(json::serializer::escape_string("\x16") == "\\u0016"); - CHECK(json::serializer::escape_string("\x17") == "\\u0017"); - CHECK(json::serializer::escape_string("\x18") == "\\u0018"); - CHECK(json::serializer::escape_string("\x19") == "\\u0019"); - CHECK(json::serializer::escape_string("\x1a") == "\\u001a"); - CHECK(json::serializer::escape_string("\x1b") == "\\u001b"); - CHECK(json::serializer::escape_string("\x1c") == "\\u001c"); - CHECK(json::serializer::escape_string("\x1d") == "\\u001d"); - CHECK(json::serializer::escape_string("\x1e") == "\\u001e"); - CHECK(json::serializer::escape_string("\x1f") == "\\u001f"); + check_escaped("\"", "\\\""); + check_escaped("\\", "\\\\"); + check_escaped("\b", "\\b"); + check_escaped("\f", "\\f"); + check_escaped("\n", "\\n"); + check_escaped("\r", "\\r"); + check_escaped("\t", "\\t"); + + check_escaped("\x01", "\\u0001"); + check_escaped("\x02", "\\u0002"); + check_escaped("\x03", "\\u0003"); + check_escaped("\x04", "\\u0004"); + check_escaped("\x05", "\\u0005"); + check_escaped("\x06", "\\u0006"); + check_escaped("\x07", "\\u0007"); + check_escaped("\x08", "\\b"); + check_escaped("\x09", "\\t"); + check_escaped("\x0a", "\\n"); + check_escaped("\x0b", "\\u000b"); + check_escaped("\x0c", "\\f"); + check_escaped("\x0d", "\\r"); + check_escaped("\x0e", "\\u000e"); + check_escaped("\x0f", "\\u000f"); + check_escaped("\x10", "\\u0010"); + check_escaped("\x11", "\\u0011"); + check_escaped("\x12", "\\u0012"); + check_escaped("\x13", "\\u0013"); + check_escaped("\x14", "\\u0014"); + check_escaped("\x15", "\\u0015"); + check_escaped("\x16", "\\u0016"); + check_escaped("\x17", "\\u0017"); + check_escaped("\x18", "\\u0018"); + check_escaped("\x19", "\\u0019"); + check_escaped("\x1a", "\\u001a"); + check_escaped("\x1b", "\\u001b"); + check_escaped("\x1c", "\\u001c"); + check_escaped("\x1d", "\\u001d"); + check_escaped("\x1e", "\\u001e"); + check_escaped("\x1f", "\\u001f"); } } From 059f21aadae5da6f1bebbdd12e3cc1421b39b46d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Feb 2017 17:24:03 +0100 Subject: [PATCH 5/6] :lipstick: fixed a warning snprintf returns an int, but we later assign it a difference_type which is usually a long. --- src/json.hpp | 2 +- src/json.hpp.re2c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 8d545a93..739f3540 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6684,7 +6684,7 @@ class basic_json static constexpr auto d = std::numeric_limits::digits10; // the actual conversion - auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + long written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); // negative value indicates an error assert(written_bytes > 0); diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index b14fb68d..a55792c5 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -6684,7 +6684,7 @@ class basic_json static constexpr auto d = std::numeric_limits::digits10; // the actual conversion - auto written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + long written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); // negative value indicates an error assert(written_bytes > 0); From d69242c6ba40bb809b18d43eb4712b15ccb7a912 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 28 Feb 2017 19:20:50 +0100 Subject: [PATCH 6/6] :lipstick: cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added comments for the serializer class. - Added test case for resizing of the indentation string. - Using std::none_of to check if “.0” needs to be added to floating-point number. --- src/json.hpp | 98 +++++++++++++++++++++++------------- src/json.hpp.re2c | 98 +++++++++++++++++++++++------------- test/src/unit-inspection.cpp | 12 +++++ 3 files changed, 138 insertions(+), 70 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 739f3540..507ec9f2 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6203,6 +6203,9 @@ class basic_json class serializer { public: + /*! + @param[in] s output stream to serialize to + */ serializer(std::ostream& s) : o(s), loc(std::localeconv()), thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), @@ -6212,10 +6215,10 @@ class basic_json /*! @brief internal implementation of the serialization function - This function is called by the public member function dump and organizes - the serialization internally. The indentation level is propagated as - additional parameter. In case of arrays and objects, the function is - called recursively. Note that + This function is called by the public member function dump and + organizes the serialization internally. The indentation level is + propagated as additional parameter. In case of arrays and objects, the + function is called recursively. - strings and object keys are escaped using `escape_string()` - integer numbers are converted implicitly via `operator<<` @@ -6483,15 +6486,14 @@ class basic_json } /*! - @brief escape a string + @brief dump escaped string - Escape a string by replacing certain special characters by a sequence of - an escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. + Escape a string by replacing certain special characters by a sequence + of an escape character (backslash) and another character and other + control characters by a sequence of "\u" followed by a four-digit hex + representation. The escaped string is written to output stream @a o. @param[in] s the string to escape - @return the escaped string @complexity Linear in the length of string @a s. */ @@ -6629,7 +6631,18 @@ class basic_json o.write(result.c_str(), static_cast(result.size())); } - template + /*! + @brief dump an integer + + Dump a given integer to output stream @a o. Works internally with + @a number_buffer. + + @param[in] x integer number (signed or unsigned) to dump + @tparam NumberType either @a number_integer_t or @a number_unsigned_t + */ + template::value or + std::is_same::value, int> = 0> void dump_integer(NumberType x) { // special case for "0" @@ -6643,10 +6656,10 @@ class basic_json size_t i = 0; // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) + while (x != 0 and i < number_buffer.size() - 1) { const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); + number_buffer[i++] = static_cast('0' + digit); x /= 10; } @@ -6656,14 +6669,22 @@ class basic_json if (is_negative) { // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; + assert(i < number_buffer.size() - 2); + number_buffer[i++] = '-'; } - std::reverse(m_buf.begin(), m_buf.begin() + i); - o.write(m_buf.data(), static_cast(i)); + std::reverse(number_buffer.begin(), number_buffer.begin() + i); + o.write(number_buffer.data(), static_cast(i)); } + /*! + @brief dump a floating-point number + + Dump a given floating-point number to output stream @a o. Works + internally with @a number_buffer. + + @param[in] x floating-point number to dump + */ void dump_float(number_float_t x) { // special case for 0.0 and -0.0 @@ -6684,26 +6705,29 @@ class basic_json static constexpr auto d = std::numeric_limits::digits10; // the actual conversion - long written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + long len = snprintf(number_buffer.data(), number_buffer.size(), + "%.*g", d, x); // negative value indicates an error - assert(written_bytes > 0); + assert(len > 0); // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); + assert(static_cast(len) < number_buffer.size()); // erase thousands separator if (thousands_sep != '\0') { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); - assert((end - m_buf.begin()) <= written_bytes); - written_bytes = (end - m_buf.begin()); + const auto end = std::remove(number_buffer.begin(), + number_buffer.begin() + len, + thousands_sep); + std::fill(end, number_buffer.end(), '\0'); + assert((end - number_buffer.begin()) <= len); + len = (end - number_buffer.begin()); } // convert decimal point to '.' if (decimal_point != '\0' and decimal_point != '.') { - for (auto& c : m_buf) + for (auto& c : number_buffer) { if (c == decimal_point) { @@ -6713,16 +6737,15 @@ class basic_json } } - // determine if need to append ".0" - bool value_is_int_like = true; - for (size_t i = 0; i < static_cast(written_bytes); ++i) - { - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e'; - } + o.write(number_buffer.data(), static_cast(len)); - o.write(m_buf.data(), static_cast(written_bytes)); + // determine if need to append ".0" + const bool value_is_int_like = std::none_of(number_buffer.begin(), + number_buffer.begin() + len + 1, + [](char c) + { + return c == '.' or c == 'e'; + }); if (value_is_int_like) { @@ -6731,15 +6754,20 @@ class basic_json } private: + /// the output of the serializer std::ostream& o; /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; + std::array number_buffer{{}}; + /// the locale const std::lconv* loc = nullptr; + /// the locale's thousand separator character const char thousands_sep = '\0'; + /// the locale's decimal point character const char decimal_point = '\0'; + /// the indentation string string_t indent_string = string_t(512, ' '); }; diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index a55792c5..1dbbb3b5 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -6203,6 +6203,9 @@ class basic_json class serializer { public: + /*! + @param[in] s output stream to serialize to + */ serializer(std::ostream& s) : o(s), loc(std::localeconv()), thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]), @@ -6212,10 +6215,10 @@ class basic_json /*! @brief internal implementation of the serialization function - This function is called by the public member function dump and organizes - the serialization internally. The indentation level is propagated as - additional parameter. In case of arrays and objects, the function is - called recursively. Note that + This function is called by the public member function dump and + organizes the serialization internally. The indentation level is + propagated as additional parameter. In case of arrays and objects, the + function is called recursively. - strings and object keys are escaped using `escape_string()` - integer numbers are converted implicitly via `operator<<` @@ -6483,15 +6486,14 @@ class basic_json } /*! - @brief escape a string + @brief dump escaped string - Escape a string by replacing certain special characters by a sequence of - an escape character (backslash) and another character and other control - characters by a sequence of "\u" followed by a four-digit hex - representation. + Escape a string by replacing certain special characters by a sequence + of an escape character (backslash) and another character and other + control characters by a sequence of "\u" followed by a four-digit hex + representation. The escaped string is written to output stream @a o. @param[in] s the string to escape - @return the escaped string @complexity Linear in the length of string @a s. */ @@ -6629,7 +6631,18 @@ class basic_json o.write(result.c_str(), static_cast(result.size())); } - template + /*! + @brief dump an integer + + Dump a given integer to output stream @a o. Works internally with + @a number_buffer. + + @param[in] x integer number (signed or unsigned) to dump + @tparam NumberType either @a number_integer_t or @a number_unsigned_t + */ + template::value or + std::is_same::value, int> = 0> void dump_integer(NumberType x) { // special case for "0" @@ -6643,10 +6656,10 @@ class basic_json size_t i = 0; // spare 1 byte for '\0' - while (x != 0 and i < m_buf.size() - 1) + while (x != 0 and i < number_buffer.size() - 1) { const auto digit = std::labs(static_cast(x % 10)); - m_buf[i++] = static_cast('0' + digit); + number_buffer[i++] = static_cast('0' + digit); x /= 10; } @@ -6656,14 +6669,22 @@ class basic_json if (is_negative) { // make sure there is capacity for the '-' - assert(i < m_buf.size() - 2); - m_buf[i++] = '-'; + assert(i < number_buffer.size() - 2); + number_buffer[i++] = '-'; } - std::reverse(m_buf.begin(), m_buf.begin() + i); - o.write(m_buf.data(), static_cast(i)); + std::reverse(number_buffer.begin(), number_buffer.begin() + i); + o.write(number_buffer.data(), static_cast(i)); } + /*! + @brief dump a floating-point number + + Dump a given floating-point number to output stream @a o. Works + internally with @a number_buffer. + + @param[in] x floating-point number to dump + */ void dump_float(number_float_t x) { // special case for 0.0 and -0.0 @@ -6684,26 +6705,29 @@ class basic_json static constexpr auto d = std::numeric_limits::digits10; // the actual conversion - long written_bytes = snprintf(m_buf.data(), m_buf.size(), "%.*g", d, x); + long len = snprintf(number_buffer.data(), number_buffer.size(), + "%.*g", d, x); // negative value indicates an error - assert(written_bytes > 0); + assert(len > 0); // check if buffer was large enough - assert(static_cast(written_bytes) < m_buf.size()); + assert(static_cast(len) < number_buffer.size()); // erase thousands separator if (thousands_sep != '\0') { - const auto end = std::remove(m_buf.begin(), m_buf.begin() + written_bytes, thousands_sep); - std::fill(end, m_buf.end(), '\0'); - assert((end - m_buf.begin()) <= written_bytes); - written_bytes = (end - m_buf.begin()); + const auto end = std::remove(number_buffer.begin(), + number_buffer.begin() + len, + thousands_sep); + std::fill(end, number_buffer.end(), '\0'); + assert((end - number_buffer.begin()) <= len); + len = (end - number_buffer.begin()); } // convert decimal point to '.' if (decimal_point != '\0' and decimal_point != '.') { - for (auto& c : m_buf) + for (auto& c : number_buffer) { if (c == decimal_point) { @@ -6713,16 +6737,15 @@ class basic_json } } - // determine if need to append ".0" - bool value_is_int_like = true; - for (size_t i = 0; i < static_cast(written_bytes); ++i) - { - // check if we find non-int character - value_is_int_like = value_is_int_like and m_buf[i] != '.' and - m_buf[i] != 'e'; - } + o.write(number_buffer.data(), static_cast(len)); - o.write(m_buf.data(), static_cast(written_bytes)); + // determine if need to append ".0" + const bool value_is_int_like = std::none_of(number_buffer.begin(), + number_buffer.begin() + len + 1, + [](char c) + { + return c == '.' or c == 'e'; + }); if (value_is_int_like) { @@ -6731,15 +6754,20 @@ class basic_json } private: + /// the output of the serializer std::ostream& o; /// a (hopefully) large enough character buffer - std::array < char, 64 > m_buf{{}}; + std::array number_buffer{{}}; + /// the locale const std::lconv* loc = nullptr; + /// the locale's thousand separator character const char thousands_sep = '\0'; + /// the locale's decimal point character const char decimal_point = '\0'; + /// the indentation string string_t indent_string = string_t(512, ' '); }; diff --git a/test/src/unit-inspection.cpp b/test/src/unit-inspection.cpp index aead1258..4900e425 100644 --- a/test/src/unit-inspection.cpp +++ b/test/src/unit-inspection.cpp @@ -213,6 +213,18 @@ TEST_CASE("object inspection") "{\n \"array\": [\n 1,\n 2,\n 3,\n 4\n ],\n \"boolean\": false,\n \"null\": null,\n \"number\": 42,\n \"object\": {},\n \"string\": \"Hello world\"\n}"); } + SECTION("indent=x") + { + CHECK(j.dump().size() == 94); + CHECK(j.dump(1).size() == 127); + CHECK(j.dump(2).size() == 142); + CHECK(j.dump(512).size() == 7792); + + // important test, because it yields a resize of the indent_string + // inside the dump() function + CHECK(j.dump(1024).size() == 15472); + } + SECTION("dump and floating-point numbers") { auto s = json(42.23).dump();