diff --git a/src/json.hpp b/src/json.hpp index 6e49b4d9..6d7eb050 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -2646,14 +2646,6 @@ class basic_json string_t dump(const int indent = -1) const { std::stringstream ss; - // fix locale problems - ss.imbue(std::locale::classic()); - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - ss.precision(std::numeric_limits::digits10); if (indent >= 0) { @@ -6214,10 +6206,6 @@ class basic_json `std::setw(4)` on @a o sets the indentation level to `4` and the serialization result is the same as calling `dump(4)`. - @note During serialization, the locale and the precision of the output - stream @a o are changed. The original values are restored when the - function returns. - @param[in,out] o stream to serialize to @param[in] j JSON value to serialize @@ -6239,22 +6227,9 @@ class basic_json // reset width to 0 for subsequent calls to this stream o.width(0); - // fix locale problems - const auto old_locale = o.imbue(std::locale::classic()); - // set precision - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - const auto old_precision = o.precision(std::numeric_limits::digits10); - // do the actual serialization j.dump(o, pretty_print, static_cast(indentation)); - // reset locale and precision - o.imbue(old_locale); - o.precision(old_precision); return o; } @@ -8244,6 +8219,162 @@ class basic_json return result; } + + /*! + @brief locale-independent serialization for built-in arithmetic types + */ + struct numtostr + { + public: + template + numtostr(T value) + { + x_write(value, std::is_integral()); + } + + operator const char* () const + { + return m_buf.data(); + } + + const char* c_str() const + { + return m_buf.data(); + } + + private: + static constexpr size_t s_capacity = 30; + std::array < char, s_capacity + 2 > m_buf{{}}; // +2 for leading '-' + // and trailing '\0' + template + void x_write(T x, std::true_type) + { + static_assert(std::numeric_limits::digits10 <= s_capacity, ""); + + const bool is_neg = x < 0; + size_t i = 0; + + while (x and i < s_capacity) + { + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; + } + + assert(i < s_capacity); + + if (i == 0) + { + m_buf[i++] = '0'; + } + + if (is_neg) + { + m_buf[i++] = '-'; + } + + std::reverse(m_buf.begin(), m_buf.begin() + i); + } + + template + void x_write(T x, std::false_type) + { + if (x == 0) + { + std::strcpy(m_buf.data(), + std::signbit(x) ? "-0.0" : "0.0"); + return; + } + + static constexpr auto d = + std::numeric_limits::digits10; + static_assert(d == 6 or d == 15 or d == 16 or d == 17, ""); + + static constexpr auto fmt = d == 6 ? "%.7g" + : d == 15 ? "%.16g" + : d == 16 ? "%.17g" + : d == 17 ? "%.18g" + : "%.19g"; + // I'm not sure why we need to +1 the precision, + // but without it there's a unit-test that fails + // that asserts precision of the output + + snprintf(m_buf.data(), m_buf.size(), fmt, x); + +#if 0 + // C locales and C++ locales are similar but + // different. + // + // If working with C++ streams we'd've used + // these, but for C formatting functions we + // have to use C locales (setlocale / localeconv), + // rather than C++ locales (std::locale installed + // by std::locale::global()). + const std::locale loc; + + const char thousands_sep = + std::use_facet< std::numpunct>( + loc).thousands_sep(); + + const char decimal_point = + std::use_facet< std::numpunct>( + loc).decimal_point(); +#else + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; +#endif + + // erase thousands separator + if (thousands_sep) + { + auto end = std::remove(m_buf.begin(), + m_buf.end(), + thousands_sep); + + std::fill(end, m_buf.end(), '\0'); + } + + // convert decimal point to '.' + if (decimal_point and decimal_point != '.') + { + for (auto& c : m_buf) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + // determine if need to apperd ".0" + auto data_end = m_buf.begin() + strlen(m_buf.data()); + + const bool value_is_int_like = + std::find_if(m_buf.begin(), data_end, + [](const char c) + { + return c == '.' + or c == 'e' + or c == 'E'; + }) + == data_end; + + assert(data_end + 2 < m_buf.end()); + if (value_is_int_like) + { + strcat(m_buf.data(), ".0"); + } + } + }; + + + /*! @brief internal implementation of the serialization function @@ -8363,27 +8494,19 @@ class basic_json case value_t::number_integer: { - o << m_value.number_integer; + o << numtostr(m_value.number_integer).c_str(); return; } case value_t::number_unsigned: { - o << m_value.number_unsigned; + o << numtostr(m_value.number_unsigned).c_str(); return; } case value_t::number_float: { - if (m_value.number_float == 0) - { - // special case for zero to get "0.0"/"-0.0" - o << (std::signbit(m_value.number_float) ? "-0.0" : "0.0"); - } - else - { - o << m_value.number_float; - } + o << numtostr(m_value.number_float).c_str(); return; } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index dae2e958..dc1d8207 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -2646,14 +2646,6 @@ class basic_json string_t dump(const int indent = -1) const { std::stringstream ss; - // fix locale problems - ss.imbue(std::locale::classic()); - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - ss.precision(std::numeric_limits::digits10); if (indent >= 0) { @@ -6214,10 +6206,6 @@ class basic_json `std::setw(4)` on @a o sets the indentation level to `4` and the serialization result is the same as calling `dump(4)`. - @note During serialization, the locale and the precision of the output - stream @a o are changed. The original values are restored when the - function returns. - @param[in,out] o stream to serialize to @param[in] j JSON value to serialize @@ -6239,22 +6227,9 @@ class basic_json // reset width to 0 for subsequent calls to this stream o.width(0); - // fix locale problems - const auto old_locale = o.imbue(std::locale::classic()); - // set precision - - // 6, 15 or 16 digits of precision allows round-trip IEEE 754 - // string->float->string, string->double->string or string->long - // double->string; to be safe, we read this value from - // std::numeric_limits::digits10 - const auto old_precision = o.precision(std::numeric_limits::digits10); - // do the actual serialization j.dump(o, pretty_print, static_cast(indentation)); - // reset locale and precision - o.imbue(old_locale); - o.precision(old_precision); return o; } @@ -8244,6 +8219,162 @@ class basic_json return result; } + + /*! + @brief locale-independent serialization for built-in arithmetic types + */ + struct numtostr + { + public: + template + numtostr(T value) + { + x_write(value, std::is_integral()); + } + + operator const char* () const + { + return m_buf.data(); + } + + const char* c_str() const + { + return m_buf.data(); + } + + private: + static constexpr size_t s_capacity = 30; + std::array < char, s_capacity + 2 > m_buf{{}}; // +2 for leading '-' + // and trailing '\0' + template + void x_write(T x, std::true_type) + { + static_assert(std::numeric_limits::digits10 <= s_capacity, ""); + + const bool is_neg = x < 0; + size_t i = 0; + + while (x and i < s_capacity) + { + const auto digit = std::labs(static_cast(x % 10)); + m_buf[i++] = static_cast('0' + digit); + x /= 10; + } + + assert(i < s_capacity); + + if (i == 0) + { + m_buf[i++] = '0'; + } + + if (is_neg) + { + m_buf[i++] = '-'; + } + + std::reverse(m_buf.begin(), m_buf.begin() + i); + } + + template + void x_write(T x, std::false_type) + { + if (x == 0) + { + std::strcpy(m_buf.data(), + std::signbit(x) ? "-0.0" : "0.0"); + return; + } + + static constexpr auto d = + std::numeric_limits::digits10; + static_assert(d == 6 or d == 15 or d == 16 or d == 17, ""); + + static constexpr auto fmt = d == 6 ? "%.7g" + : d == 15 ? "%.16g" + : d == 16 ? "%.17g" + : d == 17 ? "%.18g" + : "%.19g"; + // I'm not sure why we need to +1 the precision, + // but without it there's a unit-test that fails + // that asserts precision of the output + + snprintf(m_buf.data(), m_buf.size(), fmt, x); + +#if 0 + // C locales and C++ locales are similar but + // different. + // + // If working with C++ streams we'd've used + // these, but for C formatting functions we + // have to use C locales (setlocale / localeconv), + // rather than C++ locales (std::locale installed + // by std::locale::global()). + const std::locale loc; + + const char thousands_sep = + std::use_facet< std::numpunct>( + loc).thousands_sep(); + + const char decimal_point = + std::use_facet< std::numpunct>( + loc).decimal_point(); +#else + const auto loc = localeconv(); + assert(loc != nullptr); + const char thousands_sep = !loc->thousands_sep ? '\0' + : loc->thousands_sep[0]; + + const char decimal_point = !loc->decimal_point ? '\0' + : loc->decimal_point[0]; +#endif + + // erase thousands separator + if (thousands_sep) + { + auto end = std::remove(m_buf.begin(), + m_buf.end(), + thousands_sep); + + std::fill(end, m_buf.end(), '\0'); + } + + // convert decimal point to '.' + if (decimal_point and decimal_point != '.') + { + for (auto& c : m_buf) + { + if (c == decimal_point) + { + c = '.'; + break; + } + } + } + + // determine if need to apperd ".0" + auto data_end = m_buf.begin() + strlen(m_buf.data()); + + const bool value_is_int_like = + std::find_if(m_buf.begin(), data_end, + [](const char c) + { + return c == '.' + or c == 'e' + or c == 'E'; + }) + == data_end; + + assert(data_end + 2 < m_buf.end()); + if (value_is_int_like) + { + strcat(m_buf.data(), ".0"); + } + } + }; + + + /*! @brief internal implementation of the serialization function @@ -8363,27 +8494,19 @@ class basic_json case value_t::number_integer: { - o << m_value.number_integer; + o << numtostr(m_value.number_integer).c_str(); return; } case value_t::number_unsigned: { - o << m_value.number_unsigned; + o << numtostr(m_value.number_unsigned).c_str(); return; } case value_t::number_float: { - if (m_value.number_float == 0) - { - // special case for zero to get "0.0"/"-0.0" - o << (std::signbit(m_value.number_float) ? "-0.0" : "0.0"); - } - else - { - o << m_value.number_float; - } + o << numtostr(m_value.number_float).c_str(); return; } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 9feb07e0..f998fcb9 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -402,15 +402,35 @@ TEST_CASE("regression tests") //issue #230 //CHECK(j2b.dump() == "23.42"); - CHECK(j3a.dump() == "10000"); - CHECK(j3b.dump() == "10000"); - CHECK(j3c.dump() == "10000"); + CHECK(j3a.dump() == "10000.0"); + CHECK(j3b.dump() == "10000.0"); + CHECK(j3c.dump() == "10000.0"); //CHECK(j3b.dump() == "1E04"); // roundtrip error //CHECK(j3c.dump() == "1e04"); // roundtrip error std::locale::global(orig_locale); } + SECTION("issue #378 - locale-independent num-to-str") + { + setlocale(LC_NUMERIC, "de_DE.UTF-8"); + + // Verify that snprintf uses special decimal and grouping characters. + // Disabled, because can't trigger locale-specific behavior in AppVeyor +#ifndef _MSC_VER + { + std::array buf; + std::snprintf(buf.data(), buf.size(), "%.2f", 12345.67); + CHECK(strcmp(buf.data(), "12345,67") == 0); + } +#endif + + // verify that dumped correctly with '.' and no grouping + const json j1 = 12345.67; + CHECK(json(12345.67).dump() == "12345.67"); + setlocale(LC_NUMERIC, "C"); + } + SECTION("issue #379 - locale-independent str-to-num") { setlocale(LC_NUMERIC, "de_DE.UTF-8"); @@ -434,7 +454,6 @@ TEST_CASE("regression tests") CHECK(json::parse("1.000000000000000000000000000000000000000000000000000000000000000000000000").get() == 1.0); } - SECTION("issue #233 - Can't use basic_json::iterator as a base iterator for std::move_iterator") { json source = {"a", "b", "c"}; @@ -749,9 +768,9 @@ TEST_CASE("regression tests") CHECK_THROWS_AS(json::parse(vec), std::invalid_argument); } - //SECTION("issue #454 - doubles are printed as integers") - //{ - // json j = R"({"bool_value":true,"double_value":2.0,"int_value":10,"level1":{"list_value":[3,"hi",false],"tmp":5.0},"string_value":"hello"})"_json; - // CHECK(j["double_value"].is_number_integer()); - //} + SECTION("issue #454 - doubles are printed as integers") + { + json j = R"({"bool_value":true,"double_value":2.0,"int_value":10,"level1":{"list_value":[3,"hi",false],"tmp":5.0},"string_value":"hello"})"_json; + CHECK(j["double_value"].is_number_float()); + } }