🔀 merge #378 (for #362 and #454)

2017-02-16 09:24:45 +01:00 · 2017-02-16 09:24:45 +01:00 · 6408402ad2
commit 6408402ad2
parent 057b1e606b 949061079a
3 changed files with 346 additions and 81 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
@ -2646,14 +2646,6 @@ class basic_json
    string_t dump(const int indent = -1) const
    {
        std::stringstream ss;
-        // fix locale problems
-        ss.imbue(std::locale::classic());
-
-        // 6, 15 or 16 digits of precision allows round-trip IEEE 754
-        // string->float->string, string->double->string or string->long
-        // double->string; to be safe, we read this value from
-        // std::numeric_limits<number_float_t>::digits10
-        ss.precision(std::numeric_limits<double>::digits10);

        if (indent >= 0)
        {
@ -6214,10 +6206,6 @@ class basic_json
    `std::setw(4)` on @a o sets the indentation level to `4` and the
    serialization result is the same as calling `dump(4)`.

-    @note During serialization, the locale and the precision of the output
-    stream @a o are changed. The original values are restored when the
-    function returns.
-
    @param[in,out] o  stream to serialize to
    @param[in] j  JSON value to serialize

@ -6239,22 +6227,9 @@ class basic_json
        // reset width to 0 for subsequent calls to this stream
        o.width(0);

-        // fix locale problems
-        const auto old_locale = o.imbue(std::locale::classic());
-        // set precision
-
-        // 6, 15 or 16 digits of precision allows round-trip IEEE 754
-        // string->float->string, string->double->string or string->long
-        // double->string; to be safe, we read this value from
-        // std::numeric_limits<number_float_t>::digits10
-        const auto old_precision = o.precision(std::numeric_limits<double>::digits10);
-
        // do the actual serialization
        j.dump(o, pretty_print, static_cast<unsigned int>(indentation));

-        // reset locale and precision
-        o.imbue(old_locale);
-        o.precision(old_precision);
        return o;
    }

@ -8244,6 +8219,162 @@ class basic_json
        return result;
    }

+
+    /*!
+    @brief locale-independent serialization for built-in arithmetic types
+    */
+    struct numtostr
+    {
+      public:
+        template<typename T>
+        numtostr(T value)
+        {
+            x_write(value, std::is_integral<T>());
+        }
+
+        operator const char* () const
+        {
+            return m_buf.data();
+        }
+
+        const char* c_str() const
+        {
+            return m_buf.data();
+        }
+
+      private:
+        static constexpr size_t s_capacity = 30;
+        std::array < char, s_capacity + 2 > m_buf{{}}; // +2 for leading '-'
+        // and trailing '\0'
+        template<typename T>
+        void x_write(T x, std::true_type)
+        {
+            static_assert(std::numeric_limits<T>::digits10 <= s_capacity, "");
+
+            const bool is_neg = x < 0;
+            size_t i = 0;
+
+            while (x and i < s_capacity)
+            {
+                const auto digit = std::labs(static_cast<long>(x % 10));
+                m_buf[i++] = static_cast<char>('0' + digit);
+                x /= 10;
+            }
+
+            assert(i < s_capacity);
+
+            if (i == 0)
+            {
+                m_buf[i++] = '0';
+            }
+
+            if (is_neg)
+            {
+                m_buf[i++] = '-';
+            }
+
+            std::reverse(m_buf.begin(), m_buf.begin() + i);
+        }
+
+        template<typename T>
+        void x_write(T x, std::false_type)
+        {
+            if (x == 0)
+            {
+                std::strcpy(m_buf.data(),
+                            std::signbit(x) ? "-0.0" : "0.0");
+                return;
+            }
+
+            static constexpr auto d =
+                std::numeric_limits<number_float_t>::digits10;
+            static_assert(d == 6 or d == 15 or d == 16 or d == 17, "");
+
+            static constexpr auto fmt = d == 6  ? "%.7g"
+                                        : d == 15 ? "%.16g"
+                                        : d == 16 ? "%.17g"
+                                        : d == 17 ? "%.18g"
+                                        :           "%.19g";
+            // I'm not sure why we need to +1 the precision,
+            // but without it there's a unit-test that fails
+            // that asserts precision of the output
+
+            snprintf(m_buf.data(), m_buf.size(), fmt, x);
+
+#if 0
+            // C locales and C++ locales are similar but
+            // different.
+            //
+            // If working with C++ streams we'd've used
+            // these, but for C formatting functions we
+            // have to use C locales (setlocale / localeconv),
+            // rather than C++ locales (std::locale installed
+            // by std::locale::global()).
+            const std::locale loc;
+
+            const char thousands_sep =
+                std::use_facet< std::numpunct<char>>(
+                    loc).thousands_sep();
+
+            const char decimal_point =
+                std::use_facet< std::numpunct<char>>(
+                    loc).decimal_point();
+#else
+            const auto loc = localeconv();
+            assert(loc != nullptr);
+            const char thousands_sep = !loc->thousands_sep ? '\0'
+                                       : loc->thousands_sep[0];
+
+            const char decimal_point = !loc->decimal_point ? '\0'
+                                       : loc->decimal_point[0];
+#endif
+
+            // erase thousands separator
+            if (thousands_sep)
+            {
+                auto end = std::remove(m_buf.begin(),
+                                       m_buf.end(),
+                                       thousands_sep);
+
+                std::fill(end, m_buf.end(), '\0');
+            }
+
+            // convert decimal point to '.'
+            if (decimal_point and decimal_point != '.')
+            {
+                for (auto& c : m_buf)
+                {
+                    if (c == decimal_point)
+                    {
+                        c = '.';
+                        break;
+                    }
+                }
+            }
+
+            // determine if need to apperd ".0"
+            auto data_end = m_buf.begin() + strlen(m_buf.data());
+
+            const bool value_is_int_like =
+                std::find_if(m_buf.begin(), data_end,
+                             [](const char c)
+            {
+                return c == '.'
+                       or c == 'e'
+                       or c == 'E';
+            })
+            == data_end;
+
+            assert(data_end + 2 < m_buf.end());
+            if (value_is_int_like)
+            {
+                strcat(m_buf.data(), ".0");
+            }
+        }
+    };
+
+
+
    /*!
    @brief internal implementation of the serialization function

@ -8363,27 +8494,19 @@ class basic_json

            case value_t::number_integer:
            {
-                o << m_value.number_integer;
+                o << numtostr(m_value.number_integer).c_str();
                return;
            }

            case value_t::number_unsigned:
            {
-                o << m_value.number_unsigned;
+                o << numtostr(m_value.number_unsigned).c_str();
                return;
            }

            case value_t::number_float:
            {
-                if (m_value.number_float == 0)
-                {
-                    // special case for zero to get "0.0"/"-0.0"
-                    o << (std::signbit(m_value.number_float) ? "-0.0" : "0.0");
-                }
-                else
-                {
-                    o << m_value.number_float;
-                }
+                o << numtostr(m_value.number_float).c_str();
                return;
            }

--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -2646,14 +2646,6 @@ class basic_json
    string_t dump(const int indent = -1) const
    {
        std::stringstream ss;
-        // fix locale problems
-        ss.imbue(std::locale::classic());
-
-        // 6, 15 or 16 digits of precision allows round-trip IEEE 754
-        // string->float->string, string->double->string or string->long
-        // double->string; to be safe, we read this value from
-        // std::numeric_limits<number_float_t>::digits10
-        ss.precision(std::numeric_limits<double>::digits10);

        if (indent >= 0)
        {
@ -6214,10 +6206,6 @@ class basic_json
    `std::setw(4)` on @a o sets the indentation level to `4` and the
    serialization result is the same as calling `dump(4)`.

-    @note During serialization, the locale and the precision of the output
-    stream @a o are changed. The original values are restored when the
-    function returns.
-
    @param[in,out] o  stream to serialize to
    @param[in] j  JSON value to serialize

@ -6239,22 +6227,9 @@ class basic_json
        // reset width to 0 for subsequent calls to this stream
        o.width(0);

-        // fix locale problems
-        const auto old_locale = o.imbue(std::locale::classic());
-        // set precision
-
-        // 6, 15 or 16 digits of precision allows round-trip IEEE 754
-        // string->float->string, string->double->string or string->long
-        // double->string; to be safe, we read this value from
-        // std::numeric_limits<number_float_t>::digits10
-        const auto old_precision = o.precision(std::numeric_limits<double>::digits10);
-
        // do the actual serialization
        j.dump(o, pretty_print, static_cast<unsigned int>(indentation));

-        // reset locale and precision
-        o.imbue(old_locale);
-        o.precision(old_precision);
        return o;
    }

@ -8244,6 +8219,162 @@ class basic_json
        return result;
    }

+
+    /*!
+    @brief locale-independent serialization for built-in arithmetic types
+    */
+    struct numtostr
+    {
+      public:
+        template<typename T>
+        numtostr(T value)
+        {
+            x_write(value, std::is_integral<T>());
+        }
+
+        operator const char* () const
+        {
+            return m_buf.data();
+        }
+
+        const char* c_str() const
+        {
+            return m_buf.data();
+        }
+
+      private:
+        static constexpr size_t s_capacity = 30;
+        std::array < char, s_capacity + 2 > m_buf{{}}; // +2 for leading '-'
+        // and trailing '\0'
+        template<typename T>
+        void x_write(T x, std::true_type)
+        {
+            static_assert(std::numeric_limits<T>::digits10 <= s_capacity, "");
+
+            const bool is_neg = x < 0;
+            size_t i = 0;
+
+            while (x and i < s_capacity)
+            {
+                const auto digit = std::labs(static_cast<long>(x % 10));
+                m_buf[i++] = static_cast<char>('0' + digit);
+                x /= 10;
+            }
+
+            assert(i < s_capacity);
+
+            if (i == 0)
+            {
+                m_buf[i++] = '0';
+            }
+
+            if (is_neg)
+            {
+                m_buf[i++] = '-';
+            }
+
+            std::reverse(m_buf.begin(), m_buf.begin() + i);
+        }
+
+        template<typename T>
+        void x_write(T x, std::false_type)
+        {
+            if (x == 0)
+            {
+                std::strcpy(m_buf.data(),
+                            std::signbit(x) ? "-0.0" : "0.0");
+                return;
+            }
+
+            static constexpr auto d =
+                std::numeric_limits<number_float_t>::digits10;
+            static_assert(d == 6 or d == 15 or d == 16 or d == 17, "");
+
+            static constexpr auto fmt = d == 6  ? "%.7g"
+                                        : d == 15 ? "%.16g"
+                                        : d == 16 ? "%.17g"
+                                        : d == 17 ? "%.18g"
+                                        :           "%.19g";
+            // I'm not sure why we need to +1 the precision,
+            // but without it there's a unit-test that fails
+            // that asserts precision of the output
+
+            snprintf(m_buf.data(), m_buf.size(), fmt, x);
+
+#if 0
+            // C locales and C++ locales are similar but
+            // different.
+            //
+            // If working with C++ streams we'd've used
+            // these, but for C formatting functions we
+            // have to use C locales (setlocale / localeconv),
+            // rather than C++ locales (std::locale installed
+            // by std::locale::global()).
+            const std::locale loc;
+
+            const char thousands_sep =
+                std::use_facet< std::numpunct<char>>(
+                    loc).thousands_sep();
+
+            const char decimal_point =
+                std::use_facet< std::numpunct<char>>(
+                    loc).decimal_point();
+#else
+            const auto loc = localeconv();
+            assert(loc != nullptr);
+            const char thousands_sep = !loc->thousands_sep ? '\0'
+                                       : loc->thousands_sep[0];
+
+            const char decimal_point = !loc->decimal_point ? '\0'
+                                       : loc->decimal_point[0];
+#endif
+
+            // erase thousands separator
+            if (thousands_sep)
+            {
+                auto end = std::remove(m_buf.begin(),
+                                       m_buf.end(),
+                                       thousands_sep);
+
+                std::fill(end, m_buf.end(), '\0');
+            }
+
+            // convert decimal point to '.'
+            if (decimal_point and decimal_point != '.')
+            {
+                for (auto& c : m_buf)
+                {
+                    if (c == decimal_point)
+                    {
+                        c = '.';
+                        break;
+                    }
+                }
+            }
+
+            // determine if need to apperd ".0"
+            auto data_end = m_buf.begin() + strlen(m_buf.data());
+
+            const bool value_is_int_like =
+                std::find_if(m_buf.begin(), data_end,
+                             [](const char c)
+            {
+                return c == '.'
+                       or c == 'e'
+                       or c == 'E';
+            })
+            == data_end;
+
+            assert(data_end + 2 < m_buf.end());
+            if (value_is_int_like)
+            {
+                strcat(m_buf.data(), ".0");
+            }
+        }
+    };
+
+
+
    /*!
    @brief internal implementation of the serialization function

@ -8363,27 +8494,19 @@ class basic_json

            case value_t::number_integer:
            {
-                o << m_value.number_integer;
+                o << numtostr(m_value.number_integer).c_str();
                return;
            }

            case value_t::number_unsigned:
            {
-                o << m_value.number_unsigned;
+                o << numtostr(m_value.number_unsigned).c_str();
                return;
            }

            case value_t::number_float:
            {
-                if (m_value.number_float == 0)
-                {
-                    // special case for zero to get "0.0"/"-0.0"
-                    o << (std::signbit(m_value.number_float) ? "-0.0" : "0.0");
-                }
-                else
-                {
-                    o << m_value.number_float;
-                }
+                o << numtostr(m_value.number_float).c_str();
                return;
            }

--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@ -402,15 +402,35 @@ TEST_CASE("regression tests")
        //issue #230
        //CHECK(j2b.dump() == "23.42");

-        CHECK(j3a.dump() == "10000");
-        CHECK(j3b.dump() == "10000");
-        CHECK(j3c.dump() == "10000");
+        CHECK(j3a.dump() == "10000.0");
+        CHECK(j3b.dump() == "10000.0");
+        CHECK(j3c.dump() == "10000.0");
        //CHECK(j3b.dump() == "1E04"); // roundtrip error
        //CHECK(j3c.dump() == "1e04"); // roundtrip error

        std::locale::global(orig_locale);
    }

+    SECTION("issue #378 - locale-independent num-to-str")
+    {
+        setlocale(LC_NUMERIC, "de_DE.UTF-8");
+
+        // Verify that snprintf uses special decimal and grouping characters.
+        // Disabled, because can't trigger locale-specific behavior in AppVeyor
+#ifndef _MSC_VER
+        {
+            std::array<char, 64> buf;
+            std::snprintf(buf.data(), buf.size(), "%.2f", 12345.67);
+            CHECK(strcmp(buf.data(), "12345,67") == 0);
+        }
+#endif
+
+        // verify that dumped correctly with '.' and no grouping
+        const json j1 = 12345.67;
+        CHECK(json(12345.67).dump() == "12345.67");
+        setlocale(LC_NUMERIC, "C");
+    }
+
    SECTION("issue #379 - locale-independent str-to-num")
    {
        setlocale(LC_NUMERIC, "de_DE.UTF-8");
@ -434,7 +454,6 @@ TEST_CASE("regression tests")
        CHECK(json::parse("1.000000000000000000000000000000000000000000000000000000000000000000000000").get<double>() == 1.0);
    }

-
    SECTION("issue #233 - Can't use basic_json::iterator as a base iterator for std::move_iterator")
    {
        json source = {"a", "b", "c"};
@ -749,9 +768,9 @@ TEST_CASE("regression tests")
        CHECK_THROWS_AS(json::parse(vec), std::invalid_argument);
    }

-    //SECTION("issue #454 - doubles are printed as integers")
-    //{
-    //    json j = R"({"bool_value":true,"double_value":2.0,"int_value":10,"level1":{"list_value":[3,"hi",false],"tmp":5.0},"string_value":"hello"})"_json;
-    //    CHECK(j["double_value"].is_number_integer());
-    //}
+    SECTION("issue #454 - doubles are printed as integers")
+    {
+        json j = R"({"bool_value":true,"double_value":2.0,"int_value":10,"level1":{"list_value":[3,"hi",false],"tmp":5.0},"string_value":"hello"})"_json;
+        CHECK(j["double_value"].is_number_float());
+    }
 }