Merge pull request #450 from nlohmann/TurpentineDistillery-feature/locale_independent_str_to_num

TurpentineDistillery feature/locale independent str to num
2017-02-14 07:18:35 +01:00 · 2017-02-14 07:18:35 +01:00 · 22b9a301d6
commit 22b9a301d6
parent c95ff863bf 265c5b5207
6 changed files with 634 additions and 364 deletions
--- a/2
+++ b/2
@ -94,7 +94,7 @@ cppcheck:
 # run clang sanitize (we are overrding the CXXFLAGS provided by travis in order to use gcc's libstdc++)
 clang_sanitize: clean
-	CXX=clang++ CXXFLAGS="-g -O2 -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" $(MAKE)
+	CXX=clang++ CXXFLAGS="-g -O2 -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" $(MAKE) check
 ##########################################################################
--- a/src/json.hpp
+++ b/src/json.hpp
@ -9444,7 +9444,9 @@ class basic_json
            literal_false,   ///< the `false` literal
            literal_null,    ///< the `null` literal
            value_string,    ///< a string -- use get_string() for actual value
-            value_number,    ///< a number -- use get_number() for actual value
+            value_unsigned,  ///< an unsigned integer -- use get_number() for actual value
            value_integer,   ///< a signed integer -- use get_number() for actual value
            value_float,     ///< an floating point number -- use get_number() for actual value
            begin_array,     ///< the character for array begin `[`
            begin_object,    ///< the character for object begin `{`
            end_array,       ///< the character for array end `]`
@ -9596,7 +9598,9 @@ class basic_json
                    return "null literal";
                case token_type::value_string:
                    return "string literal";
-                case token_type::value_number:
+                case lexer::token_type::value_unsigned:
                case lexer::token_type::value_integer:
                case lexer::token_type::value_float:
                    return "number literal";
                case token_type::begin_array:
                    return "'['";
@ -9869,11 +9873,11 @@ basic_json_parser_12:
                    }
                    if (yych <= '0')
                    {
-                        goto basic_json_parser_13;
+                        goto basic_json_parser_43;
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_15;
+                        goto basic_json_parser_45;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_13:
@ -9883,23 +9887,23 @@ basic_json_parser_13:
                    {
                        if (yych == '.')
                        {
-                            goto basic_json_parser_43;
+                            goto basic_json_parser_47;
                        }
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                    }
 basic_json_parser_14:
                    {
-                        last_token_type = token_type::value_number;
+                        last_token_type = token_type::value_unsigned;
                        break;
                    }
 basic_json_parser_15:
@ -9918,7 +9922,7 @@ basic_json_parser_15:
                    {
                        if (yych == '.')
                        {
-                            goto basic_json_parser_43;
+                            goto basic_json_parser_47;
                        }
                        goto basic_json_parser_14;
                    }
@ -9926,11 +9930,11 @@ basic_json_parser_15:
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        goto basic_json_parser_14;
                    }
@ -9957,7 +9961,7 @@ basic_json_parser_23:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'a')
                    {
-                        goto basic_json_parser_45;
+                        goto basic_json_parser_49;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_24:
@ -9965,7 +9969,7 @@ basic_json_parser_24:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'u')
                    {
-                        goto basic_json_parser_46;
+                        goto basic_json_parser_50;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_25:
@ -9973,7 +9977,7 @@ basic_json_parser_25:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'r')
                    {
-                        goto basic_json_parser_47;
+                        goto basic_json_parser_51;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_26:
@ -10055,13 +10059,27 @@ basic_json_parser_31:
                    }
 basic_json_parser_32:
                    m_cursor = m_marker;
-                    if (yyaccept == 0)
+                    if (yyaccept <= 1)
                    {
-                        goto basic_json_parser_5;
+                        if (yyaccept == 0)
                        {
                            goto basic_json_parser_5;
                        }
                        else
                        {
                            goto basic_json_parser_14;
                        }
                    }
                    else
                    {
-                        goto basic_json_parser_14;
+                        if (yyaccept == 2)
                        {
                            goto basic_json_parser_44;
                        }
                        else
                        {
                            goto basic_json_parser_55;
                        }
                    }
 basic_json_parser_33:
                    ++m_cursor;
@ -10142,7 +10160,7 @@ basic_json_parser_35:
                                }
                                if (yych <= 'u')
                                {
-                                    goto basic_json_parser_48;
+                                    goto basic_json_parser_52;
                                }
                                goto basic_json_parser_32;
                            }
@ -10261,6 +10279,71 @@ basic_json_parser_42:
                    }
                    goto basic_json_parser_32;
 basic_json_parser_43:
                    yyaccept = 2;
                    yych = *(m_marker = ++m_cursor);
                    if (yych <= 'D')
                    {
                        if (yych == '.')
                        {
                            goto basic_json_parser_47;
                        }
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
                            goto basic_json_parser_48;
                        }
                    }
 basic_json_parser_44:
                    {
                        last_token_type = token_type::value_integer;
                        break;
                    }
 basic_json_parser_45:
                    yyaccept = 2;
                    m_marker = ++m_cursor;
                    if ((m_limit - m_cursor) < 3)
                    {
                        fill_line_buffer(3);    // LCOV_EXCL_LINE
                    }
                    yych = *m_cursor;
                    if (yych <= '9')
                    {
                        if (yych == '.')
                        {
                            goto basic_json_parser_47;
                        }
                        if (yych <= '/')
                        {
                            goto basic_json_parser_44;
                        }
                        goto basic_json_parser_45;
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
                            if (yych <= 'D')
                            {
                                goto basic_json_parser_44;
                            }
                            goto basic_json_parser_48;
                        }
                        else
                        {
                            if (yych == 'e')
                            {
                                goto basic_json_parser_48;
                            }
                            goto basic_json_parser_44;
                        }
                    }
 basic_json_parser_47:
                    yych = *++m_cursor;
                    if (yych <= '/')
                    {
@ -10268,16 +10351,16 @@ basic_json_parser_43:
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_49;
+                        goto basic_json_parser_53;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_44:
+basic_json_parser_48:
                    yych = *++m_cursor;
                    if (yych <= ',')
                    {
                        if (yych == '+')
                        {
-                            goto basic_json_parser_51;
+                            goto basic_json_parser_56;
                        }
                        goto basic_json_parser_32;
                    }
@ -10285,7 +10368,7 @@ basic_json_parser_44:
                    {
                        if (yych <= '-')
                        {
-                            goto basic_json_parser_51;
+                            goto basic_json_parser_56;
                        }
                        if (yych <= '/')
                        {
@ -10293,32 +10376,32 @@ basic_json_parser_44:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_52;
+                            goto basic_json_parser_57;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_45:
+basic_json_parser_49:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_54;
+                        goto basic_json_parser_59;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_46:
+basic_json_parser_50:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_55;
+                        goto basic_json_parser_60;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_47:
+basic_json_parser_51:
                    yych = *++m_cursor;
                    if (yych == 'u')
                    {
-                        goto basic_json_parser_56;
+                        goto basic_json_parser_61;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_48:
+basic_json_parser_52:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10333,7 +10416,7 @@ basic_json_parser_48:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        goto basic_json_parser_32;
                    }
@ -10341,7 +10424,7 @@ basic_json_parser_48:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        if (yych <= '`')
                        {
@ -10349,12 +10432,12 @@ basic_json_parser_48:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_49:
+basic_json_parser_53:
-                    yyaccept = 1;
+                    yyaccept = 3;
                    m_marker = ++m_cursor;
                    if ((m_limit - m_cursor) < 3)
                    {
@ -10365,27 +10448,30 @@ basic_json_parser_49:
                    {
                        if (yych <= '/')
                        {
-                            goto basic_json_parser_14;
+                            goto basic_json_parser_55;
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_49;
+                            goto basic_json_parser_53;
                        }
                        goto basic_json_parser_14;
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        goto basic_json_parser_14;
                    }
-basic_json_parser_51:
+basic_json_parser_55:
                    {
                        last_token_type = token_type::value_float;
                        break;
                    }
 basic_json_parser_56:
                    yych = *++m_cursor;
                    if (yych <= '/')
                    {
@ -10395,7 +10481,7 @@ basic_json_parser_51:
                    {
                        goto basic_json_parser_32;
                    }
-basic_json_parser_52:
+basic_json_parser_57:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10404,35 +10490,35 @@ basic_json_parser_52:
                    yych = *m_cursor;
                    if (yych <= '/')
                    {
-                        goto basic_json_parser_14;
+                        goto basic_json_parser_55;
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_52;
+                        goto basic_json_parser_57;
                    }
-                    goto basic_json_parser_14;
+                    goto basic_json_parser_55;
-basic_json_parser_54:
+basic_json_parser_59:
                    yych = *++m_cursor;
                    if (yych == 's')
                    {
-                        goto basic_json_parser_58;
+                        goto basic_json_parser_63;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_55:
+basic_json_parser_60:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_59;
+                        goto basic_json_parser_64;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_56:
+basic_json_parser_61:
                    yych = *++m_cursor;
                    if (yych == 'e')
                    {
-                        goto basic_json_parser_61;
+                        goto basic_json_parser_66;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_57:
+basic_json_parser_62:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10447,7 +10533,7 @@ basic_json_parser_57:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        goto basic_json_parser_32;
                    }
@ -10455,7 +10541,7 @@ basic_json_parser_57:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        if (yych <= '`')
                        {
@ -10463,30 +10549,30 @@ basic_json_parser_57:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_58:
+basic_json_parser_63:
                    yych = *++m_cursor;
                    if (yych == 'e')
                    {
-                        goto basic_json_parser_64;
+                        goto basic_json_parser_69;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_59:
+basic_json_parser_64:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_null;
                        break;
                    }
-basic_json_parser_61:
+basic_json_parser_66:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_true;
                        break;
                    }
-basic_json_parser_63:
+basic_json_parser_68:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10501,7 +10587,7 @@ basic_json_parser_63:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        goto basic_json_parser_32;
                    }
@ -10509,7 +10595,7 @@ basic_json_parser_63:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        if (yych <= '`')
                        {
@ -10517,17 +10603,17 @@ basic_json_parser_63:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_64:
+basic_json_parser_69:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_false;
                        break;
                    }
-basic_json_parser_66:
+basic_json_parser_71:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10838,59 +10924,155 @@ basic_json_parser_66:
            return result;
        }
        /*!
        @brief parse floating point number
        This function (and its overloads) serves to select the most appropriate
        standard floating point number parsing function based on the type
        supplied via the first parameter.  Set this to @a
        static_cast<number_float_t*>(nullptr).
        @param[in,out] endptr receives a pointer to the first character after
        the number
        @return the floating point number
        */
        long double str_to_float_t(long double* /* type */, char** endptr) const
        {
            return std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
        }
        /*!
-        @brief parse floating point number
+        @brief parse string into a built-in arithmetic type as if the current
               locale is POSIX.
-        This function (and its overloads) serves to select the most appropriate
+        @note in floating-point case strtod may parse past the token's end -
-        standard floating point number parsing function based on the type
+              this is not an error
        supplied via the first parameter.  Set this to @a
        static_cast<number_float_t*>(nullptr).
-        @param[in,out] endptr  receives a pointer to the first character after
+        @note any leading blanks are not handled
        the number
        @return the floating point number
        */
-        double str_to_float_t(double* /* type */, char** endptr) const
+        struct strtonum
        {
-            return std::strtod(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
+          public:
-        }
+            strtonum(const char* start, const char* end)
                : m_start(start), m_end(end)
            {}
-        /*!
+            /*!
-        @brief parse floating point number
+            @return true iff parsed successfully as number of type T
-        This function (and its overloads) serves to select the most appropriate
+            @param[in,out] val shall contain parsed value, or undefined value
-        standard floating point number parsing function based on the type
+            if could not parse
-        supplied via the first parameter.  Set this to @a
+            */
-        static_cast<number_float_t*>(nullptr).
+            template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
            bool to(T& val) const
            {
                return parse(val, std::is_integral<T>());
            }
-        @param[in,out] endptr  receives a pointer to the first character after
+          private:
-        the number
+            const char* const m_start = nullptr;
            const char* const m_end = nullptr;
-        @return the floating point number
+            // floating-point conversion
-        */
+
-        float str_to_float_t(float* /* type */, char** endptr) const
+            // overloaded wrappers for strtod/strtof/strtold
-        {
+            // that will be called from parse<floating_point_t>
-            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
+            static void strtof(float& f, const char* str, char** endptr)
-        }
+            {
                f = std::strtof(str, endptr);
            }
            static void strtof(double& f, const char* str, char** endptr)
            {
                f = std::strtod(str, endptr);
            }
            static void strtof(long double& f, const char* str, char** endptr)
            {
                f = std::strtold(str, endptr);
            }
            template<typename T>
            bool parse(T& value, /*is_integral=*/std::false_type) const
            {
                // replace decimal separator with locale-specific version,
                // when necessary; data will point to either the original
                // string, or buf, or tempstr containing the fixed string.
                std::string tempstr;
                std::array<char, 64> buf;
                const size_t len = static_cast<size_t>(m_end - m_start);
                // lexer will reject empty numbers
                assert(len > 0);
                // since dealing with strtod family of functions, we're
                // getting the decimal point char from the C locale facilities
                // instead of C++'s numpunct facet of the current std::locale
                const auto loc = localeconv();
                assert(loc != nullptr);
                const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0];
                const char* data = m_start;
                if (decimal_point_char != '.')
                {
                    const size_t ds_pos = static_cast<size_t>(std::find(m_start, m_end, '.') - m_start);
                    if (ds_pos != len)
                    {
                        // copy the data into the local buffer or tempstr, if
                        // buffer is too small; replace decimal separator, and
                        // update data to point to the modified bytes
                        if ((len + 1) < buf.size())
                        {
                            std::copy(m_start, m_end, buf.data());
                            buf[len] = 0;
                            buf[ds_pos] = decimal_point_char;
                            data = buf.data();
                        }
                        else
                        {
                            tempstr.assign(m_start, m_end);
                            tempstr[ds_pos] = decimal_point_char;
                            data = tempstr.c_str();
                        }
                    }
                }
                char* endptr = nullptr;
                value = 0;
                // this calls appropriate overload depending on T
                strtof(value, data, &endptr);
                // parsing was successful iff strtof parsed exactly the number
                // of characters determined by the lexer (len)
                const bool ok = (endptr == (data + len));
                if (ok and (value == 0.0) and (*data == '-'))
                {
                    // some implementations forget to negate the zero
                    value = -0.0;
                }
                return ok;
            }
            // integral conversion
            signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
            {
                return std::strtoll(m_start, endptr, 10);
            }
            unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const
            {
                return std::strtoull(m_start, endptr, 10);
            }
            template<typename T>
            bool parse(T& value, /*is_integral=*/std::true_type) const
            {
                char* endptr = nullptr;
                errno = 0; // these are thread-local
                const auto x = parse_integral(&endptr, std::is_signed<T>());
                // called right overload?
                static_assert(std::is_signed<T>() == std::is_signed<decltype(x)>(), "");
                value = static_cast<T>(x);
                return (x == static_cast<decltype(x)>(value)) // x fits into destination T
                       and (x < 0) == (value < 0)             // preserved sign
                       //and ((x != 0) or is_integral())        // strto[u]ll did nto fail
                       and (errno == 0)                       // strto[u]ll did not overflow
                       and (m_start < m_end)                  // token was not empty
                       and (endptr == m_end);                 // parsed entire token exactly
            }
        };
        /*!
        @brief return number value for number tokens
@ -10899,125 +11081,84 @@ basic_json_parser_66:
        number type (either integer, unsigned integer or floating point),
        which is passed back to the caller via the result parameter.
-        This function parses the integer component up to the radix point or
+        integral numbers that don't fit into the the range of the respective
-        exponent while collecting information about the 'floating point
+        type are parsed as number_float_t
        representation', which it stores in the result parameter. If there is
        no radix point or exponent, and the number can fit into a @ref
        number_integer_t or @ref number_unsigned_t then it sets the result
        parameter accordingly.
-        If the number is a floating point number the number is then parsed
+        floating-point values do not satisfy std::isfinite predicate
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
+        are converted to value_t::null
-        @param[out] result  @ref basic_json object to receive the number, or
+        throws if the entire string [m_start .. m_cursor) cannot be
-        NAN if the conversion read past the current token. The latter case
+        interpreted as a number
-        needs to be treated by the caller function.
+
        @param[out] result  @ref basic_json object to receive the number.
        @param[in]  token   the type of the number token
        */
-        void get_number(basic_json& result) const
+        bool get_number(basic_json& result, const token_type token) const
        {
            assert(m_start != nullptr);
            assert(m_start < m_cursor);
            assert((token == token_type::value_unsigned) or
                   (token == token_type::value_integer) or
                   (token == token_type::value_float));
-            const lexer::lexer_char_t* curptr = m_start;
+            strtonum num_converter(reinterpret_cast<const char*>(m_start),
                                   reinterpret_cast<const char*>(m_cursor));
-            // accumulate the integer conversion result (unsigned for now)
+            switch (token)
            number_unsigned_t value = 0;
            // maximum absolute value of the relevant integer type
            number_unsigned_t max;
            // temporarily store the type to avoid unnecessary bitfield access
            value_t type;
            // look for sign
            if (*curptr == '-')
            {
-                type = value_t::number_integer;
+                case lexer::token_type::value_unsigned:
                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
                curptr++;
            }
            else
            {
                type = value_t::number_unsigned;
                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
            }
            // count the significant figures
            for (; curptr < m_cursor; curptr++)
            {
                // quickly skip tests if a digit
                if (*curptr < '0' or* curptr > '9')
                {
-                    if (*curptr == '.')
+                    number_unsigned_t val;
                    if (num_converter.to(val))
                    {
-                        // don't count '.' but change to float
+                        // parsing successful
-                        type = value_t::number_float;
+                        result.m_type = value_t::number_unsigned;
-                        continue;
+                        result.m_value = val;
                        return true;
                    }
                    // assume exponent (if not then will fail parse): change to
                    // float, stop counting and record exponent details
                    type = value_t::number_float;
                    break;
                }
-                // skip if definitely not an integer
+                case lexer::token_type::value_integer:
                if (type != value_t::number_float)
                {
-                    auto digit = static_cast<number_unsigned_t>(*curptr - '0');
+                    number_integer_t val;
-
+                    if (num_converter.to(val))
                    // overflow if value * 10 + digit > max, move terms around
                    // to avoid overflow in intermediate values
                    if (value > (max - digit) / 10)
                    {
-                        // overflow
+                        // parsing successful
-                        type = value_t::number_float;
+                        result.m_type = value_t::number_integer;
-                    }
+                        result.m_value = val;
-                    else
+                        return true;
                    {
                        // no overflow
                        value = value * 10 + digit;
                    }
                    break;
                }
                default:
                {
                    break;
                }
            }
-            // save the value (if not a float)
+            // parse float (either explicitly or because a previous conversion
-            if (type == value_t::number_unsigned)
+            // failed)
            number_float_t val;
            if (num_converter.to(val))
            {
-                result.m_value.number_unsigned = value;
+                // parsing successful
-            }
+                result.m_type = value_t::number_float;
-            else if (type == value_t::number_integer)
+                result.m_value = val;
            {
                // invariant: if we parsed a '-', the absolute value is between
                // 0 (we allow -0) and max == -INT64_MIN
                assert(value >= 0);
                assert(value <= max);
                if (value == max)
                {
                    // we cannot simply negate value (== max == -INT64_MIN),
                    // see https://github.com/nlohmann/json/issues/389
                    result.m_value.number_integer = static_cast<number_integer_t>(INT64_MIN);
                }
                else
                {
                    // all other values can be negated safely
                    result.m_value.number_integer = -static_cast<number_integer_t>(value);
                }
            }
            else
            {
                // parse with strtod
                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), nullptr);
                // replace infinity and NAN by null
                if (not std::isfinite(result.m_value.number_float))
                {
-                    type = value_t::null;
+                    result.m_type  = value_t::null;
                    result.m_value = basic_json::json_value();
                }
                return true;
            }
-            // save the type
+            // couldn't parse number in any format
-            result.m_type = type;
+            return false;
        }
      private:
@ -11261,10 +11402,20 @@ basic_json_parser_66:
                    break;
                }
-                case lexer::token_type::value_number:
+                case lexer::token_type::value_unsigned:
                case lexer::token_type::value_integer:
                case lexer::token_type::value_float:
                {
-                    m_lexer.get_number(result);
+                    const bool ok = m_lexer.get_number(result, last_token);
                    get_token();
                    // if number conversion was unsuccessful, then is is
                    // because the number was directly followed by an
                    // unexpected character (e.g. "01" where "1" is unexpected)
                    if (not ok)
                    {
                        unexpect(last_token);
                    }
                    break;
                }
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -9444,7 +9444,9 @@ class basic_json
            literal_false,   ///< the `false` literal
            literal_null,    ///< the `null` literal
            value_string,    ///< a string -- use get_string() for actual value
-            value_number,    ///< a number -- use get_number() for actual value
+            value_unsigned,  ///< an unsigned integer -- use get_number() for actual value
            value_integer,   ///< a signed integer -- use get_number() for actual value
            value_float,     ///< an floating point number -- use get_number() for actual value
            begin_array,     ///< the character for array begin `[`
            begin_object,    ///< the character for object begin `{`
            end_array,       ///< the character for array end `]`
@ -9596,7 +9598,9 @@ class basic_json
                    return "null literal";
                case token_type::value_string:
                    return "string literal";
-                case token_type::value_number:
+                case lexer::token_type::value_unsigned:
                case lexer::token_type::value_integer:
                case lexer::token_type::value_float:
                    return "number literal";
                case token_type::begin_array:
                    return "'['";
@ -9684,18 +9688,22 @@ class basic_json
                    "false" { last_token_type = token_type::literal_false; break; }
                    // number
-                    decimal_point = ".";
+                    decimal_point   = ".";
-                    digit         = [0-9];
+                    digit           = [0-9];
-                    digit_1_9     = [1-9];
+                    digit_1_9       = [1-9];
-                    e             = "e" | "E";
+                    e               = "e" | "E";
-                    minus         = "-";
+                    minus           = "-";
-                    plus          = "+";
+                    plus            = "+";
-                    zero          = "0";
+                    zero            = "0";
-                    exp           = e (minus | plus)? digit+;
+                    exp             = e (minus | plus)? digit+;
-                    frac          = decimal_point digit+;
+                    frac            = decimal_point digit+;
-                    int           = (zero | digit_1_9 digit*);
+                    int             = (zero | digit_1_9 digit*);
-                    number        = minus? int frac? exp?;
+                    number_unsigned = int;
-                    number        { last_token_type = token_type::value_number; break; }
+                    number_unsigned { last_token_type = token_type::value_unsigned; break; }
                    number_integer  = minus int;
                    number_integer  { last_token_type = token_type::value_integer; break; }
                    number_float    = minus? int frac? exp?;
                    number_float    { last_token_type = token_type::value_float; break; }
                    // string
                    quotation_mark  = "\"";
@ -9988,59 +9996,155 @@ class basic_json
            return result;
        }
        /*!
        @brief parse floating point number
        This function (and its overloads) serves to select the most appropriate
        standard floating point number parsing function based on the type
        supplied via the first parameter.  Set this to @a
        static_cast<number_float_t*>(nullptr).
        @param[in,out] endptr receives a pointer to the first character after
        the number
        @return the floating point number
        */
        long double str_to_float_t(long double* /* type */, char** endptr) const
        {
            return std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
        }
        /*!
-        @brief parse floating point number
+        @brief parse string into a built-in arithmetic type as if the current
               locale is POSIX.
-        This function (and its overloads) serves to select the most appropriate
+        @note in floating-point case strtod may parse past the token's end -
-        standard floating point number parsing function based on the type
+              this is not an error
        supplied via the first parameter.  Set this to @a
        static_cast<number_float_t*>(nullptr).
-        @param[in,out] endptr  receives a pointer to the first character after
+        @note any leading blanks are not handled
        the number
        @return the floating point number
        */
-        double str_to_float_t(double* /* type */, char** endptr) const
+        struct strtonum
        {
-            return std::strtod(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
+          public:
-        }
+            strtonum(const char* start, const char* end)
                : m_start(start), m_end(end)
            {}
-        /*!
+            /*!
-        @brief parse floating point number
+            @return true iff parsed successfully as number of type T
-        This function (and its overloads) serves to select the most appropriate
+            @param[in,out] val shall contain parsed value, or undefined value
-        standard floating point number parsing function based on the type
+            if could not parse
-        supplied via the first parameter.  Set this to @a
+            */
-        static_cast<number_float_t*>(nullptr).
+            template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
            bool to(T& val) const
            {
                return parse(val, std::is_integral<T>());
            }
-        @param[in,out] endptr  receives a pointer to the first character after
+          private:
-        the number
+            const char* const m_start = nullptr;
            const char* const m_end = nullptr;
-        @return the floating point number
+            // floating-point conversion
-        */
+
-        float str_to_float_t(float* /* type */, char** endptr) const
+            // overloaded wrappers for strtod/strtof/strtold
-        {
+            // that will be called from parse<floating_point_t>
-            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
+            static void strtof(float& f, const char* str, char** endptr)
-        }
+            {
                f = std::strtof(str, endptr);
            }
            static void strtof(double& f, const char* str, char** endptr)
            {
                f = std::strtod(str, endptr);
            }
            static void strtof(long double& f, const char* str, char** endptr)
            {
                f = std::strtold(str, endptr);
            }
            template<typename T>
            bool parse(T& value, /*is_integral=*/std::false_type) const
            {
                // replace decimal separator with locale-specific version,
                // when necessary; data will point to either the original
                // string, or buf, or tempstr containing the fixed string.
                std::string tempstr;
                std::array<char, 64> buf;
                const size_t len = static_cast<size_t>(m_end - m_start);
                // lexer will reject empty numbers
                assert(len > 0);
                // since dealing with strtod family of functions, we're
                // getting the decimal point char from the C locale facilities
                // instead of C++'s numpunct facet of the current std::locale
                const auto loc = localeconv();
                assert(loc != nullptr);
                const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0];
                const char* data = m_start;
                if (decimal_point_char != '.')
                {
                    const size_t ds_pos = static_cast<size_t>(std::find(m_start, m_end, '.') - m_start);
                    if (ds_pos != len)
                    {
                        // copy the data into the local buffer or tempstr, if
                        // buffer is too small; replace decimal separator, and
                        // update data to point to the modified bytes
                        if ((len + 1) < buf.size())
                        {
                            std::copy(m_start, m_end, buf.data());
                            buf[len] = 0;
                            buf[ds_pos] = decimal_point_char;
                            data = buf.data();
                        }
                        else
                        {
                            tempstr.assign(m_start, m_end);
                            tempstr[ds_pos] = decimal_point_char;
                            data = tempstr.c_str();
                        }
                    }
                }
                char* endptr = nullptr;
                value = 0;
                // this calls appropriate overload depending on T
                strtof(value, data, &endptr);
                // parsing was successful iff strtof parsed exactly the number
                // of characters determined by the lexer (len)
                const bool ok = (endptr == (data + len));
                if (ok and (value == 0.0) and (*data == '-'))
                {
                    // some implementations forget to negate the zero
                    value = -0.0;
                }
                return ok;
            }
            // integral conversion
            signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
            {
                return std::strtoll(m_start, endptr, 10);
            }
            unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const
            {
                return std::strtoull(m_start, endptr, 10);
            }
            template<typename T>
            bool parse(T& value, /*is_integral=*/std::true_type) const
            {
                char* endptr = nullptr;
                errno = 0; // these are thread-local
                const auto x = parse_integral(&endptr, std::is_signed<T>());
                // called right overload?
                static_assert(std::is_signed<T>() == std::is_signed<decltype(x)>(), "");
                value = static_cast<T>(x);
                return (x == static_cast<decltype(x)>(value)) // x fits into destination T
                       and (x < 0) == (value < 0)             // preserved sign
                       //and ((x != 0) or is_integral())        // strto[u]ll did nto fail
                       and (errno == 0)                       // strto[u]ll did not overflow
                       and (m_start < m_end)                  // token was not empty
                       and (endptr == m_end);                 // parsed entire token exactly
            }
        };
        /*!
        @brief return number value for number tokens
@ -10049,125 +10153,84 @@ class basic_json
        number type (either integer, unsigned integer or floating point),
        which is passed back to the caller via the result parameter.
-        This function parses the integer component up to the radix point or
+        integral numbers that don't fit into the the range of the respective
-        exponent while collecting information about the 'floating point
+        type are parsed as number_float_t
        representation', which it stores in the result parameter. If there is
        no radix point or exponent, and the number can fit into a @ref
        number_integer_t or @ref number_unsigned_t then it sets the result
        parameter accordingly.
-        If the number is a floating point number the number is then parsed
+        floating-point values do not satisfy std::isfinite predicate
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
+        are converted to value_t::null
-        @param[out] result  @ref basic_json object to receive the number, or
+        throws if the entire string [m_start .. m_cursor) cannot be
-        NAN if the conversion read past the current token. The latter case
+        interpreted as a number
-        needs to be treated by the caller function.
+
        @param[out] result  @ref basic_json object to receive the number.
        @param[in]  token   the type of the number token
        */
-        void get_number(basic_json& result) const
+        bool get_number(basic_json& result, const token_type token) const
        {
            assert(m_start != nullptr);
            assert(m_start < m_cursor);
            assert((token == token_type::value_unsigned) or
                   (token == token_type::value_integer) or
                   (token == token_type::value_float));
-            const lexer::lexer_char_t* curptr = m_start;
+            strtonum num_converter(reinterpret_cast<const char*>(m_start),
                                   reinterpret_cast<const char*>(m_cursor));
-            // accumulate the integer conversion result (unsigned for now)
+            switch (token)
            number_unsigned_t value = 0;
            // maximum absolute value of the relevant integer type
            number_unsigned_t max;
            // temporarily store the type to avoid unnecessary bitfield access
            value_t type;
            // look for sign
            if (*curptr == '-')
            {
-                type = value_t::number_integer;
+                case lexer::token_type::value_unsigned:
                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
                curptr++;
            }
            else
            {
                type = value_t::number_unsigned;
                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
            }
            // count the significant figures
            for (; curptr < m_cursor; curptr++)
            {
                // quickly skip tests if a digit
                if (*curptr < '0' or* curptr > '9')
                {
-                    if (*curptr == '.')
+                    number_unsigned_t val;
                    if (num_converter.to(val))
                    {
-                        // don't count '.' but change to float
+                        // parsing successful
-                        type = value_t::number_float;
+                        result.m_type = value_t::number_unsigned;
-                        continue;
+                        result.m_value = val;
                        return true;
                    }
                    // assume exponent (if not then will fail parse): change to
                    // float, stop counting and record exponent details
                    type = value_t::number_float;
                    break;
                }
-                // skip if definitely not an integer
+                case lexer::token_type::value_integer:
                if (type != value_t::number_float)
                {
-                    auto digit = static_cast<number_unsigned_t>(*curptr - '0');
+                    number_integer_t val;
-
+                    if (num_converter.to(val))
                    // overflow if value * 10 + digit > max, move terms around
                    // to avoid overflow in intermediate values
                    if (value > (max - digit) / 10)
                    {
-                        // overflow
+                        // parsing successful
-                        type = value_t::number_float;
+                        result.m_type = value_t::number_integer;
-                    }
+                        result.m_value = val;
-                    else
+                        return true;
                    {
                        // no overflow
                        value = value * 10 + digit;
                    }
                    break;
                }
                default:
                {
                    break;
                }
            }
-            // save the value (if not a float)
+            // parse float (either explicitly or because a previous conversion
-            if (type == value_t::number_unsigned)
+            // failed)
            number_float_t val;
            if (num_converter.to(val))
            {
-                result.m_value.number_unsigned = value;
+                // parsing successful
-            }
+                result.m_type = value_t::number_float;
-            else if (type == value_t::number_integer)
+                result.m_value = val;
            {
                // invariant: if we parsed a '-', the absolute value is between
                // 0 (we allow -0) and max == -INT64_MIN
                assert(value >= 0);
                assert(value <= max);
                if (value == max)
                {
                    // we cannot simply negate value (== max == -INT64_MIN),
                    // see https://github.com/nlohmann/json/issues/389
                    result.m_value.number_integer = static_cast<number_integer_t>(INT64_MIN);
                }
                else
                {
                    // all other values can be negated safely
                    result.m_value.number_integer = -static_cast<number_integer_t>(value);
                }
            }
            else
            {
                // parse with strtod
                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), nullptr);
                // replace infinity and NAN by null
                if (not std::isfinite(result.m_value.number_float))
                {
-                    type = value_t::null;
+                    result.m_type  = value_t::null;
                    result.m_value = basic_json::json_value();
                }
                return true;
            }
-            // save the type
+            // couldn't parse number in any format
-            result.m_type = type;
+            return false;
        }
      private:
@ -10411,10 +10474,20 @@ class basic_json
                    break;
                }
-                case lexer::token_type::value_number:
+                case lexer::token_type::value_unsigned:
                case lexer::token_type::value_integer:
                case lexer::token_type::value_float:
                {
-                    m_lexer.get_number(result);
+                    const bool ok = m_lexer.get_number(result, last_token);
                    get_token();
                    // if number conversion was unsuccessful, then is is
                    // because the number was directly followed by an
                    // unexpected character (e.g. "01" where "1" is unexpected)
                    if (not ok)
                    {
                        unexpect(last_token);
                    }
                    break;
                }
--- a/test/src/unit-class_lexer.cpp
+++ b/test/src/unit-class_lexer.cpp
@ -65,25 +65,37 @@ TEST_CASE("lexer class")
        SECTION("numbers")
        {
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("0"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("2"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("3"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("4"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("5"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("6"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("7"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("8"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("9"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-0"),
                               2).scan() == json::lexer::token_type::value_integer));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1"),
                               2).scan() == json::lexer::token_type::value_integer));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1.1"),
                               3).scan() == json::lexer::token_type::value_float));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1.1"),
                               4).scan() == json::lexer::token_type::value_float));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1E10"),
                               4).scan() == json::lexer::token_type::value_float));
        }
        SECTION("whitespace")
@ -109,7 +121,9 @@ TEST_CASE("lexer class")
        CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_false) == "false literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_null) == "null literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_string) == "string literal"));
-        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_number) == "number literal"));
+        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_unsigned) == "number literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_integer) == "number literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_float) == "number literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_array) == "'['"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_object) == "'{'"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::end_array) == "']'"));
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@ -101,6 +101,7 @@ TEST_CASE("parser class")
                CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), "parse error - unexpected '\"'");
                // improve code coverage
                CHECK_THROWS_AS(json::parser("\uFF01").parse(), std::invalid_argument);
                CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), std::invalid_argument);
                // unescaped control characters
                CHECK_THROWS_AS(json::parser("\"\x00\"").parse(), std::invalid_argument);
                CHECK_THROWS_AS(json::parser("\"\x01\"").parse(), std::invalid_argument);
@ -269,6 +270,11 @@ TEST_CASE("parser class")
                }
            }
            SECTION("overflow")
            {
                CHECK(json::parser("1.18973e+4932").parse() == json());
            }
            SECTION("invalid numbers")
            {
                CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument);
@ -293,7 +299,7 @@ TEST_CASE("parser class")
                CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument);
                CHECK_THROWS_WITH(json::parser("01").parse(),
-                                  "parse error - unexpected number literal; expected end of input");
+                                  "parse error - unexpected number literal");
                CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
                CHECK_THROWS_WITH(json::parser("1.").parse(),
                                  "parse error - unexpected '.'; expected end of input");
--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@ -383,7 +383,7 @@ TEST_CASE("regression tests")
        };
        // change locale to mess with decimal points
-        std::locale::global(std::locale(std::locale(), new CommaDecimalSeparator));
+        auto orig_locale = std::locale::global(std::locale(std::locale(), new CommaDecimalSeparator));
        CHECK(j1a.dump() == "23.42");
        CHECK(j1b.dump() == "23.42");
@ -407,8 +407,34 @@ TEST_CASE("regression tests")
        CHECK(j3c.dump() == "10000");
        //CHECK(j3b.dump() == "1E04"); // roundtrip error
        //CHECK(j3c.dump() == "1e04"); // roundtrip error
        std::locale::global(orig_locale);
    }
    SECTION("issue #379 - locale-independent str-to-num")
    {
        setlocale(LC_NUMERIC, "de_DE.UTF-8");
        // disabled, because locale-specific beharivor is not
        // triggered in AppVeyor for some reason
 #ifndef _MSC_VER
        {
            // verify that strtod now uses commas as decimal-separator
            CHECK(std::strtod("3,14", nullptr) == 3.14);
            // verify that strtod does not understand dots as decimal separator
            CHECK(std::strtod("3.14", nullptr) == 3);
        }
 #endif
        // verify that parsed correctly despite using strtod internally
        CHECK(json::parse("3.14").get<double>() == 3.14);
        // check a different code path
        CHECK(json::parse("1.000000000000000000000000000000000000000000000000000000000000000000000000").get<double>() == 1.0);
    }
    SECTION("issue #233 - Can't use basic_json::iterator as a base iterator for std::move_iterator")
    {
        json source = {"a", "b", "c"};