From 265c5b5207cec0327c9e4844a56bcecaaa1f9a8d Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 13 Feb 2017 18:51:23 +0100 Subject: [PATCH] :hammer: more work on the number parser --- src/json.hpp | 60 ++++++++++++++++++++++------------ src/json.hpp.re2c | 60 ++++++++++++++++++++++------------ test/src/unit-class_parser.cpp | 7 +++- 3 files changed, 84 insertions(+), 43 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index 873b8252..bf9386b3 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -10957,6 +10957,8 @@ basic_json_parser_71: const char* const m_start = nullptr; const char* const m_end = nullptr; + // floating-point conversion + // overloaded wrappers for strtod/strtof/strtold // that will be called from parse static void strtof(float& f, const char* str, char** endptr) @@ -10984,6 +10986,9 @@ basic_json_parser_71: std::array buf; const size_t len = static_cast(m_end - m_start); + // lexer will reject empty numbers + assert(len > 0); + // since dealing with strtod family of functions, we're // getting the decimal point char from the C locale facilities // instead of C++'s numpunct facet of the current std::locale @@ -11023,10 +11028,9 @@ basic_json_parser_71: // this calls appropriate overload depending on T strtof(value, data, &endptr); - // note that reading past the end is OK, the data may be, for - // example, "123.", where the parsed token only contains - // "123", but strtod will read the dot as well. - const bool ok = (endptr >= (data + len)) and (len > 0); + // parsing was successful iff strtof parsed exactly the number + // of characters determined by the lexer (len) + const bool ok = (endptr == (data + len)); if (ok and (value == 0.0) and (*data == '-')) { @@ -11037,6 +11041,8 @@ basic_json_parser_71: return ok; } + // integral conversion + signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const { return std::strtoll(m_start, endptr, 10); @@ -11087,7 +11093,7 @@ basic_json_parser_71: @param[out] result @ref basic_json object to receive the number. @param[in] token the type of the number token */ - void get_number(basic_json& result, const token_type token) const + bool get_number(basic_json& result, const token_type token) const { assert(m_start != nullptr); assert(m_start < m_cursor); @@ -11105,9 +11111,10 @@ basic_json_parser_71: number_unsigned_t val; if (num_converter.to(val)) { + // parsing successful result.m_type = value_t::number_unsigned; result.m_value = val; - return; + return true; } break; } @@ -11117,9 +11124,10 @@ basic_json_parser_71: number_integer_t val; if (num_converter.to(val)) { + // parsing successful result.m_type = value_t::number_integer; result.m_value = val; - return; + return true; } break; } @@ -11133,22 +11141,24 @@ basic_json_parser_71: // parse float (either explicitly or because a previous conversion // failed) number_float_t val; - if (not num_converter.to(val)) + if (num_converter.to(val)) { - // couldn't parse as float_t - result.m_type = value_t::discarded; - return; + // parsing successful + result.m_type = value_t::number_float; + result.m_value = val; + + // replace infinity and NAN by null + if (not std::isfinite(result.m_value.number_float)) + { + result.m_type = value_t::null; + result.m_value = basic_json::json_value(); + } + + return true; } - result.m_type = value_t::number_float; - result.m_value = val; - - // replace infinity and NAN by null - if (not std::isfinite(result.m_value.number_float)) - { - result.m_type = value_t::null; - result.m_value = basic_json::json_value(); - } + // couldn't parse number in any format + return false; } private: @@ -11396,8 +11406,16 @@ basic_json_parser_71: case lexer::token_type::value_integer: case lexer::token_type::value_float: { - m_lexer.get_number(result, last_token); + const bool ok = m_lexer.get_number(result, last_token); get_token(); + + // if number conversion was unsuccessful, then is is + // because the number was directly followed by an + // unexpected character (e.g. "01" where "1" is unexpected) + if (not ok) + { + unexpect(last_token); + } break; } diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index cf6ed432..e15f7b1f 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -10029,6 +10029,8 @@ class basic_json const char* const m_start = nullptr; const char* const m_end = nullptr; + // floating-point conversion + // overloaded wrappers for strtod/strtof/strtold // that will be called from parse static void strtof(float& f, const char* str, char** endptr) @@ -10056,6 +10058,9 @@ class basic_json std::array buf; const size_t len = static_cast(m_end - m_start); + // lexer will reject empty numbers + assert(len > 0); + // since dealing with strtod family of functions, we're // getting the decimal point char from the C locale facilities // instead of C++'s numpunct facet of the current std::locale @@ -10095,10 +10100,9 @@ class basic_json // this calls appropriate overload depending on T strtof(value, data, &endptr); - // note that reading past the end is OK, the data may be, for - // example, "123.", where the parsed token only contains - // "123", but strtod will read the dot as well. - const bool ok = (endptr >= (data + len)) and (len > 0); + // parsing was successful iff strtof parsed exactly the number + // of characters determined by the lexer (len) + const bool ok = (endptr == (data + len)); if (ok and (value == 0.0) and (*data == '-')) { @@ -10109,6 +10113,8 @@ class basic_json return ok; } + // integral conversion + signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const { return std::strtoll(m_start, endptr, 10); @@ -10159,7 +10165,7 @@ class basic_json @param[out] result @ref basic_json object to receive the number. @param[in] token the type of the number token */ - void get_number(basic_json& result, const token_type token) const + bool get_number(basic_json& result, const token_type token) const { assert(m_start != nullptr); assert(m_start < m_cursor); @@ -10177,9 +10183,10 @@ class basic_json number_unsigned_t val; if (num_converter.to(val)) { + // parsing successful result.m_type = value_t::number_unsigned; result.m_value = val; - return; + return true; } break; } @@ -10189,9 +10196,10 @@ class basic_json number_integer_t val; if (num_converter.to(val)) { + // parsing successful result.m_type = value_t::number_integer; result.m_value = val; - return; + return true; } break; } @@ -10205,22 +10213,24 @@ class basic_json // parse float (either explicitly or because a previous conversion // failed) number_float_t val; - if (not num_converter.to(val)) + if (num_converter.to(val)) { - // couldn't parse as float_t - result.m_type = value_t::discarded; - return; + // parsing successful + result.m_type = value_t::number_float; + result.m_value = val; + + // replace infinity and NAN by null + if (not std::isfinite(result.m_value.number_float)) + { + result.m_type = value_t::null; + result.m_value = basic_json::json_value(); + } + + return true; } - result.m_type = value_t::number_float; - result.m_value = val; - - // replace infinity and NAN by null - if (not std::isfinite(result.m_value.number_float)) - { - result.m_type = value_t::null; - result.m_value = basic_json::json_value(); - } + // couldn't parse number in any format + return false; } private: @@ -10468,8 +10478,16 @@ class basic_json case lexer::token_type::value_integer: case lexer::token_type::value_float: { - m_lexer.get_number(result, last_token); + const bool ok = m_lexer.get_number(result, last_token); get_token(); + + // if number conversion was unsuccessful, then is is + // because the number was directly followed by an + // unexpected character (e.g. "01" where "1" is unexpected) + if (not ok) + { + unexpect(last_token); + } break; } diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index caa672a1..2fb0da2b 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -270,6 +270,11 @@ TEST_CASE("parser class") } } + SECTION("overflow") + { + CHECK(json::parser("1.18973e+4932").parse() == json()); + } + SECTION("invalid numbers") { CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument); @@ -294,7 +299,7 @@ TEST_CASE("parser class") CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument); CHECK_THROWS_WITH(json::parser("01").parse(), - "parse error - unexpected number literal; expected end of input"); + "parse error - unexpected number literal"); CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("1.").parse(), "parse error - unexpected '.'; expected end of input");