🔨 more work on the number parser

This commit is contained in:
Niels Lohmann 2017-02-13 18:51:23 +01:00
parent b84705d557
commit 265c5b5207
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
3 changed files with 84 additions and 43 deletions

View file

@ -10957,6 +10957,8 @@ basic_json_parser_71:
const char* const m_start = nullptr; const char* const m_start = nullptr;
const char* const m_end = nullptr; const char* const m_end = nullptr;
// floating-point conversion
// overloaded wrappers for strtod/strtof/strtold // overloaded wrappers for strtod/strtof/strtold
// that will be called from parse<floating_point_t> // that will be called from parse<floating_point_t>
static void strtof(float& f, const char* str, char** endptr) static void strtof(float& f, const char* str, char** endptr)
@ -10984,6 +10986,9 @@ basic_json_parser_71:
std::array<char, 64> buf; std::array<char, 64> buf;
const size_t len = static_cast<size_t>(m_end - m_start); const size_t len = static_cast<size_t>(m_end - m_start);
// lexer will reject empty numbers
assert(len > 0);
// since dealing with strtod family of functions, we're // since dealing with strtod family of functions, we're
// getting the decimal point char from the C locale facilities // getting the decimal point char from the C locale facilities
// instead of C++'s numpunct facet of the current std::locale // instead of C++'s numpunct facet of the current std::locale
@ -11023,10 +11028,9 @@ basic_json_parser_71:
// this calls appropriate overload depending on T // this calls appropriate overload depending on T
strtof(value, data, &endptr); strtof(value, data, &endptr);
// note that reading past the end is OK, the data may be, for // parsing was successful iff strtof parsed exactly the number
// example, "123.", where the parsed token only contains // of characters determined by the lexer (len)
// "123", but strtod will read the dot as well. const bool ok = (endptr == (data + len));
const bool ok = (endptr >= (data + len)) and (len > 0);
if (ok and (value == 0.0) and (*data == '-')) if (ok and (value == 0.0) and (*data == '-'))
{ {
@ -11037,6 +11041,8 @@ basic_json_parser_71:
return ok; return ok;
} }
// integral conversion
signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
{ {
return std::strtoll(m_start, endptr, 10); return std::strtoll(m_start, endptr, 10);
@ -11087,7 +11093,7 @@ basic_json_parser_71:
@param[out] result @ref basic_json object to receive the number. @param[out] result @ref basic_json object to receive the number.
@param[in] token the type of the number token @param[in] token the type of the number token
*/ */
void get_number(basic_json& result, const token_type token) const bool get_number(basic_json& result, const token_type token) const
{ {
assert(m_start != nullptr); assert(m_start != nullptr);
assert(m_start < m_cursor); assert(m_start < m_cursor);
@ -11105,9 +11111,10 @@ basic_json_parser_71:
number_unsigned_t val; number_unsigned_t val;
if (num_converter.to(val)) if (num_converter.to(val))
{ {
// parsing successful
result.m_type = value_t::number_unsigned; result.m_type = value_t::number_unsigned;
result.m_value = val; result.m_value = val;
return; return true;
} }
break; break;
} }
@ -11117,9 +11124,10 @@ basic_json_parser_71:
number_integer_t val; number_integer_t val;
if (num_converter.to(val)) if (num_converter.to(val))
{ {
// parsing successful
result.m_type = value_t::number_integer; result.m_type = value_t::number_integer;
result.m_value = val; result.m_value = val;
return; return true;
} }
break; break;
} }
@ -11133,22 +11141,24 @@ basic_json_parser_71:
// parse float (either explicitly or because a previous conversion // parse float (either explicitly or because a previous conversion
// failed) // failed)
number_float_t val; number_float_t val;
if (not num_converter.to(val)) if (num_converter.to(val))
{ {
// couldn't parse as float_t // parsing successful
result.m_type = value_t::discarded; result.m_type = value_t::number_float;
return; result.m_value = val;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}
return true;
} }
result.m_type = value_t::number_float; // couldn't parse number in any format
result.m_value = val; return false;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}
} }
private: private:
@ -11396,8 +11406,16 @@ basic_json_parser_71:
case lexer::token_type::value_integer: case lexer::token_type::value_integer:
case lexer::token_type::value_float: case lexer::token_type::value_float:
{ {
m_lexer.get_number(result, last_token); const bool ok = m_lexer.get_number(result, last_token);
get_token(); get_token();
// if number conversion was unsuccessful, then is is
// because the number was directly followed by an
// unexpected character (e.g. "01" where "1" is unexpected)
if (not ok)
{
unexpect(last_token);
}
break; break;
} }

View file

@ -10029,6 +10029,8 @@ class basic_json
const char* const m_start = nullptr; const char* const m_start = nullptr;
const char* const m_end = nullptr; const char* const m_end = nullptr;
// floating-point conversion
// overloaded wrappers for strtod/strtof/strtold // overloaded wrappers for strtod/strtof/strtold
// that will be called from parse<floating_point_t> // that will be called from parse<floating_point_t>
static void strtof(float& f, const char* str, char** endptr) static void strtof(float& f, const char* str, char** endptr)
@ -10056,6 +10058,9 @@ class basic_json
std::array<char, 64> buf; std::array<char, 64> buf;
const size_t len = static_cast<size_t>(m_end - m_start); const size_t len = static_cast<size_t>(m_end - m_start);
// lexer will reject empty numbers
assert(len > 0);
// since dealing with strtod family of functions, we're // since dealing with strtod family of functions, we're
// getting the decimal point char from the C locale facilities // getting the decimal point char from the C locale facilities
// instead of C++'s numpunct facet of the current std::locale // instead of C++'s numpunct facet of the current std::locale
@ -10095,10 +10100,9 @@ class basic_json
// this calls appropriate overload depending on T // this calls appropriate overload depending on T
strtof(value, data, &endptr); strtof(value, data, &endptr);
// note that reading past the end is OK, the data may be, for // parsing was successful iff strtof parsed exactly the number
// example, "123.", where the parsed token only contains // of characters determined by the lexer (len)
// "123", but strtod will read the dot as well. const bool ok = (endptr == (data + len));
const bool ok = (endptr >= (data + len)) and (len > 0);
if (ok and (value == 0.0) and (*data == '-')) if (ok and (value == 0.0) and (*data == '-'))
{ {
@ -10109,6 +10113,8 @@ class basic_json
return ok; return ok;
} }
// integral conversion
signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
{ {
return std::strtoll(m_start, endptr, 10); return std::strtoll(m_start, endptr, 10);
@ -10159,7 +10165,7 @@ class basic_json
@param[out] result @ref basic_json object to receive the number. @param[out] result @ref basic_json object to receive the number.
@param[in] token the type of the number token @param[in] token the type of the number token
*/ */
void get_number(basic_json& result, const token_type token) const bool get_number(basic_json& result, const token_type token) const
{ {
assert(m_start != nullptr); assert(m_start != nullptr);
assert(m_start < m_cursor); assert(m_start < m_cursor);
@ -10177,9 +10183,10 @@ class basic_json
number_unsigned_t val; number_unsigned_t val;
if (num_converter.to(val)) if (num_converter.to(val))
{ {
// parsing successful
result.m_type = value_t::number_unsigned; result.m_type = value_t::number_unsigned;
result.m_value = val; result.m_value = val;
return; return true;
} }
break; break;
} }
@ -10189,9 +10196,10 @@ class basic_json
number_integer_t val; number_integer_t val;
if (num_converter.to(val)) if (num_converter.to(val))
{ {
// parsing successful
result.m_type = value_t::number_integer; result.m_type = value_t::number_integer;
result.m_value = val; result.m_value = val;
return; return true;
} }
break; break;
} }
@ -10205,22 +10213,24 @@ class basic_json
// parse float (either explicitly or because a previous conversion // parse float (either explicitly or because a previous conversion
// failed) // failed)
number_float_t val; number_float_t val;
if (not num_converter.to(val)) if (num_converter.to(val))
{ {
// couldn't parse as float_t // parsing successful
result.m_type = value_t::discarded; result.m_type = value_t::number_float;
return; result.m_value = val;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}
return true;
} }
result.m_type = value_t::number_float; // couldn't parse number in any format
result.m_value = val; return false;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}
} }
private: private:
@ -10468,8 +10478,16 @@ class basic_json
case lexer::token_type::value_integer: case lexer::token_type::value_integer:
case lexer::token_type::value_float: case lexer::token_type::value_float:
{ {
m_lexer.get_number(result, last_token); const bool ok = m_lexer.get_number(result, last_token);
get_token(); get_token();
// if number conversion was unsuccessful, then is is
// because the number was directly followed by an
// unexpected character (e.g. "01" where "1" is unexpected)
if (not ok)
{
unexpect(last_token);
}
break; break;
} }

View file

@ -270,6 +270,11 @@ TEST_CASE("parser class")
} }
} }
SECTION("overflow")
{
CHECK(json::parser("1.18973e+4932").parse() == json());
}
SECTION("invalid numbers") SECTION("invalid numbers")
{ {
CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument); CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument);
@ -294,7 +299,7 @@ TEST_CASE("parser class")
CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument); CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument);
CHECK_THROWS_WITH(json::parser("01").parse(), CHECK_THROWS_WITH(json::parser("01").parse(),
"parse error - unexpected number literal; expected end of input"); "parse error - unexpected number literal");
CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'"); CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
CHECK_THROWS_WITH(json::parser("1.").parse(), CHECK_THROWS_WITH(json::parser("1.").parse(),
"parse error - unexpected '.'; expected end of input"); "parse error - unexpected '.'; expected end of input");