merged #201
This commit is contained in:
parent
04edafbddc
commit
9c233be567
9 changed files with 980 additions and 650 deletions
|
@ -402,7 +402,7 @@ I deeply appreciate the help of the following people.
|
|||
- [406345](https://github.com/406345) fixed two small warnings.
|
||||
- [Glen Fernandes](https://github.com/glenfe) noted a potential portability problem in the `has_mapped_type` function.
|
||||
- [Corbin Hughes](https://github.com/nibroc) fixed some typos in the contribution guidelines.
|
||||
- [twelsby](https://github.com/twelsby) fixed the array subscript operator, an issue that failed the MSVC build, and floating-point parsing/dumping. He further added support for unsigned integer numbers.
|
||||
- [twelsby](https://github.com/twelsby) fixed the array subscript operator, an issue that failed the MSVC build, and floating-point parsing/dumping. He further added support for unsigned integer numbers and implemented better roundtrip support for parsed numbers.
|
||||
- [Volker Diels-Grabsch](https://github.com/vog) fixed a link in the README file.
|
||||
- [msm-](https://github.com/msm-) added support for american fuzzy lop.
|
||||
- [Annihil](https://github.com/Annihil) fixed an example in the README file.
|
||||
|
|
1255
src/json.hpp
1255
src/json.hpp
File diff suppressed because it is too large
Load diff
|
@ -695,6 +695,74 @@ class basic_json
|
|||
|
||||
|
||||
private:
|
||||
|
||||
/*!
|
||||
@brief a type to hold JSON type information
|
||||
|
||||
This bitfield type holds information about JSON types. It is internally
|
||||
used to hold the basic JSON type enumeration, as well as additional
|
||||
information in the case of values that have been parsed from a string
|
||||
including whether of not it was created directly or parsed, and in the
|
||||
case of floating point numbers the number of significant figures in the
|
||||
original representaiton and if it was in exponential form, if a '+' was
|
||||
included in the exponent and the capitilization of the exponent marker.
|
||||
The sole purpose of this information is to permit accurate round trips.
|
||||
|
||||
@since version 2.0.0
|
||||
*/
|
||||
union type_data_t
|
||||
{
|
||||
struct
|
||||
{
|
||||
/// the type of the value (@ref value_t)
|
||||
uint16_t type : 4;
|
||||
/// whether the number was parsed from a string
|
||||
uint16_t parsed : 1;
|
||||
/// whether parsed number contained an exponent ('e'/'E')
|
||||
uint16_t has_exp : 1;
|
||||
/// whether parsed number contained a plus in the exponent
|
||||
uint16_t exp_plus : 1;
|
||||
/// whether parsed number's exponent was capitalized ('E')
|
||||
uint16_t exp_cap : 1;
|
||||
/// the number of figures for a parsed number
|
||||
uint16_t precision : 8;
|
||||
} bits;
|
||||
uint16_t data;
|
||||
|
||||
/// return the type as value_t
|
||||
operator value_t() const
|
||||
{
|
||||
return static_cast<value_t>(bits.type);
|
||||
}
|
||||
|
||||
/// test type for equality (ignore other fields)
|
||||
bool operator==(const value_t& rhs) const
|
||||
{
|
||||
return static_cast<value_t>(bits.type) == rhs;
|
||||
}
|
||||
|
||||
/// assignment
|
||||
type_data_t& operator=(value_t rhs)
|
||||
{
|
||||
bits.type = static_cast<uint16_t>(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// construct from value_t
|
||||
type_data_t(value_t t) noexcept
|
||||
{
|
||||
*reinterpret_cast<uint16_t*>(this) = 0;
|
||||
bits.type = static_cast<uint16_t>(t);
|
||||
}
|
||||
|
||||
/// default constructor
|
||||
type_data_t() noexcept
|
||||
{
|
||||
data = 0;
|
||||
bits.type = reinterpret_cast<uint16_t>(value_t::null);
|
||||
}
|
||||
};
|
||||
|
||||
/// helper for exception-safe object creation
|
||||
template<typename T, typename... Args>
|
||||
static T* create(Args&& ... args)
|
||||
|
@ -6046,23 +6114,78 @@ class basic_json
|
|||
|
||||
case value_t::number_float:
|
||||
{
|
||||
// If the number is an integer then output as a fixed with with
|
||||
// precision 1 to output "0.0", "1.0" etc as expected for some
|
||||
// round trip tests otherwise 15 digits of precision allows
|
||||
// round-trip IEEE 754 string->double->string; to be safe, we
|
||||
// read this value from
|
||||
// std::numeric_limits<number_float_t>::digits10
|
||||
if (std::fmod(m_value.number_float, 1) == 0)
|
||||
// buffer size: precision (2^8-1 = 255) + other ('-.e-xxx' = 7) + null (1)
|
||||
char buf[263];
|
||||
int len;
|
||||
|
||||
// check if number was parsed from a string
|
||||
if (m_type.bits.parsed)
|
||||
{
|
||||
o << std::fixed << std::setprecision(1);
|
||||
// check if parsed number had an exponent given
|
||||
if (m_type.bits.has_exp)
|
||||
{
|
||||
// handle capitalization of the exponent
|
||||
if (m_type.bits.exp_cap)
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "%.*E", m_type.bits.precision, m_value.number_float) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "%.*e", m_type.bits.precision, m_value.number_float) + 1;
|
||||
}
|
||||
|
||||
// remove '+' sign from the exponent if necessary
|
||||
if (not m_type.bits.exp_plus)
|
||||
{
|
||||
if (len > static_cast<int>(sizeof(buf)))
|
||||
{
|
||||
len = sizeof(buf);
|
||||
}
|
||||
for (int i = 0; i < len; i++)
|
||||
{
|
||||
if (buf[i] == '+')
|
||||
{
|
||||
for (; i + 1 < len; i++)
|
||||
{
|
||||
buf[i] = buf[i + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// no exponent - output as a decimal
|
||||
snprintf(buf, sizeof(buf), "%.*f",
|
||||
m_type.bits.precision, m_value.number_float);
|
||||
}
|
||||
}
|
||||
else if (m_value.number_float == 0)
|
||||
{
|
||||
// special case for zero to get "0.0"/"-0.0"
|
||||
if (std::signbit(m_value.number_float))
|
||||
{
|
||||
o << "-0.0";
|
||||
}
|
||||
else
|
||||
{
|
||||
o << "0.0";
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::defaultfloat not supported in gcc version < 5
|
||||
o.unsetf(std::ios_base::floatfield);
|
||||
o << std::setprecision(std::numeric_limits<double>::digits10);
|
||||
// Otherwise 6, 15 or 16 digits of precision allows
|
||||
// round-trip IEEE 754 string->float->string,
|
||||
// string->double->string or string->long double->string;
|
||||
// to be safe, we read this value from
|
||||
// std::numeric_limits<number_float_t>::digits10
|
||||
snprintf(buf, sizeof(buf), "%.*g",
|
||||
std::numeric_limits<double>::digits10,
|
||||
m_value.number_float);
|
||||
}
|
||||
o << m_value.number_float;
|
||||
|
||||
o << buf;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -6086,7 +6209,7 @@ class basic_json
|
|||
//////////////////////
|
||||
|
||||
/// the type of the current element
|
||||
value_t m_type = value_t::null;
|
||||
type_data_t m_type = value_t::null;
|
||||
|
||||
/// the value of the current element
|
||||
json_value m_value = {};
|
||||
|
@ -7558,124 +7681,145 @@ class basic_json
|
|||
return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief static_cast between two types and indicate if it results in error
|
||||
|
||||
This function performs a `static_cast` between @a source and @a dest.
|
||||
It then checks if a `static_cast` back to @a dest produces an error.
|
||||
|
||||
@param[in] source the value to cast from
|
||||
|
||||
@param[in, out] dest the value to cast to
|
||||
|
||||
@return true iff the cast was performed without error
|
||||
*/
|
||||
template <typename T_A, typename T_B>
|
||||
static bool attempt_cast(T_A source, T_B& dest)
|
||||
{
|
||||
dest = static_cast<T_B>(source);
|
||||
return (source == static_cast<T_A>(dest));
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief return number value for number tokens
|
||||
|
||||
This function translates the last token into the most appropriate
|
||||
number type (either integer, unsigned integer or floating point), which
|
||||
is passed back to the caller via the result parameter. The pointer @a
|
||||
m_start points to the beginning of the parsed number. We first examine
|
||||
the first character to determine the sign of the number and then pass
|
||||
this pointer to either @a std::strtoull (if positive) or @a
|
||||
std::strtoll (if negative), both of which set @a endptr to the first
|
||||
character past the converted number. If this pointer is not the same as
|
||||
@a m_cursor, then either more or less characters have been used during
|
||||
the comparison.
|
||||
number type (either integer, unsigned integer or floating point),
|
||||
which is passed back to the caller via the result parameter.
|
||||
|
||||
This can happen for inputs like "01" which will be treated like number
|
||||
0 followed by number 1. This will also occur for valid floating point
|
||||
inputs like "12e3" will be incorrectly read as 12. Numbers that are too
|
||||
large or too small for a signed/unsigned long long will cause a range
|
||||
error (@a errno set to ERANGE). The parsed number is cast to a @ref
|
||||
number_integer_t/@ref number_unsigned_t using the helper function @ref
|
||||
attempt_cast, which returns @a false if the cast could not be peformed
|
||||
without error.
|
||||
This function parses the integer component up to the radix point or
|
||||
exponent while collecting information about the 'floating point
|
||||
representation', which it stores in the result parameter. If there is
|
||||
no radix point or exponent, and the number can fit into a
|
||||
@ref number_integer_t or @ref number_unsigned_t then it sets the
|
||||
result parameter accordingly.
|
||||
|
||||
In any of these cases (more/less characters read, range error or a cast
|
||||
error) the pointer is passed to @a std:strtod, which also sets @a
|
||||
endptr to the first character past the converted number. The resulting
|
||||
@ref number_float_t is then cast to a @ref number_integer_t/@ref
|
||||
number_unsigned_t using @ref attempt_cast and if no error occurs is
|
||||
stored in that form, otherwise it is stored as a @ref number_float_t.
|
||||
The 'floating point representation' includes the number of significant
|
||||
figures after the radix point, whether the number is in exponential
|
||||
or decimal form, the capitalization of the exponent marker, and if the
|
||||
optional '+' is present in the exponent. This information is necessary
|
||||
to perform accurate round trips of floating point numbers.
|
||||
|
||||
A final comparison is made of @a endptr and if still not the same as
|
||||
@ref m_cursor a bad input is assumed and @a result parameter is set to
|
||||
NAN.
|
||||
If the number is a floating point number the number is then parsed
|
||||
using @a std:strtod (or @a std:strtof or @a std::strtold).
|
||||
|
||||
@param[out] result @ref basic_json object to receive the number, or NAN
|
||||
if the conversion read past the current token. The latter case needs to
|
||||
be treated by the caller function.
|
||||
@param[out] result @ref basic_json object to receive the number, or
|
||||
NAN if the conversion read past the current token. The latter case
|
||||
needs to be treated by the caller function.
|
||||
*/
|
||||
void get_number(basic_json& result) const
|
||||
{
|
||||
typename string_t::value_type* endptr;
|
||||
assert(m_start != nullptr);
|
||||
errno = 0;
|
||||
|
||||
// attempt to parse it as an integer - first checking for a
|
||||
// negative number
|
||||
if (*reinterpret_cast<typename string_t::const_pointer>(m_start) != '-')
|
||||
const lexer::lexer_char_t* curptr = m_start;
|
||||
|
||||
// remember this number was parsed (for later serialization)
|
||||
result.m_type.bits.parsed = true;
|
||||
|
||||
// 'found_radix_point' will be set to 0xFF upon finding a radix
|
||||
// point and later used to mask in/out the precision depending
|
||||
// whether a radix is found i.e. 'precision &= found_radix_point'
|
||||
uint8_t found_radix_point = 0;
|
||||
uint8_t precision = 0;
|
||||
|
||||
// accumulate the integer conversion result (unsigned for now)
|
||||
number_unsigned_t value = 0;
|
||||
|
||||
// maximum absolute value of the relevant integer type
|
||||
number_unsigned_t max;
|
||||
|
||||
// temporarily store the type to avoid unecessary bitfield access
|
||||
value_t type;
|
||||
|
||||
// look for sign
|
||||
if (*curptr == '-')
|
||||
{
|
||||
// positive, parse with strtoull and attempt cast to
|
||||
// number_unsigned_t
|
||||
if (attempt_cast(std::strtoull(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
|
||||
10), result.m_value.number_unsigned))
|
||||
{
|
||||
result.m_type = value_t::number_unsigned;
|
||||
}
|
||||
else
|
||||
{
|
||||
// cast failed due to overflow - store as float
|
||||
result.m_type = value_t::number_float;
|
||||
}
|
||||
type = value_t::number_integer;
|
||||
max = static_cast<uint64_t>(std::numeric_limits<number_integer_t>::max()) + 1;
|
||||
curptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Negative, parse with strtoll and attempt cast to
|
||||
// number_integer_t
|
||||
if (attempt_cast(std::strtoll(reinterpret_cast<typename string_t::const_pointer>(m_start), &endptr,
|
||||
10), result.m_value.number_integer))
|
||||
type = value_t::number_unsigned;
|
||||
max = static_cast<uint64_t>(std::numeric_limits<number_unsigned_t>::max());
|
||||
if (*curptr == '+')
|
||||
{
|
||||
result.m_type = value_t::number_integer;
|
||||
}
|
||||
else
|
||||
{
|
||||
// cast failed due to overflow - store as float
|
||||
result.m_type = value_t::number_float;
|
||||
curptr++;
|
||||
}
|
||||
}
|
||||
|
||||
// check the end of the number was reached and no range error
|
||||
// occurred
|
||||
if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor || errno == ERANGE)
|
||||
// count the significant figures
|
||||
for (; curptr < m_cursor; curptr++)
|
||||
{
|
||||
result.m_type = value_t::number_float;
|
||||
// quickly skip tests if a digit
|
||||
if (*curptr < '0' || *curptr > '9')
|
||||
{
|
||||
if (*curptr == '.')
|
||||
{
|
||||
// don't count '.' but change to float
|
||||
type = value_t::number_float;
|
||||
|
||||
// reset precision count
|
||||
precision = 0;
|
||||
found_radix_point = 0xFF;
|
||||
continue;
|
||||
}
|
||||
// assume exponent (if not then will fail parse): change to
|
||||
// float, stop counting and record exponent details
|
||||
type = value_t::number_float;
|
||||
result.m_type.bits.has_exp = true;
|
||||
|
||||
// exponent capitalization
|
||||
result.m_type.bits.exp_cap = (*curptr == 'E');
|
||||
|
||||
// exponent '+' sign
|
||||
result.m_type.bits.exp_plus = (*(++curptr) == '+');
|
||||
break;
|
||||
}
|
||||
|
||||
// skip if definitely not an integer
|
||||
if (type != value_t::number_float)
|
||||
{
|
||||
// multiply last value by ten and add the new digit
|
||||
auto temp = value * 10 + *curptr - 0x30;
|
||||
|
||||
// test for overflow
|
||||
if (temp < value || temp > max)
|
||||
{
|
||||
// overflow
|
||||
type = value_t::number_float;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no overflow - save it
|
||||
value = temp;
|
||||
}
|
||||
}
|
||||
++precision;
|
||||
}
|
||||
|
||||
if (result.m_type == value_t::number_float)
|
||||
{
|
||||
// either the number won't fit in an integer (range error from
|
||||
// strtoull/strtoll or overflow on cast) or there was something
|
||||
// else after the number, which could be an exponent
|
||||
// If no radix point was found then precision would now be set to
|
||||
// the number of digits, which is wrong - clear it.
|
||||
result.m_type.bits.precision = precision & found_radix_point;
|
||||
|
||||
// save the value (if not a float)
|
||||
if (type == value_t::number_unsigned)
|
||||
{
|
||||
result.m_value.number_unsigned = value;
|
||||
}
|
||||
else if (type == value_t::number_integer)
|
||||
{
|
||||
result.m_value.number_integer = -static_cast<number_integer_t>(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
// parse with strtod
|
||||
result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), &endptr);
|
||||
|
||||
// anything after the number is an error
|
||||
if (reinterpret_cast<lexer_char_t*>(endptr) != m_cursor)
|
||||
{
|
||||
throw std::invalid_argument(std::string("parse error - ") + get_token() + " is not a number");
|
||||
}
|
||||
result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), NULL);
|
||||
}
|
||||
|
||||
// save the type
|
||||
result.m_type = type;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
1
test/json_roundtrip/roundtrip28.json
Normal file
1
test/json_roundtrip/roundtrip28.json
Normal file
|
@ -0,0 +1 @@
|
|||
[4.940656458412e-324]
|
1
test/json_roundtrip/roundtrip29.json
Normal file
1
test/json_roundtrip/roundtrip29.json
Normal file
|
@ -0,0 +1 @@
|
|||
[2.2250738585072e-308]
|
1
test/json_roundtrip/roundtrip30.json
Normal file
1
test/json_roundtrip/roundtrip30.json
Normal file
|
@ -0,0 +1 @@
|
|||
[1.2345E-30]
|
1
test/json_roundtrip/roundtrip31.json
Normal file
1
test/json_roundtrip/roundtrip31.json
Normal file
|
@ -0,0 +1 @@
|
|||
[1.2345E+30]
|
1
test/json_roundtrip/roundtrip32.json
Normal file
1
test/json_roundtrip/roundtrip32.json
Normal file
|
@ -0,0 +1 @@
|
|||
[1.2345e+30]
|
|
@ -9776,7 +9776,8 @@ TEST_CASE("parser class")
|
|||
CHECK_THROWS_AS(json::parser("-0e-:").parse(), std::invalid_argument);
|
||||
CHECK_THROWS_AS(json::parser("-0f").parse(), std::invalid_argument);
|
||||
|
||||
CHECK_THROWS_WITH(json::parser("01").parse(), "parse error - 0 is not a number");
|
||||
CHECK_THROWS_WITH(json::parser("01").parse(),
|
||||
"parse error - unexpected number literal; expected end of input");
|
||||
CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
|
||||
CHECK_THROWS_WITH(json::parser("1.").parse(),
|
||||
"parse error - unexpected '.'; expected end of input");
|
||||
|
@ -11823,10 +11824,15 @@ TEST_CASE("compliance tests from nativejson-benchmark")
|
|||
"test/json_roundtrip/roundtrip21.json",
|
||||
"test/json_roundtrip/roundtrip22.json",
|
||||
"test/json_roundtrip/roundtrip23.json",
|
||||
//"test/json_roundtrip/roundtrip24.json",
|
||||
//"test/json_roundtrip/roundtrip25.json",
|
||||
//"test/json_roundtrip/roundtrip26.json",
|
||||
//"test/json_roundtrip/roundtrip27.json"
|
||||
"test/json_roundtrip/roundtrip24.json",
|
||||
"test/json_roundtrip/roundtrip25.json",
|
||||
"test/json_roundtrip/roundtrip26.json",
|
||||
"test/json_roundtrip/roundtrip27.json",
|
||||
"test/json_roundtrip/roundtrip28.json",
|
||||
"test/json_roundtrip/roundtrip29.json",
|
||||
"test/json_roundtrip/roundtrip30.json",
|
||||
"test/json_roundtrip/roundtrip31.json",
|
||||
"test/json_roundtrip/roundtrip32.json"
|
||||
})
|
||||
{
|
||||
CAPTURE(filename);
|
||||
|
|
Loading…
Reference in a new issue