Merge pull request #2297 from nlohmann/issue2286

Add support for high-precision numbers in UBJSON encoding
This commit is contained in:
Niels Lohmann 2020-07-25 21:50:53 +02:00 committed by GitHub
commit 63e7c40e35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1891 additions and 1667 deletions

View file

@ -28,6 +28,7 @@ number_unsigned | 128..255 | uint8 | `U`
number_unsigned | 256..32767 | int16 | `I`
number_unsigned | 32768..2147483647 | int32 | `l`
number_unsigned | 2147483648..9223372036854775807 | int64 | `L`
number_unsigned | 2147483649..18446744073709551615 | high-precision | `H`
number_float | *any value* | float64 | `D`
string | *with shortest length indicator* | string | `S`
array | *see notes on optimized format* | array | `[`
@ -44,7 +45,6 @@ object | *see notes on optimized format* | map | `{`
The following values can **not** be converted to a UBJSON value:
- strings with more than 9223372036854775807 bytes (theoretical)
- unsigned integer numbers above 9223372036854775807
!!! info "Unused UBJSON markers"

View file

@ -279,6 +279,16 @@ The parsing of the corresponding BSON record type is not implemented (yet).
[json.exception.parse_error.114] parse error at byte 5: Unsupported BSON record type 0xFF
```
### json.exception.parse_error.115
A UBJSON high-precision number could not be parsed.
!!! failure "Example message"
```
[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A
```
## Iterator errors
This exception is thrown if iterators passed to a library function do not match
@ -765,6 +775,10 @@ UBJSON and BSON only support integer numbers up to 9223372036854775807.
number overflow serializing '9223372036854775808'
```
!!! note
Since version 3.9.0, integer numbers beyond int64 are serialized as high-precision UBJSON numbers, and this exception does not further occur.
### json.exception.out_of_range.408
The size (following `#`) of an UBJSON array or object exceeds the maximal capacity.

View file

@ -97,6 +97,7 @@ json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vect
json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read.
json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read.
json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet).
json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed.
@note For an input with n bytes, 1 is the index of the first character and n+1
is the index of the terminating null byte or the end of file. This also
@ -285,7 +286,7 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not
json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved.
json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value.
json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF.
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. |
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. (until version 3.8.0) |
json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. |
json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string |

View file

@ -15,6 +15,7 @@
#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/input/input_adapters.hpp>
#include <nlohmann/detail/input/json_sax.hpp>
#include <nlohmann/detail/input/lexer.hpp>
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/meta/is_sax.hpp>
#include <nlohmann/detail/value_t.hpp>
@ -2004,6 +2005,11 @@ class binary_reader
return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
}
case 'H':
{
return get_ubjson_high_precision_number();
}
case 'C': // char
{
get();
@ -2180,6 +2186,55 @@ class binary_reader
// Note, no reader for UBJSON binary types is implemented because they do
// not exist
bool get_ubjson_high_precision_number()
{
// get size of following number string
std::size_t size{};
auto res = get_ubjson_size_value(size);
if (JSON_HEDLEY_UNLIKELY(!res))
{
return res;
}
// get number string
std::vector<char> number_vector;
for (std::size_t i = 0; i < size; ++i)
{
get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
{
return false;
}
number_vector.push_back(static_cast<char>(current));
}
// parse number string
auto number_ia = detail::input_adapter(std::forward<decltype(number_vector)>(number_vector));
auto number_lexer = detail::lexer<BasicJsonType, decltype(number_ia)>(std::move(number_ia), false);
const auto result_number = number_lexer.scan();
const auto number_string = number_lexer.get_token_string();
const auto result_remainder = number_lexer.scan();
using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
{
return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
}
switch (result_number)
{
case token_type::value_integer:
return sax->number_integer(number_lexer.get_number_integer());
case token_type::value_unsigned:
return sax->number_unsigned(number_lexer.get_number_unsigned());
case token_type::value_float:
return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
default:
return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
}
}
///////////////////////
// Utility functions //
///////////////////////

View file

@ -1319,7 +1319,17 @@ class binary_writer
}
else
{
JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64"));
if (add_prefix)
{
oa->write_character(to_char_type('H')); // high-precision number
}
const auto number = BasicJsonType(n).dump();
write_number_with_ubjson_prefix(number.size(), true);
for (std::size_t i = 0; i < number.size(); ++i)
{
oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
}
}
}
@ -1373,19 +1383,23 @@ class binary_writer
// LCOV_EXCL_START
else
{
JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64"));
if (add_prefix)
{
oa->write_character(to_char_type('H')); // high-precision number
}
const auto number = BasicJsonType(n).dump();
write_number_with_ubjson_prefix(number.size(), true);
for (std::size_t i = 0; i < number.size(); ++i)
{
oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
}
}
// LCOV_EXCL_STOP
}
/*!
@brief determine the type prefix of container values
@note This function does not need to be 100% accurate when it comes to
integer limits. In case a number exceeds the limits of int64_t,
this will be detected by a later call to function
write_number_with_ubjson_prefix. Therefore, we return 'L' for any
value that does not fit the previous limits.
*/
CharType ubjson_prefix(const BasicJsonType& j) const noexcept
{
@ -1415,8 +1429,12 @@ class binary_writer
{
return 'l';
}
// no check and assume int64_t (see note above)
return 'L';
if ((std::numeric_limits<std::int64_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int64_t>::max)())
{
return 'L';
}
// anything else is treated as high-precision number
return 'H'; // LCOV_EXCL_LINE
}
case value_t::number_unsigned:
@ -1437,8 +1455,12 @@ class binary_writer
{
return 'l';
}
// no check and assume int64_t (see note above)
return 'L';
if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
{
return 'L';
}
// anything else is treated as high-precision number
return 'H'; // LCOV_EXCL_LINE
}
case value_t::number_float:

View file

@ -7201,6 +7201,7 @@ class basic_json
number_unsigned | 256..32767 | int16 | `I`
number_unsigned | 32768..2147483647 | int32 | `l`
number_unsigned | 2147483648..9223372036854775807 | int64 | `L`
number_unsigned | 2147483649..18446744073709551615 | high-precision | `H`
number_float | *any value* | float64 | `D`
string | *with shortest length indicator* | string | `S`
array | *see notes on optimized format* | array | `[`
@ -7211,7 +7212,6 @@ class basic_json
@note The following values can **not** be converted to a UBJSON value:
- strings with more than 9223372036854775807 bytes (theoretical)
- unsigned integer numbers above 9223372036854775807
@note The following markers are not used in the conversion:
- `Z`: no-op values are not created.
@ -7687,6 +7687,7 @@ class basic_json
int16 | number_integer | `I`
int32 | number_integer | `l`
int64 | number_integer | `L`
high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H'
string | string | `S`
char | string | `C`
array | array (optimized values are supported) | `[`

File diff suppressed because it is too large Load diff

View file

@ -32,6 +32,7 @@ SOFTWARE.
#include <nlohmann/json.hpp>
using nlohmann::json;
#include <iostream>
#include <fstream>
#include <set>
#include <test_data.hpp>
@ -768,6 +769,65 @@ TEST_CASE("UBJSON")
CHECK(json::from_ubjson(result, true, false) == j);
}
}
SECTION("high-precision number")
{
SECTION("unsigned integer number")
{
std::vector<uint8_t> vec = {'H', 'i', 0x14, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'};
const auto j = json::from_ubjson(vec);
CHECK(j.is_number_unsigned());
CHECK(j.dump() == "12345678901234567890");
}
SECTION("signed integer number")
{
std::vector<uint8_t> vec = {'H', 'i', 0x13, '-', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'};
const auto j = json::from_ubjson(vec);
CHECK(j.is_number_integer());
CHECK(j.dump() == "-123456789012345678");
}
SECTION("floating-point number")
{
std::vector<uint8_t> vec = {'H', 'i', 0x16, '3', '.', '1', '4', '1', '5', '9', '2', '6', '5', '3', '5', '8', '9', '7', '9', '3', '2', '3', '8', '4', '6'};
const auto j = json::from_ubjson(vec);
CHECK(j.is_number_float());
CHECK(j.dump() == "3.141592653589793");
}
SECTION("errors")
{
// error while parsing length
std::vector<uint8_t> vec0 = {'H', 'i'};
CHECK(json::from_ubjson(vec0, true, false).is_discarded());
// error while parsing string
std::vector<uint8_t> vec1 = {'H', 'i', '1'};
CHECK(json::from_ubjson(vec1, true, false).is_discarded());
std::vector<uint8_t> vec2 = {'H', 'i', 2, '1', 'A', '3'};
CHECK_THROWS_WITH_AS(json::from_ubjson(vec2), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A", json::parse_error);
std::vector<uint8_t> vec3 = {'H', 'i', 2, '1', '.'};
CHECK_THROWS_WITH_AS(json::from_ubjson(vec3), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1.", json::parse_error);
std::vector<uint8_t> vec4 = {'H', 2, '1', '0'};
CHECK_THROWS_WITH_AS(json::from_ubjson(vec4), "[json.exception.parse_error.113] parse error at byte 2: syntax error while parsing UBJSON size: expected length type specification (U, i, I, l, L) after '#'; last byte: 0x02", json::parse_error);
}
SECTION("serialization")
{
// number that does not fit int64
json j = 11111111111111111111ULL;
CHECK(j.is_number_unsigned());
// number will be serialized to high-precision number
const auto vec = json::to_ubjson(j);
std::vector<uint8_t> expected = {'H', 'i', 0x14, '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'};
CHECK(vec == expected);
// roundtrip
CHECK(json::from_ubjson(vec) == j);
}
}
}
SECTION("string")
@ -1540,15 +1600,6 @@ TEST_CASE("UBJSON")
}
}
SECTION("number out of range")
{
// larger than max int64
json j = 9223372036854775808llu;
json _;
CHECK_THROWS_AS(_ = json::to_ubjson(j), json::out_of_range&);
CHECK_THROWS_WITH(_ = json::to_ubjson(j), "[json.exception.out_of_range.407] integer number 9223372036854775808 cannot be represented by UBJSON as it does not fit int64");
}
SECTION("excessive size")
{
SECTION("array")
@ -2370,7 +2421,7 @@ TEST_CASE("all UBJSON first bytes")
// these bytes will fail immediately with exception parse_error.112
std::set<uint8_t> supported =
{
'T', 'F', 'Z', 'U', 'i', 'I', 'l', 'L', 'd', 'D', 'C', 'S', '[', '{', 'N'
'T', 'F', 'Z', 'U', 'i', 'I', 'l', 'L', 'd', 'D', 'C', 'S', '[', '{', 'N', 'H'
};
for (auto i = 0; i < 256; ++i)