From 7360e09830a10b26c182db0e0eccb964847b9361 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 20 Jul 2020 13:12:20 +0200 Subject: [PATCH] :construction: support for UBJSON high-precision numbers #2286 --- include/nlohmann/detail/exceptions.hpp | 1 + .../nlohmann/detail/input/binary_reader.hpp | 29 +++++++++++---- include/nlohmann/json.hpp | 1 + single_include/nlohmann/json.hpp | 31 ++++++++++++---- test/src/unit-ubjson.cpp | 37 +++++++++++++++++-- 5 files changed, 81 insertions(+), 18 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index ed836188..9ead6855 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -97,6 +97,7 @@ json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vect json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). +json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed. @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 8e6493ee..693f078c 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -2003,30 +2003,45 @@ class binary_reader case 'H': { + // get size of following number string std::size_t size{}; auto res = get_ubjson_size_value(size); + if (JSON_HEDLEY_UNLIKELY(!res)) + { + return res; + } + // get number string std::string s; - for (int i = 0; i < size; ++i) + for (std::size_t i = 0; i < size; ++i) { get(); s.push_back(current); } + // parse number string auto ia = detail::input_adapter(std::forward(s)); auto l = detail::lexer(std::move(ia), false); - auto result = l.scan(); + const auto result_number = l.scan(); + const auto result_remainder = l.scan(); - switch (result) + using token_type = typename detail::lexer_base::token_type; + + if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input)) { - case detail::lexer_base::token_type::value_integer: + return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number"))); + } + + switch (result_number) + { + case token_type::value_integer: return sax->number_integer(l.get_number_integer()); - case detail::lexer_base::token_type::value_unsigned: + case token_type::value_unsigned: return sax->number_unsigned(l.get_number_unsigned()); - case detail::lexer_base::token_type::value_float: + case token_type::value_float: return sax->number_float(l.get_number_float(), std::move(s)); default: - return sax->parse_error(chars_read, s, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "invalid number", "number"))); + return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number"))); } } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 986cf71f..dd8842a6 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -7686,6 +7686,7 @@ class basic_json int16 | number_integer | `I` int32 | number_integer | `l` int64 | number_integer | `L` + high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H' string | string | `S` char | string | `C` array | array (optimized values are supported) | `[` diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c9fa6f97..4ecaac4e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2280,6 +2280,7 @@ json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vect json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). +json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed. @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -9502,30 +9503,45 @@ class binary_reader case 'H': { + // get size of following number string std::size_t size{}; auto res = get_ubjson_size_value(size); + if (JSON_HEDLEY_UNLIKELY(!res)) + { + return res; + } + // get number string std::string s; - for (int i = 0; i < size; ++i) + for (std::size_t i = 0; i < size; ++i) { get(); s.push_back(current); } + // parse number string auto ia = detail::input_adapter(std::forward(s)); auto l = detail::lexer(std::move(ia), false); - auto result = l.scan(); + const auto result_number = l.scan(); + const auto result_remainder = l.scan(); - switch (result) + using token_type = typename detail::lexer_base::token_type; + + if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input)) { - case detail::lexer_base::token_type::value_integer: + return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number"))); + } + + switch (result_number) + { + case token_type::value_integer: return sax->number_integer(l.get_number_integer()); - case detail::lexer_base::token_type::value_unsigned: + case token_type::value_unsigned: return sax->number_unsigned(l.get_number_unsigned()); - case detail::lexer_base::token_type::value_float: + case token_type::value_float: return sax->number_float(l.get_number_float(), std::move(s)); default: - return sax->parse_error(chars_read, s, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "invalid number", "number"))); + return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number"))); } } @@ -23881,6 +23897,7 @@ class basic_json int16 | number_integer | `I` int32 | number_integer | `l` int64 | number_integer | `L` + high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H' string | string | `S` char | string | `C` array | array (optimized values are supported) | `[` diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index 61b39139..ea15ba1c 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -770,10 +770,39 @@ TEST_CASE("UBJSON") SECTION("high-precision number") { - std::vector vec = {'H', 'i', 0x16, '3', '.', '1', '4', '1', '5', '9', '2', '6', '5', '3', '5', '8', '9', '7', '9', '3', '2', '3', '8', '4', '6'}; - const auto j = json::from_ubjson(vec); - CHECK(j.is_number_float()); - CHECK(j.dump() == "3.141592653589793"); + SECTION("unsigned integer number") + { + std::vector vec = {'H', 'i', 0x14, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'}; + const auto j = json::from_ubjson(vec); + CHECK(j.is_number_unsigned()); + CHECK(j.dump() == "12345678901234567890"); + } + + SECTION("signed integer number") + { + std::vector vec = {'H', 'i', 0x13, '-', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'}; + const auto j = json::from_ubjson(vec); + CHECK(j.is_number_integer()); + CHECK(j.dump() == "-123456789012345678"); + } + + SECTION("floating-point number") + { + std::vector vec = {'H', 'i', 0x16, '3', '.', '1', '4', '1', '5', '9', '2', '6', '5', '3', '5', '8', '9', '7', '9', '3', '2', '3', '8', '4', '6'}; + const auto j = json::from_ubjson(vec); + CHECK(j.is_number_float()); + CHECK(j.dump() == "3.141592653589793"); + } + + SECTION("errors") + { + std::vector vec1 = {'H', 'i', 2, '1', 'A', '3'}; + CHECK_THROWS_WITH_AS(json::from_ubjson(vec1), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A", json::parse_error); + std::vector vec2 = {'H', 'i', 2, '1', '.'}; + CHECK_THROWS_WITH_AS(json::from_ubjson(vec2), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1.", json::parse_error); + std::vector vec3 = {'H', 2, '1', '0'}; + CHECK_THROWS_WITH_AS(json::from_ubjson(vec3), "[json.exception.parse_error.113] parse error at byte 2: syntax error while parsing UBJSON size: expected length type specification (U, i, I, l, L) after '#'; last byte: 0x02", json::parse_error); + } } }