From 7360e09830a10b26c182db0e0eccb964847b9361 Mon Sep 17 00:00:00 2001
From: Niels Lohmann <mail@nlohmann.me>
Date: Mon, 20 Jul 2020 13:12:20 +0200
Subject: [PATCH] :construction: support for UBJSON high-precision numbers
 #2286

---
 include/nlohmann/detail/exceptions.hpp        |  1 +
 .../nlohmann/detail/input/binary_reader.hpp   | 29 +++++++++++----
 include/nlohmann/json.hpp                     |  1 +
 single_include/nlohmann/json.hpp              | 31 ++++++++++++----
 test/src/unit-ubjson.cpp                      | 37 +++++++++++++++++--
 5 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp
index ed836188..9ead6855 100644
--- a/include/nlohmann/detail/exceptions.hpp
+++ b/include/nlohmann/detail/exceptions.hpp
@@ -97,6 +97,7 @@ json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vect
 json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read.
 json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read.
 json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet).
+json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed.
 
 @note For an input with n bytes, 1 is the index of the first character and n+1
       is the index of the terminating null byte or the end of file. This also
diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp
index 8e6493ee..693f078c 100644
--- a/include/nlohmann/detail/input/binary_reader.hpp
+++ b/include/nlohmann/detail/input/binary_reader.hpp
@@ -2003,30 +2003,45 @@ class binary_reader
 
             case 'H':
             {
+                // get size of following number string
                 std::size_t size{};
                 auto res = get_ubjson_size_value(size);
+                if (JSON_HEDLEY_UNLIKELY(!res))
+                {
+                    return res;
+                }
 
+                // get number string
                 std::string s;
-                for (int i = 0; i < size; ++i)
+                for (std::size_t i = 0; i < size; ++i)
                 {
                     get();
                     s.push_back(current);
                 }
 
+                // parse number string
                 auto ia = detail::input_adapter(std::forward<std::string>(s));
                 auto l = detail::lexer<BasicJsonType, decltype(ia)>(std::move(ia), false);
-                auto result = l.scan();
+                const auto result_number = l.scan();
+                const auto result_remainder = l.scan();
 
-                switch (result)
+                using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
+
+                if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
                 {
-                    case detail::lexer_base<BasicJsonType>::token_type::value_integer:
+                    return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number")));
+                }
+
+                switch (result_number)
+                {
+                    case token_type::value_integer:
                         return sax->number_integer(l.get_number_integer());
-                    case detail::lexer_base<BasicJsonType>::token_type::value_unsigned:
+                    case token_type::value_unsigned:
                         return sax->number_unsigned(l.get_number_unsigned());
-                    case detail::lexer_base<BasicJsonType>::token_type::value_float:
+                    case token_type::value_float:
                         return sax->number_float(l.get_number_float(), std::move(s));
                     default:
-                        return sax->parse_error(chars_read, s, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "invalid number", "number")));
+                        return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number")));
                 }
             }
 
diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp
index 986cf71f..dd8842a6 100644
--- a/include/nlohmann/json.hpp
+++ b/include/nlohmann/json.hpp
@@ -7686,6 +7686,7 @@ class basic_json
     int16       | number_integer                          | `I`
     int32       | number_integer                          | `l`
     int64       | number_integer                          | `L`
+    high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H'
     string      | string                                  | `S`
     char        | string                                  | `C`
     array       | array (optimized values are supported)  | `[`
diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp
index c9fa6f97..4ecaac4e 100644
--- a/single_include/nlohmann/json.hpp
+++ b/single_include/nlohmann/json.hpp
@@ -2280,6 +2280,7 @@ json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vect
 json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read.
 json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read.
 json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet).
+json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed.
 
 @note For an input with n bytes, 1 is the index of the first character and n+1
       is the index of the terminating null byte or the end of file. This also
@@ -9502,30 +9503,45 @@ class binary_reader
 
             case 'H':
             {
+                // get size of following number string
                 std::size_t size{};
                 auto res = get_ubjson_size_value(size);
+                if (JSON_HEDLEY_UNLIKELY(!res))
+                {
+                    return res;
+                }
 
+                // get number string
                 std::string s;
-                for (int i = 0; i < size; ++i)
+                for (std::size_t i = 0; i < size; ++i)
                 {
                     get();
                     s.push_back(current);
                 }
 
+                // parse number string
                 auto ia = detail::input_adapter(std::forward<std::string>(s));
                 auto l = detail::lexer<BasicJsonType, decltype(ia)>(std::move(ia), false);
-                auto result = l.scan();
+                const auto result_number = l.scan();
+                const auto result_remainder = l.scan();
 
-                switch (result)
+                using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
+
+                if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
                 {
-                    case detail::lexer_base<BasicJsonType>::token_type::value_integer:
+                    return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number")));
+                }
+
+                switch (result_number)
+                {
+                    case token_type::value_integer:
                         return sax->number_integer(l.get_number_integer());
-                    case detail::lexer_base<BasicJsonType>::token_type::value_unsigned:
+                    case token_type::value_unsigned:
                         return sax->number_unsigned(l.get_number_unsigned());
-                    case detail::lexer_base<BasicJsonType>::token_type::value_float:
+                    case token_type::value_float:
                         return sax->number_float(l.get_number_float(), std::move(s));
                     default:
-                        return sax->parse_error(chars_read, s, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "invalid number", "number")));
+                        return sax->parse_error(chars_read, s, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + s, "high-precision number")));
                 }
             }
 
@@ -23881,6 +23897,7 @@ class basic_json
     int16       | number_integer                          | `I`
     int32       | number_integer                          | `l`
     int64       | number_integer                          | `L`
+    high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H'
     string      | string                                  | `S`
     char        | string                                  | `C`
     array       | array (optimized values are supported)  | `[`
diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp
index 61b39139..ea15ba1c 100644
--- a/test/src/unit-ubjson.cpp
+++ b/test/src/unit-ubjson.cpp
@@ -770,10 +770,39 @@ TEST_CASE("UBJSON")
 
             SECTION("high-precision number")
             {
-                std::vector<uint8_t> vec = {'H', 'i', 0x16, '3', '.', '1', '4', '1', '5', '9', '2', '6', '5', '3', '5', '8', '9', '7', '9', '3', '2', '3', '8', '4', '6'};
-                const auto j = json::from_ubjson(vec);
-                CHECK(j.is_number_float());
-                CHECK(j.dump() == "3.141592653589793");
+                SECTION("unsigned integer number")
+                {
+                    std::vector<uint8_t> vec = {'H', 'i', 0x14, '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'};
+                    const auto j = json::from_ubjson(vec);
+                    CHECK(j.is_number_unsigned());
+                    CHECK(j.dump() == "12345678901234567890");
+                }
+
+                SECTION("signed integer number")
+                {
+                    std::vector<uint8_t> vec = {'H', 'i', 0x13, '-', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8'};
+                    const auto j = json::from_ubjson(vec);
+                    CHECK(j.is_number_integer());
+                    CHECK(j.dump() == "-123456789012345678");
+                }
+
+                SECTION("floating-point number")
+                {
+                    std::vector<uint8_t> vec = {'H', 'i', 0x16, '3', '.', '1', '4', '1', '5', '9',  '2', '6', '5', '3', '5', '8', '9',  '7', '9', '3', '2', '3', '8', '4',  '6'};
+                    const auto j = json::from_ubjson(vec);
+                    CHECK(j.is_number_float());
+                    CHECK(j.dump() == "3.141592653589793");
+                }
+
+                SECTION("errors")
+                {
+                    std::vector<uint8_t> vec1 = {'H', 'i', 2, '1', 'A', '3'};
+                    CHECK_THROWS_WITH_AS(json::from_ubjson(vec1), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A", json::parse_error);
+                    std::vector<uint8_t> vec2 = {'H', 'i', 2, '1', '.'};
+                    CHECK_THROWS_WITH_AS(json::from_ubjson(vec2), "[json.exception.parse_error.115] parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1.", json::parse_error);
+                    std::vector<uint8_t> vec3 = {'H', 2, '1', '0'};
+                    CHECK_THROWS_WITH_AS(json::from_ubjson(vec3), "[json.exception.parse_error.113] parse error at byte 2: syntax error while parsing UBJSON size: expected length type specification (U, i, I, l, L) after '#'; last byte: 0x02", json::parse_error);
+                }
             }
         }