From 7eabb6ba36887ec9de0beda949eaca670a2b703c Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 22 Jan 2018 23:23:17 +0100 Subject: [PATCH] :memo: updated documentation for UBJSON functions --- develop/detail/parsing/binary_writer.hpp | 2 +- develop/json.hpp | 146 +++++++++++++++++++++- doc/examples/from_ubjson.cpp | 19 +++ doc/examples/from_ubjson.link | 1 + doc/examples/from_ubjson.output | 4 + doc/examples/to_ubjson.cpp | 62 ++++++++++ doc/examples/to_ubjson.link | 1 + doc/examples/to_ubjson.output | 4 + src/json.hpp | 148 ++++++++++++++++++++++- test/src/unit-to_chars.cpp | 8 +- 10 files changed, 385 insertions(+), 10 deletions(-) create mode 100644 doc/examples/from_ubjson.cpp create mode 100644 doc/examples/from_ubjson.link create mode 100644 doc/examples/from_ubjson.output create mode 100644 doc/examples/to_ubjson.cpp create mode 100644 doc/examples/to_ubjson.link create mode 100644 doc/examples/to_ubjson.output diff --git a/develop/detail/parsing/binary_writer.hpp b/develop/detail/parsing/binary_writer.hpp index 7bf894f6..87aaffa8 100644 --- a/develop/detail/parsing/binary_writer.hpp +++ b/develop/detail/parsing/binary_writer.hpp @@ -721,7 +721,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('i')); // uint8 + oa->write_character(static_cast('i')); // int8 } write_number(static_cast(n)); } diff --git a/develop/json.hpp b/develop/json.hpp index 9e6f239f..abce61aa 100644 --- a/develop/json.hpp +++ b/develop/json.hpp @@ -6515,9 +6515,11 @@ class basic_json vector in CBOR format.,to_cbor} @sa http://cbor.io - @sa @ref from_cbor(const std::vector&, const size_t) for the + @sa @ref from_cbor(detail::input_adapter, const bool strict) for the analogous deserialization @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format @since version 2.0.9 */ @@ -6613,6 +6615,8 @@ class basic_json @sa @ref from_msgpack(const std::vector&, const size_t) for the analogous deserialization @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format @since version 2.0.9 */ @@ -6633,8 +6637,89 @@ class basic_json binary_writer(o).write_msgpack(j); } + /*! + @brief create a UBJSON serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the UBJSON + (Universal Binary JSON) serialization format. UBJSON aims to be more compact + than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + UBJSON types according to the UBJSON specification: + + JSON value type | value/range | UBJSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | `Z` + boolean | `true` | true | `T` + boolean | `false` | false | `F` + number_integer | -9223372036854775808..-2147483649 | int64 | `L` + number_integer | -2147483648..-32769 | int32 | `l` + number_integer | -32768..-129 | int16 | `I` + number_integer | -128..127 | int8 | `i` + number_integer | 128..255 | uint8 | `U` + number_integer | 256..32767 | int16 | `I` + number_integer | 32768..2147483647 | int32 | `l` + number_integer | 2147483648..9223372036854775807 | int64 | `L` + number_unsigned | 0..127 | int8 | `i` + number_unsigned | 128..255 | uint8 | `U` + number_unsigned | 256..32767 | int16 | `I` + number_unsigned | 32768..2147483647 | int32 | `l` + number_unsigned | 2147483648..9223372036854775807 | int64 | `L` + number_float | *any value* | float64 | `D` + string | *with shortest length indicator* | string | `S` + array | *see notes on optimized format* | array | `[` + object | *see notes on optimized format* | map | `{` + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a UBJSON value. + + @note The following values can **not** be converted to a UBJSON value: + - strings with more than 9223372036854775807 bytes (theoretical) + - unsigned integer numbers above 9223372036854775807 + + @note The following markers are not used in the conversion: + - `Z`: no-op values are not created. + - `C`: single-byte strings are serialized with `S` markers. + + @note Any UBJSON output created @ref to_ubjson can be successfully parsed + by @ref from_ubjson. + + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + + @note The optimized formats for containers are supported: Parameter + @a use_size adds size information to the beginning of a container and + removes the closing marker. Parameter @a use_type further checks + whether all elements of a container have the same type and adds the + type marker to the beginning of the container. The @a use_type + parameter must only be used together with @a use_size = true. Note + that @a use_size = true alone may result in larger representations - + the benefit of this parameter is that the receiving side is + immediately informed on the number of elements of the container. + + @param[in] j JSON value to serialize + @param[in] use_size whether to add size annotations to container types + @param[in] use_type whether to add type annotations to container types + (must be combined with @a use_size = true) + @return UBJSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in UBJSON format.,to_ubjson} + + @sa http://ubjson.org + @sa @ref from_ubjson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + + @since version 3.1.0 + */ static std::vector to_ubjson(const basic_json& j, - const bool use_size = false, const bool use_type = false) + const bool use_size = false, + const bool use_type = false) { std::vector result; to_ubjson(j, result, use_size, use_type); @@ -6739,6 +6824,8 @@ class basic_json @sa @ref to_cbor(const basic_json&) for the analogous serialization @sa @ref from_msgpack(detail::input_adapter, const bool) for the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool) for the related + UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -6826,6 +6913,8 @@ class basic_json @sa @ref to_msgpack(const basic_json&) for the analogous serialization @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR format + @sa @ref from_ubjson(detail::input_adapter, const bool) for the related + UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -6847,6 +6936,59 @@ class basic_json return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); } + /*! + @brief create a JSON value from an input in UBJSON format + + Deserializes a given input @a i to a JSON value using the UBJSON (Universal + Binary JSON) serialization format. + + The library maps UBJSON types to JSON value types as follows: + + UBJSON type | JSON value type | marker + ----------- | --------------------------------------- | ------ + no-op | *no value, next value is read* | `N` + null | `null` | `Z` + false | `false` | `F` + true | `true` | `T` + float32 | number_float | `d` + float64 | number_float | `D` + uint8 | number_unsigned | `U` + int8 | number_integer | `i` + int16 | number_integer | `I` + int32 | number_integer | `l` + int64 | number_integer | `L` + string | string | `S` + char | string | `C` + array | array (optimized values are supported) | `[` + object | object (optimized values are supported) | `{` + + @note The mapping is **complete** in the sense that any UBJSON value can + be converted to a JSON value. + + @param[in] i an input in UBJSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true + @throw parse_error.112 if a parse error occurs + @throw parse_error.113 if a string could not be parsed successfully + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + UBJSON format to a JSON value.,from_ubjson} + + @sa http://ubjson.org + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR + format + @sa @ref from_msgpack(detail::input_adapter, const bool) for the related + MessagePack format + + @since version 3.1.0 + */ static basic_json from_ubjson(detail::input_adapter i, const bool strict = true) { diff --git a/doc/examples/from_ubjson.cpp b/doc/examples/from_ubjson.cpp new file mode 100644 index 00000000..2adca3c6 --- /dev/null +++ b/doc/examples/from_ubjson.cpp @@ -0,0 +1,19 @@ +#include +#include "json.hpp" + +using json = nlohmann::json; + +int main() +{ + // create byte vector + std::vector v = {0x7B, 0x69, 0x07, 0x63, 0x6F, 0x6D, 0x70, 0x61, + 0x63, 0x74, 0x54, 0x69, 0x06, 0x73, 0x63, 0x68, + 0x65, 0x6D, 0x61, 0x69, 0x00, 0x7D + }; + + // deserialize it with UBJSON + json j = json::from_ubjson(v); + + // print the deserialized JSON value + std::cout << std::setw(2) << j << std::endl; +} diff --git a/doc/examples/from_ubjson.link b/doc/examples/from_ubjson.link new file mode 100644 index 00000000..5ff36826 --- /dev/null +++ b/doc/examples/from_ubjson.link @@ -0,0 +1 @@ +online \ No newline at end of file diff --git a/doc/examples/from_ubjson.output b/doc/examples/from_ubjson.output new file mode 100644 index 00000000..259f63bd --- /dev/null +++ b/doc/examples/from_ubjson.output @@ -0,0 +1,4 @@ +{ + "compact": true, + "schema": 0 +} diff --git a/doc/examples/to_ubjson.cpp b/doc/examples/to_ubjson.cpp new file mode 100644 index 00000000..a39bc081 --- /dev/null +++ b/doc/examples/to_ubjson.cpp @@ -0,0 +1,62 @@ +#include +#include "json.hpp" + +using json = nlohmann::json; + +// function to print UBJSON's diagnostic format +void print_byte(uint8_t byte) +{ + if (32 < byte and byte < 128) + { + std::cout << (char)byte; + } + else + { + std::cout << (int)byte; + } +} + +int main() +{ + // create a JSON value + json j = R"({"compact": true, "schema": false})"_json; + + // serialize it to UBJSON + std::vector v = json::to_ubjson(j); + + // print the vector content + for (auto& byte : v) + { + print_byte(byte); + } + std::cout << std::endl; + + // create an array of numbers + json array = {1, 2, 3, 4, 5, 6, 7, 8}; + + // serialize it to UBJSON using default representation + std::vector v_array = json::to_ubjson(array); + // serialize it to UBJSON using size optimization + std::vector v_array_size = json::to_ubjson(array, true); + // serialize it to UBJSON using type optimization + std::vector v_array_size_and_type = json::to_ubjson(array, true, true); + + // print the vector contents + for (auto& byte : v_array) + { + print_byte(byte); + } + std::cout << std::endl; + + for (auto& byte : v_array_size) + { + print_byte(byte); + } + std::cout << std::endl; + + for (auto& byte : v_array_size_and_type) + { + print_byte(byte); + } + std::cout << std::endl; +} diff --git a/doc/examples/to_ubjson.link b/doc/examples/to_ubjson.link new file mode 100644 index 00000000..919b8b9a --- /dev/null +++ b/doc/examples/to_ubjson.link @@ -0,0 +1 @@ +online \ No newline at end of file diff --git a/doc/examples/to_ubjson.output b/doc/examples/to_ubjson.output new file mode 100644 index 00000000..087980cb --- /dev/null +++ b/doc/examples/to_ubjson.output @@ -0,0 +1,4 @@ +{i7compactTi6schemaF} +[i1i2i3i4i5i6i7i8] +[#i8i1i2i3i4i5i6i7i8 +[$i#i812345678 diff --git a/src/json.hpp b/src/json.hpp index 03101ca8..b1f42acd 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6860,7 +6860,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('i')); // uint8 + oa->write_character(static_cast('i')); // int8 } write_number(static_cast(n)); } @@ -15343,9 +15343,11 @@ class basic_json vector in CBOR format.,to_cbor} @sa http://cbor.io - @sa @ref from_cbor(const std::vector&, const size_t) for the + @sa @ref from_cbor(detail::input_adapter, const bool strict) for the analogous deserialization @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format @since version 2.0.9 */ @@ -15441,6 +15443,8 @@ class basic_json @sa @ref from_msgpack(const std::vector&, const size_t) for the analogous deserialization @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format @since version 2.0.9 */ @@ -15461,8 +15465,89 @@ class basic_json binary_writer(o).write_msgpack(j); } + /*! + @brief create a UBJSON serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the UBJSON + (Universal Binary JSON) serialization format. UBJSON aims to be more compact + than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + UBJSON types according to the UBJSON specification: + + JSON value type | value/range | UBJSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | `Z` + boolean | `true` | true | `T` + boolean | `false` | false | `F` + number_integer | -9223372036854775808..-2147483649 | int64 | `L` + number_integer | -2147483648..-32769 | int32 | `l` + number_integer | -32768..-129 | int16 | `I` + number_integer | -128..127 | int8 | `i` + number_integer | 128..255 | uint8 | `U` + number_integer | 256..32767 | int16 | `I` + number_integer | 32768..2147483647 | int32 | `l` + number_integer | 2147483648..9223372036854775807 | int64 | `L` + number_unsigned | 0..127 | int8 | `i` + number_unsigned | 128..255 | uint8 | `U` + number_unsigned | 256..32767 | int16 | `I` + number_unsigned | 32768..2147483647 | int32 | `l` + number_unsigned | 2147483648..9223372036854775807 | int64 | `L` + number_float | *any value* | float64 | `D` + string | *with shortest length indicator* | string | `S` + array | *see notes on optimized format* | array | `[` + object | *see notes on optimized format* | map | `{` + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a UBJSON value. + + @note The following values can **not** be converted to a UBJSON value: + - strings with more than 9223372036854775807 bytes (theoretical) + - unsigned integer numbers above 9223372036854775807 + + @note The following markers are not used in the conversion: + - `Z`: no-op values are not created. + - `C`: single-byte strings are serialized with `S` markers. + + @note Any UBJSON output created @ref to_ubjson can be successfully parsed + by @ref from_ubjson. + + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + + @note The optimized formats for containers are supported: Parameter + @a use_size adds size information to the beginning of a container and + removes the closing marker. Parameter @a use_type further checks + whether all elements of a container have the same type and adds the + type marker to the beginning of the container. The @a use_type + parameter must only be used together with @a use_size = true. Note + that @a use_size = true alone may result in larger representations - + the benefit of this parameter is that the receiving side is + immediately informed on the number of elements of the container. + + @param[in] j JSON value to serialize + @param[in] use_size whether to add size annotations to container types + @param[in] use_type whether to add type annotations to container types + (must be combined with @a use_size = true) + @return UBJSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in UBJSON format.,to_ubjson} + + @sa http://ubjson.org + @sa @ref from_ubjson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + + @since version 3.1.0 + */ static std::vector to_ubjson(const basic_json& j, - const bool use_size = false, const bool use_type = false) + const bool use_size = false, + const bool use_type = false) { std::vector result; to_ubjson(j, result, use_size, use_type); @@ -15567,6 +15652,8 @@ class basic_json @sa @ref to_cbor(const basic_json&) for the analogous serialization @sa @ref from_msgpack(detail::input_adapter, const bool) for the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool) for the related + UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -15654,6 +15741,8 @@ class basic_json @sa @ref to_msgpack(const basic_json&) for the analogous serialization @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR format + @sa @ref from_ubjson(detail::input_adapter, const bool) for the related + UBJSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -15675,6 +15764,59 @@ class basic_json return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); } + /*! + @brief create a JSON value from an input in UBJSON format + + Deserializes a given input @a i to a JSON value using the UBJSON (Universal + Binary JSON) serialization format. + + The library maps UBJSON types to JSON value types as follows: + + UBJSON type | JSON value type | marker + ----------- | --------------------------------------- | ------ + no-op | *no value, next value is read* | `N` + null | `null` | `Z` + false | `false` | `F` + true | `true` | `T` + float32 | number_float | `d` + float64 | number_float | `D` + uint8 | number_unsigned | `U` + int8 | number_integer | `i` + int16 | number_integer | `I` + int32 | number_integer | `l` + int64 | number_integer | `L` + string | string | `S` + char | string | `C` + array | array (optimized values are supported) | `[` + object | object (optimized values are supported) | `{` + + @note The mapping is **complete** in the sense that any UBJSON value can + be converted to a JSON value. + + @param[in] i an input in UBJSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true + @throw parse_error.112 if a parse error occurs + @throw parse_error.113 if a string could not be parsed successfully + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + UBJSON format to a JSON value.,from_ubjson} + + @sa http://ubjson.org + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool) for the related CBOR + format + @sa @ref from_msgpack(detail::input_adapter, const bool) for the related + MessagePack format + + @since version 3.1.0 + */ static basic_json from_ubjson(detail::input_adapter i, const bool strict = true) { diff --git a/test/src/unit-to_chars.cpp b/test/src/unit-to_chars.cpp index 55dee340..973e4e19 100644 --- a/test/src/unit-to_chars.cpp +++ b/test/src/unit-to_chars.cpp @@ -80,8 +80,8 @@ static float make_float(uint64_t f, int e) } uint64_t biased_exponent = (e == kDenormalExponent && (f & kHiddenBit) == 0) - ? 0 - : static_cast(e + kExponentBias); + ? 0 + : static_cast(e + kExponentBias); uint64_t bits = (f & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize); return reinterpret_bits(static_cast(bits)); @@ -132,8 +132,8 @@ static double make_double(uint64_t f, int e) } uint64_t biased_exponent = (e == kDenormalExponent && (f & kHiddenBit) == 0) - ? 0 - : static_cast(e + kExponentBias); + ? 0 + : static_cast(e + kExponentBias); uint64_t bits = (f & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize); return reinterpret_bits(bits);