diff --git a/Makefile b/Makefile index 135db65a..b6784151 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ all: @echo "cppcheck - analyze code with cppcheck" @echo "doctest - compile example files and check their output" @echo "fuzz_testing - prepare fuzz testing of the JSON parser" + @echo "fuzz_testing_bson - prepare fuzz testing of the BSON parser" @echo "fuzz_testing_cbor - prepare fuzz testing of the CBOR parser" @echo "fuzz_testing_msgpack - prepare fuzz testing of the MessagePack parser" @echo "fuzz_testing_ubjson - prepare fuzz testing of the UBJSON parser" @@ -220,6 +221,14 @@ fuzz_testing: find test/data/json_tests -size -5k -name *json | xargs -I{} cp "{}" fuzz-testing/testcases @echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer" +fuzz_testing_bson: + rm -fr fuzz-testing + mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out + $(MAKE) parse_bson_fuzzer -C test CXX=afl-clang++ + mv test/parse_bson_fuzzer fuzz-testing/fuzzer + find test/data -size -5k -name *.bson | xargs -I{} cp "{}" fuzz-testing/testcases + @echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer" + fuzz_testing_cbor: rm -fr fuzz-testing mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out diff --git a/README.md b/README.md index a64dab8d..8220bd3c 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ - [Implicit conversions](#implicit-conversions) - [Conversions to/from arbitrary types](#arbitrary-types-conversions) - [Specializing enum conversion](#specializing-enum-conversion) - - [Binary formats (CBOR, MessagePack, and UBJSON)](#binary-formats-cbor-messagepack-and-ubjson) + - [Binary formats (BSON, CBOR, MessagePack, and UBJSON)](#binary-formats-bson-cbor-messagepack-and-ubjson) - [Supported compilers](#supported-compilers) - [License](#license) - [Contact](#contact) @@ -925,14 +925,22 @@ Other Important points: - When using `get()`, undefined JSON values will default to the first pair specified in your map. Select this default pair carefully. - If an enum or JSON value is specified more than once in your map, the first matching occurrence from the top of the map will be returned when converting to or from JSON. -### Binary formats (CBOR, MessagePack, and UBJSON) +### Binary formats (BSON, CBOR, MessagePack, and UBJSON -Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supports [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors. +Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supportsĀ [BSON](http://bsonspec.org) (Binary JSON), [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors. ```cpp // create a JSON value json j = R"({"compact": true, "schema": 0})"_json; +// serialize to BSON +std::vector v_bson = json::to_bson(j); + +// 0x1B, 0x00, 0x00, 0x00, 0x08, 0x63, 0x6F, 0x6D, 0x70, 0x61, 0x63, 0x74, 0x00, 0x01, 0x10, 0x73, 0x63, 0x68, 0x65, 0x6D, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + +// roundtrip +json j_from_bson = json::from_bson(v_bson); + // serialize to CBOR std::vector v_cbor = json::to_cbor(j); @@ -1190,6 +1198,8 @@ I deeply appreciate the help of the following people. - [knilch](https://github.com/knilch0r) made sure the test suite does not stall when run in the wrong directory. - [Antonio Borondo](https://github.com/antonioborondo) fixed an MSVC 2017 warning. - [Dan Gendreau](https://github.com/dgendreau) implemented the `NLOHMANN_JSON_SERIALIZE_ENUM` macro to quickly define a enum/JSON mapping. +- [efp](https://github.com/efp) added line and column information to parse errors. +- [julian-becker](https://github.com/julian-becker) added BSON support. Thanks a lot for helping out! Please [let me know](mailto:mail@nlohmann.me) if I forgot someone. diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index dd046804..1ac2606b 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -93,6 +93,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -236,6 +237,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -278,8 +280,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 57889644..7d278736 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -80,6 +80,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -121,6 +125,216 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) + { + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); + } + + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(value); + } + + default: // anything else not supported (yet) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + while (int element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (not is_array) + { + sax->key(key); + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -459,6 +673,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -821,300 +1220,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -1249,6 +1354,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -1663,6 +1784,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -1688,7 +1916,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -1715,6 +1942,10 @@ class binary_reader error_msg += "UBJSON"; break; + case input_format_t::bson: + error_msg += "BSON"; + break; + // LCOV_EXCL_START default: assert(false); @@ -1724,6 +1955,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 706c5c5b..8b7852b8 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -18,7 +18,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // diff --git a/include/nlohmann/detail/meta/type_traits.hpp b/include/nlohmann/detail/meta/type_traits.hpp index efe878f6..4c4c4d3d 100644 --- a/include/nlohmann/detail/meta/type_traits.hpp +++ b/include/nlohmann/detail/meta/type_traits.hpp @@ -193,13 +193,13 @@ struct is_constructible_object_type_impl < static constexpr bool value = std::is_constructible::value and - std::is_same::value or - (has_from_json::value or - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); + (has_from_json::value or + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value)); }; template diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index d4b5e98f..7c0e6939 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -35,7 +35,33 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -279,7 +305,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -679,33 +705,362 @@ class binary_writer } private: - /* - @brief write a number to output input + ////////// + // BSON // + ////////// - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. + /*! + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ - template - void write_number(const NumberType n) + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian) + const auto it = name.find(static_cast(0)); + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); } - oa->write_characters(vec.data(), sizeof(NumberType)); + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; } + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) + { + write_bson_entry_header(name, 0x01); + write_number(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(static_cast(calc_bson_array_size(value))); + + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + write_bson_element(std::to_string(array_index++), el); + } + + oa->write_character(static_cast(0x00)); + } + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number(static_cast(calc_bson_object_size(value))); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(static_cast(0x00)); + } + + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) + { + return static_cast(0xFA); // Single-Precision Float + } + + static constexpr CharType get_cbor_float_prefix(double /*unused*/) + { + return static_cast(0xFB); // Double-Precision Float + } + + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -906,26 +1261,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -936,6 +1271,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 1d107ce0..41f248a2 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -460,9 +460,10 @@ class serializer // continue processing the string state = UTF8_ACCEPT; - continue; + break; } } + break; } default: // decode found yet incomplete multi-byte code point diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index a5f7d47d..0a46d0c2 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6627,6 +6627,87 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -6821,6 +6902,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -6906,6 +6989,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 */ @@ -6934,6 +7019,91 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 1268ca1d..953b4517 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -584,13 +584,13 @@ struct is_constructible_object_type_impl < static constexpr bool value = std::is_constructible::value and - std::is_same::value or - (has_from_json::value or - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); + (has_from_json::value or + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value)); }; template @@ -866,6 +866,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -1009,6 +1010,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -1051,8 +1053,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} @@ -2069,7 +2072,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // @@ -6409,6 +6412,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -6450,6 +6457,216 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) + { + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); + } + + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(value); + } + + default: // anything else not supported (yet) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + while (int element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (not is_array) + { + sax->key(key); + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -6788,6 +7005,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -7150,300 +7552,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -7578,6 +7686,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -7992,6 +8116,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -8017,7 +8248,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -8044,6 +8274,10 @@ class binary_reader error_msg += "UBJSON"; break; + case input_format_t::bson: + error_msg += "BSON"; + break; + // LCOV_EXCL_START default: assert(false); @@ -8053,6 +8287,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; @@ -8111,7 +8346,33 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -8355,7 +8616,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -8755,33 +9016,362 @@ class binary_writer } private: - /* - @brief write a number to output input + ////////// + // BSON // + ////////// - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. + /*! + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ - template - void write_number(const NumberType n) + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian) + const auto it = name.find(static_cast(0)); + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); } - oa->write_characters(vec.data(), sizeof(NumberType)); + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; } + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) + { + write_bson_entry_header(name, 0x01); + write_number(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(static_cast(calc_bson_array_size(value))); + + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + write_bson_element(std::to_string(array_index++), el); + } + + oa->write_character(static_cast(0x00)); + } + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number(static_cast(calc_bson_object_size(value))); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(static_cast(0x00)); + } + + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) + { + return static_cast(0xFA); // Single-Precision Float + } + + static constexpr CharType get_cbor_float_prefix(double /*unused*/) + { + return static_cast(0xFB); // Double-Precision Float + } + + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -8982,26 +9572,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -9012,6 +9582,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); @@ -10585,9 +11188,10 @@ class serializer // continue processing the string state = UTF8_ACCEPT; - continue; + break; } } + break; } default: // decode found yet incomplete multi-byte code point @@ -18247,6 +18851,87 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -18441,6 +19126,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -18526,6 +19213,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 */ @@ -18554,6 +19243,91 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/test/Makefile b/test/Makefile index afbb1ba5..4f00cbc7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,6 +10,7 @@ SOURCES = src/unit.cpp \ src/unit-algorithms.cpp \ src/unit-allocator.cpp \ src/unit-alt-string.cpp \ + src/unit-bson.cpp \ src/unit-capacity.cpp \ src/unit-cbor.cpp \ src/unit-class_const_iterator.cpp \ @@ -90,12 +91,15 @@ check: $(OBJECTS) $(TESTCASES) ############################################################################## FUZZER_ENGINE = src/fuzzer-driver_afl.cpp -FUZZERS = parse_afl_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer +FUZZERS = parse_afl_fuzzer parse_bson_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer fuzzers: $(FUZZERS) parse_afl_fuzzer: $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_json.cpp -o $@ +parse_bson_fuzzer: + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_bson.cpp -o $@ + parse_cbor_fuzzer: $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_cbor.cpp -o $@ diff --git a/test/data/json.org/1.json.bson b/test/data/json.org/1.json.bson new file mode 100644 index 00000000..a2466af8 Binary files /dev/null and b/test/data/json.org/1.json.bson differ diff --git a/test/data/json.org/2.json.bson b/test/data/json.org/2.json.bson new file mode 100644 index 00000000..743822c5 Binary files /dev/null and b/test/data/json.org/2.json.bson differ diff --git a/test/data/json.org/3.json.bson b/test/data/json.org/3.json.bson new file mode 100644 index 00000000..2c43e351 Binary files /dev/null and b/test/data/json.org/3.json.bson differ diff --git a/test/data/json.org/4.json.bson b/test/data/json.org/4.json.bson new file mode 100644 index 00000000..aceba2d8 Binary files /dev/null and b/test/data/json.org/4.json.bson differ diff --git a/test/data/json.org/5.json.bson b/test/data/json.org/5.json.bson new file mode 100644 index 00000000..b72643da Binary files /dev/null and b/test/data/json.org/5.json.bson differ diff --git a/test/data/json_tests/pass3.json.bson b/test/data/json_tests/pass3.json.bson new file mode 100644 index 00000000..a9c07cf3 Binary files /dev/null and b/test/data/json_tests/pass3.json.bson differ diff --git a/test/src/fuzzer-parse_bson.cpp b/test/src/fuzzer-parse_bson.cpp new file mode 100644 index 00000000..86b9f176 --- /dev/null +++ b/test/src/fuzzer-parse_bson.cpp @@ -0,0 +1,73 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (fuzz test support) +| | |__ | | | | | | version 3.3.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +This file implements a parser test suitable for fuzz testing. Given a byte +array data, it performs the following steps: + +- j1 = from_bson(data) +- vec = to_bson(j1) +- j2 = from_bson(vec) +- assert(j1 == j2) + +The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer +drivers. + +Licensed under the MIT License . +*/ + +#include +#include +#include + +using json = nlohmann::json; + +// see http://llvm.org/docs/LibFuzzer.html +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + try + { + // step 1: parse input + std::vector vec1(data, data + size); + json j1 = json::from_bson(vec1); + + if (j1.is_discarded()) + { + return 0; + } + + try + { + // step 2: round trip + std::vector vec2 = json::to_bson(j1); + + // parse serialization + json j2 = json::from_bson(vec2); + + // serializations must match + assert(json::to_bson(j2) == vec2); + } + catch (const json::parse_error&) + { + // parsing a BSON serialization must not fail + assert(false); + } + } + catch (const json::parse_error&) + { + // parse errors are ok, because input may be random bytes + } + catch (const json::type_error&) + { + // type errors can occur during parsing, too + } + catch (const json::out_of_range&) + { + // out of range errors can occur during parsing, too + } + + // return 0 - non-zero return values are reserved for future use + return 0; +} diff --git a/test/src/fuzzer-parse_json.cpp b/test/src/fuzzer-parse_json.cpp index ed586385..7aa3dc21 100644 --- a/test/src/fuzzer-parse_json.cpp +++ b/test/src/fuzzer-parse_json.cpp @@ -60,10 +60,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // parse errors are ok, because input may be random bytes } catch (const json::out_of_range&) - { - // parse errors are ok, because input may be random bytes - } - catch (const json::out_of_range&) { // out of range errors may happen if provided sizes are excessive } diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp new file mode 100644 index 00000000..aeb7b898 --- /dev/null +++ b/test/src/unit-bson.cpp @@ -0,0 +1,1241 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.2.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "catch.hpp" + +#include +using nlohmann::json; +#include + +TEST_CASE("BSON") +{ + SECTION("individual values not supported") + { + SECTION("discarded") + { + // discarded values are not serialized + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("null") + { + json j = nullptr; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null"); + } + + SECTION("boolean") + { + SECTION("true") + { + json j = true; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); + } + + SECTION("false") + { + json j = false; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); + } + } + + SECTION("number") + { + json j = 42; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + } + + SECTION("float") + { + json j = 4.2; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + } + + SECTION("string") + { + json j = "not supported"; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); + } + + SECTION("array") + { + json j = std::vector {1, 2, 3, 4, 5, 6, 7}; + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); + } + } + + SECTION("keys containing code-point U+0000 cannot be serialized to BSON") + { + json j = + { + { std::string("en\0try", 6), true } + }; + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)"); + } + + SECTION("objects") + { + SECTION("empty object") + { + json j = json::object(); + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with bool") + { + json j = + { + { "entry", true } + }; + + std::vector expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, // value = true + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with bool") + { + json j = + { + { "entry", false } + }; + + std::vector expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00, // value = false + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with double") + { + json j = + { + { "entry", 4.2 } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x01, /// entry: double + 'e', 'n', 't', 'r', 'y', '\x00', + 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with string") + { + json j = + { + { "entry", "bsonstr" } + }; + + std::vector expected = + { + 0x18, 0x00, 0x00, 0x00, // size (little endian) + 0x02, /// entry: string (UTF-8) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x08, 0x00, 0x00, 0x00, 'b', 's', 'o', 'n', 's', 't', 'r', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with null member") + { + json j = + { + { "entry", nullptr } + }; + + std::vector expected = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0x0A, /// entry: null + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{0x12345678} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with negative integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with negative integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with unsigned integer (64-bit) member") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + { "entry", std::uint64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with small unsigned integer member") + { + json j = + { + { "entry", std::uint64_t{0x42} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x42, 0x00, 0x00, 0x00, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("discarded values are not serialized") + { + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("discarded members are not serialized") + { + json j = + { + { "entry", json::value_t::discarded } + }; + + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + } + + + SECTION("non-empty object with object member") + { + json j = + { + { "entry", json::object() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x03, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with array member") + { + json j = + { + { "entry", json::array() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with non-empty array member") + { + json j = + { + { "entry", json::array({1, 2, 3, 4, 5, 6, 7, 8}) } + }; + + std::vector expected = + { + 0x49, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x3D, 0x00, 0x00, 0x00, // size (little endian) + 0x10, '0', 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, '1', 0x00, 0x02, 0x00, 0x00, 0x00, + 0x10, '2', 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, '3', 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, '4', 0x00, 0x05, 0x00, 0x00, 0x00, + 0x10, '5', 0x00, 0x06, 0x00, 0x00, 0x00, + 0x10, '6', 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, '7', 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("Some more complex document") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector expected = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + } + + SECTION("Examples from http://bsonspec.org/faq.html") + { + SECTION("Example 1") + { + std::vector input = {0x16, 0x00, 0x00, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x06, 0x00, 0x00, 0x00, 'w', 'o', 'r', 'l', 'd', 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"hello", "world"}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + CHECK(dumped == input); + CHECK(json::from_bson(dumped) == expected); + } + + SECTION("Example 2") + { + std::vector input = {0x31, 0x00, 0x00, 0x00, 0x04, 'B', 'S', 'O', 'N', 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x08, 0x00, 0x00, 0x00, 'a', 'w', 'e', 's', 'o', 'm', 'e', 0x00, 0x01, 0x31, 0x00, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x14, 0x40, 0x10, 0x32, 0x00, 0xc2, 0x07, 0x00, 0x00, 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"BSON", {"awesome", 5.05, 1986}}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + CHECK(dumped == input); + CHECK(json::from_bson(dumped) == expected); + } + } +} + +TEST_CASE("BSON input/output_adapters") +{ + json json_representation = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector bson_representation = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + json j2; + CHECK_NOTHROW(j2 = json::from_bson(bson_representation)); + + // compare parsed JSON values + CHECK(json_representation == j2); + + SECTION("roundtrips") + { + SECTION("std::ostringstream") + { + std::ostringstream ss; + json::to_bson(json_representation, ss); + std::istringstream iss(ss.str()); + json j3 = json::from_bson(iss); + CHECK(json_representation == j3); + } + + SECTION("std::string") + { + std::string s; + json::to_bson(json_representation, s); + json j3 = json::from_bson(s); + CHECK(json_representation == j3); + } + + SECTION("std::vector") + { + std::vector v; + json::to_bson(json_representation, v); + json j3 = json::from_bson(v); + CHECK(json_representation == j3); + } + } +} + +class SaxCountdown +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() + { + return events_left-- > 0; + } + + bool boolean(bool) + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) + { + return events_left-- > 0; + } + + bool string(std::string&) + { + return events_left-- > 0; + } + + bool start_object(std::size_t) + { + return events_left-- > 0; + } + + bool key(std::string&) + { + return events_left-- > 0; + } + + bool end_object() + { + return events_left-- > 0; + } + + bool start_array(std::size_t) + { + return events_left-- > 0; + } + + bool end_array() + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) + { + return false; + } + + private: + int events_left = 0; +}; + +TEST_CASE("Incomplete BSON Input") +{ + SECTION("Incomplete BSON Input 1") + { + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't' // unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); + + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } + + SECTION("Incomplete BSON Input 2") + { + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean, unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } + + SECTION("Incomplete BSON Input 3") + { + std::vector incomplete_bson = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 + // missing input data... + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(1); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } + + SECTION("Incomplete BSON Input 4") + { + std::vector incomplete_bson = + { + 0x0D, 0x00, // size (incomplete), unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } +} + +TEST_CASE("Unsupported BSON input") +{ + std::vector bson = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0xFF, // entry type: Min key (not supported yet) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + CHECK_THROWS_AS(json::from_bson(bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(bson), + "[json.exception.parse_error.114] parse error at byte 5: Unsupported BSON record type 0xFF"); + CHECK(json::from_bson(bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson)); +} + +TEST_CASE("BSON numerical data") +{ + SECTION("number") + { + SECTION("signed") + { + SECTION("std::int64_t: INT64_MIN .. INT32_MIN-1") + { + std::vector numbers + { + INT64_MIN, + -1000000000000000000LL, + -100000000000000000LL, + -10000000000000000LL, + -1000000000000000LL, + -100000000000000LL, + -10000000000000LL, + -1000000000000LL, + -100000000000LL, + -10000000000LL, + static_cast(INT32_MIN) - 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + + SECTION("signed std::int32_t: INT32_MIN .. INT32_MAX") + { + std::vector numbers + { + INT32_MIN, + -2147483647L, + -1000000000L, + -100000000L, + -10000000L, + -1000000L, + -100000L, + -10000L, + -1000L, + -100L, + -10L, + -1L, + 0L, + 1L, + 10L, + 100L, + 1000L, + 10000L, + 100000L, + 1000000L, + 10000000L, + 100000000L, + 1000000000L, + 2147483646L, + INT32_MAX + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint32_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("signed std::int64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + INT64_MAX, + 1000000000000000000LL, + 100000000000000000LL, + 10000000000000000LL, + 1000000000000000LL, + 100000000000000LL, + 10000000000000LL, + 1000000000000LL, + 100000000000LL, + 10000000000LL, + static_cast(INT32_MAX) + 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + } + + SECTION("unsigned") + { + SECTION("unsigned std::uint64_t: 0 .. INT32_MAX") + { + std::vector numbers + { + 0ULL, + 1ULL, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 2147483646ULL, + static_cast(INT32_MAX) + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + auto iu = i; + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("unsigned std::uint64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + static_cast(INT32_MAX) + 1, + 4000000000ULL, + static_cast(UINT32_MAX), + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + static_cast(INT64_MAX), + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + auto iu = i; + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + } + } + + SECTION("unsigned std::uint64_t: INT64_MAX+1 .. UINT64_MAX") + { + std::vector numbers + { + static_cast(INT64_MAX) + 1ULL, + 10000000000000000000ULL, + 18000000000000000000ULL, + UINT64_MAX - 1ULL, + UINT64_MAX, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + auto iu = i; + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.407] number overflow serializing " + std::to_string(i)); + } + } + + } + } +} + +TEST_CASE("BSON roundtrips", "[hide]") +{ + SECTION("reference files") + { + for (std::string filename : + { + "test/data/json.org/1.json", + "test/data/json.org/2.json", + "test/data/json.org/3.json", + "test/data/json.org/4.json", + "test/data/json.org/5.json" + }) + { + CAPTURE(filename); + + SECTION(filename + ": std::vector") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + json j2; + CHECK_NOTHROW(j2 = json::from_bson(packed)); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": std::ifstream") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + json j2; + CHECK_NOTHROW(j2 = json::from_bson(f_bson)); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": uint8_t* and size") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + json j2; + CHECK_NOTHROW(j2 = json::from_bson({packed.data(), packed.size()})); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": output to output adapters") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + + SECTION(filename + ": output adapters: std::vector") + { + std::vector vec; + json::to_bson(j1, vec); + + if (vec != packed) + { + // the exact serializations may differ due to the order of + // object keys; in these cases, just compare whether both + // serializations create the same JSON value + CHECK(json::from_bson(vec) == json::from_bson(packed)); + } + } + } + } + } +} diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 2c21576d..c7736b71 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs) return lhs.a == rhs.a && lhs.b == rhs.b; } -bool operator!=(Data const& lhs, Data const& rhs) -{ - return !(lhs == rhs); -} +//bool operator!=(Data const& lhs, Data const& rhs) +//{ +// return !(lhs == rhs); +//} } /////////////////////////////////////////////////////////////////////