From 1968e5c7939ad63262de6649a7ee8c6f50aecaff Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 24 Oct 2018 23:39:30 +0200 Subject: [PATCH] :art: clean up binary formats --- .../nlohmann/detail/input/binary_reader.hpp | 790 ++++++------ .../nlohmann/detail/output/binary_writer.hpp | 312 +++-- single_include/nlohmann/json.hpp | 1102 +++++++++-------- test/src/unit-bson.cpp | 184 +-- test/src/unit-regression.cpp | 8 +- 5 files changed, 1274 insertions(+), 1122 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index c213d855..615ab73c 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -125,14 +125,38 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } /*! @brief Parses a C-style string from the BSON input. - @param [out] result A reference to the string variable where the read string - is to be stored. - @return `true` if the \x00-byte indicating the end of the - string was encountered before the EOF. - `false` indicates an unexpected EOF. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -155,107 +179,107 @@ class binary_reader } /*! - @brief Parses a zero-terminated string of length @a len from the BSON input. - @param [in] len The length (including the zero-byte at the end) of the string to be read. - @param [out] result A reference to the string variable where the read string - is to be stored. + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. @tparam NumberType The type of the length @a len @pre len > 0 @return `true` if the string was successfully parsed */ - template + template bool get_bson_string(const NumberType len, string_t& result) { - return get_string(input_format_t::bson, len - static_cast(1), result) - && get() != std::char_traits::eof(); - } - - /*! - @return A hexadecimal string representation of the given @a byte - @param byte The byte to convert to a string - */ - static std::string byte_hexstring(unsigned char byte) - { - char cr[3]; - snprintf(cr, sizeof(cr), "%02hhX", byte); - return std::string{cr}; + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } /*! @brief Read a BSON document element of the given @a element_type. - @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html - @param element_type_parse_position The position in the input stream, where the `element_type` was read. - @warning Not all BSON element types are supported yet. An unsupported @a element_type will - give rise to a parse_error.114: Unsupported BSON record type 0x... + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) { switch (element_type) { case 0x01: // double { double number; - return get_number(input_format_t::bson, number) - && sax->number_float(static_cast(number), ""); + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); } + case 0x02: // string { std::int32_t len; string_t value; - return get_number(input_format_t::bson, len) - && get_bson_string(len, value) - && sax->string(value); - } - case 0x08: // boolean - { - return sax->boolean(static_cast(get())); - } - case 0x10: // int32 - { - std::int32_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x12: // int64 - { - std::int64_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x0A: // null - { - return sax->null(); + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); } + case 0x03: // object { return parse_bson_internal(); } + case 0x04: // array { return parse_bson_array(); } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + default: // anything else not supported (yet) { - auto element_type_str = byte_hexstring(element_type); - return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); } } } /*! - @brief Read a BSON element list (as specified in the BSON-spec) from the input - and passes it to the SAX-parser. - The same binary layout is used for objects and arrays, hence it must - be indicated with the argument @a is_array which one is expected - (true --> array, false --> object). - @param is_array Determines if the element list being read is to be treated as - an object (@a is_array == false), or as an array (@a is_array == true). + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_list(bool is_array) + bool parse_bson_element_list(const bool is_array) { - while (auto element_type = get()) + string_t key; + while (int element_type = get()) { if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) { @@ -263,13 +287,12 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; - string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } - if (!is_array) + if (not is_array) { sax->key(key); } @@ -278,7 +301,11 @@ class binary_reader { return false; } + + // get_bson_cstr only appends + key.clear(); } + return true; } @@ -291,7 +318,7 @@ class binary_reader std::int32_t documentSize; get_number(input_format_t::bson, documentSize); - if (JSON_UNLIKELY(not sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) { return false; } @@ -304,27 +331,9 @@ class binary_reader return sax->end_array(); } - /*! - @brief Reads in a BSON-object and pass it to the SAX-parser. - @return whether a valid BSON-value was passed to the SAX parser - */ - bool parse_bson_internal() - { - std::int32_t documentSize; - get_number(input_format_t::bson, documentSize); - - if (JSON_UNLIKELY(not sax->start_object(-1))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) - { - return false; - } - - return sax->end_object(); - } + ////////// + // CBOR // + ////////// /*! @param[in] get_char whether a new character should be retrieved from the @@ -664,6 +673,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -1026,301 +1220,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian && !InputIsLittleEndian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -1455,6 +1354,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -1869,6 +1784,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -1894,7 +1916,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -1934,6 +1955,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 7f9f0be3..64a02aac 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -35,7 +35,34 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -279,7 +306,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -678,13 +705,19 @@ class binary_writer } } + private: + ////////// + // BSON // + ////////// + /*! - @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { const auto it = name.find(static_cast(0)); - if (it != BasicJsonType::string_t::npos) + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); @@ -696,7 +729,8 @@ class binary_writer /*! @brief Writes the given @a element_type and @a name to the output adapter */ - void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) { oa->write_character(static_cast(element_type)); // boolean oa->write_characters( @@ -707,7 +741,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and boolean value @a value */ - void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) { write_bson_entry_header(name, 0x08); oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); @@ -716,7 +751,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and double value @a value */ - void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) { write_bson_entry_header(name, 0x01); write_number(value); @@ -733,7 +769,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and string value @a value */ - void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) { write_bson_entry_header(name, 0x02); @@ -769,7 +806,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and integer @a value */ - void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) { if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { @@ -783,7 +821,6 @@ class binary_writer } } - /*! @return The size of the BSON-encoded unsigned integer in @a j */ @@ -802,7 +839,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and unsigned @a value */ - void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) { if (value <= static_cast((std::numeric_limits::max)())) { @@ -824,13 +862,13 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and object @a value */ - void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) { write_bson_entry_header(name, 0x03); // object write_bson_object(value); } - /*! @return The size of the BSON-encoded array @a value */ @@ -849,10 +887,11 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and array @a value */ - void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) { write_bson_entry_header(name, 0x04); // array - write_number(calc_bson_array_size(value)); + write_number(static_cast(calc_bson_array_size(value))); for (const auto& el : value) { @@ -862,75 +901,95 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. */ - static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { const auto header_size = calc_bson_entry_header_size(name); switch (j.type()) { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + // LCOV_EXCL_START default: assert(false); return 0ul; - // LCOV_EXCL_STOP - case value_t::discarded: - return 0ul; - case value_t::object: - return header_size + calc_bson_object_size(*j.m_value.object); - case value_t::array: - return header_size + calc_bson_array_size(*j.m_value.array); - case value_t::boolean: - return header_size + 1ul; - case value_t::number_float: - return header_size + 8ul; - case value_t::number_integer: - return header_size + calc_bson_integer_size(j.m_value.number_integer); - case value_t::number_unsigned: - return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); - case value_t::string: - return header_size + calc_bson_string_size(*j.m_value.string); - case value_t::null: - return header_size + 0ul; + // LCOV_EXCL_STOP }; } - /*! - @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. - @param name The name to associate with the JSON entity @a j within the current BSON document - @return The size of the bson entry + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry */ - void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { switch (j.type()) { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + // LCOV_EXCL_START default: assert(false); return; - // LCOV_EXCL_STOP - case value_t::discarded: - return; - case value_t::object: - return write_bson_object_entry(name, *j.m_value.object); - case value_t::array: - return write_bson_array(name, *j.m_value.array); - case value_t::boolean: - return write_bson_boolean(name, j.m_value.boolean); - case value_t::number_float: - return write_bson_double(name, j.m_value.number_float); - case value_t::number_integer: - return write_bson_integer(name, j.m_value.number_integer); - case value_t::number_unsigned: - return write_bson_unsigned(name, j.m_value.number_unsigned); - case value_t::string: - return write_bson_string(name, *j.m_value.string); - case value_t::null: - return write_bson_null(name); + // LCOV_EXCL_STOP }; } @@ -958,7 +1017,7 @@ class binary_writer */ void write_bson_object(const typename BasicJsonType::object_t& value) { - write_number(calc_bson_object_size(value)); + write_number(static_cast(calc_bson_object_size(value))); for (const auto& el : value) { @@ -968,64 +1027,38 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! - @param[in] j JSON value to serialize - @pre j.type() == value_t::object - */ - void write_bson(const BasicJsonType& j) + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) { - switch (j.type()) - { - case value_t::object: - { - write_bson_object(*j.m_value.object); - break; - } - - case value_t::discarded: - { - break; - } - - default: - { - JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; - } - } + return static_cast(0xFA); // Single-Precision Float } - - private: - /* - @brief write a number to output input - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. - */ - template - void write_number(const NumberType n) + static constexpr CharType get_cbor_float_prefix(double /*unused*/) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian and not OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); + return static_cast(0xFB); // Double-Precision Float } + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -1226,26 +1259,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -1256,6 +1269,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 0664cc75..cdd4e680 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6426,14 +6426,38 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } /*! @brief Parses a C-style string from the BSON input. - @param [out] result A reference to the string variable where the read string - is to be stored. - @return `true` if the \x00-byte indicating the end of the - string was encountered before the EOF. - `false` indicates an unexpected EOF. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -6456,107 +6480,107 @@ class binary_reader } /*! - @brief Parses a zero-terminated string of length @a len from the BSON input. - @param [in] len The length (including the zero-byte at the end) of the string to be read. - @param [out] result A reference to the string variable where the read string - is to be stored. + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. @tparam NumberType The type of the length @a len @pre len > 0 @return `true` if the string was successfully parsed */ - template + template bool get_bson_string(const NumberType len, string_t& result) { - return get_string(input_format_t::bson, len - static_cast(1), result) - && get() != std::char_traits::eof(); - } - - /*! - @return A hexadecimal string representation of the given @a byte - @param byte The byte to convert to a string - */ - static std::string byte_hexstring(unsigned char byte) - { - char cr[3]; - snprintf(cr, sizeof(cr), "%02hhX", byte); - return std::string{cr}; + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } /*! @brief Read a BSON document element of the given @a element_type. - @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html - @param element_type_parse_position The position in the input stream, where the `element_type` was read. - @warning Not all BSON element types are supported yet. An unsupported @a element_type will - give rise to a parse_error.114: Unsupported BSON record type 0x... + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) { switch (element_type) { case 0x01: // double { double number; - return get_number(input_format_t::bson, number) - && sax->number_float(static_cast(number), ""); + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); } + case 0x02: // string { std::int32_t len; string_t value; - return get_number(input_format_t::bson, len) - && get_bson_string(len, value) - && sax->string(value); - } - case 0x08: // boolean - { - return sax->boolean(static_cast(get())); - } - case 0x10: // int32 - { - std::int32_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x12: // int64 - { - std::int64_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x0A: // null - { - return sax->null(); + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); } + case 0x03: // object { return parse_bson_internal(); } + case 0x04: // array { return parse_bson_array(); } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + default: // anything else not supported (yet) { - auto element_type_str = byte_hexstring(element_type); - return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); } } } /*! - @brief Read a BSON element list (as specified in the BSON-spec) from the input - and passes it to the SAX-parser. - The same binary layout is used for objects and arrays, hence it must - be indicated with the argument @a is_array which one is expected - (true --> array, false --> object). - @param is_array Determines if the element list being read is to be treated as - an object (@a is_array == false), or as an array (@a is_array == true). + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_list(bool is_array) + bool parse_bson_element_list(const bool is_array) { - while (auto element_type = get()) + string_t key; + while (int element_type = get()) { if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) { @@ -6564,13 +6588,12 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; - string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } - if (!is_array) + if (not is_array) { sax->key(key); } @@ -6579,7 +6602,11 @@ class binary_reader { return false; } + + // get_bson_cstr only appends + key.clear(); } + return true; } @@ -6592,7 +6619,7 @@ class binary_reader std::int32_t documentSize; get_number(input_format_t::bson, documentSize); - if (JSON_UNLIKELY(not sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) { return false; } @@ -6605,27 +6632,9 @@ class binary_reader return sax->end_array(); } - /*! - @brief Reads in a BSON-object and pass it to the SAX-parser. - @return whether a valid BSON-value was passed to the SAX parser - */ - bool parse_bson_internal() - { - std::int32_t documentSize; - get_number(input_format_t::bson, documentSize); - - if (JSON_UNLIKELY(not sax->start_object(-1))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) - { - return false; - } - - return sax->end_object(); - } + ////////// + // CBOR // + ////////// /*! @param[in] get_char whether a new character should be retrieved from the @@ -6965,6 +6974,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -7327,301 +7521,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian && !InputIsLittleEndian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -7756,6 +7655,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -8170,6 +8085,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -8195,7 +8217,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -8235,6 +8256,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; @@ -8293,7 +8315,34 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -8537,7 +8586,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -8936,13 +8985,19 @@ class binary_writer } } + private: + ////////// + // BSON // + ////////// + /*! - @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { const auto it = name.find(static_cast(0)); - if (it != BasicJsonType::string_t::npos) + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); @@ -8954,7 +9009,8 @@ class binary_writer /*! @brief Writes the given @a element_type and @a name to the output adapter */ - void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) { oa->write_character(static_cast(element_type)); // boolean oa->write_characters( @@ -8965,7 +9021,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and boolean value @a value */ - void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) { write_bson_entry_header(name, 0x08); oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); @@ -8974,7 +9031,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and double value @a value */ - void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) { write_bson_entry_header(name, 0x01); write_number(value); @@ -8991,7 +9049,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and string value @a value */ - void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) { write_bson_entry_header(name, 0x02); @@ -9027,7 +9086,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and integer @a value */ - void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) { if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { @@ -9041,7 +9101,6 @@ class binary_writer } } - /*! @return The size of the BSON-encoded unsigned integer in @a j */ @@ -9060,7 +9119,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and unsigned @a value */ - void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) { if (value <= static_cast((std::numeric_limits::max)())) { @@ -9082,13 +9142,13 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and object @a value */ - void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) { write_bson_entry_header(name, 0x03); // object write_bson_object(value); } - /*! @return The size of the BSON-encoded array @a value */ @@ -9107,10 +9167,11 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and array @a value */ - void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) { write_bson_entry_header(name, 0x04); // array - write_number(calc_bson_array_size(value)); + write_number(static_cast(calc_bson_array_size(value))); for (const auto& el : value) { @@ -9120,75 +9181,95 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. */ - static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { const auto header_size = calc_bson_entry_header_size(name); switch (j.type()) { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + // LCOV_EXCL_START default: assert(false); return 0ul; - // LCOV_EXCL_STOP - case value_t::discarded: - return 0ul; - case value_t::object: - return header_size + calc_bson_object_size(*j.m_value.object); - case value_t::array: - return header_size + calc_bson_array_size(*j.m_value.array); - case value_t::boolean: - return header_size + 1ul; - case value_t::number_float: - return header_size + 8ul; - case value_t::number_integer: - return header_size + calc_bson_integer_size(j.m_value.number_integer); - case value_t::number_unsigned: - return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); - case value_t::string: - return header_size + calc_bson_string_size(*j.m_value.string); - case value_t::null: - return header_size + 0ul; + // LCOV_EXCL_STOP }; } - /*! - @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. - @param name The name to associate with the JSON entity @a j within the current BSON document - @return The size of the bson entry + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry */ - void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { switch (j.type()) { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + // LCOV_EXCL_START default: assert(false); return; - // LCOV_EXCL_STOP - case value_t::discarded: - return; - case value_t::object: - return write_bson_object_entry(name, *j.m_value.object); - case value_t::array: - return write_bson_array(name, *j.m_value.array); - case value_t::boolean: - return write_bson_boolean(name, j.m_value.boolean); - case value_t::number_float: - return write_bson_double(name, j.m_value.number_float); - case value_t::number_integer: - return write_bson_integer(name, j.m_value.number_integer); - case value_t::number_unsigned: - return write_bson_unsigned(name, j.m_value.number_unsigned); - case value_t::string: - return write_bson_string(name, *j.m_value.string); - case value_t::null: - return write_bson_null(name); + // LCOV_EXCL_STOP }; } @@ -9216,7 +9297,7 @@ class binary_writer */ void write_bson_object(const typename BasicJsonType::object_t& value) { - write_number(calc_bson_object_size(value)); + write_number(static_cast(calc_bson_object_size(value))); for (const auto& el : value) { @@ -9226,64 +9307,38 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! - @param[in] j JSON value to serialize - @pre j.type() == value_t::object - */ - void write_bson(const BasicJsonType& j) + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) { - switch (j.type()) - { - case value_t::object: - { - write_bson_object(*j.m_value.object); - break; - } - - case value_t::discarded: - { - break; - } - - default: - { - JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; - } - } + return static_cast(0xFA); // Single-Precision Float } - - private: - /* - @brief write a number to output input - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. - */ - template - void write_number(const NumberType n) + static constexpr CharType get_cbor_float_prefix(double /*unused*/) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian and not OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); + return static_cast(0xFB); // Double-Precision Float } + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -9484,26 +9539,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -9514,6 +9549,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index ef94a807..711e8a64 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -48,7 +48,7 @@ TEST_CASE("BSON") SECTION("null") { json j = nullptr; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null"); } @@ -57,14 +57,14 @@ TEST_CASE("BSON") SECTION("true") { json j = true; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } SECTION("false") { json j = false; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } } @@ -72,29 +72,29 @@ TEST_CASE("BSON") SECTION("number") { json j = 42; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("float") { json j = 4.2; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("string") { json j = "not supported"; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); } SECTION("array") { json j = std::vector {1, 2, 3, 4, 5, 6, 7}; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); } } @@ -104,7 +104,7 @@ TEST_CASE("BSON") { { std::string("en\0try", 6), true } }; - REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)"); } @@ -541,6 +541,30 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } } + + SECTION("Examples from http://bsonspec.org/faq.html") + { + SECTION("Example 1") + { + std::vector input = {0x16, 0x00, 0x00, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x06, 0x00, 0x00, 0x00, 'w', 'o', 'r', 'l', 'd', 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"hello", "world"}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + CHECK(dumped == input); + } + + SECTION("Example 2") + { + std::vector input = {0x31, 0x00, 0x00, 0x00, 0x04, 'B', 'S', 'O', 'N', 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x08, 0x00, 0x00, 0x00, 'a', 'w', 'e', 's', 'o', 'm', 'e', 0x00, 0x01, 0x31, 0x00, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x14, 0x40, 0x10, 0x32, 0x00, 0xc2, 0x07, 0x00, 0x00, 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"BSON", {"awesome", 5.05, 1986}}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + //CHECK(dumped == input); // see https://github.com/nlohmann/json/pull/1254#issuecomment-432831216 + CHECK(json::from_bson(dumped) == expected); + } + } } TEST_CASE("BSON input/output_adapters") @@ -601,10 +625,6 @@ TEST_CASE("BSON input/output_adapters") } } - - - - class SaxCountdown { public: @@ -675,86 +695,84 @@ class SaxCountdown int events_left = 0; }; - -TEST_CASE("Incomplete BSON INPUT") +TEST_CASE("Incomplete BSON Input") { - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 1") { - 0x0D, 0x00, 0x00, 0x00, // size (little endian) - 0x08, // entry: boolean - 'e', 'n', 't' // unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't' // unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } -TEST_CASE("Incomplete BSON INPUT 2") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 2") { - 0x0D, 0x00, 0x00, 0x00, // size (little endian) - 0x08, // entry: boolean, unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean, unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } - -TEST_CASE("Incomplete BSON INPUT 3") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 3") { - 0x41, 0x00, 0x00, 0x00, // size (little endian) - 0x04, /// entry: embedded document - 'e', 'n', 't', 'r', 'y', '\x00', + std::vector incomplete_bson = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', - 0x35, 0x00, 0x00, 0x00, // size (little endian) - 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 - // missing input data... - }; + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 + // missing input data... + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(1); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(1); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } - - -TEST_CASE("Incomplete BSON INPUT 4") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 4") { - 0x0D, 0x00, // size (incomplete), unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, // size (incomplete), unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } } - TEST_CASE("Unsupported BSON input") { std::vector bson = @@ -774,8 +792,6 @@ TEST_CASE("Unsupported BSON input") CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson)); } - - TEST_CASE("BSON numerical data") { SECTION("number") @@ -1205,12 +1221,12 @@ TEST_CASE("BSON roundtrips", "[hide]") (std::istreambuf_iterator(f_bson)), std::istreambuf_iterator()); - SECTION(filename + ": output adapters: std::vector") - { - std::vector vec; - json::to_bson(j1, vec); - CHECK(vec == packed); - } + SECTION(filename + ": output adapters: std::vector") + { + std::vector vec; + json::to_bson(j1, vec); + CHECK(vec == packed); + } } } } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 2c21576d..c7736b71 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs) return lhs.a == rhs.a && lhs.b == rhs.b; } -bool operator!=(Data const& lhs, Data const& rhs) -{ - return !(lhs == rhs); -} +//bool operator!=(Data const& lhs, Data const& rhs) +//{ +// return !(lhs == rhs); +//} } /////////////////////////////////////////////////////////////////////