From c772c01a48b36740fdaa7d3b4e62bdf72ec320f9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 14 Jan 2018 10:27:30 +0100 Subject: [PATCH] :recycle: refactored code to split headers --- develop/detail/parsing/binary_reader.hpp | 270 ++++++++++++++++ develop/detail/parsing/binary_writer.hpp | 386 +++++++++++++++++++++-- develop/json.hpp | 33 ++ src/json.hpp | 2 +- 4 files changed, 671 insertions(+), 20 deletions(-) diff --git a/develop/detail/parsing/binary_reader.hpp b/develop/detail/parsing/binary_reader.hpp index bacf3d46..1ba961c9 100644 --- a/develop/detail/parsing/binary_reader.hpp +++ b/develop/detail/parsing/binary_reader.hpp @@ -90,6 +90,27 @@ class binary_reader return res; } + /*! + @brief create a JSON value from UBJSON input + + @param[in] strict whether to expect the input to be consumed completed + @return JSON value created from UBJSON input + + @throw parse_error.110 if input ended unexpectedly or the end of file was + not reached when @a strict was set to true + @throw parse_error.112 if unsupported byte was read + */ + BasicJsonType parse_ubjson(const bool strict) + { + const auto res = parse_ubjson_internal(); + if (strict) + { + get_ignore_noop(); + check_eof(true); + } + return res; + } + /*! @brief determine system byte order @@ -751,6 +772,16 @@ class binary_reader } } + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + */ + BasicJsonType parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief get next character from the input @@ -766,6 +797,20 @@ class binary_reader return (current = ia->get_character()); } + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + /* @brief read a number from the input @@ -1051,6 +1096,231 @@ class binary_reader return result; } + /*! + @brief reads a UBJSON string + + This function is either called after reading the 'S' byte explicitly + indicating a string, or in case of an object key where the 'S' byte can be + left out. + + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpected byte is read + */ + std::string get_ubjson_string(const bool get_char = true) + { + if (get_char) + { + get(); // TODO: may we ignore N here? + } + + check_eof(); + + switch (current) + { + case 'U': + return get_string(get_number()); + case 'i': + return get_string(get_number()); + case 'I': + return get_string(get_number()); + case 'l': + return get_string(get_number()); + case 'L': + return get_string(get_number()); + default: + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, + "expected a UBJSON string; last byte: 0x" + ss.str())); + } + } + + /*! + @brief determine the type and size for a container + + In the optimized UBJSON format, a type and a size can be provided to allow + for a more compact representation. + + @return pair of the size and the type + */ + std::pair get_ubjson_size_type() + { + std::size_t sz = std::string::npos; + int tc = 0; + + get_ignore_noop(); + + if (current == '$') + { + tc = get(); // must not ignore 'N', because 'N' maybe the type + check_eof(); + + get_ignore_noop(); + if (current != '#') + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, + "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); + } + sz = parse_ubjson_internal(); + } + else if (current == '#') + { + sz = parse_ubjson_internal(); + } + + return std::make_pair(sz, tc); + } + + BasicJsonType get_ubjson_value(const int prefix) + { + switch (prefix) + { + case std::char_traits::eof(): // EOF + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + + case 'T': // true + return true; + case 'F': // false + return false; + + case 'Z': // null + return nullptr; + + case 'U': + return get_number(); + case 'i': + return get_number(); + case 'I': + return get_number(); + case 'l': + return get_number(); + case 'L': + return get_number(); + case 'd': + return get_number(); + case 'D': + return get_number(); + + case 'C': // char + { + get(); + check_eof(); + if (JSON_UNLIKELY(not(0 <= current and current <= 127))) + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, + "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + } + return std::string(1, static_cast(current)); + } + + case 'S': // string + return get_ubjson_string(); + + case '[': // array + return get_ubjson_array(); + + case '{': // object + return get_ubjson_object(); + + default: // anything else + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, + "error reading UBJSON; last byte: 0x" + ss.str())); + } + } + + BasicJsonType get_ubjson_array() + { + BasicJsonType result = value_t::array; + const auto size_and_type = get_ubjson_size_type(); + + if (size_and_type.first != std::string::npos) + { + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + std::generate_n(std::back_inserter(*result.m_value.array), + size_and_type.first, [this, size_and_type]() + { + return get_ubjson_value(size_and_type.second); + }); + } + else + { + std::generate_n(std::back_inserter(*result.m_value.array), + size_and_type.first, [this]() + { + return parse_ubjson_internal(); + }); + } + } + else + { + while (current != ']') + { + result.push_back(parse_ubjson_internal(false)); + get_ignore_noop(); + } + } + + return result; + } + + BasicJsonType get_ubjson_object() + { + BasicJsonType result = value_t::object; + const auto size_and_type = get_ubjson_size_type(); + + if (size_and_type.first != std::string::npos) + { + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + size_and_type.first, [this, size_and_type]() + { + auto key = get_ubjson_string(); + auto val = get_ubjson_value(size_and_type.second); + return std::make_pair(std::move(key), std::move(val)); + }); + } + else + { + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + size_and_type.first, [this]() + { + auto key = get_ubjson_string(); + auto val = parse_ubjson_internal(); + return std::make_pair(std::move(key), std::move(val)); + }); + } + } + else + { + while (current != '}') + { + auto key = get_ubjson_string(false); + result[std::move(key)] = parse_ubjson_internal(); + get_ignore_noop(); + } + } + + return result; + } + /*! @brief check if input ended @throw parse_error.110 if input ended diff --git a/develop/detail/parsing/binary_writer.hpp b/develop/detail/parsing/binary_writer.hpp index 68034fc7..a7722d2b 100644 --- a/develop/detail/parsing/binary_writer.hpp +++ b/develop/detail/parsing/binary_writer.hpp @@ -164,23 +164,23 @@ class binary_writer { write_number(static_cast(0x60 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x78)); write_number(static_cast(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x79)); write_number(static_cast(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x7A)); write_number(static_cast(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x7B)); write_number(static_cast(N)); @@ -202,23 +202,23 @@ class binary_writer { write_number(static_cast(0x80 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x98)); write_number(static_cast(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x99)); write_number(static_cast(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x9A)); write_number(static_cast(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x9B)); write_number(static_cast(N)); @@ -241,23 +241,23 @@ class binary_writer { write_number(static_cast(0xA0 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0xB8)); write_number(static_cast(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0xB9)); write_number(static_cast(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0xBA)); write_number(static_cast(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0xBB)); write_number(static_cast(N)); @@ -425,19 +425,19 @@ class binary_writer // fixstr write_number(static_cast(0xA0 | N)); } - else if (N <= 255) + else if (N <= (std::numeric_limits::max)()) { // str 8 oa->write_character(static_cast(0xD9)); write_number(static_cast(N)); } - else if (N <= 65535) + else if (N <= (std::numeric_limits::max)()) { // str 16 oa->write_character(static_cast(0xDA)); write_number(static_cast(N)); } - else if (N <= 4294967295) + else if (N <= (std::numeric_limits::max)()) { // str 32 oa->write_character(static_cast(0xDB)); @@ -460,13 +460,13 @@ class binary_writer // fixarray write_number(static_cast(0x90 | N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits::max)()) { // array 16 oa->write_character(static_cast(0xDC)); write_number(static_cast(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits::max)()) { // array 32 oa->write_character(static_cast(0xDD)); @@ -490,13 +490,13 @@ class binary_writer // fixmap write_number(static_cast(0x80 | (N & 0xF))); } - else if (N <= 65535) + else if (N <= (std::numeric_limits::max)()) { // map 16 oa->write_character(static_cast(0xDE)); write_number(static_cast(N)); } - else if (N <= 4294967295) + else if (N <= (std::numeric_limits::max)()) { // map 32 oa->write_character(static_cast(0xDF)); @@ -517,6 +517,165 @@ class binary_writer } } + /*! + @param[in] j JSON value to serialize + @param[in] use_count whether to use '#' prefixes (optimized format) + @param[in] use_type whether to use '$' prefixes (optimized format) + @param[in] add_prefix whether prefixes need to be used for this value + */ + void write_ubjson(const BasicJsonType& j, const bool use_count, + const bool use_type, const bool add_prefix = true) + { + switch (j.type()) + { + case value_t::null: + { + if (add_prefix) + { + oa->write_character(static_cast('Z')); + } + break; + } + + case value_t::boolean: + { + if (add_prefix) + oa->write_character(j.m_value.boolean + ? static_cast('T') + : static_cast('F')); + break; + } + + case value_t::number_integer: + { + write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix); + break; + } + + case value_t::number_unsigned: + { + write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix); + break; + } + + case value_t::number_float: + { + write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix); + break; + } + + case value_t::string: + { + if (add_prefix) + { + oa->write_character(static_cast('S')); + } + write_number_with_ubjson_prefix(j.m_value.string->size(), true); + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + if (add_prefix) + { + oa->write_character(static_cast('[')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.array->empty()) + { + assert(use_count); + const char first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin() + 1, j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(static_cast('$')); + oa->write_character(static_cast(first_prefix)); + } + } + + if (use_count) + { + oa->write_character(static_cast('#')); + write_number_with_ubjson_prefix(j.m_value.array->size(), true); + } + + for (const auto& el : *j.m_value.array) + { + write_ubjson(el, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(static_cast(']')); + } + + break; + } + + case value_t::object: + { + if (add_prefix) + { + oa->write_character(static_cast('{')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.object->empty()) + { + assert(use_count); + const char first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin(), j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(static_cast('$')); + oa->write_character(static_cast(first_prefix)); + } + } + + if (use_count) + { + oa->write_character(static_cast('#')); + write_number_with_ubjson_prefix(j.m_value.object->size(), true); + } + + for (const auto& el : *j.m_value.object) + { + write_number_with_ubjson_prefix(el.first.size(), true); + oa->write_characters( + reinterpret_cast(el.first.c_str()), + el.first.size()); + write_ubjson(el.second, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(static_cast('}')); + } + + break; + } + + default: + break; + } + } + private: /* @brief write a number to output input @@ -544,6 +703,195 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + template + void write_number_with_ubjson_prefix(const NumberType n, + const bool add_prefix) + { + if (std::is_floating_point::value) + { + if (add_prefix) + { + oa->write_character(static_cast('D')); // float64 + } + write_number(n); + } + else if (std::is_unsigned::value) + { + if (n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('i')); // uint8 + } + write_number(static_cast(n)); + } + else if (n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('U')); // uint8 + } + write_number(static_cast(n)); + } + else if (n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('I')); // int16 + } + write_number(static_cast(n)); + } + else if (n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('l')); // int32 + } + write_number(static_cast(n)); + } + else if (n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('L')); // int64 + } + write_number(static_cast(n)); + } + else + { + // TODO: replace by exception + assert(false); + } + } + else + { + if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('i')); // int8 + } + write_number(static_cast(n)); + } + else if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('U')); // uint8 + } + write_number(static_cast(n)); + } + else if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('I')); // int16 + } + write_number(static_cast(n)); + } + else if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('l')); // int32 + } + write_number(static_cast(n)); + } + else if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast('L')); // int64 + } + write_number(static_cast(n)); + } + else + { + // TODO: replace by exception + assert(false); + } + } + } + + char ubjson_prefix(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + return 'Z'; + + case value_t::boolean: + return j.m_value.boolean ? 'T' : 'F'; + + case value_t::number_integer: + { + if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + return 'i'; + } + else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + return 'U'; + } + else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + return 'I'; + } + else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + return 'l'; + } + else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + { + return 'L'; + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + return 'i'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + return 'U'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + return 'I'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + return 'l'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + { + return 'L'; + } + break; + } + + case value_t::number_float: + return 'D'; + + case value_t::string: + return 'S'; + + case value_t::array: + return '['; + + case value_t::object: + return '{'; + + default: + break; + } + + return '\0'; + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/develop/json.hpp b/develop/json.hpp index f4c139e4..6982a3f4 100644 --- a/develop/json.hpp +++ b/develop/json.hpp @@ -6630,6 +6630,26 @@ class basic_json binary_writer(o).write_msgpack(j); } + static std::vector to_ubjson(const basic_json& j, + const bool use_size = false, const bool use_type = false) + { + std::vector result; + to_ubjson(j, result, use_size, use_type); + return result; + } + + static void to_ubjson(const basic_json& j, detail::output_adapter o, + const bool use_size = false, const bool use_type = false) + { + binary_writer(o).write_ubjson(j, use_size, use_type); + } + + static void to_ubjson(const basic_json& j, detail::output_adapter o, + const bool use_size = false, const bool use_type = false) + { + binary_writer(o).write_ubjson(j, use_size, use_type); + } + /*! @brief create a JSON value from an input in CBOR format @@ -6824,6 +6844,19 @@ class basic_json return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_msgpack(strict); } + static basic_json from_ubjson(detail::input_adapter i, + const bool strict = true) + { + return binary_reader(i).parse_ubjson(strict); + } + + template::value, int> = 0> + static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true) + { + return binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).parse_ubjson(strict); + } + /// @} ////////////////////////// diff --git a/src/json.hpp b/src/json.hpp index 09253359..09da4d16 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6409,7 +6409,7 @@ class binary_writer write_number(static_cast(N)); } // LCOV_EXCL_START - else if (N <= (std::numeric_limits::max)()) + else if (N <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0xBB)); write_number(static_cast(N));