From c772c01a48b36740fdaa7d3b4e62bdf72ec320f9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann <mail@nlohmann.me> Date: Sun, 14 Jan 2018 10:27:30 +0100 Subject: [PATCH] :recycle: refactored code to split headers --- develop/detail/parsing/binary_reader.hpp | 270 ++++++++++++++++ develop/detail/parsing/binary_writer.hpp | 386 +++++++++++++++++++++-- develop/json.hpp | 33 ++ src/json.hpp | 2 +- 4 files changed, 671 insertions(+), 20 deletions(-) diff --git a/develop/detail/parsing/binary_reader.hpp b/develop/detail/parsing/binary_reader.hpp index bacf3d46..1ba961c9 100644 --- a/develop/detail/parsing/binary_reader.hpp +++ b/develop/detail/parsing/binary_reader.hpp @@ -90,6 +90,27 @@ class binary_reader return res; } + /*! + @brief create a JSON value from UBJSON input + + @param[in] strict whether to expect the input to be consumed completed + @return JSON value created from UBJSON input + + @throw parse_error.110 if input ended unexpectedly or the end of file was + not reached when @a strict was set to true + @throw parse_error.112 if unsupported byte was read + */ + BasicJsonType parse_ubjson(const bool strict) + { + const auto res = parse_ubjson_internal(); + if (strict) + { + get_ignore_noop(); + check_eof(true); + } + return res; + } + /*! @brief determine system byte order @@ -751,6 +772,16 @@ class binary_reader } } + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + */ + BasicJsonType parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief get next character from the input @@ -766,6 +797,20 @@ class binary_reader return (current = ia->get_character()); } + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + /* @brief read a number from the input @@ -1051,6 +1096,231 @@ class binary_reader return result; } + /*! + @brief reads a UBJSON string + + This function is either called after reading the 'S' byte explicitly + indicating a string, or in case of an object key where the 'S' byte can be + left out. + + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return string + + @throw parse_error.110 if input ended + @throw parse_error.113 if an unexpected byte is read + */ + std::string get_ubjson_string(const bool get_char = true) + { + if (get_char) + { + get(); // TODO: may we ignore N here? + } + + check_eof(); + + switch (current) + { + case 'U': + return get_string(get_number<uint8_t>()); + case 'i': + return get_string(get_number<int8_t>()); + case 'I': + return get_string(get_number<int16_t>()); + case 'l': + return get_string(get_number<int32_t>()); + case 'L': + return get_string(get_number<int64_t>()); + default: + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, + "expected a UBJSON string; last byte: 0x" + ss.str())); + } + } + + /*! + @brief determine the type and size for a container + + In the optimized UBJSON format, a type and a size can be provided to allow + for a more compact representation. + + @return pair of the size and the type + */ + std::pair<std::size_t, int> get_ubjson_size_type() + { + std::size_t sz = std::string::npos; + int tc = 0; + + get_ignore_noop(); + + if (current == '$') + { + tc = get(); // must not ignore 'N', because 'N' maybe the type + check_eof(); + + get_ignore_noop(); + if (current != '#') + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, + "expected '#' after UBJSON type information; last byte: 0x" + ss.str())); + } + sz = parse_ubjson_internal(); + } + else if (current == '#') + { + sz = parse_ubjson_internal(); + } + + return std::make_pair(sz, tc); + } + + BasicJsonType get_ubjson_value(const int prefix) + { + switch (prefix) + { + case std::char_traits<char>::eof(): // EOF + JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); + + case 'T': // true + return true; + case 'F': // false + return false; + + case 'Z': // null + return nullptr; + + case 'U': + return get_number<uint8_t>(); + case 'i': + return get_number<int8_t>(); + case 'I': + return get_number<int16_t>(); + case 'l': + return get_number<int32_t>(); + case 'L': + return get_number<int64_t>(); + case 'd': + return get_number<float>(); + case 'D': + return get_number<double>(); + + case 'C': // char + { + get(); + check_eof(); + if (JSON_UNLIKELY(not(0 <= current and current <= 127))) + { + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(113, chars_read, + "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str())); + } + return std::string(1, static_cast<char>(current)); + } + + case 'S': // string + return get_ubjson_string(); + + case '[': // array + return get_ubjson_array(); + + case '{': // object + return get_ubjson_object(); + + default: // anything else + std::stringstream ss; + ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current; + JSON_THROW(parse_error::create(112, chars_read, + "error reading UBJSON; last byte: 0x" + ss.str())); + } + } + + BasicJsonType get_ubjson_array() + { + BasicJsonType result = value_t::array; + const auto size_and_type = get_ubjson_size_type(); + + if (size_and_type.first != std::string::npos) + { + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + std::generate_n(std::back_inserter(*result.m_value.array), + size_and_type.first, [this, size_and_type]() + { + return get_ubjson_value(size_and_type.second); + }); + } + else + { + std::generate_n(std::back_inserter(*result.m_value.array), + size_and_type.first, [this]() + { + return parse_ubjson_internal(); + }); + } + } + else + { + while (current != ']') + { + result.push_back(parse_ubjson_internal(false)); + get_ignore_noop(); + } + } + + return result; + } + + BasicJsonType get_ubjson_object() + { + BasicJsonType result = value_t::object; + const auto size_and_type = get_ubjson_size_type(); + + if (size_and_type.first != std::string::npos) + { + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + size_and_type.first, [this, size_and_type]() + { + auto key = get_ubjson_string(); + auto val = get_ubjson_value(size_and_type.second); + return std::make_pair(std::move(key), std::move(val)); + }); + } + else + { + std::generate_n(std::inserter(*result.m_value.object, + result.m_value.object->end()), + size_and_type.first, [this]() + { + auto key = get_ubjson_string(); + auto val = parse_ubjson_internal(); + return std::make_pair(std::move(key), std::move(val)); + }); + } + } + else + { + while (current != '}') + { + auto key = get_ubjson_string(false); + result[std::move(key)] = parse_ubjson_internal(); + get_ignore_noop(); + } + } + + return result; + } + /*! @brief check if input ended @throw parse_error.110 if input ended diff --git a/develop/detail/parsing/binary_writer.hpp b/develop/detail/parsing/binary_writer.hpp index 68034fc7..a7722d2b 100644 --- a/develop/detail/parsing/binary_writer.hpp +++ b/develop/detail/parsing/binary_writer.hpp @@ -164,23 +164,23 @@ class binary_writer { write_number(static_cast<uint8_t>(0x60 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits<uint8_t>::max)()) { oa->write_character(static_cast<CharType>(0x78)); write_number(static_cast<uint8_t>(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { oa->write_character(static_cast<CharType>(0x79)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { oa->write_character(static_cast<CharType>(0x7A)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits<uint64_t>::max)()) { oa->write_character(static_cast<CharType>(0x7B)); write_number(static_cast<uint64_t>(N)); @@ -202,23 +202,23 @@ class binary_writer { write_number(static_cast<uint8_t>(0x80 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits<uint8_t>::max)()) { oa->write_character(static_cast<CharType>(0x98)); write_number(static_cast<uint8_t>(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { oa->write_character(static_cast<CharType>(0x99)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { oa->write_character(static_cast<CharType>(0x9A)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits<uint64_t>::max)()) { oa->write_character(static_cast<CharType>(0x9B)); write_number(static_cast<uint64_t>(N)); @@ -241,23 +241,23 @@ class binary_writer { write_number(static_cast<uint8_t>(0xA0 + N)); } - else if (N <= 0xFF) + else if (N <= (std::numeric_limits<uint8_t>::max)()) { oa->write_character(static_cast<CharType>(0xB8)); write_number(static_cast<uint8_t>(N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { oa->write_character(static_cast<CharType>(0xB9)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { oa->write_character(static_cast<CharType>(0xBA)); write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START - else if (N <= 0xFFFFFFFFFFFFFFFF) + else if (N <= (std::numeric_limits<uint64_t>::max)()) { oa->write_character(static_cast<CharType>(0xBB)); write_number(static_cast<uint64_t>(N)); @@ -425,19 +425,19 @@ class binary_writer // fixstr write_number(static_cast<uint8_t>(0xA0 | N)); } - else if (N <= 255) + else if (N <= (std::numeric_limits<uint8_t>::max)()) { // str 8 oa->write_character(static_cast<CharType>(0xD9)); write_number(static_cast<uint8_t>(N)); } - else if (N <= 65535) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { // str 16 oa->write_character(static_cast<CharType>(0xDA)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 4294967295) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { // str 32 oa->write_character(static_cast<CharType>(0xDB)); @@ -460,13 +460,13 @@ class binary_writer // fixarray write_number(static_cast<uint8_t>(0x90 | N)); } - else if (N <= 0xFFFF) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { // array 16 oa->write_character(static_cast<CharType>(0xDC)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 0xFFFFFFFF) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { // array 32 oa->write_character(static_cast<CharType>(0xDD)); @@ -490,13 +490,13 @@ class binary_writer // fixmap write_number(static_cast<uint8_t>(0x80 | (N & 0xF))); } - else if (N <= 65535) + else if (N <= (std::numeric_limits<uint16_t>::max)()) { // map 16 oa->write_character(static_cast<CharType>(0xDE)); write_number(static_cast<uint16_t>(N)); } - else if (N <= 4294967295) + else if (N <= (std::numeric_limits<uint32_t>::max)()) { // map 32 oa->write_character(static_cast<CharType>(0xDF)); @@ -517,6 +517,165 @@ class binary_writer } } + /*! + @param[in] j JSON value to serialize + @param[in] use_count whether to use '#' prefixes (optimized format) + @param[in] use_type whether to use '$' prefixes (optimized format) + @param[in] add_prefix whether prefixes need to be used for this value + */ + void write_ubjson(const BasicJsonType& j, const bool use_count, + const bool use_type, const bool add_prefix = true) + { + switch (j.type()) + { + case value_t::null: + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('Z')); + } + break; + } + + case value_t::boolean: + { + if (add_prefix) + oa->write_character(j.m_value.boolean + ? static_cast<CharType>('T') + : static_cast<CharType>('F')); + break; + } + + case value_t::number_integer: + { + write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix); + break; + } + + case value_t::number_unsigned: + { + write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix); + break; + } + + case value_t::number_float: + { + write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix); + break; + } + + case value_t::string: + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('S')); + } + write_number_with_ubjson_prefix(j.m_value.string->size(), true); + oa->write_characters( + reinterpret_cast<const CharType*>(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('[')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.array->empty()) + { + assert(use_count); + const char first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin() + 1, j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(static_cast<CharType>('$')); + oa->write_character(static_cast<CharType>(first_prefix)); + } + } + + if (use_count) + { + oa->write_character(static_cast<CharType>('#')); + write_number_with_ubjson_prefix(j.m_value.array->size(), true); + } + + for (const auto& el : *j.m_value.array) + { + write_ubjson(el, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(static_cast<CharType>(']')); + } + + break; + } + + case value_t::object: + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('{')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.object->empty()) + { + assert(use_count); + const char first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin(), j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(static_cast<CharType>('$')); + oa->write_character(static_cast<CharType>(first_prefix)); + } + } + + if (use_count) + { + oa->write_character(static_cast<CharType>('#')); + write_number_with_ubjson_prefix(j.m_value.object->size(), true); + } + + for (const auto& el : *j.m_value.object) + { + write_number_with_ubjson_prefix(el.first.size(), true); + oa->write_characters( + reinterpret_cast<const CharType*>(el.first.c_str()), + el.first.size()); + write_ubjson(el.second, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(static_cast<CharType>('}')); + } + + break; + } + + default: + break; + } + } + private: /* @brief write a number to output input @@ -544,6 +703,195 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + template<typename NumberType> + void write_number_with_ubjson_prefix(const NumberType n, + const bool add_prefix) + { + if (std::is_floating_point<NumberType>::value) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('D')); // float64 + } + write_number(n); + } + else if (std::is_unsigned<NumberType>::value) + { + if (n <= (std::numeric_limits<int8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('i')); // uint8 + } + write_number(static_cast<uint8_t>(n)); + } + else if (n <= (std::numeric_limits<uint8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('U')); // uint8 + } + write_number(static_cast<uint8_t>(n)); + } + else if (n <= (std::numeric_limits<int16_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('I')); // int16 + } + write_number(static_cast<int16_t>(n)); + } + else if (n <= (std::numeric_limits<int32_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('l')); // int32 + } + write_number(static_cast<int32_t>(n)); + } + else if (n <= (std::numeric_limits<int64_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('L')); // int64 + } + write_number(static_cast<int64_t>(n)); + } + else + { + // TODO: replace by exception + assert(false); + } + } + else + { + if ((std::numeric_limits<int8_t>::min)() <= n and n <= (std::numeric_limits<int8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('i')); // int8 + } + write_number(static_cast<int8_t>(n)); + } + else if ((std::numeric_limits<uint8_t>::min)() <= n and n <= (std::numeric_limits<uint8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('U')); // uint8 + } + write_number(static_cast<uint8_t>(n)); + } + else if ((std::numeric_limits<int16_t>::min)() <= n and n <= (std::numeric_limits<int16_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('I')); // int16 + } + write_number(static_cast<int16_t>(n)); + } + else if ((std::numeric_limits<int32_t>::min)() <= n and n <= (std::numeric_limits<int32_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('l')); // int32 + } + write_number(static_cast<int32_t>(n)); + } + else if ((std::numeric_limits<int64_t>::min)() <= n and n <= (std::numeric_limits<int64_t>::max)()) + { + if (add_prefix) + { + oa->write_character(static_cast<CharType>('L')); // int64 + } + write_number(static_cast<int64_t>(n)); + } + else + { + // TODO: replace by exception + assert(false); + } + } + } + + char ubjson_prefix(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + return 'Z'; + + case value_t::boolean: + return j.m_value.boolean ? 'T' : 'F'; + + case value_t::number_integer: + { + if ((std::numeric_limits<int8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)()) + { + return 'i'; + } + else if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) + { + return 'U'; + } + else if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) + { + return 'I'; + } + else if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) + { + return 'l'; + } + else if ((std::numeric_limits<int64_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int64_t>::max)()) + { + return 'L'; + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= (std::numeric_limits<int8_t>::max)()) + { + return 'i'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + { + return 'U'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)()) + { + return 'I'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)()) + { + return 'l'; + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<int64_t>::max)()) + { + return 'L'; + } + break; + } + + case value_t::number_float: + return 'D'; + + case value_t::string: + return 'S'; + + case value_t::array: + return '['; + + case value_t::object: + return '{'; + + default: + break; + } + + return '\0'; + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess(); diff --git a/develop/json.hpp b/develop/json.hpp index f4c139e4..6982a3f4 100644 --- a/develop/json.hpp +++ b/develop/json.hpp @@ -6630,6 +6630,26 @@ class basic_json binary_writer<char>(o).write_msgpack(j); } + static std::vector<uint8_t> to_ubjson(const basic_json& j, + const bool use_size = false, const bool use_type = false) + { + std::vector<uint8_t> result; + to_ubjson(j, result, use_size, use_type); + return result; + } + + static void to_ubjson(const basic_json& j, detail::output_adapter<uint8_t> o, + const bool use_size = false, const bool use_type = false) + { + binary_writer<uint8_t>(o).write_ubjson(j, use_size, use_type); + } + + static void to_ubjson(const basic_json& j, detail::output_adapter<char> o, + const bool use_size = false, const bool use_type = false) + { + binary_writer<char>(o).write_ubjson(j, use_size, use_type); + } + /*! @brief create a JSON value from an input in CBOR format @@ -6824,6 +6844,19 @@ class basic_json return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_msgpack(strict); } + static basic_json from_ubjson(detail::input_adapter i, + const bool strict = true) + { + return binary_reader(i).parse_ubjson(strict); + } + + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true) + { + return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_ubjson(strict); + } + /// @} ////////////////////////// diff --git a/src/json.hpp b/src/json.hpp index 09253359..09da4d16 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -6409,7 +6409,7 @@ class binary_writer write_number(static_cast<uint32_t>(N)); } // LCOV_EXCL_START - else if (N <= (std::numeric_limits<uint32_t>::max)()) + else if (N <= (std::numeric_limits<uint64_t>::max)()) { oa->write_character(static_cast<CharType>(0xBB)); write_number(static_cast<uint64_t>(N));