Merge branch 'feature/ubjson' into develop

This commit is contained in:
Niels Lohmann 2018-01-21 14:04:29 +01:00
commit 9e5d901f55
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
7 changed files with 3458 additions and 38 deletions

View file

@ -262,6 +262,7 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not
json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved.
json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value.
json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF.
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. |
@liveexample{The following code shows how an `out_of_range` exception can be
caught.,out_of_range}

View file

@ -90,6 +90,27 @@ class binary_reader
return res;
}
/*!
@brief create a JSON value from UBJSON input
@param[in] strict whether to expect the input to be consumed completed
@return JSON value created from UBJSON input
@throw parse_error.110 if input ended unexpectedly or the end of file was
not reached when @a strict was set to true
@throw parse_error.112 if unsupported byte was read
*/
BasicJsonType parse_ubjson(const bool strict)
{
const auto res = parse_ubjson_internal();
if (strict)
{
get_ignore_noop();
check_eof(true);
}
return res;
}
/*!
@brief determine system byte order
@ -751,6 +772,16 @@ class binary_reader
}
}
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
*/
BasicJsonType parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief get next character from the input
@ -766,6 +797,20 @@ class binary_reader
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@ -1051,6 +1096,230 @@ class binary_reader
return result;
}
/*!
@brief reads a UBJSON string
This function is either called after reading the 'S' byte explicitly
indicating a string, or in case of an object key where the 'S' byte can be
left out.
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return string
@throw parse_error.110 if input ended
@throw parse_error.113 if an unexpected byte is read
*/
std::string get_ubjson_string(const bool get_char = true)
{
if (get_char)
{
get(); // TODO: may we ignore N here?
}
check_eof();
switch (current)
{
case 'U':
return get_string(get_number<uint8_t>());
case 'i':
return get_string(get_number<int8_t>());
case 'I':
return get_string(get_number<int16_t>());
case 'l':
return get_string(get_number<int32_t>());
case 'L':
return get_string(get_number<int64_t>());
default:
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(113, chars_read,
"expected a UBJSON string; last byte: 0x" + ss.str()));
}
}
/*!
@brief determine the type and size for a container
In the optimized UBJSON format, a type and a size can be provided to allow
for a more compact representation.
@return pair of the size and the type
*/
std::pair<std::size_t, int> get_ubjson_size_type()
{
std::size_t sz = std::string::npos;
int tc = 0;
get_ignore_noop();
if (current == '$')
{
tc = get(); // must not ignore 'N', because 'N' maybe the type
check_eof();
get_ignore_noop();
if (current != '#')
{
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(112, chars_read,
"expected '#' after UBJSON type information; last byte: 0x" + ss.str()));
}
sz = parse_ubjson_internal();
}
else if (current == '#')
{
sz = parse_ubjson_internal();
}
return std::make_pair(sz, tc);
}
BasicJsonType get_ubjson_value(const int prefix)
{
switch (prefix)
{
case std::char_traits<char>::eof(): // EOF
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
case 'T': // true
return true;
case 'F': // false
return false;
case 'Z': // null
return nullptr;
case 'U':
return get_number<uint8_t>();
case 'i':
return get_number<int8_t>();
case 'I':
return get_number<int16_t>();
case 'l':
return get_number<int32_t>();
case 'L':
return get_number<int64_t>();
case 'd':
return get_number<float>();
case 'D':
return get_number<double>();
case 'C': // char
{
get();
check_eof();
if (JSON_UNLIKELY(current > 127))
{
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(113, chars_read,
"byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str()));
}
return std::string(1, static_cast<char>(current));
}
case 'S': // string
return get_ubjson_string();
case '[': // array
return get_ubjson_array();
case '{': // object
return get_ubjson_object();
default: // anything else
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(112, chars_read,
"error reading UBJSON; last byte: 0x" + ss.str()));
}
}
BasicJsonType get_ubjson_array()
{
BasicJsonType result = value_t::array;
const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != std::string::npos)
{
if (size_and_type.second != 0)
{
if (size_and_type.second != 'N')
std::generate_n(std::back_inserter(*result.m_value.array),
size_and_type.first, [this, size_and_type]()
{
return get_ubjson_value(size_and_type.second);
});
}
else
{
std::generate_n(std::back_inserter(*result.m_value.array),
size_and_type.first, [this]()
{
return parse_ubjson_internal();
});
}
}
else
{
while (current != ']')
{
result.push_back(parse_ubjson_internal(false));
get_ignore_noop();
}
}
return result;
}
BasicJsonType get_ubjson_object()
{
BasicJsonType result = value_t::object;
const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != std::string::npos)
{
if (size_and_type.second != 0)
{
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
size_and_type.first, [this, size_and_type]()
{
auto key = get_ubjson_string();
auto val = get_ubjson_value(size_and_type.second);
return std::make_pair(std::move(key), std::move(val));
});
}
else
{
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
size_and_type.first, [this]()
{
auto key = get_ubjson_string();
auto val = parse_ubjson_internal();
return std::make_pair(std::move(key), std::move(val));
});
}
}
else
{
while (current != '}')
{
auto key = get_ubjson_string(false);
result[std::move(key)] = parse_ubjson_internal();
get_ignore_noop();
}
}
return result;
}
/*!
@brief check if input ended
@throw parse_error.110 if input ended

View file

@ -164,23 +164,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0x60 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x78));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x79));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x7A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0x7B));
write_number(static_cast<uint64_t>(N));
@ -202,23 +202,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0x80 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x98));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x99));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x9A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0x9B));
write_number(static_cast<uint64_t>(N));
@ -241,23 +241,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0xA0 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0xB8));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0xB9));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0xBA));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0xBB));
write_number(static_cast<uint64_t>(N));
@ -425,19 +425,19 @@ class binary_writer
// fixstr
write_number(static_cast<uint8_t>(0xA0 | N));
}
else if (N <= 255)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
// str 8
oa->write_character(static_cast<CharType>(0xD9));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 65535)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// str 16
oa->write_character(static_cast<CharType>(0xDA));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// str 32
oa->write_character(static_cast<CharType>(0xDB));
@ -460,13 +460,13 @@ class binary_writer
// fixarray
write_number(static_cast<uint8_t>(0x90 | N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// array 16
oa->write_character(static_cast<CharType>(0xDC));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// array 32
oa->write_character(static_cast<CharType>(0xDD));
@ -490,13 +490,13 @@ class binary_writer
// fixmap
write_number(static_cast<uint8_t>(0x80 | (N & 0xF)));
}
else if (N <= 65535)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// map 16
oa->write_character(static_cast<CharType>(0xDE));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// map 32
oa->write_character(static_cast<CharType>(0xDF));
@ -517,6 +517,165 @@ class binary_writer
}
}
/*!
@param[in] j JSON value to serialize
@param[in] use_count whether to use '#' prefixes (optimized format)
@param[in] use_type whether to use '$' prefixes (optimized format)
@param[in] add_prefix whether prefixes need to be used for this value
*/
void write_ubjson(const BasicJsonType& j, const bool use_count,
const bool use_type, const bool add_prefix = true)
{
switch (j.type())
{
case value_t::null:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('Z'));
}
break;
}
case value_t::boolean:
{
if (add_prefix)
oa->write_character(j.m_value.boolean
? static_cast<CharType>('T')
: static_cast<CharType>('F'));
break;
}
case value_t::number_integer:
{
write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix);
break;
}
case value_t::number_unsigned:
{
write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix);
break;
}
case value_t::number_float:
{
write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix);
break;
}
case value_t::string:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('S'));
}
write_number_with_ubjson_prefix(j.m_value.string->size(), true);
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
j.m_value.string->size());
break;
}
case value_t::array:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('['));
}
bool prefix_required = true;
if (use_type and not j.m_value.array->empty())
{
assert(use_count);
const char first_prefix = ubjson_prefix(j.front());
const bool same_prefix = std::all_of(j.begin() + 1, j.end(),
[this, first_prefix](const BasicJsonType & v)
{
return ubjson_prefix(v) == first_prefix;
});
if (same_prefix)
{
prefix_required = false;
oa->write_character(static_cast<CharType>('$'));
oa->write_character(static_cast<CharType>(first_prefix));
}
}
if (use_count)
{
oa->write_character(static_cast<CharType>('#'));
write_number_with_ubjson_prefix(j.m_value.array->size(), true);
}
for (const auto& el : *j.m_value.array)
{
write_ubjson(el, use_count, use_type, prefix_required);
}
if (not use_count)
{
oa->write_character(static_cast<CharType>(']'));
}
break;
}
case value_t::object:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('{'));
}
bool prefix_required = true;
if (use_type and not j.m_value.object->empty())
{
assert(use_count);
const char first_prefix = ubjson_prefix(j.front());
const bool same_prefix = std::all_of(j.begin(), j.end(),
[this, first_prefix](const BasicJsonType & v)
{
return ubjson_prefix(v) == first_prefix;
});
if (same_prefix)
{
prefix_required = false;
oa->write_character(static_cast<CharType>('$'));
oa->write_character(static_cast<CharType>(first_prefix));
}
}
if (use_count)
{
oa->write_character(static_cast<CharType>('#'));
write_number_with_ubjson_prefix(j.m_value.object->size(), true);
}
for (const auto& el : *j.m_value.object)
{
write_number_with_ubjson_prefix(el.first.size(), true);
oa->write_characters(
reinterpret_cast<const CharType*>(el.first.c_str()),
el.first.size());
write_ubjson(el.second, use_count, use_type, prefix_required);
}
if (not use_count)
{
oa->write_character(static_cast<CharType>('}'));
}
break;
}
default:
break;
}
}
private:
/*
@brief write a number to output input
@ -544,6 +703,200 @@ class binary_writer
oa->write_characters(vec.data(), sizeof(NumberType));
}
template<typename NumberType>
void write_number_with_ubjson_prefix(const NumberType n,
const bool add_prefix)
{
if (std::is_floating_point<NumberType>::value)
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('D')); // float64
}
write_number(n);
}
else if (std::is_unsigned<NumberType>::value)
{
if (n <= (std::numeric_limits<int8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('i')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if (n <= (std::numeric_limits<uint8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('U')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if (n <= (std::numeric_limits<int16_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('I')); // int16
}
write_number(static_cast<int16_t>(n));
}
else if (n <= (std::numeric_limits<int32_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('l')); // int32
}
write_number(static_cast<int32_t>(n));
}
else if (n <= (std::numeric_limits<int64_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('L')); // int64
}
write_number(static_cast<int64_t>(n));
}
else
{
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n)));
}
}
else
{
if ((std::numeric_limits<int8_t>::min)() <= n and n <= (std::numeric_limits<int8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('i')); // int8
}
write_number(static_cast<int8_t>(n));
}
else if ((std::numeric_limits<uint8_t>::min)() <= n and n <= (std::numeric_limits<uint8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('U')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if ((std::numeric_limits<int16_t>::min)() <= n and n <= (std::numeric_limits<int16_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('I')); // int16
}
write_number(static_cast<int16_t>(n));
}
else if ((std::numeric_limits<int32_t>::min)() <= n and n <= (std::numeric_limits<int32_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('l')); // int32
}
write_number(static_cast<int32_t>(n));
}
else if ((std::numeric_limits<int64_t>::min)() <= n and n <= (std::numeric_limits<int64_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('L')); // int64
}
write_number(static_cast<int64_t>(n));
}
// LCOV_EXCL_START
else
{
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n)));
}
// LCOV_EXCL_STOP
}
}
/*!
@brief determine the type prefix of container values
@note This function does not need to be 100% accurate when it comes to
integer limits. In case a number exceeds the limits of int64_t,
this will be detected by a later call to function
write_number_with_ubjson_prefix. Therefore, we return 'L' for any
value that does not fit the previous limits.
*/
char ubjson_prefix(const BasicJsonType& j) const noexcept
{
switch (j.type())
{
case value_t::null:
return 'Z';
case value_t::boolean:
return j.m_value.boolean ? 'T' : 'F';
case value_t::number_integer:
{
if ((std::numeric_limits<int8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)())
{
return 'i';
}
else if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)())
{
return 'U';
}
else if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)())
{
return 'I';
}
else if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)())
{
return 'l';
}
else // no check and assume int64_t (see note above)
{
return 'L';
}
}
case value_t::number_unsigned:
{
if (j.m_value.number_unsigned <= (std::numeric_limits<int8_t>::max)())
{
return 'i';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)())
{
return 'U';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)())
{
return 'I';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)())
{
return 'l';
}
else // no check and assume int64_t (see note above)
{
return 'L';
}
}
case value_t::number_float:
return 'D';
case value_t::string:
return 'S';
case value_t::array:
return '[';
case value_t::object:
return '{';
default: // discarded values
return 'N';
}
}
private:
/// whether we can assume little endianess
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();

View file

@ -6633,6 +6633,26 @@ class basic_json
binary_writer<char>(o).write_msgpack(j);
}
static std::vector<uint8_t> to_ubjson(const basic_json& j,
const bool use_size = false, const bool use_type = false)
{
std::vector<uint8_t> result;
to_ubjson(j, result, use_size, use_type);
return result;
}
static void to_ubjson(const basic_json& j, detail::output_adapter<uint8_t> o,
const bool use_size = false, const bool use_type = false)
{
binary_writer<uint8_t>(o).write_ubjson(j, use_size, use_type);
}
static void to_ubjson(const basic_json& j, detail::output_adapter<char> o,
const bool use_size = false, const bool use_type = false)
{
binary_writer<char>(o).write_ubjson(j, use_size, use_type);
}
/*!
@brief create a JSON value from an input in CBOR format
@ -6827,6 +6847,19 @@ class basic_json
return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_msgpack(strict);
}
static basic_json from_ubjson(detail::input_adapter i,
const bool strict = true)
{
return binary_reader(i).parse_ubjson(strict);
}
template<typename A1, typename A2,
detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0>
static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true)
{
return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_ubjson(strict);
}
/// @}
//////////////////////////

View file

@ -712,6 +712,7 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not
json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved.
json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value.
json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF.
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. |
@liveexample{The following code shows how an `out_of_range` exception can be
caught.,out_of_range}
@ -4865,6 +4866,27 @@ class binary_reader
return res;
}
/*!
@brief create a JSON value from UBJSON input
@param[in] strict whether to expect the input to be consumed completed
@return JSON value created from UBJSON input
@throw parse_error.110 if input ended unexpectedly or the end of file was
not reached when @a strict was set to true
@throw parse_error.112 if unsupported byte was read
*/
BasicJsonType parse_ubjson(const bool strict)
{
const auto res = parse_ubjson_internal();
if (strict)
{
get_ignore_noop();
check_eof(true);
}
return res;
}
/*!
@brief determine system byte order
@ -5526,6 +5548,16 @@ class binary_reader
}
}
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
*/
BasicJsonType parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief get next character from the input
@ -5541,6 +5573,20 @@ class binary_reader
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@ -5826,6 +5872,230 @@ class binary_reader
return result;
}
/*!
@brief reads a UBJSON string
This function is either called after reading the 'S' byte explicitly
indicating a string, or in case of an object key where the 'S' byte can be
left out.
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return string
@throw parse_error.110 if input ended
@throw parse_error.113 if an unexpected byte is read
*/
std::string get_ubjson_string(const bool get_char = true)
{
if (get_char)
{
get(); // TODO: may we ignore N here?
}
check_eof();
switch (current)
{
case 'U':
return get_string(get_number<uint8_t>());
case 'i':
return get_string(get_number<int8_t>());
case 'I':
return get_string(get_number<int16_t>());
case 'l':
return get_string(get_number<int32_t>());
case 'L':
return get_string(get_number<int64_t>());
default:
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(113, chars_read,
"expected a UBJSON string; last byte: 0x" + ss.str()));
}
}
/*!
@brief determine the type and size for a container
In the optimized UBJSON format, a type and a size can be provided to allow
for a more compact representation.
@return pair of the size and the type
*/
std::pair<std::size_t, int> get_ubjson_size_type()
{
std::size_t sz = std::string::npos;
int tc = 0;
get_ignore_noop();
if (current == '$')
{
tc = get(); // must not ignore 'N', because 'N' maybe the type
check_eof();
get_ignore_noop();
if (current != '#')
{
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(112, chars_read,
"expected '#' after UBJSON type information; last byte: 0x" + ss.str()));
}
sz = parse_ubjson_internal();
}
else if (current == '#')
{
sz = parse_ubjson_internal();
}
return std::make_pair(sz, tc);
}
BasicJsonType get_ubjson_value(const int prefix)
{
switch (prefix)
{
case std::char_traits<char>::eof(): // EOF
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
case 'T': // true
return true;
case 'F': // false
return false;
case 'Z': // null
return nullptr;
case 'U':
return get_number<uint8_t>();
case 'i':
return get_number<int8_t>();
case 'I':
return get_number<int16_t>();
case 'l':
return get_number<int32_t>();
case 'L':
return get_number<int64_t>();
case 'd':
return get_number<float>();
case 'D':
return get_number<double>();
case 'C': // char
{
get();
check_eof();
if (JSON_UNLIKELY(current > 127))
{
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(113, chars_read,
"byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + ss.str()));
}
return std::string(1, static_cast<char>(current));
}
case 'S': // string
return get_ubjson_string();
case '[': // array
return get_ubjson_array();
case '{': // object
return get_ubjson_object();
default: // anything else
std::stringstream ss;
ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << current;
JSON_THROW(parse_error::create(112, chars_read,
"error reading UBJSON; last byte: 0x" + ss.str()));
}
}
BasicJsonType get_ubjson_array()
{
BasicJsonType result = value_t::array;
const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != std::string::npos)
{
if (size_and_type.second != 0)
{
if (size_and_type.second != 'N')
std::generate_n(std::back_inserter(*result.m_value.array),
size_and_type.first, [this, size_and_type]()
{
return get_ubjson_value(size_and_type.second);
});
}
else
{
std::generate_n(std::back_inserter(*result.m_value.array),
size_and_type.first, [this]()
{
return parse_ubjson_internal();
});
}
}
else
{
while (current != ']')
{
result.push_back(parse_ubjson_internal(false));
get_ignore_noop();
}
}
return result;
}
BasicJsonType get_ubjson_object()
{
BasicJsonType result = value_t::object;
const auto size_and_type = get_ubjson_size_type();
if (size_and_type.first != std::string::npos)
{
if (size_and_type.second != 0)
{
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
size_and_type.first, [this, size_and_type]()
{
auto key = get_ubjson_string();
auto val = get_ubjson_value(size_and_type.second);
return std::make_pair(std::move(key), std::move(val));
});
}
else
{
std::generate_n(std::inserter(*result.m_value.object,
result.m_value.object->end()),
size_and_type.first, [this]()
{
auto key = get_ubjson_string();
auto val = parse_ubjson_internal();
return std::make_pair(std::move(key), std::move(val));
});
}
}
else
{
while (current != '}')
{
auto key = get_ubjson_string(false);
result[std::move(key)] = parse_ubjson_internal();
get_ignore_noop();
}
}
return result;
}
/*!
@brief check if input ended
@throw parse_error.110 if input ended
@ -6033,23 +6303,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0x60 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x78));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x79));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x7A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0x7B));
write_number(static_cast<uint64_t>(N));
@ -6071,23 +6341,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0x80 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0x98));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0x99));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0x9A));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0x9B));
write_number(static_cast<uint64_t>(N));
@ -6110,23 +6380,23 @@ class binary_writer
{
write_number(static_cast<uint8_t>(0xA0 + N));
}
else if (N <= 0xFF)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
oa->write_character(static_cast<CharType>(0xB8));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
oa->write_character(static_cast<CharType>(0xB9));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
oa->write_character(static_cast<CharType>(0xBA));
write_number(static_cast<uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= 0xFFFFFFFFFFFFFFFF)
else if (N <= (std::numeric_limits<uint64_t>::max)())
{
oa->write_character(static_cast<CharType>(0xBB));
write_number(static_cast<uint64_t>(N));
@ -6294,19 +6564,19 @@ class binary_writer
// fixstr
write_number(static_cast<uint8_t>(0xA0 | N));
}
else if (N <= 255)
else if (N <= (std::numeric_limits<uint8_t>::max)())
{
// str 8
oa->write_character(static_cast<CharType>(0xD9));
write_number(static_cast<uint8_t>(N));
}
else if (N <= 65535)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// str 16
oa->write_character(static_cast<CharType>(0xDA));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// str 32
oa->write_character(static_cast<CharType>(0xDB));
@ -6329,13 +6599,13 @@ class binary_writer
// fixarray
write_number(static_cast<uint8_t>(0x90 | N));
}
else if (N <= 0xFFFF)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// array 16
oa->write_character(static_cast<CharType>(0xDC));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 0xFFFFFFFF)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// array 32
oa->write_character(static_cast<CharType>(0xDD));
@ -6359,13 +6629,13 @@ class binary_writer
// fixmap
write_number(static_cast<uint8_t>(0x80 | (N & 0xF)));
}
else if (N <= 65535)
else if (N <= (std::numeric_limits<uint16_t>::max)())
{
// map 16
oa->write_character(static_cast<CharType>(0xDE));
write_number(static_cast<uint16_t>(N));
}
else if (N <= 4294967295)
else if (N <= (std::numeric_limits<uint32_t>::max)())
{
// map 32
oa->write_character(static_cast<CharType>(0xDF));
@ -6386,6 +6656,165 @@ class binary_writer
}
}
/*!
@param[in] j JSON value to serialize
@param[in] use_count whether to use '#' prefixes (optimized format)
@param[in] use_type whether to use '$' prefixes (optimized format)
@param[in] add_prefix whether prefixes need to be used for this value
*/
void write_ubjson(const BasicJsonType& j, const bool use_count,
const bool use_type, const bool add_prefix = true)
{
switch (j.type())
{
case value_t::null:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('Z'));
}
break;
}
case value_t::boolean:
{
if (add_prefix)
oa->write_character(j.m_value.boolean
? static_cast<CharType>('T')
: static_cast<CharType>('F'));
break;
}
case value_t::number_integer:
{
write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix);
break;
}
case value_t::number_unsigned:
{
write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix);
break;
}
case value_t::number_float:
{
write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix);
break;
}
case value_t::string:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('S'));
}
write_number_with_ubjson_prefix(j.m_value.string->size(), true);
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
j.m_value.string->size());
break;
}
case value_t::array:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('['));
}
bool prefix_required = true;
if (use_type and not j.m_value.array->empty())
{
assert(use_count);
const char first_prefix = ubjson_prefix(j.front());
const bool same_prefix = std::all_of(j.begin() + 1, j.end(),
[this, first_prefix](const BasicJsonType & v)
{
return ubjson_prefix(v) == first_prefix;
});
if (same_prefix)
{
prefix_required = false;
oa->write_character(static_cast<CharType>('$'));
oa->write_character(static_cast<CharType>(first_prefix));
}
}
if (use_count)
{
oa->write_character(static_cast<CharType>('#'));
write_number_with_ubjson_prefix(j.m_value.array->size(), true);
}
for (const auto& el : *j.m_value.array)
{
write_ubjson(el, use_count, use_type, prefix_required);
}
if (not use_count)
{
oa->write_character(static_cast<CharType>(']'));
}
break;
}
case value_t::object:
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('{'));
}
bool prefix_required = true;
if (use_type and not j.m_value.object->empty())
{
assert(use_count);
const char first_prefix = ubjson_prefix(j.front());
const bool same_prefix = std::all_of(j.begin(), j.end(),
[this, first_prefix](const BasicJsonType & v)
{
return ubjson_prefix(v) == first_prefix;
});
if (same_prefix)
{
prefix_required = false;
oa->write_character(static_cast<CharType>('$'));
oa->write_character(static_cast<CharType>(first_prefix));
}
}
if (use_count)
{
oa->write_character(static_cast<CharType>('#'));
write_number_with_ubjson_prefix(j.m_value.object->size(), true);
}
for (const auto& el : *j.m_value.object)
{
write_number_with_ubjson_prefix(el.first.size(), true);
oa->write_characters(
reinterpret_cast<const CharType*>(el.first.c_str()),
el.first.size());
write_ubjson(el.second, use_count, use_type, prefix_required);
}
if (not use_count)
{
oa->write_character(static_cast<CharType>('}'));
}
break;
}
default:
break;
}
}
private:
/*
@brief write a number to output input
@ -6413,6 +6842,200 @@ class binary_writer
oa->write_characters(vec.data(), sizeof(NumberType));
}
template<typename NumberType>
void write_number_with_ubjson_prefix(const NumberType n,
const bool add_prefix)
{
if (std::is_floating_point<NumberType>::value)
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('D')); // float64
}
write_number(n);
}
else if (std::is_unsigned<NumberType>::value)
{
if (n <= (std::numeric_limits<int8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('i')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if (n <= (std::numeric_limits<uint8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('U')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if (n <= (std::numeric_limits<int16_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('I')); // int16
}
write_number(static_cast<int16_t>(n));
}
else if (n <= (std::numeric_limits<int32_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('l')); // int32
}
write_number(static_cast<int32_t>(n));
}
else if (n <= (std::numeric_limits<int64_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('L')); // int64
}
write_number(static_cast<int64_t>(n));
}
else
{
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n)));
}
}
else
{
if ((std::numeric_limits<int8_t>::min)() <= n and n <= (std::numeric_limits<int8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('i')); // int8
}
write_number(static_cast<int8_t>(n));
}
else if ((std::numeric_limits<uint8_t>::min)() <= n and n <= (std::numeric_limits<uint8_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('U')); // uint8
}
write_number(static_cast<uint8_t>(n));
}
else if ((std::numeric_limits<int16_t>::min)() <= n and n <= (std::numeric_limits<int16_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('I')); // int16
}
write_number(static_cast<int16_t>(n));
}
else if ((std::numeric_limits<int32_t>::min)() <= n and n <= (std::numeric_limits<int32_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('l')); // int32
}
write_number(static_cast<int32_t>(n));
}
else if ((std::numeric_limits<int64_t>::min)() <= n and n <= (std::numeric_limits<int64_t>::max)())
{
if (add_prefix)
{
oa->write_character(static_cast<CharType>('L')); // int64
}
write_number(static_cast<int64_t>(n));
}
// LCOV_EXCL_START
else
{
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n)));
}
// LCOV_EXCL_STOP
}
}
/*!
@brief determine the type prefix of container values
@note This function does not need to be 100% accurate when it comes to
integer limits. In case a number exceeds the limits of int64_t,
this will be detected by a later call to function
write_number_with_ubjson_prefix. Therefore, we return 'L' for any
value that does not fit the previous limits.
*/
char ubjson_prefix(const BasicJsonType& j) const noexcept
{
switch (j.type())
{
case value_t::null:
return 'Z';
case value_t::boolean:
return j.m_value.boolean ? 'T' : 'F';
case value_t::number_integer:
{
if ((std::numeric_limits<int8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)())
{
return 'i';
}
else if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)())
{
return 'U';
}
else if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)())
{
return 'I';
}
else if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)())
{
return 'l';
}
else // no check and assume int64_t (see note above)
{
return 'L';
}
}
case value_t::number_unsigned:
{
if (j.m_value.number_unsigned <= (std::numeric_limits<int8_t>::max)())
{
return 'i';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)())
{
return 'U';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)())
{
return 'I';
}
else if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)())
{
return 'l';
}
else // no check and assume int64_t (see note above)
{
return 'L';
}
}
case value_t::number_float:
return 'D';
case value_t::string:
return 'S';
case value_t::array:
return '[';
case value_t::object:
return '{';
default: // discarded values
return 'N';
}
}
private:
/// whether we can assume little endianess
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();
@ -13721,6 +14344,26 @@ class basic_json
binary_writer<char>(o).write_msgpack(j);
}
static std::vector<uint8_t> to_ubjson(const basic_json& j,
const bool use_size = false, const bool use_type = false)
{
std::vector<uint8_t> result;
to_ubjson(j, result, use_size, use_type);
return result;
}
static void to_ubjson(const basic_json& j, detail::output_adapter<uint8_t> o,
const bool use_size = false, const bool use_type = false)
{
binary_writer<uint8_t>(o).write_ubjson(j, use_size, use_type);
}
static void to_ubjson(const basic_json& j, detail::output_adapter<char> o,
const bool use_size = false, const bool use_type = false)
{
binary_writer<char>(o).write_ubjson(j, use_size, use_type);
}
/*!
@brief create a JSON value from an input in CBOR format
@ -13915,6 +14558,19 @@ class basic_json
return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_msgpack(strict);
}
static basic_json from_ubjson(detail::input_adapter i,
const bool strict = true)
{
return binary_reader(i).parse_ubjson(strict);
}
template<typename A1, typename A2,
detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0>
static basic_json from_ubjson(A1 && a1, A2 && a2, const bool strict = true)
{
return binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).parse_ubjson(strict);
}
/// @}
//////////////////////////

View file

@ -40,6 +40,7 @@ SOURCES = src/unit.cpp \
src/unit-regression.cpp \
src/unit-serialization.cpp \
src/unit-testsuites.cpp \
src/unit-ubjson.cpp \
src/unit-unicode.cpp
OBJECTS = $(SOURCES:.cpp=.o)

2107
test/src/unit-ubjson.cpp Normal file

File diff suppressed because it is too large Load diff