Merge pull request #1662 from OmnipotentEntity/develop

Add binary type support to all binary file formats, as well as an internally represented binary type
This commit is contained in:
Niels Lohmann 2020-04-16 11:14:32 +02:00 committed by GitHub
commit f2b43a36b2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 3008 additions and 106 deletions

View file

@ -26,6 +26,7 @@ template<typename BasicJsonType, typename CharType>
class binary_writer
{
using string_t = typename BasicJsonType::string_t;
using internal_binary_t = typename BasicJsonType::internal_binary_t;
public:
/*!
@ -258,6 +259,45 @@ class binary_writer
break;
}
case value_t::binary:
{
// step 1: write control byte and the binary array size
const auto N = j.m_value.binary->size();
if (N <= 0x17)
{
write_number(static_cast<std::uint8_t>(0x40 + N));
}
else if (N <= (std::numeric_limits<std::uint8_t>::max)())
{
oa->write_character(to_char_type(0x58));
write_number(static_cast<std::uint8_t>(N));
}
else if (N <= (std::numeric_limits<std::uint16_t>::max)())
{
oa->write_character(to_char_type(0x59));
write_number(static_cast<std::uint16_t>(N));
}
else if (N <= (std::numeric_limits<std::uint32_t>::max)())
{
oa->write_character(to_char_type(0x5A));
write_number(static_cast<std::uint32_t>(N));
}
// LCOV_EXCL_START
else if (N <= (std::numeric_limits<std::uint64_t>::max)())
{
oa->write_character(to_char_type(0x5B));
write_number(static_cast<std::uint64_t>(N));
}
// LCOV_EXCL_STOP
// step 2: write each element
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.binary->data()),
N);
break;
}
case value_t::object:
{
// step 1: write control byte and the object size
@ -506,6 +546,101 @@ class binary_writer
break;
}
case value_t::binary:
{
// step 0: determine if the binary type has a set subtype to
// determine whether or not to use the ext or fixext types
const bool use_ext = j.m_value.binary->has_subtype;
// step 1: write control byte and the byte string length
const auto N = j.m_value.binary->size();
if (N <= (std::numeric_limits<std::uint8_t>::max)())
{
std::uint8_t output_type;
bool fixed = true;
if (use_ext)
{
switch (N)
{
case 1:
output_type = 0xD4; // fixext 1
break;
case 2:
output_type = 0xD5; // fixext 2
break;
case 4:
output_type = 0xD6; // fixext 4
break;
case 8:
output_type = 0xD7; // fixext 8
break;
case 16:
output_type = 0xD8; // fixext 16
break;
default:
output_type = 0xC7; // ext 8
fixed = false;
break;
}
}
else
{
output_type = 0xC4; // bin 8
fixed = false;
}
oa->write_character(to_char_type(output_type));
if (not fixed)
{
write_number(static_cast<std::uint8_t>(N));
}
}
else if (N <= (std::numeric_limits<std::uint16_t>::max)())
{
std::uint8_t output_type;
if (use_ext)
{
output_type = 0xC8; // ext 16
}
else
{
output_type = 0xC5; // bin 16
}
oa->write_character(to_char_type(output_type));
write_number(static_cast<std::uint16_t>(N));
}
else if (N <= (std::numeric_limits<std::uint32_t>::max)())
{
std::uint8_t output_type;
if (use_ext)
{
output_type = 0xC9; // ext 32
}
else
{
output_type = 0xC6; // bin 32
}
oa->write_character(to_char_type(output_type));
write_number(static_cast<std::uint32_t>(N));
}
// step 1.5: if this is an ext type, write the subtype
if (use_ext)
{
write_number(j.m_value.binary->subtype);
}
// step 2: write the byte string
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.binary->data()),
N);
break;
}
case value_t::object:
{
// step 1: write control byte and the object size
@ -649,6 +784,49 @@ class binary_writer
break;
}
case value_t::binary:
{
if (add_prefix)
{
oa->write_character(to_char_type('['));
}
if (use_type and not j.m_value.binary->empty())
{
assert(use_count);
oa->write_character(to_char_type('$'));
oa->write_character('U');
}
if (use_count)
{
oa->write_character(to_char_type('#'));
write_number_with_ubjson_prefix(j.m_value.binary->size(), true);
}
if (use_type)
{
oa->write_characters(
reinterpret_cast<const CharType*>(j.m_value.binary->data()),
j.m_value.binary->size());
}
else
{
for (size_t i = 0; i < j.m_value.binary->size(); ++i)
{
oa->write_character(to_char_type('U'));
oa->write_character(j.m_value.binary->data()[i]);
}
}
if (not use_count)
{
oa->write_character(to_char_type(']'));
}
break;
}
case value_t::object:
{
if (add_prefix)
@ -871,6 +1049,14 @@ class binary_writer
return sizeof(std::int32_t) + embedded_document_size + 1ul;
}
/*!
@return The size of the BSON-encoded binary array @a value
*/
static std::size_t calc_bson_binary_size(const typename BasicJsonType::internal_binary_t& value)
{
return sizeof(std::int32_t) + value.size() + 1ul;
}
/*!
@brief Writes a BSON element with key @a name and array @a value
*/
@ -890,6 +1076,27 @@ class binary_writer
oa->write_character(to_char_type(0x00));
}
/*!
@brief Writes a BSON element with key @a name and binary value @a value
*/
void write_bson_binary(const string_t& name,
const internal_binary_t& value)
{
write_bson_entry_header(name, 0x05);
write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size()));
std::uint8_t subtype = 0x00; // Generic Binary Subtype
if (value.has_subtype)
{
subtype = value.subtype;
}
write_number(subtype);
oa->write_characters(
reinterpret_cast<const CharType*>(value.data()),
value.size());
}
/*!
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
@return The calculated size for the BSON document entry for @a j with the given @a name.
@ -906,6 +1113,9 @@ class binary_writer
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::binary:
return header_size + calc_bson_binary_size(*j.m_value.binary);
case value_t::boolean:
return header_size + 1ul;
@ -950,6 +1160,9 @@ class binary_writer
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::binary:
return write_bson_binary(name, *j.m_value.binary);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
@ -1230,7 +1443,8 @@ class binary_writer
case value_t::string:
return 'S';
case value_t::array:
case value_t::array: // fallthrough
case value_t::binary:
return '[';
case value_t::object:

View file

@ -45,6 +45,7 @@ class serializer
using number_float_t = typename BasicJsonType::number_float_t;
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using binary_t = typename BasicJsonType::binary_t;
static constexpr std::uint8_t UTF8_ACCEPT = 0;
static constexpr std::uint8_t UTF8_REJECT = 1;
@ -83,16 +84,19 @@ class serializer
- strings and object keys are escaped using `escape_string()`
- integer numbers are converted implicitly via `operator<<`
- floating-point numbers are converted to a string using `"%g"` format
- if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
@param[in] serialize_binary whether the output shall include non-standard binary output
*/
void dump(const BasicJsonType& val, const bool pretty_print,
const bool ensure_ascii,
const unsigned int indent_step,
const unsigned int current_indent = 0)
const unsigned int current_indent = 0,
const bool serialize_binary = false)
{
switch (val.m_type)
{
@ -236,6 +240,55 @@ class serializer
return;
}
case value_t::binary:
{
if (not serialize_binary)
{
JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON"));
}
if (val.m_value.binary->empty())
{
o->write_characters("b[]", 3);
}
else if (pretty_print)
{
o->write_characters("b[", 2);
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_character(',');
int index = i - val.m_value.binary->cbegin();
if (index % 16 == 0)
{
o->write_character('\n');
}
else
{
o->write_character(' ');
}
}
dump_integer(val.m_value.binary->back());
o->write_character(']');
}
else
{
o->write_characters("b[", 2);
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_character(',');
}
dump_integer(val.m_value.binary->back());
o->write_character(']');
}
return;
}
case value_t::boolean:
{
if (val.m_value.boolean)
@ -592,7 +645,8 @@ class serializer
*/
template<typename NumberType, detail::enable_if_t<
std::is_same<NumberType, number_unsigned_t>::value or
std::is_same<NumberType, number_integer_t>::value,
std::is_same<NumberType, number_integer_t>::value or
std::is_same<NumberType, typename binary_t::value_type>::value,
int> = 0>
void dump_integer(NumberType x)
{
@ -630,7 +684,7 @@ class serializer
if (is_negative)
{
*buffer_ptr = '-';
abs_value = remove_sign(x);
abs_value = remove_sign(static_cast<number_integer_t>(x));
// account one more byte for the minus sign
n_chars = 1 + count_digits(abs_value);