Merge branch 'develop' into feature/convert_char

This commit is contained in:
Niels Lohmann 2018-10-27 16:48:48 +02:00 committed by GitHub
commit 5a6bdf5934
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 4034 additions and 722 deletions

View file

@ -35,7 +35,33 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
}
}
}
/*!
@param[in] j JSON value to serialize
*/
void write_cbor(const BasicJsonType& j)
{
@ -279,7 +305,7 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
*/
void write_msgpack(const BasicJsonType& j)
{
@ -679,33 +705,362 @@ class binary_writer
}
private:
/*
@brief write a number to output input
//////////
// BSON //
//////////
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
/*!
@return The size of a BSON document entry header, including the id marker
and the entry name size (and its null-terminator).
*/
template<typename NumberType>
void write_number(const NumberType n)
static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian)
const auto it = name.find(static_cast<typename BasicJsonType::string_t::value_type>(0));
if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos))
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
JSON_THROW(out_of_range::create(409,
"BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")"));
}
oa->write_characters(vec.data(), sizeof(NumberType));
return /*id*/ 1ul + name.size() + /*zero-terminator*/1u;
}
/*!
@brief Writes the given @a element_type and @a name to the output adapter
*/
void write_bson_entry_header(const typename BasicJsonType::string_t& name,
std::uint8_t element_type)
{
oa->write_character(to_char_type(element_type)); // boolean
oa->write_characters(
reinterpret_cast<const CharType*>(name.c_str()),
name.size() + 1u);
}
/*!
@brief Writes a BSON element with key @a name and boolean value @a value
*/
void write_bson_boolean(const typename BasicJsonType::string_t& name,
const bool value)
{
write_bson_entry_header(name, 0x08);
oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00));
}
/*!
@brief Writes a BSON element with key @a name and double value @a value
*/
void write_bson_double(const typename BasicJsonType::string_t& name,
const double value)
{
write_bson_entry_header(name, 0x01);
write_number<double, true>(value);
}
/*!
@return The size of the BSON-encoded string in @a value
*/
static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value)
{
return sizeof(std::int32_t) + value.size() + 1ul;
}
/*!
@brief Writes a BSON element with key @a name and string value @a value
*/
void write_bson_string(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::string_t& value)
{
write_bson_entry_header(name, 0x02);
write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size() + 1ul));
oa->write_characters(
reinterpret_cast<const CharType*>(value.c_str()),
value.size() + 1);
}
/*!
@brief Writes a BSON element with key @a name and null value
*/
void write_bson_null(const typename BasicJsonType::string_t& name)
{
write_bson_entry_header(name, 0x0A);
}
/*!
@return The size of the BSON-encoded integer @a value
*/
static std::size_t calc_bson_integer_size(const std::int64_t value)
{
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
{
return sizeof(std::int32_t);
}
else
{
return sizeof(std::int64_t);
}
}
/*!
@brief Writes a BSON element with key @a name and integer @a value
*/
void write_bson_integer(const typename BasicJsonType::string_t& name,
const std::int64_t value)
{
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
{
write_bson_entry_header(name, 0x10); // int32
write_number<std::int32_t, true>(static_cast<std::int32_t>(value));
}
else
{
write_bson_entry_header(name, 0x12); // int64
write_number<std::int64_t, true>(static_cast<std::int64_t>(value));
}
}
/*!
@return The size of the BSON-encoded unsigned integer in @a j
*/
static std::size_t calc_bson_unsigned_size(const std::uint64_t value)
{
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
{
return sizeof(std::int32_t);
}
else
{
return sizeof(std::int64_t);
}
}
/*!
@brief Writes a BSON element with key @a name and unsigned @a value
*/
void write_bson_unsigned(const typename BasicJsonType::string_t& name,
const std::uint64_t value)
{
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
{
write_bson_entry_header(name, 0x10); // int32
write_number<std::int32_t, true>(static_cast<std::int32_t>(value));
}
else if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
{
write_bson_entry_header(name, 0x12); // int64
write_number<std::int64_t, true>(static_cast<std::int64_t>(value));
}
else
{
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value)));
}
}
/*!
@brief Writes a BSON element with key @a name and object @a value
*/
void write_bson_object_entry(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::object_t& value)
{
write_bson_entry_header(name, 0x03); // object
write_bson_object(value);
}
/*!
@return The size of the BSON-encoded array @a value
*/
static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value)
{
std::size_t embedded_document_size = 0ul;
std::size_t array_index = 0ul;
for (const auto& el : value)
{
embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el);
}
return sizeof(std::int32_t) + embedded_document_size + 1ul;
}
/*!
@brief Writes a BSON element with key @a name and array @a value
*/
void write_bson_array(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::array_t& value)
{
write_bson_entry_header(name, 0x04); // array
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
std::size_t array_index = 0ul;
for (const auto& el : value)
{
write_bson_element(std::to_string(array_index++), el);
}
oa->write_character(to_char_type(0x00));
}
/*!
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
@return The calculated size for the BSON document entry for @a j with the given @a name.
*/
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
const auto header_size = calc_bson_entry_header_size(name);
switch (j.type())
{
case value_t::discarded:
return 0ul;
case value_t::object:
return header_size + calc_bson_object_size(*j.m_value.object);
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::boolean:
return header_size + 1ul;
case value_t::number_float:
return header_size + 8ul;
case value_t::number_integer:
return header_size + calc_bson_integer_size(j.m_value.number_integer);
case value_t::number_unsigned:
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
case value_t::string:
return header_size + calc_bson_string_size(*j.m_value.string);
case value_t::null:
return header_size + 0ul;
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
};
}
/*!
@brief Serializes the JSON value @a j to BSON and associates it with the
key @a name.
@param name The name to associate with the JSON entity @a j within the
current BSON document
@return The size of the BSON entry
*/
void write_bson_element(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
switch (j.type())
{
case value_t::discarded:
return;
case value_t::object:
return write_bson_object_entry(name, *j.m_value.object);
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
case value_t::number_float:
return write_bson_double(name, j.m_value.number_float);
case value_t::number_integer:
return write_bson_integer(name, j.m_value.number_integer);
case value_t::number_unsigned:
return write_bson_unsigned(name, j.m_value.number_unsigned);
case value_t::string:
return write_bson_string(name, *j.m_value.string);
case value_t::null:
return write_bson_null(name);
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
};
}
/*!
@brief Calculates the size of the BSON serialization of the given
JSON-object @a j.
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value)
{
std::size_t document_size = 0;
for (const auto& el : value)
{
document_size += calc_bson_element_size(el.first, el.second);
}
return sizeof(std::int32_t) + document_size + 1ul;
}
/*!
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson_object(const typename BasicJsonType::object_t& value)
{
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
for (const auto& el : value)
{
write_bson_element(el.first, el.second);
}
oa->write_character(to_char_type(0x00));
}
//////////
// CBOR //
//////////
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
return to_char_type(0xFA); // Single-Precision Float
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
return to_char_type(0xFB); // Double-Precision Float
}
/////////////
// MsgPack //
/////////////
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return to_char_type(0xCA); // float 32
}
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return to_char_type(0xCB); // float 64
}
////////////
// UBJSON //
////////////
// UBJSON: write number (floating point)
template<typename NumberType, typename std::enable_if<
std::is_floating_point<NumberType>::value, int>::type = 0>
@ -906,26 +1261,6 @@ class binary_writer
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
return to_char_type(0xFA); // Single-Precision Float
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
return to_char_type(0xFB); // Double-Precision Float
}
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return to_char_type(0xCA); // float 32
}
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return to_char_type(0xCB); // float 64
}
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return 'd'; // float 32
@ -936,6 +1271,38 @@ class binary_writer
return 'D'; // float 64
}
///////////////////////
// Utility functions //
///////////////////////
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
}
// The following to_char_type functions are implement the conversion
// between uint8_t and CharType. In case CharType is not unsigned,
// such a conversion is required to allow values greater than 128.

View file

@ -28,6 +28,14 @@ namespace detail
// serialization //
///////////////////
/// how to treat decoding errors
enum class error_handler_t
{
strict, ///< throw a type_error exception in case of invalid UTF-8
replace, ///< replace invalid UTF-8 sequences with U+FFFD
ignore ///< ignore invalid UTF-8 sequences
};
template<typename BasicJsonType>
class serializer
{
@ -42,12 +50,17 @@ class serializer
/*!
@param[in] s output stream to serialize to
@param[in] ichar indentation character to use
@param[in] error_handler_ how to react on decoding errors
*/
serializer(output_adapter_t<char> s, const char ichar)
: o(std::move(s)), loc(std::localeconv()),
thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)),
decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)),
indent_char(ichar), indent_string(512, indent_char)
serializer(output_adapter_t<char> s, const char ichar,
error_handler_t error_handler_ = error_handler_t::strict)
: o(std::move(s))
, loc(std::localeconv())
, thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep))
, decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point))
, indent_char(ichar)
, indent_string(512, indent_char)
, error_handler(error_handler_)
{}
// delete because of pointer members
@ -287,6 +300,10 @@ class serializer
uint8_t state = UTF8_ACCEPT;
std::size_t bytes = 0; // number of bytes written to string_buffer
// number of bytes written at the point of the last valid byte
std::size_t bytes_after_last_accept = 0;
std::size_t undumped_chars = 0;
for (std::size_t i = 0; i < s.size(); ++i)
{
const auto byte = static_cast<uint8_t>(s[i]);
@ -384,14 +401,69 @@ class serializer
o->write_characters(string_buffer.data(), bytes);
bytes = 0;
}
// remember the byte position of this accept
bytes_after_last_accept = bytes;
undumped_chars = 0;
break;
}
case UTF8_REJECT: // decode found invalid UTF-8 byte
{
std::string sn(3, '\0');
snprintf(&sn[0], sn.size(), "%.2X", byte);
JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
switch (error_handler)
{
case error_handler_t::strict:
{
std::string sn(3, '\0');
snprintf(&sn[0], sn.size(), "%.2X", byte);
JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
}
case error_handler_t::ignore:
case error_handler_t::replace:
{
// in case we saw this character the first time, we
// would like to read it again, because the byte
// may be OK for itself, but just not OK for the
// previous sequence
if (undumped_chars > 0)
{
--i;
}
// reset length buffer to the last accepted index;
// thus removing/ignoring the invalid characters
bytes = bytes_after_last_accept;
if (error_handler == error_handler_t::replace)
{
// add a replacement character
if (ensure_ascii)
{
string_buffer[bytes++] = '\\';
string_buffer[bytes++] = 'u';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'f';
string_buffer[bytes++] = 'd';
}
else
{
string_buffer[bytes++] = '\xEF';
string_buffer[bytes++] = '\xBF';
string_buffer[bytes++] = '\xBD';
}
bytes_after_last_accept = bytes;
}
undumped_chars = 0;
// continue processing the string
state = UTF8_ACCEPT;
break;
}
}
break;
}
default: // decode found yet incomplete multi-byte code point
@ -401,11 +473,13 @@ class serializer
// code point will not be escaped - copy byte to buffer
string_buffer[bytes++] = s[i];
}
++undumped_chars;
break;
}
}
}
// we finished processing the string
if (JSON_LIKELY(state == UTF8_ACCEPT))
{
// write buffer
@ -417,9 +491,38 @@ class serializer
else
{
// we finish reading, but do not accept: string was incomplete
std::string sn(3, '\0');
snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
switch (error_handler)
{
case error_handler_t::strict:
{
std::string sn(3, '\0');
snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
}
case error_handler_t::ignore:
{
// write all accepted bytes
o->write_characters(string_buffer.data(), bytes_after_last_accept);
break;
}
case error_handler_t::replace:
{
// write all accepted bytes
o->write_characters(string_buffer.data(), bytes_after_last_accept);
// add a replacement character
if (ensure_ascii)
{
o->write_characters("\\ufffd", 6);
}
else
{
o->write_characters("\xEF\xBF\xBD", 3);
}
break;
}
}
}
}
@ -629,6 +732,9 @@ class serializer
const char indent_char;
/// the indentation string
string_t indent_string;
/// error_handler how to react on decoding errors
const error_handler_t error_handler;
};
} // namespace detail
} // namespace nlohmann