💥 change serialization of binary values

This commit is contained in:
Niels Lohmann 2020-05-18 13:53:20 +02:00
parent 9eb19bcc27
commit b7ff40029a
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
6 changed files with 273 additions and 116 deletions

View file

@ -84,19 +84,22 @@ class serializer
- strings and object keys are escaped using `escape_string()`
- integer numbers are converted implicitly via `operator<<`
- floating-point numbers are converted to a string using `"%g"` format
- if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown
- binary values are serialized as objects containing the subtype and the
byte array
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
in the output are escaped with `\uXXXX` sequences, and the result consists
of ASCII characters only.
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
@param[in] serialize_binary whether the output shall include non-standard binary output
*/
void dump(const BasicJsonType& val, const bool pretty_print,
void dump(const BasicJsonType& val,
const bool pretty_print,
const bool ensure_ascii,
const unsigned int indent_step,
const unsigned int current_indent = 0,
const bool serialize_binary = false)
const unsigned int current_indent = 0)
{
switch (val.m_type)
{
@ -127,7 +130,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
@ -138,7 +141,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -155,7 +158,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
@ -165,7 +168,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character('}');
}
@ -197,14 +200,14 @@ class serializer
i != val.m_value.array->cend() - 1; ++i)
{
o->write_characters(indent_string.c_str(), new_indent);
dump(*i, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(*i, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
// last element
assert(not val.m_value.array->empty());
o->write_characters(indent_string.c_str(), new_indent);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -218,13 +221,13 @@ class serializer
for (auto i = val.m_value.array->cbegin();
i != val.m_value.array->cend() - 1; ++i)
{
dump(*i, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(*i, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
// last element
assert(not val.m_value.array->empty());
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
o->write_character(']');
}
@ -242,27 +245,73 @@ class serializer
case value_t::binary:
{
if (not serialize_binary)
if (pretty_print)
{
JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON"));
}
o->write_characters("{\n", 2);
if (val.m_value.binary->empty())
{
o->write_characters("b[]", 3);
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
{
indent_string.resize(indent_string.size() * 2, ' ');
}
o->write_characters(indent_string.c_str(), new_indent);
o->write_characters("\"bytes\": [", 10);
if (not val.m_value.binary->empty())
{
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_characters(", ", 2);
}
dump_integer(val.m_value.binary->back());
}
o->write_characters("],\n", 3);
o->write_characters(indent_string.c_str(), new_indent);
o->write_characters("\"subtype\": ", 11);
if (val.m_value.binary->has_subtype())
{
dump_integer(val.m_value.binary->subtype());
}
else
{
o->write_characters("null", 4);
}
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
o->write_character('}');
}
else
{
o->write_characters("b[", 2);
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
o->write_characters("{\"bytes\":[", 10);
if (not val.m_value.binary->empty())
{
dump_integer(*i);
o->write_character(',');
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_character(',');
}
dump_integer(val.m_value.binary->back());
}
dump_integer(val.m_value.binary->back());
o->write_character(']');
o->write_characters("],\"subtype\":", 12);
if (val.m_value.binary->has_subtype())
{
dump_integer(val.m_value.binary->subtype());
o->write_character('}');
}
else
{
o->write_characters("null}", 5);
}
}
return;
}

View file

@ -2234,16 +2234,15 @@ class basic_json
possible values: `strict` (throws and exception in case a decoding error
occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD),
and `ignore` (ignore invalid UTF-8 sequences during serialization).
@param[in] serialize_binary Whether or not to allow serialization of binary
types to JSON. Because binary types are non-standard, this will produce
non-conformant JSON, and is disabled by default. This flag is primarily
useful for debugging. Will output the binary value as a list of 8-bit
numbers prefixed by "b" (e.g. "bindata" = b[3, 0, 42, 255]).
@return string containing the serialization of the JSON value
@throw type_error.316 if a string stored inside the JSON value is not
UTF-8 encoded
UTF-8 encoded and @a error_handler is set to strict
@note Binary values are serialized as object containing two keys:
- "bytes": an array of bytes as integers
- "subtype": the subtype as integer or "null" if the binary has no subtype
@complexity Linear.
@ -2258,24 +2257,24 @@ class basic_json
@since version 1.0.0; indentation character @a indent_char, option
@a ensure_ascii and exceptions added in version 3.0.0; error
handlers added in version 3.4.0.
handlers added in version 3.4.0; serialization of binary values added
in version 3.8.0.
*/
string_t dump(const int indent = -1,
const char indent_char = ' ',
const bool ensure_ascii = false,
const error_handler_t error_handler = error_handler_t::strict,
const bool serialize_binary = false) const
const error_handler_t error_handler = error_handler_t::strict) const
{
string_t result;
serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler);
if (indent >= 0)
{
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent), 0, serialize_binary);
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
}
else
{
s.dump(*this, false, ensure_ascii, 0, 0, serialize_binary);
s.dump(*this, false, ensure_ascii, 0);
}
return result;

View file

@ -14828,19 +14828,22 @@ class serializer
- strings and object keys are escaped using `escape_string()`
- integer numbers are converted implicitly via `operator<<`
- floating-point numbers are converted to a string using `"%g"` format
- if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown
- binary values are serialized as objects containing the subtype and the
byte array
@param[in] val value to serialize
@param[in] pretty_print whether the output shall be pretty-printed
@param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
in the output are escaped with `\uXXXX` sequences, and the result consists
of ASCII characters only.
@param[in] indent_step the indent level
@param[in] current_indent the current indent level (only used internally)
@param[in] serialize_binary whether the output shall include non-standard binary output
*/
void dump(const BasicJsonType& val, const bool pretty_print,
void dump(const BasicJsonType& val,
const bool pretty_print,
const bool ensure_ascii,
const unsigned int indent_step,
const unsigned int current_indent = 0,
const bool serialize_binary = false)
const unsigned int current_indent = 0)
{
switch (val.m_type)
{
@ -14871,7 +14874,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
@ -14882,7 +14885,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -14899,7 +14902,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
@ -14909,7 +14912,7 @@ class serializer
o->write_character('\"');
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character('}');
}
@ -14941,14 +14944,14 @@ class serializer
i != val.m_value.array->cend() - 1; ++i)
{
o->write_characters(indent_string.c_str(), new_indent);
dump(*i, true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(*i, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
// last element
assert(not val.m_value.array->empty());
o->write_characters(indent_string.c_str(), new_indent);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent, serialize_binary);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -14962,13 +14965,13 @@ class serializer
for (auto i = val.m_value.array->cbegin();
i != val.m_value.array->cend() - 1; ++i)
{
dump(*i, false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(*i, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
// last element
assert(not val.m_value.array->empty());
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent, serialize_binary);
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
o->write_character(']');
}
@ -14986,27 +14989,73 @@ class serializer
case value_t::binary:
{
if (not serialize_binary)
if (pretty_print)
{
JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON"));
}
o->write_characters("{\n", 2);
if (val.m_value.binary->empty())
{
o->write_characters("b[]", 3);
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
{
indent_string.resize(indent_string.size() * 2, ' ');
}
o->write_characters(indent_string.c_str(), new_indent);
o->write_characters("\"bytes\": [", 10);
if (not val.m_value.binary->empty())
{
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_characters(", ", 2);
}
dump_integer(val.m_value.binary->back());
}
o->write_characters("],\n", 3);
o->write_characters(indent_string.c_str(), new_indent);
o->write_characters("\"subtype\": ", 11);
if (val.m_value.binary->has_subtype())
{
dump_integer(val.m_value.binary->subtype());
}
else
{
o->write_characters("null", 4);
}
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
o->write_character('}');
}
else
{
o->write_characters("b[", 2);
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
o->write_characters("{\"bytes\":[", 10);
if (not val.m_value.binary->empty())
{
dump_integer(*i);
o->write_character(',');
for (auto i = val.m_value.binary->cbegin();
i != val.m_value.binary->cend() - 1; ++i)
{
dump_integer(*i);
o->write_character(',');
}
dump_integer(val.m_value.binary->back());
}
dump_integer(val.m_value.binary->back());
o->write_character(']');
o->write_characters("],\"subtype\":", 12);
if (val.m_value.binary->has_subtype())
{
dump_integer(val.m_value.binary->subtype());
o->write_character('}');
}
else
{
o->write_characters("null}", 5);
}
}
return;
}
@ -17963,16 +18012,15 @@ class basic_json
possible values: `strict` (throws and exception in case a decoding error
occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD),
and `ignore` (ignore invalid UTF-8 sequences during serialization).
@param[in] serialize_binary Whether or not to allow serialization of binary
types to JSON. Because binary types are non-standard, this will produce
non-conformant JSON, and is disabled by default. This flag is primarily
useful for debugging. Will output the binary value as a list of 8-bit
numbers prefixed by "b" (e.g. "bindata" = b[3, 0, 42, 255]).
@return string containing the serialization of the JSON value
@throw type_error.316 if a string stored inside the JSON value is not
UTF-8 encoded
UTF-8 encoded and @a error_handler is set to strict
@note Binary values are serialized as object containing two keys:
- "bytes": an array of bytes as integers
- "subtype": the subtype as integer or "null" if the binary has no subtype
@complexity Linear.
@ -17987,24 +18035,24 @@ class basic_json
@since version 1.0.0; indentation character @a indent_char, option
@a ensure_ascii and exceptions added in version 3.0.0; error
handlers added in version 3.4.0.
handlers added in version 3.4.0; serialization of binary values added
in version 3.8.0.
*/
string_t dump(const int indent = -1,
const char indent_char = ' ',
const bool ensure_ascii = false,
const error_handler_t error_handler = error_handler_t::strict,
const bool serialize_binary = false) const
const error_handler_t error_handler = error_handler_t::strict) const
{
string_t result;
serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler);
if (indent >= 0)
{
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent), 0, serialize_binary);
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
}
else
{
s.dump(*this, false, ensure_ascii, 0, 0, serialize_binary);
s.dump(*this, false, ensure_ascii, 0);
}
return result;

View file

@ -1582,7 +1582,7 @@ TEST_CASE("CBOR")
auto j = json::from_cbor(input);
CHECK(j.is_binary());
auto k = json::binary_array({0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x99});
CAPTURE(j.dump(0, ' ', false, json::error_handler_t::strict, true))
CAPTURE(j.dump(0, ' ', false, json::error_handler_t::strict))
CHECK(j == k);
}

View file

@ -1919,8 +1919,7 @@ TEST_CASE("regression tests")
j.dump(4, // Indent
' ', // Indent char
false, // Ensure ascii
json::error_handler_t::strict, // Error
true // Allow binary data
json::error_handler_t::strict // Error
)
);
}

View file

@ -209,52 +209,114 @@ TEST_CASE_TEMPLATE("serialization for extreme integer values", T, int32_t, uint3
TEST_CASE("dump with binary values")
{
SECTION("serialize_binary = false")
{
auto binary = json::binary_array({1, 2, 3, 4});
auto binary_empty = json::binary_array({});
json object = {{"key", binary}};
json array = {"value", 1, binary};
auto binary = json::binary_array({1, 2, 3, 4});
auto binary_empty = json::binary_array({});
auto binary_with_subtype = json::binary_array({1, 2, 3, 4}, 128);
auto binary_empty_with_subtype = json::binary_array({}, 128);
CHECK_THROWS_AS(binary.dump(), json::type_error);
CHECK_THROWS_AS(binary_empty.dump(), json::type_error);
CHECK_THROWS_AS(object.dump(), json::type_error);
CHECK_THROWS_AS(array.dump(), json::type_error);
CHECK_THROWS_WITH(binary.dump(), "[json.exception.type_error.317] cannot serialize binary data to text JSON");
CHECK_THROWS_WITH(binary_empty.dump(), "[json.exception.type_error.317] cannot serialize binary data to text JSON");
CHECK_THROWS_WITH(object.dump(), "[json.exception.type_error.317] cannot serialize binary data to text JSON");
CHECK_THROWS_WITH(array.dump(), "[json.exception.type_error.317] cannot serialize binary data to text JSON");
json object = {{"key", binary}};
json object_empty = {{"key", binary_empty}};
json object_with_subtype = {{"key", binary_with_subtype}};
json object_empty_with_subtype = {{"key", binary_empty_with_subtype}};
json array = {"value", 1, binary};
json array_empty = {"value", 1, binary_empty};
json array_with_subtype = {"value", 1, binary_with_subtype};
json array_empty_with_subtype = {"value", 1, binary_empty_with_subtype};
SECTION("normal")
{
CHECK(binary.dump() == "{\"bytes\":[1,2,3,4],\"subtype\":null}");
CHECK(binary_empty.dump() == "{\"bytes\":[],\"subtype\":null}");
CHECK(binary_with_subtype.dump() == "{\"bytes\":[1,2,3,4],\"subtype\":128}");
CHECK(binary_empty_with_subtype.dump() == "{\"bytes\":[],\"subtype\":128}");
CHECK(object.dump() == "{\"key\":{\"bytes\":[1,2,3,4],\"subtype\":null}}");
CHECK(object_empty.dump() == "{\"key\":{\"bytes\":[],\"subtype\":null}}");
CHECK(object_with_subtype.dump() == "{\"key\":{\"bytes\":[1,2,3,4],\"subtype\":128}}");
CHECK(object_empty_with_subtype.dump() == "{\"key\":{\"bytes\":[],\"subtype\":128}}");
CHECK(array.dump() == "[\"value\",1,{\"bytes\":[1,2,3,4],\"subtype\":null}]");
CHECK(array_empty.dump() == "[\"value\",1,{\"bytes\":[],\"subtype\":null}]");
CHECK(array_with_subtype.dump() == "[\"value\",1,{\"bytes\":[1,2,3,4],\"subtype\":128}]");
CHECK(array_empty_with_subtype.dump() == "[\"value\",1,{\"bytes\":[],\"subtype\":128}]");
}
SECTION("serialize_binary = true")
SECTION("pretty-printed")
{
auto binary = json::binary_array({1, 2, 3, 4});
auto binary_empty = json::binary_array({});
json object = {{"key", binary}};
json array = {"value", 1, binary};
CHECK(binary.dump(-1, ' ', false, json::error_handler_t::strict, true) == "b[1,2,3,4]");
CHECK(binary_empty.dump(-1, ' ', false, json::error_handler_t::strict, true) == "b[]");
CHECK(object.dump(-1, ' ', false, json::error_handler_t::strict, true) == "{\"key\":b[1,2,3,4]}");
CHECK(array.dump(-1, ' ', false, json::error_handler_t::strict, true) == "[\"value\",1,b[1,2,3,4]]");
}
SECTION("serialize_binary = true, pretty-printed")
{
auto binary = json::binary_array({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20});
auto binary_empty = json::binary_array({});
json object = {{"key", binary}};
json array = {"value", 1, binary};
CHECK(binary.dump(4, ' ', false, json::error_handler_t::strict, true) == "b[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]");
CHECK(binary_empty.dump(4, ' ', false, json::error_handler_t::strict, true) == "b[]");
CHECK(object.dump(4, ' ', false, json::error_handler_t::strict, true) == "{\n"
" \"key\": b[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]\n"
CHECK(binary.dump(4) == "{\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": null\n"
"}");
CHECK(array.dump(4, ' ', false, json::error_handler_t::strict, true) == "[\n"
CHECK(binary_empty.dump(4) == "{\n"
" \"bytes\": [],\n"
" \"subtype\": null\n"
"}");
CHECK(binary_with_subtype.dump(4) == "{\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": 128\n"
"}");
CHECK(binary_empty_with_subtype.dump(4) == "{\n"
" \"bytes\": [],\n"
" \"subtype\": 128\n"
"}");
CHECK(object.dump(4) == "{\n"
" \"key\": {\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": null\n"
" }\n"
"}");
CHECK(object_empty.dump(4) == "{\n"
" \"key\": {\n"
" \"bytes\": [],\n"
" \"subtype\": null\n"
" }\n"
"}");
CHECK(object_with_subtype.dump(4) == "{\n"
" \"key\": {\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": 128\n"
" }\n"
"}");
CHECK(object_empty_with_subtype.dump(4) == "{\n"
" \"key\": {\n"
" \"bytes\": [],\n"
" \"subtype\": 128\n"
" }\n"
"}");
CHECK(array.dump(4) == "[\n"
" \"value\",\n"
" 1,\n"
" b[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]\n"
" {\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": null\n"
" }\n"
"]");
CHECK(array_empty.dump(4) == "[\n"
" \"value\",\n"
" 1,\n"
" {\n"
" \"bytes\": [],\n"
" \"subtype\": null\n"
" }\n"
"]");
CHECK(array_with_subtype.dump(4) == "[\n"
" \"value\",\n"
" 1,\n"
" {\n"
" \"bytes\": [1, 2, 3, 4],\n"
" \"subtype\": 128\n"
" }\n"
"]");
CHECK(array_empty_with_subtype.dump(4) == "[\n"
" \"value\",\n"
" 1,\n"
" {\n"
" \"bytes\": [],\n"
" \"subtype\": 128\n"
" }\n"
"]");
}
}