add ensure_ascii parameter to dump. #330
This commit is contained in:
parent
7dee868a42
commit
71597be294
3 changed files with 77 additions and 42 deletions
105
src/json.hpp
105
src/json.hpp
|
@ -6423,6 +6423,7 @@ class serializer
|
||||||
@param[in] current_indent the current indent level (only used internally)
|
@param[in] current_indent the current indent level (only used internally)
|
||||||
*/
|
*/
|
||||||
void dump(const BasicJsonType& val, const bool pretty_print,
|
void dump(const BasicJsonType& val, const bool pretty_print,
|
||||||
|
const bool ensure_ascii,
|
||||||
const unsigned int indent_step,
|
const unsigned int indent_step,
|
||||||
const unsigned int current_indent = 0)
|
const unsigned int current_indent = 0)
|
||||||
{
|
{
|
||||||
|
@ -6453,9 +6454,9 @@ class serializer
|
||||||
{
|
{
|
||||||
o->write_characters(indent_string.c_str(), new_indent);
|
o->write_characters(indent_string.c_str(), new_indent);
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
dump_escaped(i->first);
|
dump_escaped(i->first, ensure_ascii);
|
||||||
o->write_characters("\": ", 3);
|
o->write_characters("\": ", 3);
|
||||||
dump(i->second, true, indent_step, new_indent);
|
dump(i->second, true, ensure_ascii, indent_step, new_indent);
|
||||||
o->write_characters(",\n", 2);
|
o->write_characters(",\n", 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6463,9 +6464,9 @@ class serializer
|
||||||
assert(i != val.m_value.object->cend());
|
assert(i != val.m_value.object->cend());
|
||||||
o->write_characters(indent_string.c_str(), new_indent);
|
o->write_characters(indent_string.c_str(), new_indent);
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
dump_escaped(i->first);
|
dump_escaped(i->first, ensure_ascii);
|
||||||
o->write_characters("\": ", 3);
|
o->write_characters("\": ", 3);
|
||||||
dump(i->second, true, indent_step, new_indent);
|
dump(i->second, true, ensure_ascii, indent_step, new_indent);
|
||||||
|
|
||||||
o->write_character('\n');
|
o->write_character('\n');
|
||||||
o->write_characters(indent_string.c_str(), current_indent);
|
o->write_characters(indent_string.c_str(), current_indent);
|
||||||
|
@ -6480,18 +6481,18 @@ class serializer
|
||||||
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
|
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
|
||||||
{
|
{
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
dump_escaped(i->first);
|
dump_escaped(i->first, ensure_ascii);
|
||||||
o->write_characters("\":", 2);
|
o->write_characters("\":", 2);
|
||||||
dump(i->second, false, indent_step, current_indent);
|
dump(i->second, false, ensure_ascii, indent_step, current_indent);
|
||||||
o->write_character(',');
|
o->write_character(',');
|
||||||
}
|
}
|
||||||
|
|
||||||
// last element
|
// last element
|
||||||
assert(i != val.m_value.object->cend());
|
assert(i != val.m_value.object->cend());
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
dump_escaped(i->first);
|
dump_escaped(i->first, ensure_ascii);
|
||||||
o->write_characters("\":", 2);
|
o->write_characters("\":", 2);
|
||||||
dump(i->second, false, indent_step, current_indent);
|
dump(i->second, false, ensure_ascii, indent_step, current_indent);
|
||||||
|
|
||||||
o->write_character('}');
|
o->write_character('}');
|
||||||
}
|
}
|
||||||
|
@ -6523,14 +6524,14 @@ class serializer
|
||||||
i != val.m_value.array->cend() - 1; ++i)
|
i != val.m_value.array->cend() - 1; ++i)
|
||||||
{
|
{
|
||||||
o->write_characters(indent_string.c_str(), new_indent);
|
o->write_characters(indent_string.c_str(), new_indent);
|
||||||
dump(*i, true, indent_step, new_indent);
|
dump(*i, true, ensure_ascii, indent_step, new_indent);
|
||||||
o->write_characters(",\n", 2);
|
o->write_characters(",\n", 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// last element
|
// last element
|
||||||
assert(not val.m_value.array->empty());
|
assert(not val.m_value.array->empty());
|
||||||
o->write_characters(indent_string.c_str(), new_indent);
|
o->write_characters(indent_string.c_str(), new_indent);
|
||||||
dump(val.m_value.array->back(), true, indent_step, new_indent);
|
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
|
||||||
|
|
||||||
o->write_character('\n');
|
o->write_character('\n');
|
||||||
o->write_characters(indent_string.c_str(), current_indent);
|
o->write_characters(indent_string.c_str(), current_indent);
|
||||||
|
@ -6544,13 +6545,13 @@ class serializer
|
||||||
for (auto i = val.m_value.array->cbegin();
|
for (auto i = val.m_value.array->cbegin();
|
||||||
i != val.m_value.array->cend() - 1; ++i)
|
i != val.m_value.array->cend() - 1; ++i)
|
||||||
{
|
{
|
||||||
dump(*i, false, indent_step, current_indent);
|
dump(*i, false, ensure_ascii, indent_step, current_indent);
|
||||||
o->write_character(',');
|
o->write_character(',');
|
||||||
}
|
}
|
||||||
|
|
||||||
// last element
|
// last element
|
||||||
assert(not val.m_value.array->empty());
|
assert(not val.m_value.array->empty());
|
||||||
dump(val.m_value.array->back(), false, indent_step, current_indent);
|
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
|
||||||
|
|
||||||
o->write_character(']');
|
o->write_character(']');
|
||||||
}
|
}
|
||||||
|
@ -6561,7 +6562,7 @@ class serializer
|
||||||
case value_t::string:
|
case value_t::string:
|
||||||
{
|
{
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
dump_escaped(*val.m_value.string);
|
dump_escaped(*val.m_value.string, ensure_ascii);
|
||||||
o->write_character('\"');
|
o->write_character('\"');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -6616,14 +6617,15 @@ class serializer
|
||||||
@brief calculates the extra space to escape a JSON string
|
@brief calculates the extra space to escape a JSON string
|
||||||
|
|
||||||
@param[in] s the string to escape
|
@param[in] s the string to escape
|
||||||
|
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences
|
||||||
@return the number of characters required to escape string @a s
|
@return the number of characters required to escape string @a s
|
||||||
|
|
||||||
@complexity Linear in the length of string @a s.
|
@complexity Linear in the length of string @a s.
|
||||||
*/
|
*/
|
||||||
static std::size_t extra_space(const string_t& s) noexcept
|
static std::size_t extra_space(const string_t& s, const bool ensure_ascii) noexcept
|
||||||
{
|
{
|
||||||
return std::accumulate(s.begin(), s.end(), size_t{},
|
return std::accumulate(s.begin(), s.end(), size_t{},
|
||||||
[](size_t res, typename string_t::value_type c)
|
[ensure_ascii](size_t res, typename string_t::value_type c)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
|
@ -6673,6 +6675,11 @@ class serializer
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
|
if (c & 0x80 and ensure_ascii)
|
||||||
|
{
|
||||||
|
// from c (1 byte) to \uxxxx (6 bytes)
|
||||||
|
return res + 5;
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6688,12 +6695,13 @@ class serializer
|
||||||
representation. The escaped string is written to output stream @a o.
|
representation. The escaped string is written to output stream @a o.
|
||||||
|
|
||||||
@param[in] s the string to escape
|
@param[in] s the string to escape
|
||||||
|
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences
|
||||||
|
|
||||||
@complexity Linear in the length of string @a s.
|
@complexity Linear in the length of string @a s.
|
||||||
*/
|
*/
|
||||||
void dump_escaped(const string_t& s) const
|
void dump_escaped(const string_t& s, const bool ensure_ascii) const
|
||||||
{
|
{
|
||||||
const auto space = extra_space(s);
|
const auto space = extra_space(s, ensure_ascii);
|
||||||
if (space == 0)
|
if (space == 0)
|
||||||
{
|
{
|
||||||
o->write_characters(s.c_str(), s.size());
|
o->write_characters(s.c_str(), s.size());
|
||||||
|
@ -6704,6 +6712,27 @@ class serializer
|
||||||
string_t result(s.size() + space, '\\');
|
string_t result(s.size() + space, '\\');
|
||||||
std::size_t pos = 0;
|
std::size_t pos = 0;
|
||||||
|
|
||||||
|
auto escape_character = [&result, &pos](const typename string_t::value_type c)
|
||||||
|
{
|
||||||
|
// convert a number 0..15 to its hex representation
|
||||||
|
// (0..f)
|
||||||
|
static const char hexify[16] =
|
||||||
|
{
|
||||||
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
||||||
|
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
|
||||||
|
};
|
||||||
|
|
||||||
|
// print character c as \uxxxx
|
||||||
|
for (const char m :
|
||||||
|
{ 'u', '0', '0', hexify[(c >> 4) & 0x0f], hexify[c & 0x0f]
|
||||||
|
})
|
||||||
|
{
|
||||||
|
result[++pos] = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
++pos;
|
||||||
|
};
|
||||||
|
|
||||||
for (const auto& c : s)
|
for (const auto& c : s)
|
||||||
{
|
{
|
||||||
switch (c)
|
switch (c)
|
||||||
|
@ -6792,28 +6821,21 @@ class serializer
|
||||||
case 0x1e:
|
case 0x1e:
|
||||||
case 0x1f:
|
case 0x1f:
|
||||||
{
|
{
|
||||||
// convert a number 0..15 to its hex representation
|
escape_character(c);
|
||||||
// (0..f)
|
|
||||||
static const char hexify[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
|
|
||||||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
|
|
||||||
};
|
|
||||||
|
|
||||||
// print character c as \uxxxx
|
|
||||||
for (const char m :
|
|
||||||
{'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
|
|
||||||
})
|
|
||||||
{
|
|
||||||
result[++pos] = m;
|
|
||||||
}
|
|
||||||
|
|
||||||
++pos;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
// all other characters are added as-is
|
if (c & 0x80 and ensure_ascii)
|
||||||
result[pos++] = c;
|
{
|
||||||
|
escape_character(c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// all other characters are added as-is
|
||||||
|
result[pos++] = c;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9017,7 +9039,7 @@ class basic_json
|
||||||
|
|
||||||
Serialization function for JSON values. The function tries to mimic
|
Serialization function for JSON values. The function tries to mimic
|
||||||
Python's `json.dumps()` function, and currently supports its @a indent
|
Python's `json.dumps()` function, and currently supports its @a indent
|
||||||
parameter.
|
and @a ensure_ascii parameters.
|
||||||
|
|
||||||
@param[in] indent If indent is nonnegative, then array elements and object
|
@param[in] indent If indent is nonnegative, then array elements and object
|
||||||
members will be pretty-printed with that indent level. An indent level of
|
members will be pretty-printed with that indent level. An indent level of
|
||||||
|
@ -9025,30 +9047,33 @@ class basic_json
|
||||||
representation.
|
representation.
|
||||||
@param[in] indent_char The character to use for indentation if @a indent is
|
@param[in] indent_char The character to use for indentation if @a indent is
|
||||||
greater than `0`. The default is ` ` (space).
|
greater than `0`. The default is ` ` (space).
|
||||||
|
@param[in] ensure_ascii If ensure_ascii is true (the default), all non-ASCII
|
||||||
|
characters in the output are escaped with \uXXXX sequences, and the result
|
||||||
|
consists of ASCII characters only.
|
||||||
|
|
||||||
@return string containing the serialization of the JSON value
|
@return string containing the serialization of the JSON value
|
||||||
|
|
||||||
@complexity Linear.
|
@complexity Linear.
|
||||||
|
|
||||||
@liveexample{The following example shows the effect of different @a indent
|
@liveexample{The following example shows the effect of different @a indent
|
||||||
parameters to the result of the serialization.,dump}
|
parameters to the result of the serialization.dump}
|
||||||
|
|
||||||
@see https://docs.python.org/2/library/json.html#json.dump
|
@see https://docs.python.org/2/library/json.html#json.dump
|
||||||
|
|
||||||
@since version 1.0.0; indentation character added in version 3.0.0
|
@since version 1.0.0; indentation character added in version 3.0.0
|
||||||
*/
|
*/
|
||||||
string_t dump(const int indent = -1, const char indent_char = ' ') const
|
string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false) const
|
||||||
{
|
{
|
||||||
string_t result;
|
string_t result;
|
||||||
serializer s(detail::output_adapter_factory<char>::create(result), indent_char);
|
serializer s(detail::output_adapter_factory<char>::create(result), indent_char);
|
||||||
|
|
||||||
if (indent >= 0)
|
if (indent >= 0)
|
||||||
{
|
{
|
||||||
s.dump(*this, true, static_cast<unsigned int>(indent));
|
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s.dump(*this, false, 0);
|
s.dump(*this, false, ensure_ascii, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -12715,7 +12740,7 @@ class basic_json
|
||||||
|
|
||||||
// do the actual serialization
|
// do the actual serialization
|
||||||
serializer s(detail::output_adapter_factory<char>::create(o), o.fill());
|
serializer s(detail::output_adapter_factory<char>::create(o), o.fill());
|
||||||
s.dump(j, pretty_print, static_cast<unsigned int>(indentation));
|
s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,11 +50,12 @@ TEST_CASE("convenience functions")
|
||||||
SECTION("string escape")
|
SECTION("string escape")
|
||||||
{
|
{
|
||||||
const auto check_escaped = [](const char* original,
|
const auto check_escaped = [](const char* original,
|
||||||
const char* escaped)
|
const char* escaped,
|
||||||
|
const bool ensure_ascii = false)
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
json::serializer s(nlohmann::detail::output_adapter_factory<char>::create(ss), ' ');
|
json::serializer s(nlohmann::detail::output_adapter_factory<char>::create(ss), ' ');
|
||||||
s.dump_escaped(original);
|
s.dump_escaped(original, ensure_ascii);
|
||||||
CHECK(ss.str() == escaped);
|
CHECK(ss.str() == escaped);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -97,5 +98,7 @@ TEST_CASE("convenience functions")
|
||||||
check_escaped("\x1d", "\\u001d");
|
check_escaped("\x1d", "\\u001d");
|
||||||
check_escaped("\x1e", "\\u001e");
|
check_escaped("\x1e", "\\u001e");
|
||||||
check_escaped("\x1f", "\\u001f");
|
check_escaped("\x1f", "\\u001f");
|
||||||
|
check_escaped("\xA9", "\xA9");
|
||||||
|
check_escaped("\xA9", "\\u00a9", true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -250,6 +250,13 @@ TEST_CASE("object inspection")
|
||||||
CHECK(json("❤️").dump() == "\"❤️\"");
|
CHECK(json("❤️").dump() == "\"❤️\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SECTION("dump with ensure_ascii and non-ASCII characters")
|
||||||
|
{
|
||||||
|
CHECK(json("ä").dump(-1, ' ', true) == R"("\u00c3\u00a4")");
|
||||||
|
CHECK(json("Ö").dump(-1, ' ', true) == R"("\u00c3\u0096")");
|
||||||
|
CHECK(json("❤️").dump(-1, ' ', true) == R"("\u00e2\u009d\u00a4\u00ef\u00b8\u008f")");
|
||||||
|
}
|
||||||
|
|
||||||
SECTION("serialization of discarded element")
|
SECTION("serialization of discarded element")
|
||||||
{
|
{
|
||||||
json j_discarded(json::value_t::discarded);
|
json j_discarded(json::value_t::discarded);
|
||||||
|
|
Loading…
Reference in a new issue