micro-optimization of dump()

A lot of small changes to avoid memory allocations:

- The locale is only queried once rather than with every number
serialization.
- The indentation string is recycled between different calls.
- The string escape function avoids a copy if no escaping is necessary.
- The string escape and the space function use a complete switch case
instead of cascaded ifs.

Cachegrind measures some 15% performance improvement.
This commit is contained in:
Niels Lohmann 2017-02-28 16:28:22 +01:00
parent fc48b8ac2b
commit 224f99070b
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
3 changed files with 277 additions and 160 deletions

View file

@ -34,6 +34,7 @@ SOFTWARE.
#include <cassert> // assert #include <cassert> // assert
#include <cctype> // isdigit #include <cctype> // isdigit
#include <ciso646> // and, not, or #include <ciso646> // and, not, or
#include <clocale> // lconv, localeconv
#include <cmath> // isfinite, labs, ldexp, signbit #include <cmath> // isfinite, labs, ldexp, signbit
#include <cstddef> // nullptr_t, ptrdiff_t, size_t #include <cstddef> // nullptr_t, ptrdiff_t, size_t
#include <cstdint> // int64_t, uint64_t #include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
{ {
public: public:
serializer(std::ostream& s) serializer(std::ostream& s)
: o(s) : o(s), loc(std::localeconv()),
thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
{} {}
/*! /*!
@ -6244,7 +6247,10 @@ class basic_json
// variable to hold indentation for recursive calls // variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step; const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' '); if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements // first n-1 elements
auto i = val.m_value.object->cbegin(); auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
{ {
o.write(indent_string.c_str(), new_indent); o.write(indent_string.c_str(), new_indent);
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\": ", 3); o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent); dump(i->second, true, indent_step, new_indent);
o.write(",\n", 2); o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
assert(i != val.m_value.object->cend()); assert(i != val.m_value.object->cend());
o.write(indent_string.c_str(), new_indent); o.write(indent_string.c_str(), new_indent);
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\": ", 3); o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent); dump(i->second, true, indent_step, new_indent);
@ -6281,8 +6285,7 @@ class basic_json
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{ {
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\":", 2); o.write("\":", 2);
dump(i->second, false, indent_step, current_indent); dump(i->second, false, indent_step, current_indent);
o.put(','); o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
// last element // last element
assert(i != val.m_value.object->cend()); assert(i != val.m_value.object->cend());
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\":", 2); o.write("\":", 2);
dump(i->second, false, indent_step, current_indent); dump(i->second, false, indent_step, current_indent);
@ -6316,7 +6318,10 @@ class basic_json
// variable to hold indentation for recursive calls // variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step; const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' '); if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements // first n-1 elements
for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
case value_t::string: case value_t::string:
{ {
o.put('\"'); o.put('\"');
const auto s = escape_string(*val.m_value.string); dump_escaped(*val.m_value.string);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.put('\"'); o.put('\"');
return; return;
} }
@ -6380,19 +6384,19 @@ class basic_json
case value_t::number_integer: case value_t::number_integer:
{ {
x_write(val.m_value.number_integer); dump_integer(val.m_value.number_integer);
return; return;
} }
case value_t::number_unsigned: case value_t::number_unsigned:
{ {
x_write(val.m_value.number_unsigned); dump_integer(val.m_value.number_unsigned);
return; return;
} }
case value_t::number_float: case value_t::number_float:
{ {
x_write(val.m_value.number_float); dump_float(val.m_value.number_float);
return; return;
} }
@ -6438,14 +6442,40 @@ class basic_json
return res + 1; return res + 1;
} }
default: case 0x00:
{ case 0x01:
if (c >= 0x00 and c <= 0x1f) case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{ {
// from c (1 byte) to \uxxxx (6 bytes) // from c (1 byte) to \uxxxx (6 bytes)
return res + 5; return res + 5;
} }
default:
{
return res; return res;
} }
} }
@ -6465,12 +6495,13 @@ class basic_json
@complexity Linear in the length of string @a s. @complexity Linear in the length of string @a s.
*/ */
static string_t escape_string(const string_t& s) void dump_escaped(const string_t& s) const
{ {
const auto space = extra_space(s); const auto space = extra_space(s);
if (space == 0) if (space == 0)
{ {
return s; o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
return;
} }
// create a result string of necessary size // create a result string of necessary size
@ -6537,9 +6568,33 @@ class basic_json
break; break;
} }
default: case 0x00:
{ case 0x01:
if (c >= 0x00 and c <= 0x1f) case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{ {
// convert a number 0..15 to its hex representation // convert a number 0..15 to its hex representation
// (0..f) // (0..f)
@ -6558,22 +6613,24 @@ class basic_json
} }
++pos; ++pos;
break;
} }
else
default:
{ {
// all other characters are added as-is // all other characters are added as-is
result[pos++] = c; result[pos++] = c;
}
break; break;
} }
} }
} }
return result; assert(pos == s.size() + space);
o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
} }
template<typename NumberType> template<typename NumberType>
void x_write(NumberType x) void dump_integer(NumberType x)
{ {
// special case for "0" // special case for "0"
if (x == 0) if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
o.write(m_buf.data(), static_cast<std::streamsize>(i)); o.write(m_buf.data(), static_cast<std::streamsize>(i));
} }
void x_write(number_float_t x) void dump_float(number_float_t x)
{ {
// special case for 0.0 and -0.0 // special case for 0.0 and -0.0
if (x == 0) if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
// check if buffer was large enough // check if buffer was large enough
assert(static_cast<size_t>(written_bytes) < m_buf.size()); assert(static_cast<size_t>(written_bytes) < m_buf.size());
// read information from locale
const auto loc = localeconv();
assert(loc != nullptr);
const char thousands_sep = !loc->thousands_sep ? '\0'
: loc->thousands_sep[0];
const char decimal_point = !loc->decimal_point ? '\0'
: loc->decimal_point[0];
// erase thousands separator // erase thousands separator
if (thousands_sep != '\0') if (thousands_sep != '\0')
{ {
@ -6687,6 +6735,12 @@ class basic_json
/// a (hopefully) large enough character buffer /// a (hopefully) large enough character buffer
std::array < char, 64 > m_buf{{}}; std::array < char, 64 > m_buf{{}};
const std::lconv* loc = nullptr;
const char thousands_sep = '\0';
const char decimal_point = '\0';
string_t indent_string = string_t(512, ' ');
}; };
public: public:

View file

@ -34,6 +34,7 @@ SOFTWARE.
#include <cassert> // assert #include <cassert> // assert
#include <cctype> // isdigit #include <cctype> // isdigit
#include <ciso646> // and, not, or #include <ciso646> // and, not, or
#include <clocale> // lconv, localeconv
#include <cmath> // isfinite, labs, ldexp, signbit #include <cmath> // isfinite, labs, ldexp, signbit
#include <cstddef> // nullptr_t, ptrdiff_t, size_t #include <cstddef> // nullptr_t, ptrdiff_t, size_t
#include <cstdint> // int64_t, uint64_t #include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
{ {
public: public:
serializer(std::ostream& s) serializer(std::ostream& s)
: o(s) : o(s), loc(std::localeconv()),
thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
{} {}
/*! /*!
@ -6244,7 +6247,10 @@ class basic_json
// variable to hold indentation for recursive calls // variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step; const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' '); if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements // first n-1 elements
auto i = val.m_value.object->cbegin(); auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
{ {
o.write(indent_string.c_str(), new_indent); o.write(indent_string.c_str(), new_indent);
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\": ", 3); o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent); dump(i->second, true, indent_step, new_indent);
o.write(",\n", 2); o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
assert(i != val.m_value.object->cend()); assert(i != val.m_value.object->cend());
o.write(indent_string.c_str(), new_indent); o.write(indent_string.c_str(), new_indent);
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\": ", 3); o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent); dump(i->second, true, indent_step, new_indent);
@ -6281,8 +6285,7 @@ class basic_json
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{ {
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\":", 2); o.write("\":", 2);
dump(i->second, false, indent_step, current_indent); dump(i->second, false, indent_step, current_indent);
o.put(','); o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
// last element // last element
assert(i != val.m_value.object->cend()); assert(i != val.m_value.object->cend());
o.put('\"'); o.put('\"');
const auto s = escape_string(i->first); dump_escaped(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.write("\":", 2); o.write("\":", 2);
dump(i->second, false, indent_step, current_indent); dump(i->second, false, indent_step, current_indent);
@ -6316,7 +6318,10 @@ class basic_json
// variable to hold indentation for recursive calls // variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step; const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' '); if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements // first n-1 elements
for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i) for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
case value_t::string: case value_t::string:
{ {
o.put('\"'); o.put('\"');
const auto s = escape_string(*val.m_value.string); dump_escaped(*val.m_value.string);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
o.put('\"'); o.put('\"');
return; return;
} }
@ -6380,19 +6384,19 @@ class basic_json
case value_t::number_integer: case value_t::number_integer:
{ {
x_write(val.m_value.number_integer); dump_integer(val.m_value.number_integer);
return; return;
} }
case value_t::number_unsigned: case value_t::number_unsigned:
{ {
x_write(val.m_value.number_unsigned); dump_integer(val.m_value.number_unsigned);
return; return;
} }
case value_t::number_float: case value_t::number_float:
{ {
x_write(val.m_value.number_float); dump_float(val.m_value.number_float);
return; return;
} }
@ -6438,14 +6442,40 @@ class basic_json
return res + 1; return res + 1;
} }
default: case 0x00:
{ case 0x01:
if (c >= 0x00 and c <= 0x1f) case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{ {
// from c (1 byte) to \uxxxx (6 bytes) // from c (1 byte) to \uxxxx (6 bytes)
return res + 5; return res + 5;
} }
default:
{
return res; return res;
} }
} }
@ -6465,12 +6495,13 @@ class basic_json
@complexity Linear in the length of string @a s. @complexity Linear in the length of string @a s.
*/ */
static string_t escape_string(const string_t& s) void dump_escaped(const string_t& s) const
{ {
const auto space = extra_space(s); const auto space = extra_space(s);
if (space == 0) if (space == 0)
{ {
return s; o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
return;
} }
// create a result string of necessary size // create a result string of necessary size
@ -6537,9 +6568,33 @@ class basic_json
break; break;
} }
default: case 0x00:
{ case 0x01:
if (c >= 0x00 and c <= 0x1f) case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{ {
// convert a number 0..15 to its hex representation // convert a number 0..15 to its hex representation
// (0..f) // (0..f)
@ -6558,22 +6613,24 @@ class basic_json
} }
++pos; ++pos;
break;
} }
else
default:
{ {
// all other characters are added as-is // all other characters are added as-is
result[pos++] = c; result[pos++] = c;
}
break; break;
} }
} }
} }
return result; assert(pos == s.size() + space);
o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
} }
template<typename NumberType> template<typename NumberType>
void x_write(NumberType x) void dump_integer(NumberType x)
{ {
// special case for "0" // special case for "0"
if (x == 0) if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
o.write(m_buf.data(), static_cast<std::streamsize>(i)); o.write(m_buf.data(), static_cast<std::streamsize>(i));
} }
void x_write(number_float_t x) void dump_float(number_float_t x)
{ {
// special case for 0.0 and -0.0 // special case for 0.0 and -0.0
if (x == 0) if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
// check if buffer was large enough // check if buffer was large enough
assert(static_cast<size_t>(written_bytes) < m_buf.size()); assert(static_cast<size_t>(written_bytes) < m_buf.size());
// read information from locale
const auto loc = localeconv();
assert(loc != nullptr);
const char thousands_sep = !loc->thousands_sep ? '\0'
: loc->thousands_sep[0];
const char decimal_point = !loc->decimal_point ? '\0'
: loc->decimal_point[0];
// erase thousands separator // erase thousands separator
if (thousands_sep != '\0') if (thousands_sep != '\0')
{ {
@ -6687,6 +6735,12 @@ class basic_json
/// a (hopefully) large enough character buffer /// a (hopefully) large enough character buffer
std::array < char, 64 > m_buf{{}}; std::array < char, 64 > m_buf{{}};
const std::lconv* loc = nullptr;
const char thousands_sep = '\0';
const char decimal_point = '\0';
string_t indent_string = string_t(512, ' ');
}; };
public: public:

View file

@ -49,44 +49,53 @@ TEST_CASE("convenience functions")
SECTION("string escape") SECTION("string escape")
{ {
CHECK(json::serializer::escape_string("\"") == "\\\""); const auto check_escaped = [](const char* original,
CHECK(json::serializer::escape_string("\\") == "\\\\"); const char* escaped)
CHECK(json::serializer::escape_string("\b") == "\\b"); {
CHECK(json::serializer::escape_string("\f") == "\\f"); std::stringstream ss;
CHECK(json::serializer::escape_string("\n") == "\\n"); json::serializer s(ss);
CHECK(json::serializer::escape_string("\r") == "\\r"); s.dump_escaped(original);
CHECK(json::serializer::escape_string("\t") == "\\t"); CHECK(ss.str() == escaped);
};
CHECK(json::serializer::escape_string("\x01") == "\\u0001"); check_escaped("\"", "\\\"");
CHECK(json::serializer::escape_string("\x02") == "\\u0002"); check_escaped("\\", "\\\\");
CHECK(json::serializer::escape_string("\x03") == "\\u0003"); check_escaped("\b", "\\b");
CHECK(json::serializer::escape_string("\x04") == "\\u0004"); check_escaped("\f", "\\f");
CHECK(json::serializer::escape_string("\x05") == "\\u0005"); check_escaped("\n", "\\n");
CHECK(json::serializer::escape_string("\x06") == "\\u0006"); check_escaped("\r", "\\r");
CHECK(json::serializer::escape_string("\x07") == "\\u0007"); check_escaped("\t", "\\t");
CHECK(json::serializer::escape_string("\x08") == "\\b");
CHECK(json::serializer::escape_string("\x09") == "\\t"); check_escaped("\x01", "\\u0001");
CHECK(json::serializer::escape_string("\x0a") == "\\n"); check_escaped("\x02", "\\u0002");
CHECK(json::serializer::escape_string("\x0b") == "\\u000b"); check_escaped("\x03", "\\u0003");
CHECK(json::serializer::escape_string("\x0c") == "\\f"); check_escaped("\x04", "\\u0004");
CHECK(json::serializer::escape_string("\x0d") == "\\r"); check_escaped("\x05", "\\u0005");
CHECK(json::serializer::escape_string("\x0e") == "\\u000e"); check_escaped("\x06", "\\u0006");
CHECK(json::serializer::escape_string("\x0f") == "\\u000f"); check_escaped("\x07", "\\u0007");
CHECK(json::serializer::escape_string("\x10") == "\\u0010"); check_escaped("\x08", "\\b");
CHECK(json::serializer::escape_string("\x11") == "\\u0011"); check_escaped("\x09", "\\t");
CHECK(json::serializer::escape_string("\x12") == "\\u0012"); check_escaped("\x0a", "\\n");
CHECK(json::serializer::escape_string("\x13") == "\\u0013"); check_escaped("\x0b", "\\u000b");
CHECK(json::serializer::escape_string("\x14") == "\\u0014"); check_escaped("\x0c", "\\f");
CHECK(json::serializer::escape_string("\x15") == "\\u0015"); check_escaped("\x0d", "\\r");
CHECK(json::serializer::escape_string("\x16") == "\\u0016"); check_escaped("\x0e", "\\u000e");
CHECK(json::serializer::escape_string("\x17") == "\\u0017"); check_escaped("\x0f", "\\u000f");
CHECK(json::serializer::escape_string("\x18") == "\\u0018"); check_escaped("\x10", "\\u0010");
CHECK(json::serializer::escape_string("\x19") == "\\u0019"); check_escaped("\x11", "\\u0011");
CHECK(json::serializer::escape_string("\x1a") == "\\u001a"); check_escaped("\x12", "\\u0012");
CHECK(json::serializer::escape_string("\x1b") == "\\u001b"); check_escaped("\x13", "\\u0013");
CHECK(json::serializer::escape_string("\x1c") == "\\u001c"); check_escaped("\x14", "\\u0014");
CHECK(json::serializer::escape_string("\x1d") == "\\u001d"); check_escaped("\x15", "\\u0015");
CHECK(json::serializer::escape_string("\x1e") == "\\u001e"); check_escaped("\x16", "\\u0016");
CHECK(json::serializer::escape_string("\x1f") == "\\u001f"); check_escaped("\x17", "\\u0017");
check_escaped("\x18", "\\u0018");
check_escaped("\x19", "\\u0019");
check_escaped("\x1a", "\\u001a");
check_escaped("\x1b", "\\u001b");
check_escaped("\x1c", "\\u001c");
check_escaped("\x1d", "\\u001d");
check_escaped("\x1e", "\\u001e");
check_escaped("\x1f", "\\u001f");
} }
} }