micro-optimization of dump()

A lot of small changes to avoid memory allocations:

- The locale is only queried once rather than with every number
serialization.
- The indentation string is recycled between different calls.
- The string escape function avoids a copy if no escaping is necessary.
- The string escape and the space function use a complete switch case
instead of cascaded ifs.

Cachegrind measures some 15% performance improvement.
This commit is contained in:
Niels Lohmann 2017-02-28 16:28:22 +01:00
parent fc48b8ac2b
commit 224f99070b
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
3 changed files with 277 additions and 160 deletions

View file

@ -34,6 +34,7 @@ SOFTWARE.
#include <cassert> // assert
#include <cctype> // isdigit
#include <ciso646> // and, not, or
#include <clocale> // lconv, localeconv
#include <cmath> // isfinite, labs, ldexp, signbit
#include <cstddef> // nullptr_t, ptrdiff_t, size_t
#include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
{
public:
serializer(std::ostream& s)
: o(s)
: o(s), loc(std::localeconv()),
thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
{}
/*!
@ -6244,7 +6247,10 @@ class basic_json
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' ');
if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements
auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
{
o.write(indent_string.c_str(), new_indent);
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent);
o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
assert(i != val.m_value.object->cend());
o.write(indent_string.c_str(), new_indent);
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent);
@ -6281,8 +6285,7 @@ class basic_json
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\":", 2);
dump(i->second, false, indent_step, current_indent);
o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
// last element
assert(i != val.m_value.object->cend());
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\":", 2);
dump(i->second, false, indent_step, current_indent);
@ -6316,7 +6318,10 @@ class basic_json
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' ');
if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements
for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
case value_t::string:
{
o.put('\"');
const auto s = escape_string(*val.m_value.string);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(*val.m_value.string);
o.put('\"');
return;
}
@ -6380,19 +6384,19 @@ class basic_json
case value_t::number_integer:
{
x_write(val.m_value.number_integer);
dump_integer(val.m_value.number_integer);
return;
}
case value_t::number_unsigned:
{
x_write(val.m_value.number_unsigned);
dump_integer(val.m_value.number_unsigned);
return;
}
case value_t::number_float:
{
x_write(val.m_value.number_float);
dump_float(val.m_value.number_float);
return;
}
@ -6438,14 +6442,40 @@ class basic_json
return res + 1;
}
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
default:
{
if (c >= 0x00 and c <= 0x1f)
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
return res;
}
}
@ -6465,12 +6495,13 @@ class basic_json
@complexity Linear in the length of string @a s.
*/
static string_t escape_string(const string_t& s)
void dump_escaped(const string_t& s) const
{
const auto space = extra_space(s);
if (space == 0)
{
return s;
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
return;
}
// create a result string of necessary size
@ -6537,43 +6568,69 @@ class basic_json
break;
}
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
break;
}
default:
{
if (c >= 0x00 and c <= 0x1f)
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
}
else
{
// all other characters are added as-is
result[pos++] = c;
}
// all other characters are added as-is
result[pos++] = c;
break;
}
}
}
return result;
assert(pos == s.size() + space);
o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
}
template<typename NumberType>
void x_write(NumberType x)
void dump_integer(NumberType x)
{
// special case for "0"
if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
o.write(m_buf.data(), static_cast<std::streamsize>(i));
}
void x_write(number_float_t x)
void dump_float(number_float_t x)
{
// special case for 0.0 and -0.0
if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
// check if buffer was large enough
assert(static_cast<size_t>(written_bytes) < m_buf.size());
// read information from locale
const auto loc = localeconv();
assert(loc != nullptr);
const char thousands_sep = !loc->thousands_sep ? '\0'
: loc->thousands_sep[0];
const char decimal_point = !loc->decimal_point ? '\0'
: loc->decimal_point[0];
// erase thousands separator
if (thousands_sep != '\0')
{
@ -6687,6 +6735,12 @@ class basic_json
/// a (hopefully) large enough character buffer
std::array < char, 64 > m_buf{{}};
const std::lconv* loc = nullptr;
const char thousands_sep = '\0';
const char decimal_point = '\0';
string_t indent_string = string_t(512, ' ');
};
public:

View file

@ -34,6 +34,7 @@ SOFTWARE.
#include <cassert> // assert
#include <cctype> // isdigit
#include <ciso646> // and, not, or
#include <clocale> // lconv, localeconv
#include <cmath> // isfinite, labs, ldexp, signbit
#include <cstddef> // nullptr_t, ptrdiff_t, size_t
#include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
{
public:
serializer(std::ostream& s)
: o(s)
: o(s), loc(std::localeconv()),
thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
{}
/*!
@ -6244,7 +6247,10 @@ class basic_json
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' ');
if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements
auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
{
o.write(indent_string.c_str(), new_indent);
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent);
o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
assert(i != val.m_value.object->cend());
o.write(indent_string.c_str(), new_indent);
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\": ", 3);
dump(i->second, true, indent_step, new_indent);
@ -6281,8 +6285,7 @@ class basic_json
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\":", 2);
dump(i->second, false, indent_step, current_indent);
o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
// last element
assert(i != val.m_value.object->cend());
o.put('\"');
const auto s = escape_string(i->first);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(i->first);
o.write("\":", 2);
dump(i->second, false, indent_step, current_indent);
@ -6316,7 +6318,10 @@ class basic_json
// variable to hold indentation for recursive calls
const auto new_indent = current_indent + indent_step;
string_t indent_string = string_t(new_indent, ' ');
if (indent_string.size() < new_indent)
{
indent_string.resize(new_indent, ' ');
}
// first n-1 elements
for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
case value_t::string:
{
o.put('\"');
const auto s = escape_string(*val.m_value.string);
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
dump_escaped(*val.m_value.string);
o.put('\"');
return;
}
@ -6380,19 +6384,19 @@ class basic_json
case value_t::number_integer:
{
x_write(val.m_value.number_integer);
dump_integer(val.m_value.number_integer);
return;
}
case value_t::number_unsigned:
{
x_write(val.m_value.number_unsigned);
dump_integer(val.m_value.number_unsigned);
return;
}
case value_t::number_float:
{
x_write(val.m_value.number_float);
dump_float(val.m_value.number_float);
return;
}
@ -6438,14 +6442,40 @@ class basic_json
return res + 1;
}
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
default:
{
if (c >= 0x00 and c <= 0x1f)
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
return res;
}
}
@ -6465,12 +6495,13 @@ class basic_json
@complexity Linear in the length of string @a s.
*/
static string_t escape_string(const string_t& s)
void dump_escaped(const string_t& s) const
{
const auto space = extra_space(s);
if (space == 0)
{
return s;
o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
return;
}
// create a result string of necessary size
@ -6537,43 +6568,69 @@ class basic_json
break;
}
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
break;
}
default:
{
if (c >= 0x00 and c <= 0x1f)
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
}
else
{
// all other characters are added as-is
result[pos++] = c;
}
// all other characters are added as-is
result[pos++] = c;
break;
}
}
}
return result;
assert(pos == s.size() + space);
o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
}
template<typename NumberType>
void x_write(NumberType x)
void dump_integer(NumberType x)
{
// special case for "0"
if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
o.write(m_buf.data(), static_cast<std::streamsize>(i));
}
void x_write(number_float_t x)
void dump_float(number_float_t x)
{
// special case for 0.0 and -0.0
if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
// check if buffer was large enough
assert(static_cast<size_t>(written_bytes) < m_buf.size());
// read information from locale
const auto loc = localeconv();
assert(loc != nullptr);
const char thousands_sep = !loc->thousands_sep ? '\0'
: loc->thousands_sep[0];
const char decimal_point = !loc->decimal_point ? '\0'
: loc->decimal_point[0];
// erase thousands separator
if (thousands_sep != '\0')
{
@ -6687,6 +6735,12 @@ class basic_json
/// a (hopefully) large enough character buffer
std::array < char, 64 > m_buf{{}};
const std::lconv* loc = nullptr;
const char thousands_sep = '\0';
const char decimal_point = '\0';
string_t indent_string = string_t(512, ' ');
};
public:

View file

@ -49,44 +49,53 @@ TEST_CASE("convenience functions")
SECTION("string escape")
{
CHECK(json::serializer::escape_string("\"") == "\\\"");
CHECK(json::serializer::escape_string("\\") == "\\\\");
CHECK(json::serializer::escape_string("\b") == "\\b");
CHECK(json::serializer::escape_string("\f") == "\\f");
CHECK(json::serializer::escape_string("\n") == "\\n");
CHECK(json::serializer::escape_string("\r") == "\\r");
CHECK(json::serializer::escape_string("\t") == "\\t");
const auto check_escaped = [](const char* original,
const char* escaped)
{
std::stringstream ss;
json::serializer s(ss);
s.dump_escaped(original);
CHECK(ss.str() == escaped);
};
CHECK(json::serializer::escape_string("\x01") == "\\u0001");
CHECK(json::serializer::escape_string("\x02") == "\\u0002");
CHECK(json::serializer::escape_string("\x03") == "\\u0003");
CHECK(json::serializer::escape_string("\x04") == "\\u0004");
CHECK(json::serializer::escape_string("\x05") == "\\u0005");
CHECK(json::serializer::escape_string("\x06") == "\\u0006");
CHECK(json::serializer::escape_string("\x07") == "\\u0007");
CHECK(json::serializer::escape_string("\x08") == "\\b");
CHECK(json::serializer::escape_string("\x09") == "\\t");
CHECK(json::serializer::escape_string("\x0a") == "\\n");
CHECK(json::serializer::escape_string("\x0b") == "\\u000b");
CHECK(json::serializer::escape_string("\x0c") == "\\f");
CHECK(json::serializer::escape_string("\x0d") == "\\r");
CHECK(json::serializer::escape_string("\x0e") == "\\u000e");
CHECK(json::serializer::escape_string("\x0f") == "\\u000f");
CHECK(json::serializer::escape_string("\x10") == "\\u0010");
CHECK(json::serializer::escape_string("\x11") == "\\u0011");
CHECK(json::serializer::escape_string("\x12") == "\\u0012");
CHECK(json::serializer::escape_string("\x13") == "\\u0013");
CHECK(json::serializer::escape_string("\x14") == "\\u0014");
CHECK(json::serializer::escape_string("\x15") == "\\u0015");
CHECK(json::serializer::escape_string("\x16") == "\\u0016");
CHECK(json::serializer::escape_string("\x17") == "\\u0017");
CHECK(json::serializer::escape_string("\x18") == "\\u0018");
CHECK(json::serializer::escape_string("\x19") == "\\u0019");
CHECK(json::serializer::escape_string("\x1a") == "\\u001a");
CHECK(json::serializer::escape_string("\x1b") == "\\u001b");
CHECK(json::serializer::escape_string("\x1c") == "\\u001c");
CHECK(json::serializer::escape_string("\x1d") == "\\u001d");
CHECK(json::serializer::escape_string("\x1e") == "\\u001e");
CHECK(json::serializer::escape_string("\x1f") == "\\u001f");
check_escaped("\"", "\\\"");
check_escaped("\\", "\\\\");
check_escaped("\b", "\\b");
check_escaped("\f", "\\f");
check_escaped("\n", "\\n");
check_escaped("\r", "\\r");
check_escaped("\t", "\\t");
check_escaped("\x01", "\\u0001");
check_escaped("\x02", "\\u0002");
check_escaped("\x03", "\\u0003");
check_escaped("\x04", "\\u0004");
check_escaped("\x05", "\\u0005");
check_escaped("\x06", "\\u0006");
check_escaped("\x07", "\\u0007");
check_escaped("\x08", "\\b");
check_escaped("\x09", "\\t");
check_escaped("\x0a", "\\n");
check_escaped("\x0b", "\\u000b");
check_escaped("\x0c", "\\f");
check_escaped("\x0d", "\\r");
check_escaped("\x0e", "\\u000e");
check_escaped("\x0f", "\\u000f");
check_escaped("\x10", "\\u0010");
check_escaped("\x11", "\\u0011");
check_escaped("\x12", "\\u0012");
check_escaped("\x13", "\\u0013");
check_escaped("\x14", "\\u0014");
check_escaped("\x15", "\\u0015");
check_escaped("\x16", "\\u0016");
check_escaped("\x17", "\\u0017");
check_escaped("\x18", "\\u0018");
check_escaped("\x19", "\\u0019");
check_escaped("\x1a", "\\u001a");
check_escaped("\x1b", "\\u001b");
check_escaped("\x1c", "\\u001c");
check_escaped("\x1d", "\\u001d");
check_escaped("\x1e", "\\u001e");
check_escaped("\x1f", "\\u001f");
}
}