Merge pull request #1411 from nickaein/develop
Improve dump_integer performance
This commit is contained in:
commit
daeb48b01a
7 changed files with 1000273 additions and 46 deletions
|
@ -19,6 +19,7 @@ file(COPY ${CMAKE_SOURCE_DIR}/data DESTINATION .)
|
|||
file(COPY ${CMAKE_SOURCE_DIR}/../test/data/regression/floats.json
|
||||
${CMAKE_SOURCE_DIR}/../test/data/regression/unsigned_ints.json
|
||||
${CMAKE_SOURCE_DIR}/../test/data/regression/signed_ints.json
|
||||
${CMAKE_SOURCE_DIR}/../test/data/regression/small_signed_ints.json
|
||||
DESTINATION data/numbers)
|
||||
|
||||
# benchmark binary
|
||||
|
|
|
@ -28,14 +28,14 @@ static void ParseFile(benchmark::State& state, const char* filename)
|
|||
std::ifstream file(filename, std::ios::binary | std::ios::ate);
|
||||
state.SetBytesProcessed(state.iterations() * file.tellg());
|
||||
}
|
||||
BENCHMARK_CAPTURE(ParseFile, jeopardy, "data/jeopardy/jeopardy.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, canada, "data/nativejson-benchmark/canada.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, twitter, "data/nativejson-benchmark/twitter.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, floats, "data/numbers/floats.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
|
||||
|
||||
BENCHMARK_CAPTURE(ParseFile, jeopardy, "data/jeopardy/jeopardy.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, canada, "data/nativejson-benchmark/canada.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, twitter, "data/nativejson-benchmark/twitter.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, floats, "data/numbers/floats.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseFile, small_signed_ints, "data/numbers/small_signed_ints.json");
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// parse JSON from string
|
||||
|
@ -61,13 +61,14 @@ static void ParseString(benchmark::State& state, const char* filename)
|
|||
|
||||
state.SetBytesProcessed(state.iterations() * str.size());
|
||||
}
|
||||
BENCHMARK_CAPTURE(ParseString, jeopardy, "data/jeopardy/jeopardy.json");
|
||||
BENCHMARK_CAPTURE(ParseString, canada, "data/nativejson-benchmark/canada.json");
|
||||
BENCHMARK_CAPTURE(ParseString, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
|
||||
BENCHMARK_CAPTURE(ParseString, twitter, "data/nativejson-benchmark/twitter.json");
|
||||
BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
|
||||
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseString, jeopardy, "data/jeopardy/jeopardy.json");
|
||||
BENCHMARK_CAPTURE(ParseString, canada, "data/nativejson-benchmark/canada.json");
|
||||
BENCHMARK_CAPTURE(ParseString, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
|
||||
BENCHMARK_CAPTURE(ParseString, twitter, "data/nativejson-benchmark/twitter.json");
|
||||
BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
|
||||
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
|
||||
BENCHMARK_CAPTURE(ParseString, small_signed_ints, "data/numbers/small_signed_ints.json");
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -101,6 +102,8 @@ BENCHMARK_CAPTURE(Dump, signed_ints / -, "data/numbers/signed_ints.json",
|
|||
BENCHMARK_CAPTURE(Dump, signed_ints / 4, "data/numbers/signed_ints.json", 4);
|
||||
BENCHMARK_CAPTURE(Dump, unsigned_ints / -, "data/numbers/unsigned_ints.json", -1);
|
||||
BENCHMARK_CAPTURE(Dump, unsigned_ints / 4, "data/numbers/unsigned_ints.json", 4);
|
||||
BENCHMARK_CAPTURE(Dump, small_signed_ints / -, "data/numbers/small_signed_ints.json", -1);
|
||||
BENCHMARK_CAPTURE(Dump, small_signed_ints / 4, "data/numbers/small_signed_ints.json", 4);
|
||||
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -527,6 +527,40 @@ class serializer
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief count digits
|
||||
|
||||
Count the number of decimal (base 10) digits for an input unsigned integer.
|
||||
|
||||
@param[in] x unsigned integer number to count its digits
|
||||
@return number of decimal digits
|
||||
*/
|
||||
inline unsigned int count_digits(number_unsigned_t x) noexcept
|
||||
{
|
||||
unsigned int n_digits = 1;
|
||||
for (;;)
|
||||
{
|
||||
if (x < 10)
|
||||
{
|
||||
return n_digits;
|
||||
}
|
||||
if (x < 100)
|
||||
{
|
||||
return n_digits + 1;
|
||||
}
|
||||
if (x < 1000)
|
||||
{
|
||||
return n_digits + 2;
|
||||
}
|
||||
if (x < 10000)
|
||||
{
|
||||
return n_digits + 3;
|
||||
}
|
||||
x = x / 10000u;
|
||||
n_digits += 4;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief dump an integer
|
||||
|
||||
|
@ -542,6 +576,22 @@ class serializer
|
|||
int> = 0>
|
||||
void dump_integer(NumberType x)
|
||||
{
|
||||
static constexpr std::array<std::array<char, 2>, 100> digits_to_99
|
||||
{
|
||||
{
|
||||
{'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
|
||||
{'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
|
||||
{'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
|
||||
{'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
|
||||
{'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
|
||||
{'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
|
||||
{'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
|
||||
{'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
|
||||
{'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
|
||||
{'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'},
|
||||
}
|
||||
};
|
||||
|
||||
// special case for "0"
|
||||
if (x == 0)
|
||||
{
|
||||
|
@ -549,28 +599,58 @@ class serializer
|
|||
return;
|
||||
}
|
||||
|
||||
const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755
|
||||
std::size_t i = 0;
|
||||
// use a pointer to fill the buffer
|
||||
auto buffer_ptr = begin(number_buffer);
|
||||
|
||||
while (x != 0)
|
||||
{
|
||||
// spare 1 byte for '\0'
|
||||
assert(i < number_buffer.size() - 1);
|
||||
const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
|
||||
number_unsigned_t abs_value;
|
||||
|
||||
const auto digit = std::labs(static_cast<long>(x % 10));
|
||||
number_buffer[i++] = static_cast<char>('0' + digit);
|
||||
x /= 10;
|
||||
}
|
||||
unsigned int n_chars;
|
||||
|
||||
if (is_negative)
|
||||
{
|
||||
// make sure there is capacity for the '-'
|
||||
assert(i < number_buffer.size() - 2);
|
||||
number_buffer[i++] = '-';
|
||||
*buffer_ptr = '-';
|
||||
abs_value = static_cast<number_unsigned_t>(0 - x);
|
||||
|
||||
// account one more byte for the minus sign
|
||||
n_chars = 1 + count_digits(abs_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
abs_value = static_cast<number_unsigned_t>(x);
|
||||
n_chars = count_digits(abs_value);
|
||||
}
|
||||
|
||||
std::reverse(number_buffer.begin(), number_buffer.begin() + i);
|
||||
o->write_characters(number_buffer.data(), i);
|
||||
// spare 1 byte for '\0'
|
||||
assert(n_chars < number_buffer.size() - 1);
|
||||
|
||||
// jump to the end to generate the string from backward
|
||||
// so we later avoid reversing the result
|
||||
buffer_ptr += n_chars;
|
||||
|
||||
// Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
|
||||
// See: https://www.youtube.com/watch?v=o4-CwDo2zpg
|
||||
const auto buffer_end = buffer_ptr;
|
||||
while (abs_value >= 100)
|
||||
{
|
||||
const auto digits_index = static_cast<unsigned>((abs_value % 100));
|
||||
abs_value /= 100;
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][1];
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][0];
|
||||
}
|
||||
|
||||
if (abs_value >= 10)
|
||||
{
|
||||
const auto digits_index = static_cast<unsigned>(abs_value);
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][1];
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][0];
|
||||
}
|
||||
else
|
||||
{
|
||||
*(--buffer_ptr) = static_cast<char>('0' + abs_value);
|
||||
}
|
||||
|
||||
o->write_characters(number_buffer.data(), n_chars);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
|
|
@ -11410,6 +11410,40 @@ class serializer
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief count digits
|
||||
|
||||
Count the number of decimal (base 10) digits for an input unsigned integer.
|
||||
|
||||
@param[in] x unsigned integer number to count its digits
|
||||
@return number of decimal digits
|
||||
*/
|
||||
inline unsigned int count_digits(number_unsigned_t x) noexcept
|
||||
{
|
||||
unsigned int n_digits = 1;
|
||||
for (;;)
|
||||
{
|
||||
if (x < 10)
|
||||
{
|
||||
return n_digits;
|
||||
}
|
||||
if (x < 100)
|
||||
{
|
||||
return n_digits + 1;
|
||||
}
|
||||
if (x < 1000)
|
||||
{
|
||||
return n_digits + 2;
|
||||
}
|
||||
if (x < 10000)
|
||||
{
|
||||
return n_digits + 3;
|
||||
}
|
||||
x = x / 10000u;
|
||||
n_digits += 4;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief dump an integer
|
||||
|
||||
|
@ -11425,6 +11459,22 @@ class serializer
|
|||
int> = 0>
|
||||
void dump_integer(NumberType x)
|
||||
{
|
||||
static constexpr std::array<std::array<char, 2>, 100> digits_to_99
|
||||
{
|
||||
{
|
||||
{'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
|
||||
{'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
|
||||
{'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
|
||||
{'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
|
||||
{'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
|
||||
{'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
|
||||
{'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
|
||||
{'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
|
||||
{'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
|
||||
{'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'},
|
||||
}
|
||||
};
|
||||
|
||||
// special case for "0"
|
||||
if (x == 0)
|
||||
{
|
||||
|
@ -11432,28 +11482,58 @@ class serializer
|
|||
return;
|
||||
}
|
||||
|
||||
const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755
|
||||
std::size_t i = 0;
|
||||
// use a pointer to fill the buffer
|
||||
auto buffer_ptr = begin(number_buffer);
|
||||
|
||||
while (x != 0)
|
||||
{
|
||||
// spare 1 byte for '\0'
|
||||
assert(i < number_buffer.size() - 1);
|
||||
const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
|
||||
number_unsigned_t abs_value;
|
||||
|
||||
const auto digit = std::labs(static_cast<long>(x % 10));
|
||||
number_buffer[i++] = static_cast<char>('0' + digit);
|
||||
x /= 10;
|
||||
}
|
||||
unsigned int n_chars;
|
||||
|
||||
if (is_negative)
|
||||
{
|
||||
// make sure there is capacity for the '-'
|
||||
assert(i < number_buffer.size() - 2);
|
||||
number_buffer[i++] = '-';
|
||||
*buffer_ptr = '-';
|
||||
abs_value = static_cast<number_unsigned_t>(0 - x);
|
||||
|
||||
// account one more byte for the minus sign
|
||||
n_chars = 1 + count_digits(abs_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
abs_value = static_cast<number_unsigned_t>(x);
|
||||
n_chars = count_digits(abs_value);
|
||||
}
|
||||
|
||||
std::reverse(number_buffer.begin(), number_buffer.begin() + i);
|
||||
o->write_characters(number_buffer.data(), i);
|
||||
// spare 1 byte for '\0'
|
||||
assert(n_chars < number_buffer.size() - 1);
|
||||
|
||||
// jump to the end to generate the string from backward
|
||||
// so we later avoid reversing the result
|
||||
buffer_ptr += n_chars;
|
||||
|
||||
// Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
|
||||
// See: https://www.youtube.com/watch?v=o4-CwDo2zpg
|
||||
const auto buffer_end = buffer_ptr;
|
||||
while (abs_value >= 100)
|
||||
{
|
||||
const auto digits_index = static_cast<unsigned>((abs_value % 100));
|
||||
abs_value /= 100;
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][1];
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][0];
|
||||
}
|
||||
|
||||
if (abs_value >= 10)
|
||||
{
|
||||
const auto digits_index = static_cast<unsigned>(abs_value);
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][1];
|
||||
*(--buffer_ptr) = digits_to_99[digits_index][0];
|
||||
}
|
||||
else
|
||||
{
|
||||
*(--buffer_ptr) = static_cast<char>('0' + abs_value);
|
||||
}
|
||||
|
||||
o->write_characters(number_buffer.data(), n_chars);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
|
1000002
test/data/regression/small_signed_ints.json
Executable file
1000002
test/data/regression/small_signed_ints.json
Executable file
File diff suppressed because it is too large
Load diff
|
@ -669,7 +669,8 @@ TEST_CASE("regression tests")
|
|||
{
|
||||
"test/data/regression/floats.json",
|
||||
"test/data/regression/signed_ints.json",
|
||||
"test/data/regression/unsigned_ints.json"
|
||||
"test/data/regression/unsigned_ints.json",
|
||||
"test/data/regression/small_signed_ints.json"
|
||||
})
|
||||
{
|
||||
CAPTURE(filename)
|
||||
|
|
|
@ -471,4 +471,64 @@ TEST_CASE("formatting")
|
|||
check_double( 1.2345e+21, "1.2344999999999999e+21" ); // 1.2345e+21 1.2344999999999999e+21 1.2345e21
|
||||
check_double( 1.2345e+22, "1.2345e+22" ); // 1.2345e+22 1.2345e+22 1.2345e22
|
||||
}
|
||||
|
||||
SECTION("integer")
|
||||
{
|
||||
auto check_integer = [](std::int64_t number, const std::string & expected)
|
||||
{
|
||||
nlohmann::json j = number;
|
||||
CHECK(j.dump() == expected);
|
||||
};
|
||||
|
||||
// edge cases
|
||||
check_integer(INT64_MIN, "-9223372036854775808");
|
||||
check_integer(INT64_MAX, "9223372036854775807");
|
||||
|
||||
// few random big integers
|
||||
check_integer(-3456789012345678901LL, "-3456789012345678901");
|
||||
check_integer(3456789012345678901LL, "3456789012345678901");
|
||||
check_integer(-5678901234567890123LL, "-5678901234567890123");
|
||||
check_integer(5678901234567890123LL, "5678901234567890123");
|
||||
|
||||
// integers with various digit counts
|
||||
check_integer(-1000000000000000000LL, "-1000000000000000000");
|
||||
check_integer(-100000000000000000LL, "-100000000000000000");
|
||||
check_integer(-10000000000000000LL, "-10000000000000000");
|
||||
check_integer(-1000000000000000LL, "-1000000000000000");
|
||||
check_integer(-100000000000000LL, "-100000000000000");
|
||||
check_integer(-10000000000000LL, "-10000000000000");
|
||||
check_integer(-1000000000000LL, "-1000000000000");
|
||||
check_integer(-100000000000LL, "-100000000000");
|
||||
check_integer(-10000000000LL, "-10000000000");
|
||||
check_integer(-1000000000LL, "-1000000000");
|
||||
check_integer(-100000000LL, "-100000000");
|
||||
check_integer(-10000000LL, "-10000000");
|
||||
check_integer(-1000000LL, "-1000000");
|
||||
check_integer(-100000LL, "-100000");
|
||||
check_integer(-10000LL, "-10000");
|
||||
check_integer(-1000LL, "-1000");
|
||||
check_integer(-100LL, "-100");
|
||||
check_integer(-10LL, "-10");
|
||||
check_integer(-1LL, "-1");
|
||||
check_integer(0, "0");
|
||||
check_integer(1LL, "1");
|
||||
check_integer(10LL, "10");
|
||||
check_integer(100LL, "100");
|
||||
check_integer(1000LL, "1000");
|
||||
check_integer(10000LL, "10000");
|
||||
check_integer(100000LL, "100000");
|
||||
check_integer(1000000LL, "1000000");
|
||||
check_integer(10000000LL, "10000000");
|
||||
check_integer(100000000LL, "100000000");
|
||||
check_integer(1000000000LL, "1000000000");
|
||||
check_integer(10000000000LL, "10000000000");
|
||||
check_integer(100000000000LL, "100000000000");
|
||||
check_integer(1000000000000LL, "1000000000000");
|
||||
check_integer(10000000000000LL, "10000000000000");
|
||||
check_integer(100000000000000LL, "100000000000000");
|
||||
check_integer(1000000000000000LL, "1000000000000000");
|
||||
check_integer(10000000000000000LL, "10000000000000000");
|
||||
check_integer(100000000000000000LL, "100000000000000000");
|
||||
check_integer(1000000000000000000LL, "1000000000000000000");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue