Simplify the json/src/benchmarks.cpp to allow more optimal code gen.

o For some unknown reason, the complexity of the benchmark platform
  prevented some C++ compilers from generating optimal code, properly
  reflective of the real performance in actual deployment.
o Added the json_benchmarks_simple target, which performs the same
  suite of tests as json_benchmarks.
o Simplified the benchmark platform, and emit an "Average" TPS
  (Transactions Per Second) value reflective of aggregate parse/output
  performance.
This commit is contained in:
Perry Kundert 2017-10-07 15:50:19 -07:00
parent 23440eb86e
commit 0b803d0a5f
4 changed files with 190 additions and 7 deletions

1
.gitignore vendored
View file

@ -1,5 +1,6 @@
json_unit json_unit
json_benchmarks json_benchmarks
json_benchmarks_simple
fuzz-testing fuzz-testing
*.dSYM *.dSYM

View file

@ -1,11 +1,21 @@
all: json_benchmarks
./json_benchmarks
json_benchmarks: src/benchmarks.cpp ../src/json.hpp number_jsons #
# Build/run json.hpp benchmarks, eg. CXX=g++-7 make
#
# The existing json_benchmarks did not allow optimization under some compilers
#
all: json_benchmarks json_benchmarks_simple number_jsons
bash -c 'time ./json_benchmarks'
bash -c 'time ./json_benchmarks_simple'
json_benchmarks: src/benchmarks.cpp ../src/json.hpp
$(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@ $(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@
json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp
$(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@
number_jsons: number_jsons:
(test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py) (test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py)
clean: clean:
rm -f json_benchmarks files/numbers/*.json rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json

View file

@ -34,6 +34,19 @@ static void bench(benchpress::context& ctx,
{ {
// using string streams for benchmarking to factor-out cold-cache disk // using string streams for benchmarking to factor-out cold-cache disk
// access. // access.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr; std::stringstream istr;
{ {
// read file into string stream // read file into string stream
@ -43,11 +56,12 @@ static void bench(benchpress::context& ctx,
// read the stream once // read the stream once
json j; json j;
j << istr; istr >> j;
// clear flags and rewind // clear flags and rewind
istr.clear(); istr.clear();
istr.seekg(0); istr.seekg(0);
} }
#endif
switch (mode) switch (mode)
{ {
@ -62,7 +76,7 @@ static void bench(benchpress::context& ctx,
istr.clear(); istr.clear();
istr.seekg(0); istr.seekg(0);
json j; json j;
j << istr; istr >> j;
} }
break; break;
@ -74,7 +88,7 @@ static void bench(benchpress::context& ctx,
{ {
// create JSON value from input // create JSON value from input
json j; json j;
j << istr; istr >> j;
std::stringstream ostr; std::stringstream ostr;
ctx.reset_timer(); ctx.reset_timer();

View file

@ -0,0 +1,158 @@
//
// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance
//
// For some reason, the complexity of benchmarks.cpp doesn't allow
// the compiler to optimize code using json.hpp effectively. The
// exact same tests, with the use of benchpress and cxxopts produces
// much faster code, at least under g++.
//
#include <fstream>
#include <iostream>
#include <chrono>
#include <list>
#include <tuple>
#include <json.hpp>
using json = nlohmann::json;
enum class EMode { input, output, indent };
static double bench(const EMode mode, size_t iters, const std::string& in_path )
{
// using string streams for benchmarking to factor-out cold-cache disk
// access. Define FROMFILE to use file I/O instead.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
std::ifstream input_file(in_path);
istr << input_file.rdbuf();
input_file.close();
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
double tps = 0;
switch (mode)
{
// benchmarking input
case EMode::input:
{
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
// clear flags and rewind
istr.clear();
istr.seekg(0);
json j;
istr >> j;
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
// benchmarking output
case EMode::output:
case EMode::indent:
{
// create JSON value from input
json j;
istr >> j;
std::stringstream ostr;
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
if (mode == EMode::indent)
{
ostr << j;
}
else
{
ostr << std::setw(4) << j;
}
// reset data
ostr.str(std::string());
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
}
return tps;
}
template <typename T>
struct average {
T _sum { 0 };
size_t _count { 0 };
T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; }
operator T() { return _sum / _count; }
};
// Execute each test approximately enough times to get near 1
// transaction per second, and compute the average; a single aggregate
// number that gives a performance metric representing both parsing
// and output.
int main( int, char ** )
{
std::list<std::tuple<std::string, EMode, size_t, std::string>> tests {
{ "parse jeopardy.json", EMode::input, 2, "files/jeopardy/jeopardy.json" },
{ "parse canada.json", EMode::input, 30, "files/nativejson-benchmark/canada.json" },
{ "parse citm_catalog.json", EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" },
{ "parse twitter.json", EMode::input, 225, "files/nativejson-benchmark/twitter.json" },
{ "parse floats.json", EMode::input, 5, "files/numbers/floats.json" },
{ "parse signed_ints.json", EMode::input, 6, "files/numbers/signed_ints.json" },
{ "parse unsigned_ints.json", EMode::input, 6, "files/numbers/unsigned_ints.json" },
{ "dump jeopardy.json", EMode::output, 5, "files/jeopardy/jeopardy.json" },
{ "dump jeopardy.json w/ind.", EMode::indent, 5, "files/jeopardy/jeopardy.json" },
{ "dump floats.json", EMode::output, 2, "files/numbers/floats.json" },
{ "dump signed_ints.json", EMode::output, 20, "files/numbers/signed_ints.json" },
};
average<double> avg;
for ( auto t : tests ) {
std::string name, path;
EMode mode;
size_t iters;
std::tie(name, mode, iters, path) = t;
auto tps = bench( mode, iters, path );
avg += tps;
std::cout
<< std::left
<< std::setw( 30 ) << name
<< std::right
<< " x " << std::setw( 3 ) << iters
<< std::left
<< " == " << std::setw( 10 ) << tps
<< std::right
<< " TPS, " << std::setw( 8 ) << std::round( tps * 1e6 / iters )
<< " ms/op"
<< std::endl;
}
std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl;
std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl;
return 0;
}