From 0b803d0a5ff48687558c85e7ceb5c2cf70ea29af Mon Sep 17 00:00:00 2001 From: Perry Kundert Date: Sat, 7 Oct 2017 15:50:19 -0700 Subject: [PATCH] Simplify the json/src/benchmarks.cpp to allow more optimal code gen. o For some unknown reason, the complexity of the benchmark platform prevented some C++ compilers from generating optimal code, properly reflective of the real performance in actual deployment. o Added the json_benchmarks_simple target, which performs the same suite of tests as json_benchmarks. o Simplified the benchmark platform, and emit an "Average" TPS (Transactions Per Second) value reflective of aggregate parse/output performance. --- .gitignore | 1 + benchmarks/Makefile | 18 ++- benchmarks/src/benchmarks.cpp | 20 +++- benchmarks/src/benchmarks_simple.cpp | 158 +++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 7 deletions(-) create mode 100644 benchmarks/src/benchmarks_simple.cpp diff --git a/.gitignore b/.gitignore index 5b2bc0fa..8157f1a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ json_unit json_benchmarks +json_benchmarks_simple fuzz-testing *.dSYM diff --git a/benchmarks/Makefile b/benchmarks/Makefile index 0e4068c4..ef2de8a3 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -1,11 +1,21 @@ -all: json_benchmarks - ./json_benchmarks -json_benchmarks: src/benchmarks.cpp ../src/json.hpp number_jsons +# +# Build/run json.hpp benchmarks, eg. CXX=g++-7 make +# +# The existing json_benchmarks did not allow optimization under some compilers +# +all: json_benchmarks json_benchmarks_simple number_jsons + bash -c 'time ./json_benchmarks' + bash -c 'time ./json_benchmarks_simple' + +json_benchmarks: src/benchmarks.cpp ../src/json.hpp $(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@ +json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp + $(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@ + number_jsons: (test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py) clean: - rm -f json_benchmarks files/numbers/*.json + rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json diff --git a/benchmarks/src/benchmarks.cpp b/benchmarks/src/benchmarks.cpp index 55a4e478..a76c3783 100644 --- a/benchmarks/src/benchmarks.cpp +++ b/benchmarks/src/benchmarks.cpp @@ -34,6 +34,19 @@ static void bench(benchpress::context& ctx, { // using string streams for benchmarking to factor-out cold-cache disk // access. +#if defined( FROMFILE ) + std::ifstream istr; + { + istr.open( in_path, std::ifstream::in ); + + // read the stream once + json j; + istr >> j; + // clear flags and rewind + istr.clear(); + istr.seekg(0); + } +#else std::stringstream istr; { // read file into string stream @@ -43,11 +56,12 @@ static void bench(benchpress::context& ctx, // read the stream once json j; - j << istr; + istr >> j; // clear flags and rewind istr.clear(); istr.seekg(0); } +#endif switch (mode) { @@ -62,7 +76,7 @@ static void bench(benchpress::context& ctx, istr.clear(); istr.seekg(0); json j; - j << istr; + istr >> j; } break; @@ -74,7 +88,7 @@ static void bench(benchpress::context& ctx, { // create JSON value from input json j; - j << istr; + istr >> j; std::stringstream ostr; ctx.reset_timer(); diff --git a/benchmarks/src/benchmarks_simple.cpp b/benchmarks/src/benchmarks_simple.cpp new file mode 100644 index 00000000..4fad680a --- /dev/null +++ b/benchmarks/src/benchmarks_simple.cpp @@ -0,0 +1,158 @@ +// +// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance +// +// For some reason, the complexity of benchmarks.cpp doesn't allow +// the compiler to optimize code using json.hpp effectively. The +// exact same tests, with the use of benchpress and cxxopts produces +// much faster code, at least under g++. +// +#include +#include +#include +#include +#include + +#include + +using json = nlohmann::json; + +enum class EMode { input, output, indent }; + +static double bench(const EMode mode, size_t iters, const std::string& in_path ) +{ + // using string streams for benchmarking to factor-out cold-cache disk + // access. Define FROMFILE to use file I/O instead. +#if defined( FROMFILE ) + std::ifstream istr; + { + istr.open( in_path, std::ifstream::in ); + + // read the stream once + json j; + istr >> j; + // clear flags and rewind + istr.clear(); + istr.seekg(0); + } +#else + std::stringstream istr; + { + // read file into string stream + std::ifstream input_file(in_path); + istr << input_file.rdbuf(); + input_file.close(); + + // read the stream once + json j; + istr >> j; + // clear flags and rewind + istr.clear(); + istr.seekg(0); + } +#endif + double tps = 0; + switch (mode) + { + // benchmarking input + case EMode::input: + { + auto start = std::chrono::system_clock::now(); + for (size_t i = 0; i < iters; ++i) + { + // clear flags and rewind + istr.clear(); + istr.seekg(0); + json j; + istr >> j; + } + auto ended = std::chrono::system_clock::now(); + tps = 1.0 / std::chrono::duration( ended - start ).count(); + break; + } + + // benchmarking output + case EMode::output: + case EMode::indent: + { + // create JSON value from input + json j; + istr >> j; + std::stringstream ostr; + + auto start = std::chrono::system_clock::now(); + for (size_t i = 0; i < iters; ++i) + { + if (mode == EMode::indent) + { + ostr << j; + } + else + { + ostr << std::setw(4) << j; + } + + // reset data + ostr.str(std::string()); + } + auto ended = std::chrono::system_clock::now(); + tps = 1.0 / std::chrono::duration( ended - start ).count(); + + break; + } + } + return tps; +} + +template +struct average { + T _sum { 0 }; + size_t _count { 0 }; + T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; } + operator T() { return _sum / _count; } +}; + +// Execute each test approximately enough times to get near 1 +// transaction per second, and compute the average; a single aggregate +// number that gives a performance metric representing both parsing +// and output. + +int main( int, char ** ) +{ + std::list> tests { + { "parse jeopardy.json", EMode::input, 2, "files/jeopardy/jeopardy.json" }, + { "parse canada.json", EMode::input, 30, "files/nativejson-benchmark/canada.json" }, + { "parse citm_catalog.json", EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" }, + { "parse twitter.json", EMode::input, 225, "files/nativejson-benchmark/twitter.json" }, + { "parse floats.json", EMode::input, 5, "files/numbers/floats.json" }, + { "parse signed_ints.json", EMode::input, 6, "files/numbers/signed_ints.json" }, + { "parse unsigned_ints.json", EMode::input, 6, "files/numbers/unsigned_ints.json" }, + { "dump jeopardy.json", EMode::output, 5, "files/jeopardy/jeopardy.json" }, + { "dump jeopardy.json w/ind.", EMode::indent, 5, "files/jeopardy/jeopardy.json" }, + { "dump floats.json", EMode::output, 2, "files/numbers/floats.json" }, + { "dump signed_ints.json", EMode::output, 20, "files/numbers/signed_ints.json" }, + }; + + average avg; + for ( auto t : tests ) { + std::string name, path; + EMode mode; + size_t iters; + std::tie(name, mode, iters, path) = t; + auto tps = bench( mode, iters, path ); + avg += tps; + std::cout + << std::left + << std::setw( 30 ) << name + << std::right + << " x " << std::setw( 3 ) << iters + << std::left + << " == " << std::setw( 10 ) << tps + << std::right + << " TPS, " << std::setw( 8 ) << std::round( tps * 1e6 / iters ) + << " ms/op" + << std::endl; + } + std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl; + std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl; + return 0; +}