using Google Benchmark #921

This commit is contained in:
Niels Lohmann 2018-01-18 21:57:21 +01:00
parent 6402077ac2
commit a8f711a2f1
No known key found for this signature in database
GPG key ID: 7F3CEA63AE251B69
18 changed files with 125 additions and 2086 deletions

View file

@ -935,7 +935,6 @@ The library itself contains of a single header file licensed under the MIT licen
- [**American fuzzy lop**](http://lcamtuf.coredump.cx/afl/) for fuzz testing
- [**AppVeyor**](https://www.appveyor.com) for [continuous integration](https://ci.appveyor.com/project/nlohmann/json) on Windows
- [**Artistic Style**](http://astyle.sourceforge.net) for automatic source code identation
- [**benchpress**](https://github.com/sbs-ableton/benchpress) to benchmark the code
- [**Catch**](https://github.com/philsquared/Catch) for the unit tests
- [**Clang**](http://clang.llvm.org) for compilation with code sanitizers
- [**Cmake**](https://cmake.org) for build automation
@ -943,10 +942,10 @@ The library itself contains of a single header file licensed under the MIT licen
- [**Coveralls**](https://coveralls.io) to measure [code coverage](https://coveralls.io/github/nlohmann/json)
- [**Coverity Scan**](https://scan.coverity.com) for [static analysis](https://scan.coverity.com/projects/nlohmann-json)
- [**cppcheck**](http://cppcheck.sourceforge.net) for static analysis
- [**cxxopts**](https://github.com/jarro2783/cxxopts) to let benchpress parse command-line parameters
- [**Doxygen**](http://www.stack.nl/~dimitri/doxygen/) to generate [documentation](https://nlohmann.github.io/json/)
- [**git-update-ghpages**](https://github.com/rstacruz/git-update-ghpages) to upload the documentation to gh-pages
- [**GitHub Changelog Generator**](https://github.com/skywinder/github-changelog-generator) to generate the [ChangeLog](https://github.com/nlohmann/json/blob/develop/ChangeLog.md)
- [**Google Benchmark**]https://github.com/google/benchmark) to implement the benchmarks
- [**libFuzzer**](http://llvm.org/docs/LibFuzzer.html) to implement fuzz testing for OSS-Fuzz
- [**OSS-Fuzz**](https://github.com/google/oss-fuzz) for continuous fuzz testing of the library
- [**Probot**](https://probot.github.io) for automating maintainer tasks such as closing stale issues, requesting missing information, or detecting toxic comments.

26
benchmarks/CMakeLists.txt Normal file
View file

@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.0)
project(JSON_Benchmarks LANGUAGES CXX)
# set compiler flags
if((CMAKE_CXX_COMPILER_ID MATCHES GNU) OR (CMAKE_CXX_COMPILER_ID MATCHES Clang))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -flto -DNDEBUG -O3")
endif()
# configure Google Benchmarks
set(BENCHMARK_ENABLE_TESTING OFF CACHE INTERNAL "" FORCE)
add_subdirectory(thirdparty/benchmark)
# header directories
include_directories(thirdparty)
include_directories(${CMAKE_SOURCE_DIR}/src)
# copy test files to build folder
file(COPY ${CMAKE_SOURCE_DIR}/data DESTINATION .)
file(COPY ${CMAKE_SOURCE_DIR}/../test/data/regression/floats.json
${CMAKE_SOURCE_DIR}/../test/data/regression/unsigned_ints.json
${CMAKE_SOURCE_DIR}/../test/data/regression/signed_ints.json
DESTINATION data/numbers)
# benchmark binary
add_executable(json_benchmarks src/benchmarks.cpp)
target_link_libraries(json_benchmarks benchmark ${CMAKE_THREAD_LIBS_INIT})

View file

@ -1,21 +0,0 @@
#
# Build/run json.hpp benchmarks, eg. CXX=g++-7 make
#
# The existing json_benchmarks did not allow optimization under some compilers
#
all: json_benchmarks json_benchmarks_simple number_jsons
bash -c 'time ./json_benchmarks'
bash -c 'time ./json_benchmarks_simple'
json_benchmarks: src/benchmarks.cpp ../src/json.hpp
$(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@
json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp
$(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@
number_jsons:
(test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py)
clean:
rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json

View file

@ -1,3 +0,0 @@
# Bechmarks
Run `make` to compile and run a small set of benchmarks.

View file

@ -1,25 +0,0 @@
#!/usr/bin/env python
import json
import random
import sys
random.seed(0)
# floats
result_floats = []
for x in range(0, 1000000):
result_floats.append(random.uniform(-100000000.0, 100000000.0))
json.dump(result_floats, open("floats.json", "w"), indent=2)
# unsigned integers
result_uints = []
for x in range(0, 1000000):
result_uints.append(random.randint(0, 18446744073709551615))
json.dump(result_uints, open("unsigned_ints.json", "w"), indent=2)
# signed integers
result_sints = []
for x in range(0, 1000000):
result_sints.append(random.randint(-9223372036854775808, 9223372036854775807))
json.dump(result_sints, open("signed_ints.json", "w"), indent=2)

View file

@ -1,132 +1,106 @@
#define BENCHPRESS_CONFIG_MAIN
#include "benchmark/benchmark.h"
#include "json.hpp"
#include <fstream>
#include <sstream>
#include <benchpress.hpp>
#include <json.hpp>
#include <pthread.h>
#include <thread>
using json = nlohmann::json;
struct StartUp
{
StartUp()
{
#ifndef __llvm__
// pin thread to a single CPU
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(std::thread::hardware_concurrency() - 1, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
#endif
}
};
StartUp startup;
//////////////////////////////////////////////////////////////////////////////
// parse JSON from file
//////////////////////////////////////////////////////////////////////////////
enum class EMode { input, output_no_indent, output_with_indent };
static void ParseFile(benchmark::State& state, const char* filename)
{
while (state.KeepRunning())
{
state.PauseTiming();
auto* f = new std::ifstream(filename);
auto* j = new json();
state.ResumeTiming();
static void bench(benchpress::context& ctx,
const std::string& in_path,
const EMode mode)
{
// using string streams for benchmarking to factor-out cold-cache disk
// access.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
*j = json::parse(*f);
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
std::ifstream input_file(in_path);
istr << input_file.rdbuf();
input_file.close();
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
switch (mode)
{
// benchmarking input
case EMode::input:
{
ctx.reset_timer();
for (size_t i = 0; i < ctx.num_iterations(); ++i)
{
// clear flags and rewind
istr.clear();
istr.seekg(0);
json j;
istr >> j;
state.PauseTiming();
delete f;
delete j;
state.ResumeTiming();
}
break;
std::ifstream file(filename, std::ios::binary | std::ios::ate);
state.SetBytesProcessed(state.iterations() * file.tellg());
}
BENCHMARK_CAPTURE(ParseFile, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseFile, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseFile, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseFile, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseFile, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
// benchmarking output
case EMode::output_no_indent:
case EMode::output_with_indent:
//////////////////////////////////////////////////////////////////////////////
// parse JSON from string
//////////////////////////////////////////////////////////////////////////////
static void ParseString(benchmark::State& state, const char* filename)
{
// create JSON value from input
json j;
istr >> j;
std::stringstream ostr;
std::ifstream f(filename);
std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
ctx.reset_timer();
for (size_t i = 0; i < ctx.num_iterations(); ++i)
while (state.KeepRunning())
{
if (mode == EMode::output_no_indent)
state.PauseTiming();
auto* j = new json();
state.ResumeTiming();
*j = json::parse(str);
state.PauseTiming();
delete j;
state.ResumeTiming();
}
state.SetBytesProcessed(state.iterations() * str.size());
}
BENCHMARK_CAPTURE(ParseString, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseString, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseString, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseString, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
//////////////////////////////////////////////////////////////////////////////
// serialize JSON
//////////////////////////////////////////////////////////////////////////////
static void Dump(benchmark::State& state, const char* filename, int indent)
{
ostr << j;
}
else
std::ifstream f(filename);
std::string str((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
json j = json::parse(str);
while (state.KeepRunning())
{
ostr << std::setw(4) << j;
j.dump(indent);
}
// reset data
ostr.str(std::string());
state.SetBytesProcessed(state.iterations() * j.dump(indent).size());
}
BENCHMARK_CAPTURE(Dump, jeopardy / -, "data/jeopardy/jeopardy.json", -1);
BENCHMARK_CAPTURE(Dump, jeopardy / 4, "data/jeopardy/jeopardy.json", 4);
BENCHMARK_CAPTURE(Dump, canada / -, "data/nativejson-benchmark/canada.json", -1);
BENCHMARK_CAPTURE(Dump, canada / 4, "data/nativejson-benchmark/canada.json", 4);
BENCHMARK_CAPTURE(Dump, citm_catalog / -, "data/nativejson-benchmark/citm_catalog.json", -1);
BENCHMARK_CAPTURE(Dump, citm_catalog / 4, "data/nativejson-benchmark/citm_catalog.json", 4);
BENCHMARK_CAPTURE(Dump, twitter / -, "data/nativejson-benchmark/twitter.json", -1);
BENCHMARK_CAPTURE(Dump, twitter / 4, "data/nativejson-benchmark/twitter.json", 4);
BENCHMARK_CAPTURE(Dump, floats / -, "data/numbers/floats.json", -1);
BENCHMARK_CAPTURE(Dump, floats / 4, "data/numbers/floats.json", 4);
BENCHMARK_CAPTURE(Dump, signed_ints / -, "data/numbers/signed_ints.json", -1);
BENCHMARK_CAPTURE(Dump, signed_ints / 4, "data/numbers/signed_ints.json", 4);
BENCHMARK_CAPTURE(Dump, unsigned_ints / -, "data/numbers/unsigned_ints.json", -1);
BENCHMARK_CAPTURE(Dump, unsigned_ints / 4, "data/numbers/unsigned_ints.json", 4);
break;
}
}
}
#define BENCHMARK_I(mode, title, in_path) \
BENCHMARK((title), [](benchpress::context* ctx) \
{ \
bench(*ctx, (in_path), (mode)); \
})
BENCHMARK_I(EMode::input, "parse jeopardy.json", "files/jeopardy/jeopardy.json");
BENCHMARK_I(EMode::input, "parse canada.json", "files/nativejson-benchmark/canada.json");
BENCHMARK_I(EMode::input, "parse citm_catalog.json", "files/nativejson-benchmark/citm_catalog.json");
BENCHMARK_I(EMode::input, "parse twitter.json", "files/nativejson-benchmark/twitter.json");
BENCHMARK_I(EMode::input, "parse numbers/floats.json", "files/numbers/floats.json");
BENCHMARK_I(EMode::input, "parse numbers/signed_ints.json", "files/numbers/signed_ints.json");
BENCHMARK_I(EMode::input, "parse numbers/unsigned_ints.json", "files/numbers/unsigned_ints.json");
BENCHMARK_I(EMode::output_no_indent, "dump jeopardy.json", "files/jeopardy/jeopardy.json");
BENCHMARK_I(EMode::output_with_indent, "dump jeopardy.json with indent", "files/jeopardy/jeopardy.json");
BENCHMARK_I(EMode::output_no_indent, "dump numbers/floats.json", "files/numbers/floats.json");
BENCHMARK_I(EMode::output_no_indent, "dump numbers/signed_ints.json", "files/numbers/signed_ints.json");
BENCHMARK_MAIN();

View file

@ -1,158 +0,0 @@
//
// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance
//
// For some reason, the complexity of benchmarks.cpp doesn't allow
// the compiler to optimize code using json.hpp effectively. The
// exact same tests, with the use of benchpress and cxxopts produces
// much faster code, at least under g++.
//
#include <fstream>
#include <iostream>
#include <chrono>
#include <list>
#include <tuple>
#include <json.hpp>
using json = nlohmann::json;
enum class EMode { input, output, indent };
static double bench(const EMode mode, size_t iters, const std::string& in_path )
{
// using string streams for benchmarking to factor-out cold-cache disk
// access. Define FROMFILE to use file I/O instead.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
std::ifstream input_file(in_path);
istr << input_file.rdbuf();
input_file.close();
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
double tps = 0;
switch (mode)
{
// benchmarking input
case EMode::input:
{
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
// clear flags and rewind
istr.clear();
istr.seekg(0);
json j;
istr >> j;
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
// benchmarking output
case EMode::output:
case EMode::indent:
{
// create JSON value from input
json j;
istr >> j;
std::stringstream ostr;
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
if (mode == EMode::indent)
{
ostr << j;
}
else
{
ostr << std::setw(4) << j;
}
// reset data
ostr.str(std::string());
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
}
return tps;
}
template <typename T>
struct average {
T _sum { 0 };
size_t _count { 0 };
T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; }
operator T() { return _sum / _count; }
};
// Execute each test approximately enough times to get near 1
// transaction per second, and compute the average; a single aggregate
// number that gives a performance metric representing both parsing
// and output.
int main( int, char ** )
{
std::list<std::tuple<std::string, EMode, size_t, std::string>> tests {
{ "parse jeopardy.json", EMode::input, 2, "files/jeopardy/jeopardy.json" },
{ "parse canada.json", EMode::input, 30, "files/nativejson-benchmark/canada.json" },
{ "parse citm_catalog.json", EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" },
{ "parse twitter.json", EMode::input, 225, "files/nativejson-benchmark/twitter.json" },
{ "parse floats.json", EMode::input, 5, "files/numbers/floats.json" },
{ "parse signed_ints.json", EMode::input, 6, "files/numbers/signed_ints.json" },
{ "parse unsigned_ints.json", EMode::input, 6, "files/numbers/unsigned_ints.json" },
{ "dump jeopardy.json", EMode::output, 5, "files/jeopardy/jeopardy.json" },
{ "dump jeopardy.json w/ind.", EMode::indent, 5, "files/jeopardy/jeopardy.json" },
{ "dump floats.json", EMode::output, 2, "files/numbers/floats.json" },
{ "dump signed_ints.json", EMode::output, 20, "files/numbers/signed_ints.json" },
};
average<double> avg;
for ( auto t : tests ) {
std::string name, path;
EMode mode;
size_t iters;
std::tie(name, mode, iters, path) = t;
auto tps = bench( mode, iters, path );
avg += tps;
std::cout
<< std::left
<< std::setw( 30 ) << name
<< std::right
<< " x " << std::setw( 3 ) << iters
<< std::left
<< " == " << std::setw( 10 ) << tps
<< std::right
<< " TPS, " << std::setw( 8 ) << std::round( tps * 1e6 / iters )
<< " ms/op"
<< std::endl;
}
std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl;
std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl;
return 0;
}

View file

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2014 Christopher Gilbert
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,401 +0,0 @@
/*
* Copyright (C) 2015 Christopher Gilbert.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef BENCHPRESS_HPP
#define BENCHPRESS_HPP
#include <algorithm> // max, min
#include <atomic> // atomic_intmax_t
#include <chrono> // high_resolution_timer, duration
#include <functional> // function
#include <iomanip> // setw
#include <iostream> // cout
#include <regex> // regex, regex_match
#include <sstream> // stringstream
#include <string> // string
#include <thread> // thread
#include <vector> // vector
namespace benchpress {
/*
* The options class encapsulates all options for running benchmarks.
*
* When including benchpress, a main function can be emitted which includes a command-line parser for building an
* options object. However from time-to-time it may be necessary for the developer to have to build their own main
* stub and construct the options object manually.
*
* options opts;
* opts
* .bench(".*")
* .benchtime(1)
* .cpu(4);
*/
class options {
std::string d_bench;
size_t d_benchtime;
size_t d_cpu;
public:
options()
: d_bench(".*")
, d_benchtime(1)
, d_cpu(std::thread::hardware_concurrency())
{}
options& bench(const std::string& bench) {
d_bench = bench;
return *this;
}
options& benchtime(size_t benchtime) {
d_benchtime = benchtime;
return *this;
}
options& cpu(size_t cpu) {
d_cpu = cpu;
return *this;
}
std::string get_bench() const {
return d_bench;
}
size_t get_benchtime() const {
return d_benchtime;
}
size_t get_cpu() const {
return d_cpu;
}
};
class context;
/*
* The benchmark_info class is used to store a function name / pointer pair.
*
* benchmark_info bi("example", [](benchpress::context* b) {
* // benchmark function
* });
*/
class benchmark_info {
std::string d_name;
std::function<void(context*)> d_func;
public:
benchmark_info(std::string name, std::function<void(context*)> func)
: d_name(name)
, d_func(func)
{}
std::string get_name() const { return d_name; }
std::function<void(context*)> get_func() const { return d_func; }
};
/*
* The registration class is responsible for providing a single global point of reference for registering
* benchmark functions.
*
* registration::get_ptr()->register_benchmark(info);
*/
class registration {
static registration* d_this;
std::vector<benchmark_info> d_benchmarks;
public:
static registration* get_ptr() {
if (nullptr == d_this) {
d_this = new registration();
}
return d_this;
}
void register_benchmark(benchmark_info& info) {
d_benchmarks.push_back(info);
}
std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
};
/*
* The auto_register class is a helper used to register benchmarks.
*/
class auto_register {
public:
auto_register(const std::string& name, std::function<void(context*)> func) {
benchmark_info info(name, func);
registration::get_ptr()->register_benchmark(info);
}
};
#define CONCAT(x, y) x ## y
#define CONCAT2(x, y) CONCAT(x, y)
// The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
// registration class.
#define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
// This macro will prevent the compiler from removing a redundant code path which has no side-effects.
#define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
/*
* The result class is responsible for producing a printable string representation of a benchmark run.
*/
class result {
size_t d_num_iterations;
std::chrono::nanoseconds d_duration;
size_t d_num_bytes;
public:
result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
: d_num_iterations(num_iterations)
, d_duration(duration)
, d_num_bytes(num_bytes)
{}
size_t get_ns_per_op() const {
if (d_num_iterations <= 0) {
return 0;
}
return d_duration.count() / d_num_iterations;
}
double get_mb_per_s() const {
if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
return 0;
}
return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
}
std::string to_string() const {
std::stringstream tmp;
tmp << std::setw(12) << std::right << d_num_iterations;
size_t npo = get_ns_per_op();
tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
double mbs = get_mb_per_s();
if (mbs > 0.0) {
tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
}
return std::string(tmp.str());
}
};
/*
* The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
*/
class parallel_context {
std::atomic_intmax_t d_num_iterations;
public:
parallel_context(size_t num_iterations)
: d_num_iterations(num_iterations)
{}
bool next() {
return (d_num_iterations.fetch_sub(1) > 0);
}
};
/*
* The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
*/
class context {
bool d_timer_on;
std::chrono::high_resolution_clock::time_point d_start;
std::chrono::nanoseconds d_duration;
std::chrono::seconds d_benchtime;
size_t d_num_iterations;
size_t d_num_threads;
size_t d_num_bytes;
benchmark_info d_benchmark;
public:
context(const benchmark_info& info, const options& opts)
: d_timer_on(false)
, d_start()
, d_duration()
, d_benchtime(std::chrono::seconds(opts.get_benchtime()))
, d_num_iterations(1)
, d_num_threads(opts.get_cpu())
, d_num_bytes(0)
, d_benchmark(info)
{}
size_t num_iterations() const { return d_num_iterations; }
void set_num_threads(size_t n) { d_num_threads = n; }
size_t num_threads() const { return d_num_threads; }
void start_timer() {
if (!d_timer_on) {
d_start = std::chrono::high_resolution_clock::now();
d_timer_on = true;
}
}
void stop_timer() {
if (d_timer_on) {
d_duration += std::chrono::high_resolution_clock::now() - d_start;
d_timer_on = false;
}
}
void reset_timer() {
if (d_timer_on) {
d_start = std::chrono::high_resolution_clock::now();
}
d_duration = std::chrono::nanoseconds::zero();
}
void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
size_t get_ns_per_op() {
if (d_num_iterations <= 0) {
return 0;
}
return d_duration.count() / d_num_iterations;
}
void run_n(size_t n) {
d_num_iterations = n;
reset_timer();
start_timer();
d_benchmark.get_func()(this);
stop_timer();
}
void run_parallel(std::function<void(parallel_context*)> f) {
parallel_context pc(d_num_iterations);
std::vector<std::thread> threads;
for (size_t i = 0; i < d_num_threads; ++i) {
threads.push_back(std::thread([&pc,&f]() -> void {
f(&pc);
}));
}
for(auto& thread : threads){
thread.join();
}
}
result run() {
size_t n = 1;
run_n(n);
while (d_duration < d_benchtime && n < 1e9) {
size_t last = n;
if (get_ns_per_op() == 0) {
n = 1e9;
} else {
n = d_duration.count() / get_ns_per_op();
}
n = std::max(std::min(n+n/2, 100*last), last+1);
n = round_up(n);
run_n(n);
}
return result(n, d_duration, d_num_bytes);
}
private:
template<typename T>
T round_down_10(T n) {
int tens = 0;
while (n > 10) {
n /= 10;
tens++;
}
int result = 1;
for (int i = 0; i < tens; ++i) {
result *= 10;
}
return result;
}
template<typename T>
T round_up(T n) {
T base = round_down_10(n);
if (n < (2 * base)) {
return 2 * base;
}
if (n < (5 * base)) {
return 5 * base;
}
return 10 * base;
}
};
/*
* The run_benchmarks function will run the registered benchmarks.
*/
void run_benchmarks(const options& opts) {
std::regex match_r(opts.get_bench());
auto benchmarks = registration::get_ptr()->get_benchmarks();
for (auto& info : benchmarks) {
if (std::regex_match(info.get_name(), match_r)) {
context c(info, opts);
auto r = c.run();
std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
}
}
}
} // namespace benchpress
/*
* If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
* command-line parser and then executes run_benchmarks.
*/
#ifdef BENCHPRESS_CONFIG_MAIN
#include "cxxopts.hpp"
benchpress::registration* benchpress::registration::d_this;
int main(int argc, char** argv) {
std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
benchpress::options bench_opts;
try {
cxxopts::Options cmd_opts(argv[0], " - command line options");
cmd_opts.add_options()
("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
->default_value(".*"))
("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
->default_value("1"))
("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
->default_value(std::to_string(std::thread::hardware_concurrency())))
("help", "print help")
;
cmd_opts.parse(argc, argv);
if (cmd_opts.count("help")) {
std::cout << cmd_opts.help({""}) << std::endl;
exit(0);
}
if (cmd_opts.count("bench")) {
bench_opts.bench(cmd_opts["bench"].as<std::string>());
}
if (cmd_opts.count("benchtime")) {
bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
}
if (cmd_opts.count("cpu")) {
bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
}
} catch (const cxxopts::OptionException& e) {
std::cout << "error parsing options: " << e.what() << std::endl;
exit(1);
}
benchpress::run_benchmarks(bench_opts);
float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - bp_start
).count() / 1000.f;
std::cout << argv[0] << " " << duration << "s" << std::endl;
return 0;
}
#endif
#endif // BENCHPRESS_HPP

View file

@ -1,19 +0,0 @@
Copyright (c) 2014 Jarryd Beck
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

File diff suppressed because it is too large Load diff