⬆️ upgraded Catch and Google Benchmark

- Catch 1.12.0 -> 1.12.2 - Google Benchmark 1.3.0 -> 1.4.1
2019-01-13 11:29:39 +01:00 · 2019-01-13 11:29:39 +01:00 · 06731b14d7
commit 06731b14d7
parent daeb48b01a
76 changed files with 2828 additions and 341 deletions
--- a/benchmarks/thirdparty/benchmark/AUTHORS
+++ b/benchmarks/thirdparty/benchmark/AUTHORS
@ -13,6 +13,7 @@ Arne Beer <arne@twobeer.de>
 Carto
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
+Deniz Evrenci <denizevrenci@gmail.com>
 Dirac Research 
 Dominik Czarnota <dominik.b.czarnota@gmail.com>
 Eric Fiselier <eric@efcs.ca>
@ -31,6 +32,7 @@ Kishan Kumar <kumar.kishan@outlook.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
 Maxim Vafin <maxvafin@gmail.com>
+MongoDB Inc.
 Nick Hutchinson <nshutchinson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Paul Redmond <paul.redmond@gmail.com>
@ -38,6 +40,7 @@ Radoslav Yovchev <radoslav.tm@gmail.com>
 Roman Lebedev <lebedev.ri@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
 Steinar H. Gunderson <sgunderson@bigfoot.com>
+Stripe, Inc.
 Yixuan Qiu <yixuanq@gmail.com>
 Yusuke Suzuki <utatane.tea@gmail.com>
 Zbigniew Skowron <zbychs@gmail.com>
--- a/benchmarks/thirdparty/benchmark/BUILD.bazel
+++ b/benchmarks/thirdparty/benchmark/BUILD.bazel
@ -0,0 +1,42 @@
+licenses(["notice"])
+
+config_setting(
+    name = "windows",
+    values = {
+        "cpu": "x64_windows",
+    },
+    visibility = [":__subpackages__"],
+)
+
+cc_library(
+    name = "benchmark",
+    srcs = glob(
+        [
+            "src/*.cc",
+            "src/*.h",
+        ],
+        exclude = ["src/benchmark_main.cc"],
+    ),
+    hdrs = ["include/benchmark/benchmark.h"],
+    linkopts = select({
+        ":windows": ["-DEFAULTLIB:shlwapi.lib"],
+        "//conditions:default": ["-pthread"],
+    }),
+    strip_include_prefix = "include",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "benchmark_main",
+    srcs = ["src/benchmark_main.cc"],
+    hdrs = ["include/benchmark/benchmark.h"],
+    strip_include_prefix = "include",
+    visibility = ["//visibility:public"],
+    deps = [":benchmark"],
+)
+
+cc_library(
+    name = "benchmark_internal_headers",
+    hdrs = glob(["src/*.h"]),
+    visibility = ["//test:__pkg__"],
+)
--- a/benchmarks/thirdparty/benchmark/CMakeLists.txt
+++ b/benchmarks/thirdparty/benchmark/CMakeLists.txt
@ -27,10 +27,48 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
 # in cases where it is not possible to build or find a valid version of gtest.
 option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)

+set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
+function(should_enable_assembly_tests)
+  if(CMAKE_BUILD_TYPE)
+    string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
+    if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage")
+      # FIXME: The --coverage flag needs to be removed when building assembly
+      # tests for this to work.
+      return()
+    endif()
+  endif()
+  if (MSVC)
+    return()
+  elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+    return()
+  elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    # FIXME: Make these work on 32 bit builds
+    return()
+  elseif(BENCHMARK_BUILD_32_BITS)
+     # FIXME: Make these work on 32 bit builds
+    return()
+  endif()
+  find_program(LLVM_FILECHECK_EXE FileCheck)
+  if (LLVM_FILECHECK_EXE)
+    set(LLVM_FILECHECK_EXE "${LLVM_FILECHECK_EXE}" CACHE PATH "llvm filecheck" FORCE)
+    message(STATUS "LLVM FileCheck Found: ${LLVM_FILECHECK_EXE}")
+  else()
+    message(STATUS "Failed to find LLVM FileCheck")
+    return()
+  endif()
+  set(ENABLE_ASSEMBLY_TESTS_DEFAULT ON PARENT_SCOPE)
+endfunction()
+should_enable_assembly_tests()
+
+# This option disables the building and running of the assembly verification tests
+option(BENCHMARK_ENABLE_ASSEMBLY_TESTS "Enable building and running the assembly tests"
+    ${ENABLE_ASSEMBLY_TESTS_DEFAULT})
+
 # Make sure we can import out CMake functions
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

+
 # Read the git tags to determine the project version
 include(GetGitVersion)
 get_git_version(GIT_VERSION)
@ -140,7 +178,7 @@ else()
      if (GCC_RANLIB)
        set(CMAKE_RANLIB ${GCC_RANLIB})
      endif()
-    elseif("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
+    elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
      include(llvm-toolchain)
    endif()
  endif()
@ -165,7 +203,7 @@ else()
 endif()

 if (BENCHMARK_USE_LIBCXX)
-  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
    add_cxx_compiler_flag(-stdlib=libc++)
  elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
          "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
@ -178,7 +216,7 @@ if (BENCHMARK_USE_LIBCXX)
    # linker flags appear before all linker inputs and -lc++ must appear after.
    list(APPEND BENCHMARK_CXX_LIBRARIES c++)
  else()
-    message(FATAL "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler")
+    message(FATAL_ERROR "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler")
  endif()
 endif(BENCHMARK_USE_LIBCXX)

--- a/benchmarks/thirdparty/benchmark/CONTRIBUTING.md
+++ b/benchmarks/thirdparty/benchmark/CONTRIBUTING.md
@ -0,0 +1,58 @@
+# How to contribute #
+
+We'd love to accept your patches and contributions to this project.  There are
+a just a few small guidelines you need to follow.
+
+
+## Contributor License Agreement ##
+
+Contributions to any Google project must be accompanied by a Contributor
+License Agreement.  This is not a copyright **assignment**, it simply gives
+Google permission to use and redistribute your contributions as part of the
+project.
+
+  * If you are an individual writing original source code and you're sure you
+    own the intellectual property, then you'll need to sign an [individual
+    CLA][].
+
+  * If you work for a company that wants to allow you to contribute your work,
+    then you'll need to sign a [corporate CLA][].
+
+You generally only need to submit a CLA once, so if you've already submitted
+one (even if it was for a different project), you probably don't need to do it
+again.
+
+[individual CLA]: https://developers.google.com/open-source/cla/individual
+[corporate CLA]: https://developers.google.com/open-source/cla/corporate
+
+Once your CLA is submitted (or if you already submitted one for
+another Google project), make a commit adding yourself to the
+[AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part
+of your first [pull request][].
+
+[AUTHORS]: AUTHORS
+[CONTRIBUTORS]: CONTRIBUTORS
+
+
+## Submitting a patch ##
+
+  1. It's generally best to start by opening a new issue describing the bug or
+     feature you're intending to fix.  Even if you think it's relatively minor,
+     it's helpful to know what people are working on.  Mention in the initial
+     issue that you are planning to work on that bug or feature so that it can
+     be assigned to you.
+
+  1. Follow the normal process of [forking][] the project, and setup a new
+     branch to work in.  It's important that each group of changes be done in
+     separate branches in order to ensure that a pull request only includes the
+     commits related to that bug or feature.
+
+  1. Do your best to have [well-formed commit messages][] for each change.
+     This provides consistency throughout the project, and ensures that commit
+     messages are able to be formatted properly by various git tools.
+
+  1. Finally, push the commits to your fork and submit a [pull request][].
+
+[forking]: https://help.github.com/articles/fork-a-repo
+[well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
+[pull request]: https://help.github.com/articles/creating-a-pull-request
--- a/benchmarks/thirdparty/benchmark/CONTRIBUTORS
+++ b/benchmarks/thirdparty/benchmark/CONTRIBUTORS
@ -28,6 +28,7 @@ Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
 Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
+Deniz Evrenci <denizevrenci@gmail.com>
 Dominic Hamon <dma@stripysock.com> <dominic@google.com>
 Dominik Czarnota <dominik.b.czarnota@gmail.com>
 Eric Fiselier <eric@efcs.ca>
@ -38,6 +39,7 @@ Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 Jern-Kuan Leong <jernkuan@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
+John Millikin <jmillikin@stripe.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kai Wolf <kai.wolf@gmail.com>
 Kishan Kumar <kumar.kishan@outlook.com>
@ -53,6 +55,7 @@ Pierre Phaneuf <pphaneuf@google.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
 Raul Marin <rmrodriguez@cartodb.com>
 Ray Glover <ray.glover@uk.ibm.com>
+Robert Guo <robert.guo@mongodb.com>
 Roman Lebedev <lebedev.ri@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
 Tobias Ulvgård <tobias.ulvgard@dirac.se>
--- a/benchmarks/thirdparty/benchmark/LICENSE
+++ b/benchmarks/thirdparty/benchmark/LICENSE
--- a/benchmarks/thirdparty/benchmark/README.md
+++ b/benchmarks/thirdparty/benchmark/README.md
@ -14,6 +14,8 @@ IRC channel: https://freenode.net #googlebenchmark

 [Additional Tooling Documentation](docs/tools.md)

+[Assembly Testing Documentation](docs/AssemblyTests.md)
+

 ## Building

@ -21,7 +23,7 @@ The basic steps for configuring and building the library look like this:

 ```bash
 $ git clone https://github.com/google/benchmark.git
-# Benchmark requires GTest as a dependency. Add the source tree as a subdirectory.
+# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory.
 $ git clone https://github.com/google/googletest.git benchmark/googletest
 $ mkdir build && cd build
 $ cmake -G <generator> [options] ../benchmark
@ -29,15 +31,13 @@ $ cmake -G <generator> [options] ../benchmark
 $ make
 ```

-Note that Google Benchmark requires GTest to build and run the tests. This
-dependency can be provided three ways:
+Note that Google Benchmark requires Google Test to build and run the tests. This
+dependency can be provided two ways:

-* Checkout the GTest sources into `benchmark/googletest`.
+* Checkout the Google Test sources into `benchmark/googletest` as above.
 * Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during
  configuration, the library will automatically download and build any required
  dependencies.
-* Otherwise, if nothing is done, CMake will use `find_package(GTest REQUIRED)`
-  to resolve the required GTest dependency.

 If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF`
 to `CMAKE_ARGS`.
@ -59,6 +59,7 @@ Now, let's clone the repository and build it
 ```
 git clone https://github.com/google/benchmark.git
 cd benchmark
+git clone https://github.com/google/googletest.git
 mkdir build
 cd build
 cmake .. -DCMAKE_BUILD_TYPE=RELEASE
@ -71,7 +72,7 @@ We need to install the library globally now
 sudo make install
 ```

-Now you have google/benchmark installed in your machine 
+Now you have google/benchmark installed in your machine
 Note: Don't forget to link to pthread library while building

 ## Stable and Experimental Library Versions
@ -86,6 +87,11 @@ to use, test, and provide feedback on the new features are encouraged to try
 this branch. However, this branch provides no stability guarantees and reserves
 the right to change and break the API at any time.

+##Prerequisite knowledge
+
+Before attempting to understand this framework one should ideally have some familiarity with the structure and format of the Google Test framework, upon which it is based. Documentation for Google Test, including a "Getting Started" (primer) guide, is available here:
+https://github.com/google/googletest/blob/master/googletest/docs/Documentation.md
+

 ## Example usage
 ### Basic usage
@ -112,7 +118,10 @@ BENCHMARK(BM_StringCopy);
 BENCHMARK_MAIN();
 ```

-Don't forget to inform your linker to add benchmark library e.g. through `-lbenchmark` compilation flag.
+Don't forget to inform your linker to add benchmark library e.g. through 
+`-lbenchmark` compilation flag. Alternatively, you may leave out the 
+`BENCHMARK_MAIN();` at the end of the source file and link against 
+`-lbenchmark_main` to get the same default behavior.

 The benchmark library will reporting the timing for the code within the `for(...)` loop.

@ -821,7 +830,7 @@ BM_SetInsert/1024/10                       33157      33648      21431  1.13369M
 The JSON format outputs human readable json split into two top level attributes.
 The `context` attribute contains information about the run in general, including
 information about the CPU and the date.
-The `benchmarks` attribute contains a list of ever benchmark run. Example json
+The `benchmarks` attribute contains a list of every benchmark run. Example json
 output looks like:
 ```json
 {
@ -893,8 +902,11 @@ If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake cach
 If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.

 ## Linking against the library
-When using gcc, it is necessary to link against pthread to avoid runtime exceptions.
-This is due to how gcc implements std::thread.
+
+When the library is built using GCC it is necessary to link with `-pthread`,
+due to how GCC implements `std::thread`.
+
+For GCC 4.x failing to link to pthreads will lead to runtime exceptions, not linker errors.
 See [issue #67](https://github.com/google/benchmark/issues/67) for more details.

 ## Compiler Support
@ -928,8 +940,11 @@ sudo cpupower frequency-set --governor powersave

 # Known Issues

-### Windows
+### Windows with CMake

 * Users must manually link `shlwapi.lib`. Failure to do so may result
 in unresolved symbols.

+### Solaris
+
+* Users must explicitly link with kstat library (-lkstat compilation flag).
--- a/benchmarks/thirdparty/benchmark/WORKSPACE
+++ b/benchmarks/thirdparty/benchmark/WORKSPACE
@ -0,0 +1,7 @@
+workspace(name = "com_github_google_benchmark")
+
+http_archive(
+     name = "com_google_googletest",
+     urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
+     strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
+)
--- a/benchmarks/thirdparty/benchmark/appveyor.yml
+++ b/benchmarks/thirdparty/benchmark/appveyor.yml
@ -0,0 +1,56 @@
+version: '{build}'
+
+image: Visual Studio 2017
+
+configuration:
+  - Debug
+  - Release
+
+environment:
+  matrix:
+    - compiler: msvc-15-seh
+      generator: "Visual Studio 15 2017"
+
+    - compiler: msvc-15-seh
+      generator: "Visual Studio 15 2017 Win64"
+
+    - compiler: msvc-14-seh
+      generator: "Visual Studio 14 2015"
+
+    - compiler: msvc-14-seh
+      generator: "Visual Studio 14 2015 Win64"
+
+    - compiler: msvc-12-seh
+      generator: "Visual Studio 12 2013"
+
+    - compiler: msvc-12-seh
+      generator: "Visual Studio 12 2013 Win64"
+
+    - compiler: gcc-5.3.0-posix
+      generator: "MinGW Makefiles"
+      cxx_path: 'C:\mingw-w64\i686-5.3.0-posix-dwarf-rt_v4-rev0\mingw32\bin'
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+
+matrix:
+  fast_finish: true
+
+install:
+  # git bash conflicts with MinGW makefiles
+  - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files\Git\usr\bin;=%")
+  - if not "%cxx_path%"=="" (set "PATH=%PATH%;%cxx_path%")
+
+build_script:
+  - md _build -Force
+  - cd _build
+  - echo %configuration%
+  - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%configuration%" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON ..
+  - cmake --build . --config %configuration%
+
+test_script:
+  - ctest -c %configuration% --timeout 300 --output-on-failure
+
+artifacts:
+  - path: '_build/CMakeFiles/*.log'
+    name: logs
+  - path: '_build/Testing/**/*.xml'
+    name: test_results
--- a/benchmarks/thirdparty/benchmark/cmake/AddCXXCompilerFlag.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/AddCXXCompilerFlag.cmake
@ -62,3 +62,13 @@ function(add_required_cxx_compiler_flag FLAG)
    message(FATAL_ERROR "Required flag '${FLAG}' is not supported by the compiler")
  endif()
 endfunction()
+
+function(check_cxx_warning_flag FLAG)
+  mangle_compiler_flag("${FLAG}" MANGLED_FLAG)
+  set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+  # Add -Werror to ensure the compiler generates an error if the warning flag
+  # doesn't exist.
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror ${FLAG}")
+  check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
+  set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
+endfunction()
--- a/benchmarks/thirdparty/benchmark/cmake/CXXFeatureCheck.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/CXXFeatureCheck.cmake
@ -27,25 +27,27 @@ function(cxx_feature_check FILE)
    return()
  endif()

-  message("-- Performing Test ${FEATURE}")
-  if(CMAKE_CROSSCOMPILING)
-    try_compile(COMPILE_${FEATURE}
-            ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
-            CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
-            LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
-    if(COMPILE_${FEATURE})
-      message(WARNING
-            "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
-      set(RUN_${FEATURE} 0)
-    else()
-      set(RUN_${FEATURE} 1)
-    endif()
-  else()
+  if (NOT DEFINED COMPILE_${FEATURE})
    message("-- Performing Test ${FEATURE}")
-    try_run(RUN_${FEATURE} COMPILE_${FEATURE}
-            ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
-            CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
-            LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+    if(CMAKE_CROSSCOMPILING)
+      try_compile(COMPILE_${FEATURE}
+              ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
+              CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
+              LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+      if(COMPILE_${FEATURE})
+        message(WARNING
+              "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
+        set(RUN_${FEATURE} 0)
+      else()
+        set(RUN_${FEATURE} 1)
+      endif()
+    else()
+      message("-- Performing Test ${FEATURE}")
+      try_run(RUN_${FEATURE} COMPILE_${FEATURE}
+              ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
+              CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
+              LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+    endif()
  endif()

  if(RUN_${FEATURE} EQUAL 0)
--- a/benchmarks/thirdparty/benchmark/cmake/Config.cmake.in
+++ b/benchmarks/thirdparty/benchmark/cmake/Config.cmake.in
--- a/benchmarks/thirdparty/benchmark/cmake/GetGitVersion.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/GetGitVersion.cmake
@ -21,6 +21,7 @@ set(__get_git_version INCLUDED)
 function(get_git_version var)
  if(GIT_EXECUTABLE)
      execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
+          WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
          RESULT_VARIABLE status
          OUTPUT_VARIABLE GIT_VERSION
          ERROR_QUIET)
@ -33,9 +34,11 @@ function(get_git_version var)

      # Work out if the repository is dirty
      execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
+          WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
          OUTPUT_QUIET
          ERROR_QUIET)
      execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
+          WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
          OUTPUT_VARIABLE GIT_DIFF_INDEX
          ERROR_QUIET)
      string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
--- a/benchmarks/thirdparty/benchmark/cmake/HandleGTest.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/HandleGTest.cmake
@ -1,13 +1,11 @@

-macro(split_list listname)
-  string(REPLACE ";" " " ${listname} "${${listname}}")
-endmacro()
+include(split_list)

 macro(build_external_gtest)
  include(ExternalProject)
  set(GTEST_FLAGS "")
  if (BENCHMARK_USE_LIBCXX)
-    if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
      list(APPEND GTEST_FLAGS -stdlib=libc++)
    else()
      message(WARNING "Unsupported compiler (${CMAKE_CXX_COMPILER}) when using libc++")
@ -23,9 +21,22 @@ macro(build_external_gtest)
  if ("${GTEST_BUILD_TYPE}" STREQUAL "COVERAGE")
    set(GTEST_BUILD_TYPE "DEBUG")
  endif()
+  # FIXME: Since 10/Feb/2017 the googletest trunk has had a bug where
+  # -Werror=unused-function fires during the build on OS X. This is a temporary
+  # workaround to keep our travis bots from failing. It should be removed
+  # once gtest is fixed.
+  if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+    list(APPEND GTEST_FLAGS "-Wno-unused-function")
+  endif()
  split_list(GTEST_FLAGS)
+  set(EXCLUDE_FROM_ALL_OPT "")
+  set(EXCLUDE_FROM_ALL_VALUE "")
+  if (${CMAKE_VERSION} VERSION_GREATER "3.0.99")
+      set(EXCLUDE_FROM_ALL_OPT "EXCLUDE_FROM_ALL")
+      set(EXCLUDE_FROM_ALL_VALUE "ON")
+  endif()
  ExternalProject_Add(googletest
-      EXCLUDE_FROM_ALL ON
+      ${EXCLUDE_FROM_ALL_OPT} ${EXCLUDE_FROM_ALL_VALUE}
      GIT_REPOSITORY https://github.com/google/googletest.git
      GIT_TAG master
      PREFIX "${CMAKE_BINARY_DIR}/googletest"
@ -35,45 +46,68 @@ macro(build_external_gtest)
        -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
        -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
        -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
+        -DCMAKE_INSTALL_LIBDIR:PATH=<INSTALL_DIR>/lib
        -DCMAKE_CXX_FLAGS:STRING=${GTEST_FLAGS}
        -Dgtest_force_shared_crt:BOOL=ON
      )

  ExternalProject_Get_Property(googletest install_dir)
-
-  add_library(gtest UNKNOWN IMPORTED)
-  add_library(gtest_main UNKNOWN IMPORTED)
+  set(GTEST_INCLUDE_DIRS ${install_dir}/include)
+  file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIRS})

  set(LIB_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}")
  set(LIB_PREFIX "${CMAKE_STATIC_LIBRARY_PREFIX}")
-
  if("${GTEST_BUILD_TYPE}" STREQUAL "DEBUG")
    set(LIB_SUFFIX "d${CMAKE_STATIC_LIBRARY_SUFFIX}")
  endif()
-  file(MAKE_DIRECTORY ${install_dir}/include)
-  set_target_properties(gtest PROPERTIES
-    IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}gtest${LIB_SUFFIX}
-    INTERFACE_INCLUDE_DIRECTORIES ${install_dir}/include
-  )
-  set_target_properties(gtest_main PROPERTIES
-    IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}gtest_main${LIB_SUFFIX}
-    INTERFACE_INCLUDE_DIRECTORIES ${install_dir}/include
-  )
-  add_dependencies(gtest googletest)
-  add_dependencies(gtest_main googletest)
-  set(GTEST_BOTH_LIBRARIES gtest gtest_main)
-  #set(GTEST_INCLUDE_DIRS ${install_dir}/include)
+
+  # Use gmock_main instead of gtest_main because it initializes gtest as well.
+  # Note: The libraries are listed in reverse order of their dependancies.
+  foreach(LIB gtest gmock gmock_main)
+    add_library(${LIB} UNKNOWN IMPORTED)
+    set_target_properties(${LIB} PROPERTIES
+      IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}${LIB}${LIB_SUFFIX}
+      INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIRS}
+      INTERFACE_LINK_LIBRARIES "${GTEST_BOTH_LIBRARIES}"
+    )
+    add_dependencies(${LIB} googletest)
+    list(APPEND GTEST_BOTH_LIBRARIES ${LIB})
+  endforeach()
 endmacro(build_external_gtest)

 if (BENCHMARK_ENABLE_GTEST_TESTS)
  if (IS_DIRECTORY ${CMAKE_SOURCE_DIR}/googletest)
+    set(GTEST_ROOT "${CMAKE_SOURCE_DIR}/googletest")
    set(INSTALL_GTEST OFF CACHE INTERNAL "")
    set(INSTALL_GMOCK OFF CACHE INTERNAL "")
    add_subdirectory(${CMAKE_SOURCE_DIR}/googletest)
-    set(GTEST_BOTH_LIBRARIES gtest gtest_main)
+    set(GTEST_BOTH_LIBRARIES gtest gmock gmock_main)
+    foreach(HEADER test mock)
+      # CMake 2.8 and older don't respect INTERFACE_INCLUDE_DIRECTORIES, so we
+      # have to add the paths ourselves.
+      set(HFILE g${HEADER}/g${HEADER}.h)
+      set(HPATH ${GTEST_ROOT}/google${HEADER}/include)
+      find_path(HEADER_PATH_${HEADER} ${HFILE}
+          NO_DEFAULT_PATHS
+          HINTS ${HPATH}
+      )
+      if (NOT HEADER_PATH_${HEADER})
+        message(FATAL_ERROR "Failed to find header ${HFILE} in ${HPATH}")
+      endif()
+      list(APPEND GTEST_INCLUDE_DIRS ${HEADER_PATH_${HEADER}})
+    endforeach()
  elseif(BENCHMARK_DOWNLOAD_DEPENDENCIES)
    build_external_gtest()
  else()
    find_package(GTest REQUIRED)
+    find_path(GMOCK_INCLUDE_DIRS gmock/gmock.h
+        HINTS ${GTEST_INCLUDE_DIRS})
+    if (NOT GMOCK_INCLUDE_DIRS)
+      message(FATAL_ERROR "Failed to find header gmock/gmock.h with hint ${GTEST_INCLUDE_DIRS}")
+    endif()
+    set(GTEST_INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} ${GMOCK_INCLUDE_DIRS})
+    # FIXME: We don't currently require the gmock library to build the tests,
+    # and it's likely we won't find it, so we don't try. As long as we've
+    # found the gmock/gmock.h header and gtest_main that should be good enough.
  endif()
 endif()
--- a/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMAr.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMAr.cmake
--- a/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMNm.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMNm.cmake
--- a/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMRanLib.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/Modules/FindLLVMRanLib.cmake
--- a/benchmarks/thirdparty/benchmark/cmake/benchmark.pc.in
+++ b/benchmarks/thirdparty/benchmark/cmake/benchmark.pc.in
--- a/benchmarks/thirdparty/benchmark/cmake/gnu_posix_regex.cpp
+++ b/benchmarks/thirdparty/benchmark/cmake/gnu_posix_regex.cpp
--- a/benchmarks/thirdparty/benchmark/cmake/llvm-toolchain.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/llvm-toolchain.cmake
--- a/benchmarks/thirdparty/benchmark/cmake/posix_regex.cpp
+++ b/benchmarks/thirdparty/benchmark/cmake/posix_regex.cpp
--- a/benchmarks/thirdparty/benchmark/cmake/split_list.cmake
+++ b/benchmarks/thirdparty/benchmark/cmake/split_list.cmake
@ -0,0 +1,3 @@
+macro(split_list listname)
+  string(REPLACE ";" " " ${listname} "${${listname}}")
+endmacro()
--- a/benchmarks/thirdparty/benchmark/cmake/std_regex.cpp
+++ b/benchmarks/thirdparty/benchmark/cmake/std_regex.cpp
--- a/benchmarks/thirdparty/benchmark/cmake/steady_clock.cpp
+++ b/benchmarks/thirdparty/benchmark/cmake/steady_clock.cpp
--- a/benchmarks/thirdparty/benchmark/cmake/thread_safety_attributes.cpp
+++ b/benchmarks/thirdparty/benchmark/cmake/thread_safety_attributes.cpp
--- a/benchmarks/thirdparty/benchmark/docs/AssemblyTests.md
+++ b/benchmarks/thirdparty/benchmark/docs/AssemblyTests.md
@ -0,0 +1,147 @@
+# Assembly Tests
+
+The Benchmark library provides a number of functions whose primary
+purpose in to affect assembly generation, including `DoNotOptimize`
+and `ClobberMemory`. In addition there are other functions,
+such as `KeepRunning`, for which generating good assembly is paramount.
+
+For these functions it's important to have tests that verify the
+correctness and quality of the implementation. This requires testing
+the code generated by the compiler.
+
+This document describes how the Benchmark library tests compiler output,
+as well as how to properly write new tests.
+
+
+## Anatomy of a Test
+
+Writing a test has two steps:
+
+* Write the code you want to generate assembly for.
+* Add `// CHECK` lines to match against the verified assembly.
+
+Example:
+```c++
+
+// CHECK-LABEL: test_add:
+extern "C" int test_add() {
+    extern int ExternInt;
+    return ExternInt + 1;
+
+    // CHECK: movl ExternInt(%rip), %eax
+    // CHECK: addl %eax
+    // CHECK: ret
+}
+
+```
+
+#### LLVM Filecheck
+
+[LLVM's Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html)
+is used to test the generated assembly against the `// CHECK` lines
+specified in the tests source file. Please see the documentation
+linked above for information on how to write `CHECK` directives.
+
+#### Tips and Tricks:
+
+* Tests should match the minimal amount of output required to establish
+correctness. `CHECK` directives don't have to match on the exact next line
+after the previous match, so tests should omit checks for unimportant
+bits of assembly. ([`CHECK-NEXT`](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive)
+can be used to ensure a match occurs exactly after the previous match).
+
+* The tests are compiled with `-O3 -g0`. So we're only testing the
+optimized output.
+
+* The assembly output is further cleaned up using `tools/strip_asm.py`.
+This removes comments, assembler directives, and unused labels before
+the test is run.
+
+* The generated and stripped assembly file for a test is output under
+`<build-directory>/test/<test-name>.s`
+
+* Filecheck supports using [`CHECK` prefixes](https://llvm.org/docs/CommandGuide/FileCheck.html#cmdoption-check-prefixes)
+to specify lines that should only match in certain situations.
+The Benchmark tests use `CHECK-CLANG` and `CHECK-GNU` for lines that
+are only expected to match Clang or GCC's output respectively. Normal
+`CHECK` lines match against all compilers. (Note: `CHECK-NOT` and
+`CHECK-LABEL` are NOT prefixes. They are versions of non-prefixed
+`CHECK` lines)
+
+* Use `extern "C"` to disable name mangling for specific functions. This
+makes them easier to name in the `CHECK` lines.
+
+
+## Problems Writing Portable Tests
+
+Writing tests which check the code generated by a compiler are
+inherently non-portable. Different compilers and even different compiler
+versions may generate entirely different code. The Benchmark tests
+must tolerate this.
+
+LLVM Filecheck provides a number of mechanisms to help write
+"more portable" tests; including [matching using regular expressions](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-pattern-matching-syntax),
+allowing the creation of [named variables](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-variables)
+for later matching, and [checking non-sequential matches](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-dag-directive).
+
+#### Capturing Variables
+
+For example, say GCC stores a variable in a register but Clang stores
+it in memory. To write a test that tolerates both cases we "capture"
+the destination of the store, and then use the captured expression
+to write the remainder of the test.
+
+```c++
+// CHECK-LABEL: test_div_no_op_into_shr:
+extern "C" void test_div_no_op_into_shr(int value) {
+    int divisor = 2;
+    benchmark::DoNotOptimize(divisor); // hide the value from the optimizer
+    return value / divisor;
+
+    // CHECK: movl $2, [[DEST:.*]]
+    // CHECK: idivl [[DEST]]
+    // CHECK: ret
+}
+```
+
+#### Using Regular Expressions to Match Differing Output
+
+Often tests require testing assembly lines which may subtly differ
+between compilers or compiler versions. A common example of this
+is matching stack frame addresses. In this case regular expressions
+can be used to match the differing bits of output. For example:
+
+```c++
+int ExternInt;
+struct Point { int x, y, z; };
+
+// CHECK-LABEL: test_store_point:
+extern "C" void test_store_point() {
+    Point p{ExternInt, ExternInt, ExternInt};
+    benchmark::DoNotOptimize(p);
+
+    // CHECK: movl ExternInt(%rip), %eax
+    // CHECK: movl %eax, -{{[0-9]+}}(%rsp)
+    // CHECK: movl %eax, -{{[0-9]+}}(%rsp)
+    // CHECK: movl %eax, -{{[0-9]+}}(%rsp)
+    // CHECK: ret
+}
+```
+
+## Current Requirements and Limitations
+
+The tests require Filecheck to be installed along the `PATH` of the
+build machine. Otherwise the tests will be disabled.
+
+Additionally, as mentioned in the previous section, codegen tests are
+inherently non-portable. Currently the tests are limited to:
+
+* x86_64 targets.
+* Compiled with GCC or Clang
+
+Further work could be done, at least on a limited basis, to extend the
+tests to other architectures and compilers (using `CHECK` prefixes).
+
+Furthermore, the tests fail for builds which specify additional flags
+that modify code generation, including `--coverage` or `-fsanitize=`.
+
--- a/benchmarks/thirdparty/benchmark/docs/tools.md
+++ b/benchmarks/thirdparty/benchmark/docs/tools.md
@ -0,0 +1,242 @@
+# Benchmark Tools
+
+## compare_bench.py
+
+The `compare_bench.py` utility which can be used to compare the result of benchmarks.
+The program is invoked like:
+
+``` bash
+$ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]...
+```
+
+Where `<old-benchmark>` and `<new-benchmark>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+The sample output using the JSON test files under `Inputs/` gives:
+
+``` bash
+$ ./compare_bench.py ./gbench/Inputs/test1_run1.json ./gbench/Inputs/test1_run2.json
+Comparing ./gbench/Inputs/test1_run1.json to ./gbench/Inputs/test1_run2.json
+Benchmark                        Time             CPU      Time Old      Time New       CPU Old       CPU New
+-------------------------------------------------------------------------------------------------------------
+BM_SameTimes                  +0.0000         +0.0000            10            10            10            10
+BM_2xFaster                   -0.5000         -0.5000            50            25            50            25
+BM_2xSlower                   +1.0000         +1.0000            50           100            50           100
+BM_1PercentFaster             -0.0100         -0.0100           100            99           100            99
+BM_1PercentSlower             +0.0100         +0.0100           100           101           100           101
+BM_10PercentFaster            -0.1000         -0.1000           100            90           100            90
+BM_10PercentSlower            +0.1000         +0.1000           100           110           100           110
+BM_100xSlower                +99.0000        +99.0000           100         10000           100         10000
+BM_100xFaster                 -0.9900         -0.9900         10000           100         10000           100
+BM_10PercentCPUToTime         +0.1000         -0.1000           100           110           100            90
+BM_ThirdFaster                -0.3333         -0.3334           100            67           100            67
+BM_BadTimeUnit                -0.9000         +0.2000             0             0             0             1
+```
+
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
+When a benchmark executable is run, the raw output from the benchmark is printed in real time to stdout. The sample output using `benchmark/basic_test` for both arguments looks like:
+
+```
+./compare_bench.py  test/basic_test test/basic_test  --benchmark_filter=BM_empty.*
+RUNNING: test/basic_test --benchmark_filter=BM_empty.* --benchmark_out=/tmp/tmpN7LF3a
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 23:28:36
+---------------------------------------------------------------------
+Benchmark                              Time           CPU Iterations
+---------------------------------------------------------------------
+BM_empty                               4 ns          4 ns  170178757
+BM_empty/threads:8                     1 ns          7 ns  103868920
+BM_empty_stop_start                    0 ns          0 ns 1000000000
+BM_empty_stop_start/threads:8          0 ns          0 ns 1403031720
+RUNNING: /test/basic_test --benchmark_filter=BM_empty.* --benchmark_out=/tmp/tmplvrIp8
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 23:28:38
+---------------------------------------------------------------------
+Benchmark                              Time           CPU Iterations
+---------------------------------------------------------------------
+BM_empty                               4 ns          4 ns  169534855
+BM_empty/threads:8                     1 ns          7 ns  104188776
+BM_empty_stop_start                    0 ns          0 ns 1000000000
+BM_empty_stop_start/threads:8          0 ns          0 ns 1404159424
+Comparing ../build/test/basic_test to ../build/test/basic_test
+Benchmark                                Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------
+BM_empty                              -0.0048         -0.0049             4             4             4             4
+BM_empty/threads:8                    -0.0123         -0.0054             1             1             7             7
+BM_empty_stop_start                   -0.0000         -0.0000             0             0             0             0
+BM_empty_stop_start/threads:8         -0.0029         +0.0001             0             0             0             0
+
+```
+
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+Obviously this example doesn't give any useful output, but it's intended to show the output format when 'compare_bench.py' needs to run benchmarks.
+
+## compare.py
+
+The `compare.py` can be used to compare the result of benchmarks.
+There are three modes of operation:
+
+1. Just compare two benchmarks, what `compare_bench.py` did.
+The program is invoked like:
+
+``` bash
+$ compare.py benchmarks <benchmark_baseline> <benchmark_contender> [benchmark options]...
+```
+Where `<benchmark_baseline>` and `<benchmark_contender>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py benchmarks ./a.out ./a.out
+RUNNING: ./a.out --benchmark_out=/tmp/tmprBT5nW
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:16:44
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   19101577   211.669MB/s
+BM_memcpy/64           76 ns         76 ns    9412571   800.199MB/s
+BM_memcpy/512          84 ns         84 ns    8249070   5.64771GB/s
+BM_memcpy/1024        116 ns        116 ns    6181763   8.19505GB/s
+BM_memcpy/8192        643 ns        643 ns    1062855   11.8636GB/s
+BM_copy/8             222 ns        222 ns    3137987   34.3772MB/s
+BM_copy/64           1608 ns       1608 ns     432758   37.9501MB/s
+BM_copy/512         12589 ns      12589 ns      54806   38.7867MB/s
+BM_copy/1024        25169 ns      25169 ns      27713   38.8003MB/s
+BM_copy/8192       201165 ns     201112 ns       3486   38.8466MB/s
+RUNNING: ./a.out --benchmark_out=/tmp/tmpt1wwG_
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:16:53
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   19397903   211.255MB/s
+BM_memcpy/64           73 ns         73 ns    9691174   839.635MB/s
+BM_memcpy/512          85 ns         85 ns    8312329   5.60101GB/s
+BM_memcpy/1024        118 ns        118 ns    6438774   8.11608GB/s
+BM_memcpy/8192        656 ns        656 ns    1068644   11.6277GB/s
+BM_copy/8             223 ns        223 ns    3146977   34.2338MB/s
+BM_copy/64           1611 ns       1611 ns     435340   37.8751MB/s
+BM_copy/512         12622 ns      12622 ns      54818   38.6844MB/s
+BM_copy/1024        25257 ns      25239 ns      27779   38.6927MB/s
+BM_copy/8192       205013 ns     205010 ns       3479    38.108MB/s
+Comparing ./a.out to ./a.out
+Benchmark                 Time             CPU      Time Old      Time New       CPU Old       CPU New
+------------------------------------------------------------------------------------------------------
+BM_memcpy/8            +0.0020         +0.0020            36            36            36            36
+BM_memcpy/64           -0.0468         -0.0470            76            73            76            73
+BM_memcpy/512          +0.0081         +0.0083            84            85            84            85
+BM_memcpy/1024         +0.0098         +0.0097           116           118           116           118
+BM_memcpy/8192         +0.0200         +0.0203           643           656           643           656
+BM_copy/8              +0.0046         +0.0042           222           223           222           223
+BM_copy/64             +0.0020         +0.0020          1608          1611          1608          1611
+BM_copy/512            +0.0027         +0.0026         12589         12622         12589         12622
+BM_copy/1024           +0.0035         +0.0028         25169         25257         25169         25239
+BM_copy/8192           +0.0191         +0.0194        201165        205013        201112        205010
+```
+
+What it does is for the every benchmark from the first run it looks for the benchmark with exactly the same name in the second run, and then compares the results. If the names differ, the benchmark is omitted from the diff.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
+2. Compare two different filters of one benchmark
+The program is invoked like:
+
+``` bash
+$ compare.py filters <benchmark> <filter_baseline> <filter_contender> [benchmark options]...
+```
+Where `<benchmark>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+Where `<filter_baseline>` and `<filter_contender>` are the same regex filters that you would pass to the `[--benchmark_filter=<regex>]` parameter of the benchmark binary.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py filters ./a.out BM_memcpy BM_copy
+RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmpBWKk0k
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:37:28
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            36 ns         36 ns   17891491   211.215MB/s
+BM_memcpy/64           74 ns         74 ns    9400999   825.646MB/s
+BM_memcpy/512          87 ns         87 ns    8027453   5.46126GB/s
+BM_memcpy/1024        111 ns        111 ns    6116853    8.5648GB/s
+BM_memcpy/8192        657 ns        656 ns    1064679   11.6247GB/s
+RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpAvWcOM
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:37:33
+----------------------------------------------------
+Benchmark             Time           CPU Iterations
+----------------------------------------------------
+BM_copy/8           227 ns        227 ns    3038700   33.6264MB/s
+BM_copy/64         1640 ns       1640 ns     426893   37.2154MB/s
+BM_copy/512       12804 ns      12801 ns      55417   38.1444MB/s
+BM_copy/1024      25409 ns      25407 ns      27516   38.4365MB/s
+BM_copy/8192     202986 ns     202990 ns       3454   38.4871MB/s
+Comparing BM_memcpy to BM_copy (from ./a.out)
+Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
+--------------------------------------------------------------------------------------------------------------------
+[BM_memcpy vs. BM_copy]/8            +5.2829         +5.2812            36           227            36           227
+[BM_memcpy vs. BM_copy]/64          +21.1719        +21.1856            74          1640            74          1640
+[BM_memcpy vs. BM_copy]/512        +145.6487       +145.6097            87         12804            87         12801
+[BM_memcpy vs. BM_copy]/1024       +227.1860       +227.1776           111         25409           111         25407
+[BM_memcpy vs. BM_copy]/8192       +308.1664       +308.2898           657        202986           656        202990
+```
+
+As you can see, it applies filter to the benchmarks, both when running the benchmark, and before doing the diff. And to make the diff work, the matches are replaced with some common string. Thus, you can compare two different benchmark families within one benchmark binary.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+
+3. Compare filter one from benchmark one to filter two from benchmark two:
+The program is invoked like:
+
+``` bash
+$ compare.py filters <benchmark_baseline> <filter_baseline> <benchmark_contender> <filter_contender> [benchmark options]...
+```
+
+Where `<benchmark_baseline>` and `<benchmark_contender>` either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
+
+Where `<filter_baseline>` and `<filter_contender>` are the same regex filters that you would pass to the `[--benchmark_filter=<regex>]` parameter of the benchmark binary.
+
+`[benchmark options]` will be passed to the benchmarks invocations. They can be anything that binary accepts, be it either normal `--benchmark_*` parameters, or some custom parameters your binary takes.
+
+Example output:
+```
+$ ./compare.py benchmarksfiltered ./a.out BM_memcpy ./a.out BM_copy
+RUNNING: ./a.out --benchmark_filter=BM_memcpy --benchmark_out=/tmp/tmp_FvbYg
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:38:27
+------------------------------------------------------
+Benchmark               Time           CPU Iterations
+------------------------------------------------------
+BM_memcpy/8            37 ns         37 ns   18953482   204.118MB/s
+BM_memcpy/64           74 ns         74 ns    9206578   828.245MB/s
+BM_memcpy/512          91 ns         91 ns    8086195   5.25476GB/s
+BM_memcpy/1024        120 ns        120 ns    5804513   7.95662GB/s
+BM_memcpy/8192        664 ns        664 ns    1028363   11.4948GB/s
+RUNNING: ./a.out --benchmark_filter=BM_copy --benchmark_out=/tmp/tmpDfL5iE
+Run on (8 X 4000 MHz CPU s)
+2017-11-07 21:38:32
+----------------------------------------------------
+Benchmark             Time           CPU Iterations
+----------------------------------------------------
+BM_copy/8           230 ns        230 ns    2985909   33.1161MB/s
+BM_copy/64         1654 ns       1653 ns     419408   36.9137MB/s
+BM_copy/512       13122 ns      13120 ns      53403   37.2156MB/s
+BM_copy/1024      26679 ns      26666 ns      26575   36.6218MB/s
+BM_copy/8192     215068 ns     215053 ns       3221   36.3283MB/s
+Comparing BM_memcpy (from ./a.out) to BM_copy (from ./a.out)
+Benchmark                               Time             CPU      Time Old      Time New       CPU Old       CPU New
+--------------------------------------------------------------------------------------------------------------------
+[BM_memcpy vs. BM_copy]/8            +5.1649         +5.1637            37           230            37           230
+[BM_memcpy vs. BM_copy]/64          +21.4352        +21.4374            74          1654            74          1653
+[BM_memcpy vs. BM_copy]/512        +143.6022       +143.5865            91         13122            91         13120
+[BM_memcpy vs. BM_copy]/1024       +221.5903       +221.4790           120         26679           120         26666
+[BM_memcpy vs. BM_copy]/8192       +322.9059       +323.0096           664        215068           664        215053
+```
+This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
+As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
--- a/benchmarks/thirdparty/benchmark/include/benchmark/benchmark.h
+++ b/benchmarks/thirdparty/benchmark/include/benchmark/benchmark.h
@ -172,6 +172,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);

 #include <stdint.h>

+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <iosfwd>
@ -291,7 +292,7 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();


 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
-    defined(EMSCRIPTN)
+    defined(__EMSCRIPTEN__)
 # define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
 #endif

@ -302,15 +303,20 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
 // See: https://youtu.be/nXaxk27zwlk?t=2441
 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
 template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  // Clang doesn't like the 'X' constraint on `value` and certain GCC versions
-  // don't like the 'g' constraint. Attempt to placate them both.
+inline BENCHMARK_ALWAYS_INLINE
+void DoNotOptimize(Tp const& value) {
+    asm volatile("" : : "r,m"(value) : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
 #if defined(__clang__)
-  asm volatile("" : : "g"(value) : "memory");
+  asm volatile("" : "+r,m"(value) : : "memory");
 #else
-  asm volatile("" : : "i,r,m"(value) : "memory");
+  asm volatile("" : "+m,r"(value) : : "memory");
 #endif
 }
+
 // Force the compiler to flush pending writes to global memory. Acts as an
 // effective read/write barrier
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
@ -379,7 +385,7 @@ enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };

 // BigOFunc is passed to a benchmark in order to specify the asymptotic
 // computational complexity for the benchmark.
-typedef double(BigOFunc)(int);
+typedef double(BigOFunc)(int64_t);

 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
 // statistics over all the measurements of some type
@ -429,16 +435,19 @@ class State {
  // Returns true if the benchmark should continue through another iteration.
  // NOTE: A benchmark may not return from the test until KeepRunning() has
  // returned false.
-  bool KeepRunning() {
-    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
-      StartKeepRunning();
-    }
-    bool const res = (--total_iterations_ != 0);
-    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
-      FinishKeepRunning();
-    }
-    return res;
-  }
+  bool KeepRunning();
+
+  // Returns true iff the benchmark should run n more iterations.
+  // REQUIRES: 'n' > 0.
+  // NOTE: A benchmark must not return from the test until KeepRunningBatch()
+  // has returned false.
+  // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
+  //
+  // Intended usage:
+  //   while (state.KeepRunningBatch(1000)) {
+  //     // process 1000 elements
+  //   }
+  bool KeepRunningBatch(size_t n);

  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
  //           by the current thread.
@ -505,10 +514,10 @@ class State {
  //
  // REQUIRES: a benchmark has exited its benchmarking loop.
  BENCHMARK_ALWAYS_INLINE
-  void SetBytesProcessed(size_t bytes) { bytes_processed_ = bytes; }
+  void SetBytesProcessed(int64_t bytes) { bytes_processed_ = bytes; }

  BENCHMARK_ALWAYS_INLINE
-  size_t bytes_processed() const { return bytes_processed_; }
+  int64_t bytes_processed() const { return bytes_processed_; }

  // If this routine is called with complexity_n > 0 and complexity report is
  // requested for the
@ -516,10 +525,10 @@ class State {
  // and complexity_n will
  // represent the length of N.
  BENCHMARK_ALWAYS_INLINE
-  void SetComplexityN(int complexity_n) { complexity_n_ = complexity_n; }
+  void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }

  BENCHMARK_ALWAYS_INLINE
-  int complexity_length_n() { return complexity_n_; }
+  int64_t complexity_length_n() { return complexity_n_; }

  // If this routine is called with items > 0, then an items/s
  // label is printed on the benchmark report line for the currently
@ -528,10 +537,10 @@ class State {
  //
  // REQUIRES: a benchmark has exited its benchmarking loop.
  BENCHMARK_ALWAYS_INLINE
-  void SetItemsProcessed(size_t items) { items_processed_ = items; }
+  void SetItemsProcessed(int64_t items) { items_processed_ = items; }

  BENCHMARK_ALWAYS_INLINE
-  size_t items_processed() const { return items_processed_; }
+  int64_t items_processed() const { return items_processed_; }

  // If this routine is called, the specified label is printed at the
  // end of the benchmark report line for the currently executing
@ -539,7 +548,7 @@ class State {
  //  static void BM_Compress(benchmark::State& state) {
  //    ...
  //    double compress = input_size / output_size;
-  //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
+  //    state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
  //  }
  // Produces output that looks like:
  //  BM_Compress   50         50   14115038  compress:27.3%
@ -553,34 +562,52 @@ class State {

  // Range arguments for this run. CHECKs if the argument has been set.
  BENCHMARK_ALWAYS_INLINE
-  int range(std::size_t pos = 0) const {
+  int64_t range(std::size_t pos = 0) const {
    assert(range_.size() > pos);
    return range_[pos];
  }

  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
-  int range_x() const { return range(0); }
+  int64_t range_x() const { return range(0); }

  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
-  int range_y() const { return range(1); }
+  int64_t range_y() const { return range(1); }

  BENCHMARK_ALWAYS_INLINE
-  size_t iterations() const { return (max_iterations - total_iterations_) + 1; }
+  size_t iterations() const {
+    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
+      return 0;
+    }
+    return max_iterations - total_iterations_ + batch_leftover_;
+  }

- private:
-  bool started_;
-  bool finished_;
+private: // items we expect on the first cache line (ie 64 bytes of the struct)
+
+  // When total_iterations_ is 0, KeepRunning() and friends will return false.
+  // May be larger than max_iterations.
  size_t total_iterations_;

-  std::vector<int> range_;
+  // When using KeepRunningBatch(), batch_leftover_ holds the number of
+  // iterations beyond max_iters that were run. Used to track
+  // completed_iterations_ accurately.
+  size_t batch_leftover_;

-  size_t bytes_processed_;
-  size_t items_processed_;
-
-  int complexity_n_;
+public:
+  const size_t max_iterations;

+private:
+  bool started_;
+  bool finished_;
  bool error_occurred_;

+private: // items we don't need on the first cache line
+  std::vector<int64_t> range_;
+
+  int64_t bytes_processed_;
+  int64_t items_processed_;
+
+  int64_t complexity_n_;
+
 public:
  // Container for user-defined counters.
  UserCounters counters;
@ -588,27 +615,69 @@ class State {
  const int thread_index;
  // Number of threads concurrently executing the benchmark.
  const int threads;
-  const size_t max_iterations;
+

  // TODO(EricWF) make me private
-  State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+  State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
        int n_threads, internal::ThreadTimer* timer,
        internal::ThreadManager* manager);

 private:
  void StartKeepRunning();
+  // Implementation of KeepRunning() and KeepRunningBatch().
+  // is_batch must be true unless n is 1.
+  bool KeepRunningInternal(size_t n, bool is_batch);
  void FinishKeepRunning();
  internal::ThreadTimer* timer_;
  internal::ThreadManager* manager_;
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
 };

+inline BENCHMARK_ALWAYS_INLINE
+bool State::KeepRunning() {
+  return KeepRunningInternal(1, /*is_batch=*/ false);
+}
+
+inline BENCHMARK_ALWAYS_INLINE
+bool State::KeepRunningBatch(size_t n) {
+  return KeepRunningInternal(n, /*is_batch=*/ true);
+}
+
+inline BENCHMARK_ALWAYS_INLINE
+bool State::KeepRunningInternal(size_t n, bool is_batch) {
+  // total_iterations_ is set to 0 by the constructor, and always set to a
+  // nonzero value by StartKepRunning().
+  assert(n > 0);
+  // n must be 1 unless is_batch is true.
+  assert(is_batch || n == 1);
+  if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
+    total_iterations_ -= n;
+    return true;
+  }
+  if (!started_) {
+    StartKeepRunning();
+    if (!error_occurred_ && total_iterations_ >= n) {
+      total_iterations_-= n;
+      return true;
+    }
+  }
+  // For non-batch runs, total_iterations_ must be 0 by now.
+  if (is_batch && total_iterations_ != 0) {
+    batch_leftover_  = n - total_iterations_;
+    total_iterations_ = 0;
+    return true;
+  }
+  FinishKeepRunning();
+  return false;
+}
+
 struct State::StateIterator {
  struct BENCHMARK_UNUSED Value {};
  typedef std::forward_iterator_tag iterator_category;
  typedef Value value_type;
  typedef Value reference;
  typedef Value pointer;
+  typedef std::ptrdiff_t difference_type;

 private:
  friend class State;
@ -670,7 +739,7 @@ class Benchmark {
  // Run this benchmark once with "x" as the extra argument passed
  // to the function.
  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* Arg(int x);
+  Benchmark* Arg(int64_t x);

  // Run this benchmark with the given time unit for the generated output report
  Benchmark* Unit(TimeUnit unit);
@ -678,23 +747,23 @@ class Benchmark {
  // Run this benchmark once for a number of values picked from the
  // range [start..limit].  (start and limit are always picked.)
  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* Range(int start, int limit);
+  Benchmark* Range(int64_t start, int64_t limit);

  // Run this benchmark once for all values in the range [start..limit] with
  // specific step
  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* DenseRange(int start, int limit, int step = 1);
+  Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);

  // Run this benchmark once with "args" as the extra arguments passed
  // to the function.
  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* Args(const std::vector<int>& args);
+  Benchmark* Args(const std::vector<int64_t>& args);

  // Equivalent to Args({x, y})
  // NOTE: This is a legacy C++03 interface provided for compatibility only.
  //   New code should use 'Args'.
-  Benchmark* ArgPair(int x, int y) {
-    std::vector<int> args;
+  Benchmark* ArgPair(int64_t x, int64_t y) {
+    std::vector<int64_t> args;
    args.push_back(x);
    args.push_back(y);
    return Args(args);
@ -703,7 +772,7 @@ class Benchmark {
  // Run this benchmark once for a number of values picked from the
  // ranges [start..limit].  (starts and limits are always picked.)
  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
+  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);

  // Equivalent to ArgNames({name})
  Benchmark* ArgName(const std::string& name);
@ -715,8 +784,8 @@ class Benchmark {
  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
  // NOTE: This is a legacy C++03 interface provided for compatibility only.
  //   New code should use 'Ranges'.
-  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
-    std::vector<std::pair<int, int> > ranges;
+  Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
+    std::vector<std::pair<int64_t, int64_t> > ranges;
    ranges.push_back(std::make_pair(lo1, hi1));
    ranges.push_back(std::make_pair(lo2, hi2));
    return Ranges(ranges);
@ -823,15 +892,13 @@ class Benchmark {

  int ArgsCnt() const;

-  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
-
 private:
  friend class BenchmarkFamilies;

  std::string name_;
  ReportMode report_mode_;
  std::vector<std::string> arg_names_;   // Args for all benchmark runs
-  std::vector<std::vector<int> > args_;  // Args for all benchmark runs
+  std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
  TimeUnit time_unit_;
  int range_multiplier_;
  double min_time_;
@ -1186,7 +1253,7 @@ class BenchmarkReporter {
    CPUInfo const& cpu_info;
    // The number of chars in the longest benchmark name.
    size_t name_field_width;
-
+    static const char *executable_name;
    Context();
  };

@ -1239,7 +1306,7 @@ class BenchmarkReporter {
    // Keep track of arguments to compute asymptotic complexity
    BigO complexity;
    BigOFunc* complexity_lambda;
-    int complexity_n;
+    int64_t complexity_n;

    // what statistics to compute from the measurements
    const std::vector<Statistics>* statistics;
--- a/benchmarks/thirdparty/benchmark/mingw.py
+++ b/benchmarks/thirdparty/benchmark/mingw.py
@ -0,0 +1,320 @@
+#! /usr/bin/env python
+# encoding: utf-8
+
+import argparse
+import errno
+import logging
+import os
+import platform
+import re
+import sys
+import subprocess
+import tempfile
+
+try:
+    import winreg
+except ImportError:
+    import _winreg as winreg
+try:
+    import urllib.request as request
+except ImportError:
+    import urllib as request
+try:
+    import urllib.parse as parse
+except ImportError:
+    import urlparse as parse
+
+class EmptyLogger(object):
+    '''
+    Provides an implementation that performs no logging
+    '''
+    def debug(self, *k, **kw):
+        pass
+    def info(self, *k, **kw):
+        pass
+    def warn(self, *k, **kw):
+        pass
+    def error(self, *k, **kw):
+        pass
+    def critical(self, *k, **kw):
+        pass
+    def setLevel(self, *k, **kw):
+        pass
+
+urls = (
+    'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20'
+        'targetting%20Win32/Personal%20Builds/mingw-builds/installer/'
+        'repository.txt',
+    'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/'
+        'repository.txt'
+)
+'''
+A list of mingw-build repositories
+'''
+
+def repository(urls = urls, log = EmptyLogger()):
+    '''
+    Downloads and parse mingw-build repository files and parses them
+    '''
+    log.info('getting mingw-builds repository')
+    versions = {}
+    re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files')
+    re_sub = r'http://downloads.sourceforge.net/project/\1'
+    for url in urls:
+        log.debug(' - requesting: %s', url)
+        socket = request.urlopen(url)
+        repo = socket.read()
+        if not isinstance(repo, str):
+            repo = repo.decode();
+        socket.close()
+        for entry in repo.split('\n')[:-1]:
+            value = entry.split('|')
+            version = tuple([int(n) for n in value[0].strip().split('.')])
+            version = versions.setdefault(version, {})
+            arch = value[1].strip()
+            if arch == 'x32':
+                arch = 'i686'
+            elif arch == 'x64':
+                arch = 'x86_64'
+            arch = version.setdefault(arch, {})
+            threading = arch.setdefault(value[2].strip(), {})
+            exceptions = threading.setdefault(value[3].strip(), {})
+            revision = exceptions.setdefault(int(value[4].strip()[3:]),
+                re_sourceforge.sub(re_sub, value[5].strip()))
+    return versions
+
+def find_in_path(file, path=None):
+    '''
+    Attempts to find an executable in the path
+    '''
+    if platform.system() == 'Windows':
+        file += '.exe'
+    if path is None:
+        path = os.environ.get('PATH', '')
+    if type(path) is type(''):
+        path = path.split(os.pathsep)
+    return list(filter(os.path.exists,
+        map(lambda dir, file=file: os.path.join(dir, file), path)))
+
+def find_7zip(log = EmptyLogger()):
+    '''
+    Attempts to find 7zip for unpacking the mingw-build archives
+    '''
+    log.info('finding 7zip')
+    path = find_in_path('7z')
+    if not path:
+        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip')
+        path, _ = winreg.QueryValueEx(key, 'Path')
+        path = [os.path.join(path, '7z.exe')]
+    log.debug('found \'%s\'', path[0])
+    return path[0]
+
+find_7zip()
+
+def unpack(archive, location, log = EmptyLogger()):
+    '''
+    Unpacks a mingw-builds archive
+    '''
+    sevenzip = find_7zip(log)
+    log.info('unpacking %s', os.path.basename(archive))
+    cmd = [sevenzip, 'x', archive, '-o' + location, '-y']
+    log.debug(' - %r', cmd)
+    with open(os.devnull, 'w') as devnull:
+        subprocess.check_call(cmd, stdout = devnull)
+
+def download(url, location, log = EmptyLogger()):
+    '''
+    Downloads and unpacks a mingw-builds archive
+    '''
+    log.info('downloading MinGW')
+    log.debug(' - url: %s', url)
+    log.debug(' - location: %s', location)
+
+    re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*')
+
+    stream = request.urlopen(url)
+    try:
+        content = stream.getheader('Content-Disposition') or ''
+    except AttributeError:
+        content = stream.headers.getheader('Content-Disposition') or ''
+    matches = re_content.match(content)
+    if matches:
+        filename = matches.group(2)
+    else:
+        parsed = parse.urlparse(stream.geturl())
+        filename = os.path.basename(parsed.path)
+
+    try:
+        os.makedirs(location)
+    except OSError as e:
+        if e.errno == errno.EEXIST and os.path.isdir(location):
+            pass
+        else:
+            raise
+
+    archive = os.path.join(location, filename)
+    with open(archive, 'wb') as out:
+        while True:
+            buf = stream.read(1024)
+            if not buf:
+                break
+            out.write(buf)
+    unpack(archive, location, log = log)
+    os.remove(archive)
+
+    possible = os.path.join(location, 'mingw64')
+    if not os.path.exists(possible):
+        possible = os.path.join(location, 'mingw32')
+        if not os.path.exists(possible):
+            raise ValueError('Failed to find unpacked MinGW: ' + possible)
+    return possible
+
+def root(location = None, arch = None, version = None, threading = None,
+        exceptions = None, revision = None, log = EmptyLogger()):
+    '''
+    Returns the root folder of a specific version of the mingw-builds variant
+    of gcc. Will download the compiler if needed
+    '''
+
+    # Get the repository if we don't have all the information
+    if not (arch and version and threading and exceptions and revision):
+        versions = repository(log = log)
+
+    # Determine some defaults
+    version = version or max(versions.keys())
+    if not arch:
+        arch = platform.machine().lower()
+        if arch == 'x86':
+            arch = 'i686'
+        elif arch == 'amd64':
+            arch = 'x86_64'
+    if not threading:
+        keys = versions[version][arch].keys()
+        if 'posix' in keys:
+            threading = 'posix'
+        elif 'win32' in keys:
+            threading = 'win32'
+        else:
+            threading = keys[0]
+    if not exceptions:
+        keys = versions[version][arch][threading].keys()
+        if 'seh' in keys:
+            exceptions = 'seh'
+        elif 'sjlj' in keys:
+            exceptions = 'sjlj'
+        else:
+            exceptions = keys[0]
+    if revision == None:
+        revision = max(versions[version][arch][threading][exceptions].keys())
+    if not location:
+        location = os.path.join(tempfile.gettempdir(), 'mingw-builds')
+
+    # Get the download url
+    url = versions[version][arch][threading][exceptions][revision]
+
+    # Tell the user whatzzup
+    log.info('finding MinGW %s', '.'.join(str(v) for v in version))
+    log.debug(' - arch: %s', arch)
+    log.debug(' - threading: %s', threading)
+    log.debug(' - exceptions: %s', exceptions)
+    log.debug(' - revision: %s', revision)
+    log.debug(' - url: %s', url)
+
+    # Store each specific revision differently
+    slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}'
+    slug = slug.format(
+        version = '.'.join(str(v) for v in version),
+        arch = arch,
+        threading = threading,
+        exceptions = exceptions,
+        revision = revision
+    )
+    if arch == 'x86_64':
+        root_dir = os.path.join(location, slug, 'mingw64')
+    elif arch == 'i686':
+        root_dir = os.path.join(location, slug, 'mingw32')
+    else:
+        raise ValueError('Unknown MinGW arch: ' + arch)
+
+    # Download if needed
+    if not os.path.exists(root_dir):
+        downloaded = download(url, os.path.join(location, slug), log = log)
+        if downloaded != root_dir:
+            raise ValueError('The location of mingw did not match\n%s\n%s'
+                % (downloaded, root_dir))
+
+    return root_dir
+
+def str2ver(string):
+    '''
+    Converts a version string into a tuple
+    '''
+    try:
+        version = tuple(int(v) for v in string.split('.'))
+        if len(version) is not 3:
+            raise ValueError()
+    except ValueError:
+        raise argparse.ArgumentTypeError(
+            'please provide a three digit version string')
+    return version
+
+def main():
+    '''
+    Invoked when the script is run directly by the python interpreter
+    '''
+    parser = argparse.ArgumentParser(
+        description = 'Downloads a specific version of MinGW',
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument('--location',
+        help = 'the location to download the compiler to',
+        default = os.path.join(tempfile.gettempdir(), 'mingw-builds'))
+    parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'],
+        help = 'the target MinGW architecture string')
+    parser.add_argument('--version', type = str2ver,
+        help = 'the version of GCC to download')
+    parser.add_argument('--threading', choices = ['posix', 'win32'],
+        help = 'the threading type of the compiler')
+    parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'],
+        help = 'the method to throw exceptions')
+    parser.add_argument('--revision', type=int,
+        help = 'the revision of the MinGW release')
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-v', '--verbose', action='store_true',
+        help='increase the script output verbosity')
+    group.add_argument('-q', '--quiet', action='store_true',
+        help='only print errors and warning')
+    args = parser.parse_args()
+
+    # Create the logger
+    logger = logging.getLogger('mingw')
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.setLevel(logging.INFO)
+    if args.quiet:
+        logger.setLevel(logging.WARN)
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
+    # Get MinGW
+    root_dir = root(location = args.location, arch = args.arch,
+        version = args.version, threading = args.threading,
+        exceptions = args.exceptions, revision = args.revision,
+        log = logger)
+
+    sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin'))
+
+if __name__ == '__main__':
+    try:
+        main()
+    except IOError as e:
+        sys.stderr.write('IO error: %s\n' % e)
+        sys.exit(1)
+    except OSError as e:
+        sys.stderr.write('OS error: %s\n' % e)
+        sys.exit(1)
+    except KeyboardInterrupt as e:
+        sys.stderr.write('Killed\n')
+        sys.exit(1)
--- a/benchmarks/thirdparty/benchmark/releasing.md
+++ b/benchmarks/thirdparty/benchmark/releasing.md
@ -0,0 +1,16 @@
+# How to release
+
+* Make sure you're on master and synced to HEAD
+* Ensure the project builds and tests run (sanity check only, obviously)
+    * `parallel -j0 exec ::: test/*_test` can help ensure everything at least
+      passes
+* Prepare release notes
+    * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
+      commits between the last annotated tag and HEAD
+    * Pick the most interesting.
+* Create a release through github's interface
+    * Note this will create a lightweight tag.
+    * Update this to an annotated tag:
+      * `git pull --tags`
+      * `git tag -a -f <tag> <tag>`
+      * `git push --force origin`
--- a/benchmarks/thirdparty/benchmark/src/CMakeLists.txt
+++ b/benchmarks/thirdparty/benchmark/src/CMakeLists.txt
@ -11,6 +11,7 @@ file(GLOB
    *.cc
    ${PROJECT_SOURCE_DIR}/include/benchmark/*.h
    ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
+list(FILTER SOURCE_FILES EXCLUDE REGEX "benchmark_main\\.cc")

 add_library(benchmark ${SOURCE_FILES})
 set_target_properties(benchmark PROPERTIES
@ -34,6 +35,23 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
  target_link_libraries(benchmark Shlwapi)
 endif()

+# We need extra libraries on Solaris
+if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
+  target_link_libraries(benchmark kstat)
+endif()
+
+# Benchmark main library
+add_library(benchmark_main "benchmark_main.cc")
+set_target_properties(benchmark_main PROPERTIES
+  OUTPUT_NAME "benchmark_main"
+  VERSION ${GENERIC_LIB_VERSION}
+  SOVERSION ${GENERIC_LIB_SOVERSION}
+)
+target_include_directories(benchmark PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+    )
+target_link_libraries(benchmark_main benchmark)
+
 set(include_install_dir "include")
 set(lib_install_dir "lib/")
 set(bin_install_dir "bin/")
@ -60,7 +78,7 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ON
 if (BENCHMARK_ENABLE_INSTALL)
  # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
  install(
-    TARGETS benchmark
+    TARGETS benchmark benchmark_main
    EXPORT ${targets_export_name}
    ARCHIVE DESTINATION ${lib_install_dir}
    LIBRARY DESTINATION ${lib_install_dir}
--- a/benchmarks/thirdparty/benchmark/src/arraysize.h
+++ b/benchmarks/thirdparty/benchmark/src/arraysize.h
--- a/benchmarks/thirdparty/benchmark/src/benchmark.cc
+++ b/benchmarks/thirdparty/benchmark/src/benchmark.cc
@ -17,7 +17,9 @@
 #include "internal_macros.h"

 #ifndef BENCHMARK_OS_WINDOWS
+#ifndef BENCHMARK_OS_FUCHSIA
 #include <sys/resource.h>
+#endif
 #include <sys/time.h>
 #include <unistd.h>
 #endif
@ -27,10 +29,10 @@
 #include <condition_variable>
 #include <cstdio>
 #include <cstdlib>
-#include <cstring>
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <string>
 #include <thread>

 #include "check.h"
@ -44,7 +46,8 @@
 #include "re.h"
 #include "statistics.h"
 #include "string_util.h"
-#include "timers.h"
+#include "thread_manager.h"
+#include "thread_timer.h"

 DEFINE_bool(benchmark_list_tests, false,
            "Print a list of benchmarks. This option overrides all other "
@ -82,7 +85,7 @@ DEFINE_string(benchmark_out_format, "json",
              "The format to use for file output. Valid values are "
              "'console', 'json', or 'csv'.");

-DEFINE_string(benchmark_out, "", "The file to write additonal output to");
+DEFINE_string(benchmark_out, "", "The file to write additional output to");

 DEFINE_string(benchmark_color, "auto",
              "Whether to use colors in the output.  Valid values: "
@ -108,118 +111,11 @@ namespace internal {

 void UseCharPointer(char const volatile*) {}

-class ThreadManager {
- public:
-  ThreadManager(int num_threads)
-      : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
-
-  Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
-    return benchmark_mutex_;
-  }
-
-  bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
-    return start_stop_barrier_.wait();
-  }
-
-  void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
-    start_stop_barrier_.removeThread();
-    if (--alive_threads_ == 0) {
-      MutexLock lock(end_cond_mutex_);
-      end_condition_.notify_all();
-    }
-  }
-
-  void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
-    MutexLock lock(end_cond_mutex_);
-    end_condition_.wait(lock.native_handle(),
-                        [this]() { return alive_threads_ == 0; });
-  }
-
- public:
-  struct Result {
-    double real_time_used = 0;
-    double cpu_time_used = 0;
-    double manual_time_used = 0;
-    int64_t bytes_processed = 0;
-    int64_t items_processed = 0;
-    int complexity_n = 0;
-    std::string report_label_;
-    std::string error_message_;
-    bool has_error_ = false;
-    UserCounters counters;
-  };
-  GUARDED_BY(GetBenchmarkMutex()) Result results;
-
- private:
-  mutable Mutex benchmark_mutex_;
-  std::atomic<int> alive_threads_;
-  Barrier start_stop_barrier_;
-  Mutex end_cond_mutex_;
-  Condition end_condition_;
-};
-
-// Timer management class
-class ThreadTimer {
- public:
-  ThreadTimer() = default;
-
-  // Called by each thread
-  void StartTimer() {
-    running_ = true;
-    start_real_time_ = ChronoClockNow();
-    start_cpu_time_ = ThreadCPUUsage();
-  }
-
-  // Called by each thread
-  void StopTimer() {
-    CHECK(running_);
-    running_ = false;
-    real_time_used_ += ChronoClockNow() - start_real_time_;
-    // Floating point error can result in the subtraction producing a negative
-    // time. Guard against that.
-    cpu_time_used_ += std::max<double>(ThreadCPUUsage() - start_cpu_time_, 0);
-  }
-
-  // Called by each thread
-  void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
-
-  bool running() const { return running_; }
-
-  // REQUIRES: timer is not running
-  double real_time_used() {
-    CHECK(!running_);
-    return real_time_used_;
-  }
-
-  // REQUIRES: timer is not running
-  double cpu_time_used() {
-    CHECK(!running_);
-    return cpu_time_used_;
-  }
-
-  // REQUIRES: timer is not running
-  double manual_time_used() {
-    CHECK(!running_);
-    return manual_time_used_;
-  }
-
- private:
-  bool running_ = false;        // Is the timer running
-  double start_real_time_ = 0;  // If running_
-  double start_cpu_time_ = 0;   // If running_
-
-  // Accumulated time so far (does not contain current slice if running_)
-  double real_time_used_ = 0;
-  double cpu_time_used_ = 0;
-  // Manually set iteration time. User sets this with SetIterationTime(seconds).
-  double manual_time_used_ = 0;
-};
-
 namespace {

 BenchmarkReporter::Run CreateRunReport(
    const benchmark::internal::Benchmark::Instance& b,
-    const internal::ThreadManager::Result& results, size_t iters,
+    const internal::ThreadManager::Result& results,
    double seconds) {
  // Create report about this benchmark run.
  BenchmarkReporter::Run report;
@ -228,8 +124,8 @@ BenchmarkReporter::Run CreateRunReport(
  report.error_occurred = results.has_error_;
  report.error_message = results.error_message_;
  report.report_label = results.report_label_;
-  // Report the total iterations across all threads.
-  report.iterations = static_cast<int64_t>(iters) * b.threads;
+  // This is the total iterations across all threads.
+  report.iterations = results.iterations;
  report.time_unit = b.time_unit;

  if (!report.error_occurred) {
@ -268,11 +164,12 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b,
  internal::ThreadTimer timer;
  State st(iters, b->arg, thread_id, b->threads, &timer, manager);
  b->benchmark->Run(st);
-  CHECK(st.iterations() == st.max_iterations)
+  CHECK(st.iterations() >= st.max_iterations)
      << "Benchmark returned before State::KeepRunning() returned false!";
  {
    MutexLock l(manager->GetBenchmarkMutex());
    internal::ThreadManager::Result& results = manager->results;
+    results.iterations += st.iterations();
    results.cpu_time_used += timer.cpu_time_used();
    results.real_time_used += timer.real_time_used();
    results.manual_time_used += timer.manual_time_used();
@ -340,18 +237,17 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
      // Determine if this run should be reported; Either it has
      // run for a sufficient amount of time or because an error was reported.
      const bool should_report =  repetition_num > 0
-        || has_explicit_iteration_count // An exact iteration count was requested
+        || has_explicit_iteration_count  // An exact iteration count was requested
        || results.has_error_
-        || iters >= kMaxIterations
-        || seconds >= min_time // the elapsed time is large enough
+        || iters >= kMaxIterations  // No chance to try again, we hit the limit.
+        || seconds >= min_time  // the elapsed time is large enough
        // CPU time is specified but the elapsed real time greatly exceeds the
        // minimum time. Note that user provided timers are except from this
        // sanity check.
        || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time);

      if (should_report) {
-        BenchmarkReporter::Run report =
-            CreateRunReport(b, results, iters, seconds);
+        BenchmarkReporter::Run report = CreateRunReport(b, results, seconds);
        if (!report.error_occurred && b.complexity != oNone)
          complexity_reports->push_back(report);
        reports.push_back(report);
@ -394,26 +290,44 @@ std::vector<BenchmarkReporter::Run> RunBenchmark(
 }  // namespace
 }  // namespace internal

-State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
             int n_threads, internal::ThreadTimer* timer,
             internal::ThreadManager* manager)
-    : started_(false),
+    : total_iterations_(0),
+      batch_leftover_(0),
+      max_iterations(max_iters),
+      started_(false),
      finished_(false),
-      total_iterations_(max_iters + 1),
+      error_occurred_(false),
      range_(ranges),
      bytes_processed_(0),
      items_processed_(0),
      complexity_n_(0),
-      error_occurred_(false),
      counters(),
      thread_index(thread_i),
      threads(n_threads),
-      max_iterations(max_iters),
      timer_(timer),
      manager_(manager) {
  CHECK(max_iterations != 0) << "At least one iteration must be run";
-  CHECK(total_iterations_ != 0) << "max iterations wrapped around";
  CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
+
+  // Note: The use of offsetof below is technically undefined until C++17
+  // because State is not a standard layout type. However, all compilers
+  // currently provide well-defined behavior as an extension (which is
+  // demonstrated since constexpr evaluation must diagnose all undefined
+  // behavior). However, GCC and Clang also warn about this use of offsetof,
+  // which must be suppressed.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
+#endif
+  // Offset tests to ensure commonly accessed data is on the first cache line.
+  const int cache_line_size = 64;
+  static_assert(offsetof(State, error_occurred_) <=
+                (cache_line_size - sizeof(error_occurred_)), "");
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 }

 void State::PauseTiming() {
@ -437,7 +351,7 @@ void State::SkipWithError(const char* msg) {
      manager_->results.has_error_ = true;
    }
  }
-  total_iterations_ = 1;
+  total_iterations_ = 0;
  if (timer_->running()) timer_->StopTimer();
 }

@ -453,6 +367,7 @@ void State::SetLabel(const char* label) {
 void State::StartKeepRunning() {
  CHECK(!started_ && !finished_);
  started_ = true;
+  total_iterations_ = error_occurred_ ? 0 : max_iterations;
  manager_->StartStopBarrier();
  if (!error_occurred_) ResumeTiming();
 }
@ -462,8 +377,8 @@ void State::FinishKeepRunning() {
  if (!error_occurred_) {
    PauseTiming();
  }
-  // Total iterations has now wrapped around zero. Fix this.
-  total_iterations_ = 1;
+  // Total iterations has now wrapped around past 0. Fix this.
+  total_iterations_ = 0;
  finished_ = true;
  manager_->StartStopBarrier();
 }
@ -495,7 +410,7 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
  BenchmarkReporter::Context context;
  context.name_field_width = name_field_width;

-  // Keep track of runing times of all instances of current benchmark
+  // Keep track of running times of all instances of current benchmark
  std::vector<BenchmarkReporter::Run> complexity_reports;

  // We flush streams after invoking reporter methods that write to them. This
@ -653,6 +568,7 @@ void PrintUsageAndExit() {

 void ParseCommandLineFlags(int* argc, char** argv) {
  using namespace benchmark;
+  BenchmarkReporter::Context::executable_name = argv[0];
  for (int i = 1; i < *argc; ++i) {
    if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                      &FLAGS_benchmark_list_tests) ||
--- a/benchmarks/thirdparty/benchmark/src/benchmark_api_internal.h
+++ b/benchmarks/thirdparty/benchmark/src/benchmark_api_internal.h
@ -17,7 +17,7 @@ struct Benchmark::Instance {
  std::string name;
  Benchmark* benchmark;
  ReportMode report_mode;
-  std::vector<int> arg;
+  std::vector<int64_t> arg;
  TimeUnit time_unit;
  int range_multiplier;
  bool use_real_time;
--- a/benchmarks/thirdparty/benchmark/src/benchmark_main.cc
+++ b/benchmarks/thirdparty/benchmark/src/benchmark_main.cc
@ -0,0 +1,17 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+
+BENCHMARK_MAIN();
--- a/benchmarks/thirdparty/benchmark/src/benchmark_register.cc
+++ b/benchmarks/thirdparty/benchmark/src/benchmark_register.cc
@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "benchmark/benchmark.h"
-#include "benchmark_api_internal.h"
-#include "internal_macros.h"
+#include "benchmark_register.h"

 #ifndef BENCHMARK_OS_WINDOWS
+#ifndef BENCHMARK_OS_FUCHSIA
 #include <sys/resource.h>
+#endif
 #include <sys/time.h>
 #include <unistd.h>
 #endif
@ -34,13 +34,16 @@
 #include <sstream>
 #include <thread>

+#include "benchmark/benchmark.h"
+#include "benchmark_api_internal.h"
 #include "check.h"
 #include "commandlineflags.h"
 #include "complexity.h"
-#include "statistics.h"
+#include "internal_macros.h"
 #include "log.h"
 #include "mutex.h"
 #include "re.h"
+#include "statistics.h"
 #include "string_util.h"
 #include "timers.h"

@ -74,7 +77,7 @@ class BenchmarkFamilies {

  // Extract the list of benchmark instances that match the specified
  // regular expression.
-  bool FindBenchmarks(const std::string& re,
+  bool FindBenchmarks(std::string re,
                      std::vector<Benchmark::Instance>* benchmarks,
                      std::ostream* Err);

@ -104,13 +107,18 @@ void BenchmarkFamilies::ClearBenchmarks() {
 }

 bool BenchmarkFamilies::FindBenchmarks(
-    const std::string& spec, std::vector<Benchmark::Instance>* benchmarks,
+    std::string spec, std::vector<Benchmark::Instance>* benchmarks,
    std::ostream* ErrStream) {
  CHECK(ErrStream);
  auto& Err = *ErrStream;
  // Make regular expression out of command-line flag
  std::string error_msg;
  Regex re;
+  bool isNegativeFilter = false;
+  if(spec[0] == '-') {
+      spec.replace(0, 1, "");
+      isNegativeFilter = true;
+  }
  if (!re.Init(spec, &error_msg)) {
    Err << "Could not compile benchmark re: " << error_msg << std::endl;
    return false;
@ -170,20 +178,20 @@ bool BenchmarkFamilies::FindBenchmarks(
            const auto& arg_name = family->arg_names_[arg_i];
            if (!arg_name.empty()) {
              instance.name +=
-                  StringPrintF("%s:", family->arg_names_[arg_i].c_str());
+                  StrFormat("%s:", family->arg_names_[arg_i].c_str());
            }
          }
-          
-          instance.name += StringPrintF("%d", arg);
+
+          instance.name += StrFormat("%d", arg);
          ++arg_i;
        }

        if (!IsZero(family->min_time_))
-          instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
+          instance.name += StrFormat("/min_time:%0.3f", family->min_time_);
        if (family->iterations_ != 0)
-          instance.name += StringPrintF("/iterations:%d", family->iterations_);
+          instance.name += StrFormat("/iterations:%d", family->iterations_);
        if (family->repetitions_ != 0)
-          instance.name += StringPrintF("/repeats:%d", family->repetitions_);
+          instance.name += StrFormat("/repeats:%d", family->repetitions_);

        if (family->use_manual_time_) {
          instance.name += "/manual_time";
@ -193,10 +201,11 @@ bool BenchmarkFamilies::FindBenchmarks(

        // Add the number of threads used to the name
        if (!family->thread_counts_.empty()) {
-          instance.name += StringPrintF("/threads:%d", instance.threads);
+          instance.name += StrFormat("/threads:%d", instance.threads);
        }

-        if (re.Match(instance.name)) {
+        if ((re.Match(instance.name) && !isNegativeFilter) ||
+            (!re.Match(instance.name) && isNegativeFilter)) {
          instance.last_benchmark_instance = (&args == &family->args_.back());
          benchmarks->push_back(std::move(instance));
        }
@ -244,30 +253,7 @@ Benchmark::Benchmark(const char* name)

 Benchmark::~Benchmark() {}

-void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
-  CHECK_GE(lo, 0);
-  CHECK_GE(hi, lo);
-  CHECK_GE(mult, 2);
-
-  // Add "lo"
-  dst->push_back(lo);
-
-  static const int kint32max = std::numeric_limits<int32_t>::max();
-
-  // Now space out the benchmarks in multiples of "mult"
-  for (int32_t i = 1; i < kint32max / mult; i *= mult) {
-    if (i >= hi) break;
-    if (i > lo) {
-      dst->push_back(i);
-    }
-  }
-  // Add "hi" (if different from "lo")
-  if (hi != lo) {
-    dst->push_back(hi);
-  }
-}
-
-Benchmark* Benchmark::Arg(int x) {
+Benchmark* Benchmark::Arg(int64_t x) {
  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
  args_.push_back({x});
  return this;
@ -278,20 +264,21 @@ Benchmark* Benchmark::Unit(TimeUnit unit) {
  return this;
 }

-Benchmark* Benchmark::Range(int start, int limit) {
+Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
-  std::vector<int> arglist;
+  std::vector<int64_t> arglist;
  AddRange(&arglist, start, limit, range_multiplier_);

-  for (int i : arglist) {
+  for (int64_t i : arglist) {
    args_.push_back({i});
  }
  return this;
 }

-Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges) {
+Benchmark* Benchmark::Ranges(
+    const std::vector<std::pair<int64_t, int64_t>>& ranges) {
  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
-  std::vector<std::vector<int>> arglists(ranges.size());
+  std::vector<std::vector<int64_t>> arglists(ranges.size());
  std::size_t total = 1;
  for (std::size_t i = 0; i < ranges.size(); i++) {
    AddRange(&arglists[i], ranges[i].first, ranges[i].second,
@ -302,7 +289,7 @@ Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges) {
  std::vector<std::size_t> ctr(arglists.size(), 0);

  for (std::size_t i = 0; i < total; i++) {
-    std::vector<int> tmp;
+    std::vector<int64_t> tmp;
    tmp.reserve(arglists.size());

    for (std::size_t j = 0; j < arglists.size(); j++) {
@ -334,17 +321,17 @@ Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
  return this;
 }

-Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
+Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
  CHECK_GE(start, 0);
  CHECK_LE(start, limit);
-  for (int arg = start; arg <= limit; arg += step) {
+  for (int64_t arg = start; arg <= limit; arg += step) {
    args_.push_back({arg});
  }
  return this;
 }

-Benchmark* Benchmark::Args(const std::vector<int>& args) {
+Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
  args_.push_back(args);
  return this;
@ -361,7 +348,6 @@ Benchmark* Benchmark::RangeMultiplier(int multiplier) {
  return this;
 }

-
 Benchmark* Benchmark::MinTime(double t) {
  CHECK(t > 0.0);
  CHECK(iterations_ == 0);
@ -369,7 +355,6 @@ Benchmark* Benchmark::MinTime(double t) {
  return this;
 }

-
 Benchmark* Benchmark::Iterations(size_t n) {
  CHECK(n > 0);
  CHECK(IsZero(min_time_));
--- a/benchmarks/thirdparty/benchmark/src/benchmark_register.h
+++ b/benchmarks/thirdparty/benchmark/src/benchmark_register.h
@ -0,0 +1,33 @@
+#ifndef BENCHMARK_REGISTER_H
+#define BENCHMARK_REGISTER_H
+
+#include <vector>
+
+#include "check.h"
+
+template <typename T>
+void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
+  CHECK_GE(lo, 0);
+  CHECK_GE(hi, lo);
+  CHECK_GE(mult, 2);
+
+  // Add "lo"
+  dst->push_back(lo);
+
+  static const T kmax = std::numeric_limits<T>::max();
+
+  // Now space out the benchmarks in multiples of "mult"
+  for (T i = 1; i < kmax / mult; i *= mult) {
+    if (i >= hi) break;
+    if (i > lo) {
+      dst->push_back(i);
+    }
+  }
+
+  // Add "hi" (if different from "lo")
+  if (hi != lo) {
+    dst->push_back(hi);
+  }
+}
+
+#endif  // BENCHMARK_REGISTER_H
--- a/benchmarks/thirdparty/benchmark/src/check.h
+++ b/benchmarks/thirdparty/benchmark/src/check.h
--- a/benchmarks/thirdparty/benchmark/src/colorprint.cc
+++ b/benchmarks/thirdparty/benchmark/src/colorprint.cc
--- a/benchmarks/thirdparty/benchmark/src/colorprint.h
+++ b/benchmarks/thirdparty/benchmark/src/colorprint.h
--- a/benchmarks/thirdparty/benchmark/src/commandlineflags.cc
+++ b/benchmarks/thirdparty/benchmark/src/commandlineflags.cc
--- a/benchmarks/thirdparty/benchmark/src/commandlineflags.h
+++ b/benchmarks/thirdparty/benchmark/src/commandlineflags.h
--- a/benchmarks/thirdparty/benchmark/src/complexity.cc
+++ b/benchmarks/thirdparty/benchmark/src/complexity.cc
@ -28,18 +28,18 @@ namespace benchmark {
 BigOFunc* FittingCurve(BigO complexity) {
  switch (complexity) {
    case oN:
-      return [](int n) -> double { return n; };
+      return [](int64_t n) -> double { return static_cast<double>(n); };
    case oNSquared:
-      return [](int n) -> double { return std::pow(n, 2); };
+      return [](int64_t n) -> double { return std::pow(n, 2); };
    case oNCubed:
-      return [](int n) -> double { return std::pow(n, 3); };
+      return [](int64_t n) -> double { return std::pow(n, 3); };
    case oLogN:
-      return [](int n) { return log2(n); };
+      return [](int64_t n) { return log2(n); };
    case oNLogN:
-      return [](int n) { return n * log2(n); };
+      return [](int64_t n) { return n * log2(n); };
    case o1:
    default:
-      return [](int) { return 1.0; };
+      return [](int64_t) { return 1.0; };
  }
 }

@ -65,15 +65,15 @@ std::string GetBigOString(BigO complexity) {

 // Find the coefficient for the high-order term in the running time, by
 // minimizing the sum of squares of relative error, for the fitting curve
-// given by the lambda expresion.
+// given by the lambda expression.
 //   - n             : Vector containing the size of the benchmark tests.
 //   - time          : Vector containing the times for the benchmark tests.
-//   - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
+//   - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).

 // For a deeper explanation on the algorithm logic, look the README file at
 // http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit

-LeastSq MinimalLeastSq(const std::vector<int>& n,
+LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
                       const std::vector<double>& time,
                       BigOFunc* fitting_curve) {
  double sigma_gn = 0.0;
@ -117,7 +117,7 @@ LeastSq MinimalLeastSq(const std::vector<int>& n,
 //   - complexity : If different than oAuto, the fitting curve will stick to
 //                  this one. If it is oAuto, it will be calculated the best
 //                  fitting curve.
-LeastSq MinimalLeastSq(const std::vector<int>& n,
+LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
                       const std::vector<double>& time, const BigO complexity) {
  CHECK_EQ(n.size(), time.size());
  CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
@ -157,7 +157,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
  if (reports.size() < 2) return results;

  // Accumulators.
-  std::vector<int> n;
+  std::vector<int64_t> n;
  std::vector<double> real_time;
  std::vector<double> cpu_time;

--- a/benchmarks/thirdparty/benchmark/src/complexity.h
+++ b/benchmarks/thirdparty/benchmark/src/complexity.h
--- a/benchmarks/thirdparty/benchmark/src/console_reporter.cc
+++ b/benchmarks/thirdparty/benchmark/src/console_reporter.cc
--- a/benchmarks/thirdparty/benchmark/src/counter.cc
+++ b/benchmarks/thirdparty/benchmark/src/counter.cc
--- a/benchmarks/thirdparty/benchmark/src/counter.h
+++ b/benchmarks/thirdparty/benchmark/src/counter.h
--- a/benchmarks/thirdparty/benchmark/src/csv_reporter.cc
+++ b/benchmarks/thirdparty/benchmark/src/csv_reporter.cc
--- a/benchmarks/thirdparty/benchmark/src/cycleclock.h
+++ b/benchmarks/thirdparty/benchmark/src/cycleclock.h
@ -159,6 +159,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
  struct timeval tv;
  gettimeofday(&tv, nullptr);
  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__s390__) // Covers both s390 and s390x.
+  // Return the CPU clock.
+  uint64_t tsc;
+  asm("stck %0" : "=Q" (tsc) : : "cc");
+  return tsc;
 #else
 // The soft failover to a generic implementation is automatic only for ARM.
 // For other platforms the developer is expected to make an attempt to create
--- a/benchmarks/thirdparty/benchmark/src/internal_macros.h
+++ b/benchmarks/thirdparty/benchmark/src/internal_macros.h
@ -39,6 +39,7 @@
 #elif defined(_WIN32)
  #define BENCHMARK_OS_WINDOWS 1
 #elif defined(__APPLE__)
+  #define BENCHMARK_OS_APPLE 1
  #include "TargetConditionals.h"
  #if defined(TARGET_OS_MAC)
    #define BENCHMARK_OS_MACOSX 1
@ -50,14 +51,20 @@
  #define BENCHMARK_OS_FREEBSD 1
 #elif defined(__NetBSD__)
  #define BENCHMARK_OS_NETBSD 1
+#elif defined(__OpenBSD__)
+  #define BENCHMARK_OS_OPENBSD 1
 #elif defined(__linux__)
  #define BENCHMARK_OS_LINUX 1
 #elif defined(__native_client__)
  #define BENCHMARK_OS_NACL 1
-#elif defined(EMSCRIPTEN)
+#elif defined(__EMSCRIPTEN__)
  #define BENCHMARK_OS_EMSCRIPTEN 1
 #elif defined(__rtems__)
  #define BENCHMARK_OS_RTEMS 1
+#elif defined(__Fuchsia__)
+#define BENCHMARK_OS_FUCHSIA 1
+#elif defined (__SVR4) && defined (__sun)
+#define BENCHMARK_OS_SOLARIS 1
 #endif

 #if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
--- a/benchmarks/thirdparty/benchmark/src/json_reporter.cc
+++ b/benchmarks/thirdparty/benchmark/src/json_reporter.cc
@ -32,15 +32,15 @@ namespace benchmark {
 namespace {

 std::string FormatKV(std::string const& key, std::string const& value) {
-  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value.c_str());
+  return StrFormat("\"%s\": \"%s\"", key.c_str(), value.c_str());
 }

 std::string FormatKV(std::string const& key, const char* value) {
-  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value);
+  return StrFormat("\"%s\": \"%s\"", key.c_str(), value);
 }

 std::string FormatKV(std::string const& key, bool value) {
-  return StringPrintF("\"%s\": %s", key.c_str(), value ? "true" : "false");
+  return StrFormat("\"%s\": %s", key.c_str(), value ? "true" : "false");
 }

 std::string FormatKV(std::string const& key, int64_t value) {
@ -77,6 +77,10 @@ bool JSONReporter::ReportContext(const Context& context) {
  std::string walltime_value = LocalDateTimeString();
  out << indent << FormatKV("date", walltime_value) << ",\n";

+  if (Context::executable_name) {
+    out << indent << FormatKV("executable", Context::executable_name) << ",\n";
+  }
+
  CPUInfo const& info = context.cpu_info;
  out << indent << FormatKV("num_cpus", static_cast<int64_t>(info.num_cpus))
      << ",\n";
--- a/benchmarks/thirdparty/benchmark/src/log.h
+++ b/benchmarks/thirdparty/benchmark/src/log.h
--- a/benchmarks/thirdparty/benchmark/src/mutex.h
+++ b/benchmarks/thirdparty/benchmark/src/mutex.h
--- a/benchmarks/thirdparty/benchmark/src/re.h
+++ b/benchmarks/thirdparty/benchmark/src/re.h
@ -17,19 +17,31 @@

 #include "internal_macros.h"

+#if !defined(HAVE_STD_REGEX) && \
+    !defined(HAVE_GNU_POSIX_REGEX) && \
+    !defined(HAVE_POSIX_REGEX)
+  // No explicit regex selection; detect based on builtin hints.
+  #if defined(BENCHMARK_OS_LINUX) || defined(BENCHMARK_OS_APPLE)
+    #define HAVE_POSIX_REGEX 1
+  #elif __cplusplus >= 199711L
+    #define HAVE_STD_REGEX 1
+  #endif
+#endif
+
 // Prefer C regex libraries when compiling w/o exceptions so that we can
 // correctly report errors.
-#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && defined(HAVE_STD_REGEX) && \
+#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
+    defined(BENCHMARK_HAVE_STD_REGEX) && \
    (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
-#undef HAVE_STD_REGEX
+  #undef HAVE_STD_REGEX
 #endif

 #if defined(HAVE_STD_REGEX)
-#include <regex>
+  #include <regex>
 #elif defined(HAVE_GNU_POSIX_REGEX)
-#include <gnuregex.h>
+  #include <gnuregex.h>
 #elif defined(HAVE_POSIX_REGEX)
-#include <regex.h>
+  #include <regex.h>
 #else
 #error No regular expression backend was found!
 #endif
@ -64,7 +76,7 @@ class Regex {
 #elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX)
  regex_t re_;
 #else
-#error No regular expression backend implementation available
+  #error No regular expression backend implementation available
 #endif
 };

--- a/benchmarks/thirdparty/benchmark/src/reporter.cc
+++ b/benchmarks/thirdparty/benchmark/src/reporter.cc
@ -37,6 +37,9 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,

  Out << LocalDateTimeString() << "\n";

+  if (context.executable_name)
+    Out << "Running " << context.executable_name << "\n";
+
  const CPUInfo &info = context.cpu_info;
  Out << "Run on (" << info.num_cpus << " X "
      << (info.cycles_per_second / 1000000.0) << " MHz CPU "
@ -64,6 +67,9 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
 #endif
 }

+// No initializer because it's already initialized to NULL.
+const char* BenchmarkReporter::Context::executable_name;
+
 BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {}

 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
--- a/benchmarks/thirdparty/benchmark/src/sleep.cc
+++ b/benchmarks/thirdparty/benchmark/src/sleep.cc
--- a/benchmarks/thirdparty/benchmark/src/sleep.h
+++ b/benchmarks/thirdparty/benchmark/src/sleep.h
--- a/benchmarks/thirdparty/benchmark/src/statistics.cc
+++ b/benchmarks/thirdparty/benchmark/src/statistics.cc
@ -30,22 +30,25 @@ auto StatisticsSum = [](const std::vector<double>& v) {
 };

 double StatisticsMean(const std::vector<double>& v) {
-  if (v.size() == 0) return 0.0;
+  if (v.empty()) return 0.0;
  return StatisticsSum(v) * (1.0 / v.size());
 }

 double StatisticsMedian(const std::vector<double>& v) {
  if (v.size() < 3) return StatisticsMean(v);
-  std::vector<double> partial;
-  // we need roundDown(count/2)+1 slots
-  partial.resize(1 + (v.size() / 2));
-  std::partial_sort_copy(v.begin(), v.end(), partial.begin(), partial.end());
-  // did we have odd number of samples?
-  // if yes, then the last element of partially-sorted vector is the median
-  // it no, then the average of the last two elements is the median
+  std::vector<double> copy(v);
+
+  auto center = copy.begin() + v.size() / 2;
+  std::nth_element(copy.begin(), center, copy.end());
+
+  // did we have an odd number of samples?
+  // if yes, then center is the median
+  // it no, then we are looking for the average between center and the value before
  if(v.size() % 2 == 1)
-    return partial.back();
-  return (partial[partial.size() - 2] + partial[partial.size() - 1]) / 2.0;
+    return *center;
+  auto center2 = copy.begin() + v.size() / 2 - 1;
+  std::nth_element(copy.begin(), center2, copy.end());
+  return (*center + *center2) / 2.0;
 }

 // Return the sum of the squares of this sample set
@ -62,7 +65,7 @@ auto Sqrt = [](const double dat) {

 double StatisticsStdDev(const std::vector<double>& v) {
  const auto mean = StatisticsMean(v);
-  if (v.size() == 0) return mean;
+  if (v.empty()) return mean;

  // Sample standard deviation is undefined for n = 1
  if (v.size() == 1)
--- a/benchmarks/thirdparty/benchmark/src/statistics.h
+++ b/benchmarks/thirdparty/benchmark/src/statistics.h
--- a/benchmarks/thirdparty/benchmark/src/string_util.cc
+++ b/benchmarks/thirdparty/benchmark/src/string_util.cc
@ -122,7 +122,7 @@ std::string HumanReadableNumber(double n, double one_k) {
  return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
 }

-std::string StringPrintFImp(const char* msg, va_list args) {
+std::string StrFormatImp(const char* msg, va_list args) {
  // we might need a second shot at this, so pre-emptivly make a copy
  va_list args_cp;
  va_copy(args_cp, args);
@ -152,10 +152,10 @@ std::string StringPrintFImp(const char* msg, va_list args) {
  return std::string(buff_ptr.get());
 }

-std::string StringPrintF(const char* format, ...) {
+std::string StrFormat(const char* format, ...) {
  va_list args;
  va_start(args, format);
-  std::string tmp = StringPrintFImp(format, args);
+  std::string tmp = StrFormatImp(format, args);
  va_end(args);
  return tmp;
 }
--- a/benchmarks/thirdparty/benchmark/src/string_util.h
+++ b/benchmarks/thirdparty/benchmark/src/string_util.h
@ -12,23 +12,23 @@ void AppendHumanReadable(int n, std::string* str);

 std::string HumanReadableNumber(double n, double one_k = 1024.0);

-std::string StringPrintF(const char* format, ...);
+std::string StrFormat(const char* format, ...);

-inline std::ostream& StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT {
+inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT {
  return out;
 }

 template <class First, class... Rest>
-inline std::ostream& StringCatImp(std::ostream& out, First&& f,
+inline std::ostream& StrCatImp(std::ostream& out, First&& f,
                                  Rest&&... rest) {
  out << std::forward<First>(f);
-  return StringCatImp(out, std::forward<Rest>(rest)...);
+  return StrCatImp(out, std::forward<Rest>(rest)...);
 }

 template <class... Args>
 inline std::string StrCat(Args&&... args) {
  std::ostringstream ss;
-  StringCatImp(ss, std::forward<Args>(args)...);
+  StrCatImp(ss, std::forward<Args>(args)...);
  return ss.str();
 }

--- a/benchmarks/thirdparty/benchmark/src/sysinfo.cc
+++ b/benchmarks/thirdparty/benchmark/src/sysinfo.cc
@ -16,20 +16,26 @@

 #ifdef BENCHMARK_OS_WINDOWS
 #include <Shlwapi.h>
+#undef StrCat  // Don't let StrCat in string_util.h be renamed to lstrcatA
 #include <VersionHelpers.h>
 #include <Windows.h>
 #else
 #include <fcntl.h>
+#ifndef BENCHMARK_OS_FUCHSIA
 #include <sys/resource.h>
+#endif
 #include <sys/time.h>
 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <unistd.h>
 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
-    defined BENCHMARK_OS_NETBSD
+    defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD
 #define BENCHMARK_HAS_SYSCTL
 #include <sys/sysctl.h>
 #endif
 #endif
+#if defined(BENCHMARK_OS_SOLARIS)
+#include <kstat.h>
+#endif

 #include <algorithm>
 #include <array>
@ -130,6 +136,26 @@ struct ValueUnion {
 };

 ValueUnion GetSysctlImp(std::string const& Name) {
+#if defined BENCHMARK_OS_OPENBSD
+  int mib[2];
+
+  mib[0] = CTL_HW;
+  if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){
+    ValueUnion buff(sizeof(int));
+
+    if (Name == "hw.ncpu") {
+      mib[1] = HW_NCPU;
+    } else {
+      mib[1] = HW_CPUSPEED;
+    }
+
+    if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) {
+      return ValueUnion();
+    }
+    return buff;
+  }
+  return ValueUnion();
+#else
  size_t CurBuffSize = 0;
  if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
    return ValueUnion();
@ -138,6 +164,7 @@ ValueUnion GetSysctlImp(std::string const& Name) {
  if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
    return buff;
  return ValueUnion();
+#endif
 }

 BENCHMARK_MAYBE_UNUSED
@ -303,7 +330,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
    if (!B.test(0)) continue;
    CInfo* Cache = &it->Cache;
    CPUInfo::CacheInfo C;
-    C.num_sharing = B.count();
+    C.num_sharing = static_cast<int>(B.count());
    C.level = Cache->Level;
    C.size = Cache->Size;
    switch (Cache->Type) {
@ -354,6 +381,15 @@ int GetNumCPUs() {
  return sysinfo.dwNumberOfProcessors;  // number of logical
                                        // processors in the current
                                        // group
+#elif defined(BENCHMARK_OS_SOLARIS)
+  // Returns -1 in case of a failure.
+  int NumCPU = sysconf(_SC_NPROCESSORS_ONLN);
+  if (NumCPU < 0) {
+    fprintf(stderr,
+            "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
+            strerror(errno));
+  }
+  return NumCPU;
 #else
  int NumCPUs = 0;
  int MaxID = -1;
@ -441,7 +477,7 @@ double GetCPUCyclesPerSecond() {
    std::string value;
    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
-    // accept postive values. Some environments (virtual machines) report zero,
+    // accept positive values. Some environments (virtual machines) report zero,
    // which would cause infinite looping in WallTime_Init.
    if (startsWithKey(ln, "cpu MHz")) {
      if (!value.empty()) {
@ -473,12 +509,17 @@ double GetCPUCyclesPerSecond() {
  constexpr auto* FreqStr =
 #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
      "machdep.tsc_freq";
+#elif defined BENCHMARK_OS_OPENBSD
+      "hw.cpuspeed";
 #else
      "hw.cpufrequency";
 #endif
  unsigned long long hz = 0;
+#if defined BENCHMARK_OS_OPENBSD
+  if (GetSysctl(FreqStr, &hz)) return hz * 1000000;
+#else
  if (GetSysctl(FreqStr, &hz)) return hz;
-
+#endif
  fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
          FreqStr, strerror(errno));

@ -493,6 +534,35 @@ double GetCPUCyclesPerSecond() {
                      "~MHz", nullptr, &data, &data_size)))
    return static_cast<double>((int64_t)data *
                               (int64_t)(1000 * 1000));  // was mhz
+#elif defined (BENCHMARK_OS_SOLARIS)
+  kstat_ctl_t *kc = kstat_open();
+  if (!kc) {
+    std::cerr << "failed to open /dev/kstat\n";
+    return -1;
+  }
+  kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0");
+  if (!ksp) {
+    std::cerr << "failed to lookup in /dev/kstat\n";
+    return -1;
+  }
+  if (kstat_read(kc, ksp, NULL) < 0) {
+    std::cerr << "failed to read from /dev/kstat\n";
+    return -1;
+  }
+  kstat_named_t *knp =
+      (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz");
+  if (!knp) {
+    std::cerr << "failed to lookup data in /dev/kstat\n";
+    return -1;
+  }
+  if (knp->data_type != KSTAT_DATA_UINT64) {
+    std::cerr << "current_clock_Hz is of unexpected data type: "
+              << knp->data_type << "\n";
+    return -1;
+  }
+  double clock_hz = knp->value.ui64;
+  kstat_close(kc);
+  return clock_hz;
 #endif
  // If we've fallen through, attempt to roughly estimate the CPU clock rate.
  const int estimate_time_ms = 1000;
--- a/benchmarks/thirdparty/benchmark/src/thread_manager.h
+++ b/benchmarks/thirdparty/benchmark/src/thread_manager.h
@ -0,0 +1,66 @@
+#ifndef BENCHMARK_THREAD_MANAGER_H
+#define BENCHMARK_THREAD_MANAGER_H
+
+#include <atomic>
+
+#include "benchmark/benchmark.h"
+#include "mutex.h"
+
+namespace benchmark {
+namespace internal {
+
+class ThreadManager {
+ public:
+  ThreadManager(int num_threads)
+      : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
+
+  Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
+    return benchmark_mutex_;
+  }
+
+  bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
+    return start_stop_barrier_.wait();
+  }
+
+  void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
+    start_stop_barrier_.removeThread();
+    if (--alive_threads_ == 0) {
+      MutexLock lock(end_cond_mutex_);
+      end_condition_.notify_all();
+    }
+  }
+
+  void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
+    MutexLock lock(end_cond_mutex_);
+    end_condition_.wait(lock.native_handle(),
+                        [this]() { return alive_threads_ == 0; });
+  }
+
+ public:
+  struct Result {
+    int64_t iterations = 0;
+    double real_time_used = 0;
+    double cpu_time_used = 0;
+    double manual_time_used = 0;
+    int64_t bytes_processed = 0;
+    int64_t items_processed = 0;
+    int64_t complexity_n = 0;
+    std::string report_label_;
+    std::string error_message_;
+    bool has_error_ = false;
+    UserCounters counters;
+  };
+  GUARDED_BY(GetBenchmarkMutex()) Result results;
+
+ private:
+  mutable Mutex benchmark_mutex_;
+  std::atomic<int> alive_threads_;
+  Barrier start_stop_barrier_;
+  Mutex end_cond_mutex_;
+  Condition end_condition_;
+};
+
+}  // namespace internal
+}  // namespace benchmark
+
+#endif  // BENCHMARK_THREAD_MANAGER_H
--- a/benchmarks/thirdparty/benchmark/src/thread_timer.h
+++ b/benchmarks/thirdparty/benchmark/src/thread_timer.h
@ -0,0 +1,69 @@
+#ifndef BENCHMARK_THREAD_TIMER_H
+#define BENCHMARK_THREAD_TIMER_H
+
+#include "check.h"
+#include "timers.h"
+
+namespace benchmark {
+namespace internal {
+
+class ThreadTimer {
+ public:
+  ThreadTimer() = default;
+
+  // Called by each thread
+  void StartTimer() {
+    running_ = true;
+    start_real_time_ = ChronoClockNow();
+    start_cpu_time_ = ThreadCPUUsage();
+  }
+
+  // Called by each thread
+  void StopTimer() {
+    CHECK(running_);
+    running_ = false;
+    real_time_used_ += ChronoClockNow() - start_real_time_;
+    // Floating point error can result in the subtraction producing a negative
+    // time. Guard against that.
+    cpu_time_used_ += std::max<double>(ThreadCPUUsage() - start_cpu_time_, 0);
+  }
+
+  // Called by each thread
+  void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
+
+  bool running() const { return running_; }
+
+  // REQUIRES: timer is not running
+  double real_time_used() {
+    CHECK(!running_);
+    return real_time_used_;
+  }
+
+  // REQUIRES: timer is not running
+  double cpu_time_used() {
+    CHECK(!running_);
+    return cpu_time_used_;
+  }
+
+  // REQUIRES: timer is not running
+  double manual_time_used() {
+    CHECK(!running_);
+    return manual_time_used_;
+  }
+
+ private:
+  bool running_ = false;        // Is the timer running
+  double start_real_time_ = 0;  // If running_
+  double start_cpu_time_ = 0;   // If running_
+
+  // Accumulated time so far (does not contain current slice if running_)
+  double real_time_used_ = 0;
+  double cpu_time_used_ = 0;
+  // Manually set iteration time. User sets this with SetIterationTime(seconds).
+  double manual_time_used_ = 0;
+};
+
+}  // namespace internal
+}  // namespace benchmark
+
+#endif  // BENCHMARK_THREAD_TIMER_H
--- a/benchmarks/thirdparty/benchmark/src/timers.cc
+++ b/benchmarks/thirdparty/benchmark/src/timers.cc
@ -17,11 +17,14 @@

 #ifdef BENCHMARK_OS_WINDOWS
 #include <Shlwapi.h>
+#undef StrCat  // Don't let StrCat in string_util.h be renamed to lstrcatA
 #include <VersionHelpers.h>
 #include <Windows.h>
 #else
 #include <fcntl.h>
+#ifndef BENCHMARK_OS_FUCHSIA
 #include <sys/resource.h>
+#endif
 #include <sys/time.h>
 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <unistd.h>
@ -74,7 +77,7 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
          static_cast<double>(user.QuadPart)) *
         1e-7;
 }
-#else
+#elif !defined(BENCHMARK_OS_FUCHSIA)
 double MakeTime(struct rusage const& ru) {
  return (static_cast<double>(ru.ru_utime.tv_sec) +
          static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
@ -162,6 +165,10 @@ double ThreadCPUUsage() {
  // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See
  // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c
  return ProcessCPUUsage();
+#elif defined(BENCHMARK_OS_SOLARIS)
+  struct rusage ru;
+  if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru);
+  DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed");
 #elif defined(CLOCK_THREAD_CPUTIME_ID)
  struct timespec ts;
  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
@ -186,7 +193,6 @@ std::string DateTimeString(bool local) {
        std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
 #else
    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
    ::localtime_r(&now, &timeinfo);
    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
 #endif
@ -195,7 +201,6 @@ std::string DateTimeString(bool local) {
    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
 #else
    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
    ::gmtime_r(&now, &timeinfo);
    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
 #endif
--- a/benchmarks/thirdparty/benchmark/src/timers.h
+++ b/benchmarks/thirdparty/benchmark/src/timers.h
--- a/benchmarks/thirdparty/benchmark/tools/compare.py
+++ b/benchmarks/thirdparty/benchmark/tools/compare.py
@ -0,0 +1,316 @@
+#!/usr/bin/env python
+
+"""
+compare.py - versatile benchmark output compare tool
+"""
+
+import argparse
+from argparse import ArgumentParser
+import sys
+import gbench
+from gbench import util, report
+from gbench.util import *
+
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+               "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing optional flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+               " is not supported.") % output_type)
+        sys.exit(1)
+
+
+def create_parser():
+    parser = ArgumentParser(
+        description='versatile benchmark output compare tool')
+    subparsers = parser.add_subparsers(
+        help='This tool has multiple modes of operation:',
+        dest='mode')
+
+    parser_a = subparsers.add_parser(
+        'benchmarks',
+        help='The most simple use-case, compare all the output of these two benchmarks')
+    baseline = parser_a.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    contender = parser_a.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser_a.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_b = subparsers.add_parser(
+        'filters', help='Compare filter one with the filter two of benchmark')
+    baseline = parser_b.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test',
+        metavar='test',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_b.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_b.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_c = subparsers.add_parser(
+        'benchmarksfiltered',
+        help='Compare filter one of first benchmark with filter two of the second benchmark')
+    baseline = parser_c.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_c.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_c.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    return parser
+
+
+def main():
+    # Parse the command line flags
+    parser = create_parser()
+    args, unknown_args = parser.parse_known_args()
+    if args.mode is None:
+      parser.print_help()
+      exit(1)
+    assert not unknown_args
+    benchmark_options = args.benchmark_options
+
+    if args.mode == 'benchmarks':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = ''
+        filter_contender = ''
+
+        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
+
+        description = 'Comparing %s to %s' % (test_baseline, test_contender)
+    elif args.mode == 'filters':
+        test_baseline = args.test[0].name
+        test_contender = args.test[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if filter_baseline == filter_contender, you are analyzing the
+        # stdev
+
+        description = 'Comparing %s to %s (from %s)' % (
+            filter_baseline, filter_contender, args.test[0].name)
+    elif args.mode == 'benchmarksfiltered':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if test_baseline == test_contender and
+        # filter_baseline == filter_contender, you are analyzing the stdev
+
+        description = 'Comparing %s (from %s) to %s (from %s)' % (
+            filter_baseline, test_baseline, filter_contender, test_contender)
+    else:
+        # should never happen
+        print("Unrecognized mode of operation: '%s'" % args.mode)
+        parser.print_help()
+        exit(1)
+
+    check_inputs(test_baseline, test_contender, benchmark_options)
+
+    options_baseline = []
+    options_contender = []
+
+    if filter_baseline and filter_contender:
+        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
+        options_contender = ['--benchmark_filter=%s' % filter_contender]
+
+    # Run the benchmarks and report the results
+    json1 = json1_orig = gbench.util.run_or_load_benchmark(
+        test_baseline, benchmark_options + options_baseline)
+    json2 = json2_orig = gbench.util.run_or_load_benchmark(
+        test_contender, benchmark_options + options_contender)
+
+    # Now, filter the benchmarks so that the difference report can work
+    if filter_baseline and filter_contender:
+        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        json1 = gbench.report.filter_benchmark(
+            json1_orig, filter_baseline, replacement)
+        json2 = gbench.report.filter_benchmark(
+            json2_orig, filter_contender, replacement)
+
+    # Diff and output
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print(description)
+    for ln in output_lines:
+        print(ln)
+
+
+import unittest
+
+
+class TestParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = create_parser()
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'gbench',
+            'Inputs')
+        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
+        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
+
+    def test_benchmarks_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['d'])
+
+    def test_benchmarks_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_basic(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_filters_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', 'e'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['f'])
+
+    def test_benchmarksfiltered_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarksfiltered_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'f')
+
+    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'g')
+
+
+if __name__ == '__main__':
+    # unittest.main()
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/benchmarks/thirdparty/benchmark/tools/compare_bench.py
+++ b/benchmarks/thirdparty/benchmark/tools/compare_bench.py
@ -0,0 +1,67 @@
+#!/usr/bin/env python
+"""
+compare_bench.py - Compare two benchmarks or their results and report the
+                   difference.
+"""
+import argparse
+from argparse import ArgumentParser
+import sys
+import gbench
+from gbench import util, report
+from gbench.util import *
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+              "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing --benchmark flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare_bench.py`"
+              " is not supported.") % output_type)
+        sys.exit(1)
+
+
+def main():
+    parser = ArgumentParser(
+        description='compare the results of two benchmarks')
+    parser.add_argument(
+        'test1', metavar='test1', type=str, nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser.add_argument(
+        'test2', metavar='test2', type=str, nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser.add_argument(
+        'benchmark_options', metavar='benchmark_options', nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables'
+    )
+    args, unknown_args = parser.parse_known_args()
+    # Parse the command line flags
+    test1 = args.test1[0]
+    test2 = args.test2[0]
+    if unknown_args:
+        # should never happen
+        print("Unrecognized positional argument arguments: '%s'"
+              % unknown_args)
+        exit(1)
+    benchmark_options = args.benchmark_options
+    check_inputs(test1, test2, benchmark_options)
+    # Run the benchmarks and report the results
+    json1 = gbench.util.run_or_load_benchmark(test1, benchmark_options)
+    json2 = gbench.util.run_or_load_benchmark(test2, benchmark_options)
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print('Comparing %s to %s' % (test1, test2))
+    for ln in output_lines:
+        print(ln)
+
+
+if __name__ == '__main__':
+    main()
--- a/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test1_run1.json
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test1_run1.json
@ -0,0 +1,102 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_100xSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_100xFaster",
+      "iterations": 1000,
+      "real_time": 10000,
+      "cpu_time": 10000,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentCPUToTime",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_ThirdFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_BadTimeUnit",
+      "iterations": 1000,
+      "real_time": 0.4,
+      "cpu_time": 0.5,
+      "time_unit": "s"
+    },
+    {
+      "name": "BM_DifferentTimeUnit",
+      "iterations": 1,
+      "real_time": 1,
+      "cpu_time": 1,
+      "time_unit": "s"
+    }
+  ]
+}
--- a/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test1_run2.json
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test1_run2.json
@ -0,0 +1,102 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 25,
+      "cpu_time": 25,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 20833333,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentFaster",
+      "iterations": 1000,
+      "real_time": 98.9999999,
+      "cpu_time": 98.9999999,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_1PercentSlower",
+      "iterations": 1000,
+      "real_time": 100.9999999,
+      "cpu_time": 100.9999999,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 90,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 110,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_100xSlower",
+      "iterations": 1000,
+      "real_time": 1.0000e+04,
+      "cpu_time": 1.0000e+04,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_100xFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentCPUToTime",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_ThirdFaster",
+      "iterations": 1000,
+      "real_time": 66.665,
+      "cpu_time": 66.664,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_BadTimeUnit",
+      "iterations": 1000,
+      "real_time": 0.04,
+      "cpu_time": 0.6,
+      "time_unit": "s"
+    },
+    {
+      "name": "BM_DifferentTimeUnit",
+      "iterations": 1,
+      "real_time": 1,
+      "cpu_time": 1,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test2_run.json
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/Inputs/test2_run.json
@ -0,0 +1,81 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_Hi",
+      "iterations": 1234,
+      "real_time": 42,
+      "cpu_time": 24,
+      "time_unit": "ms"
+    },
+    {
+      "name": "BM_Zero",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_Zero/4",
+      "iterations": 4000,
+      "real_time": 40,
+      "cpu_time": 40,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_Zero",
+      "iterations": 2000,
+      "real_time": 20,
+      "cpu_time": 20,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_Zero/3",
+      "iterations": 3000,
+      "real_time": 30,
+      "cpu_time": 30,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_One",
+      "iterations": 5000,
+      "real_time": 5,
+      "cpu_time": 5,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_One/4",
+      "iterations": 2000,
+      "real_time": 20,
+      "cpu_time": 20,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_One",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "Prefix/BM_One/3",
+      "iterations": 1500,
+      "real_time": 15,
+      "cpu_time": 15,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_Bye",
+      "iterations": 5321,
+      "real_time": 11,
+      "cpu_time": 63,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/benchmarks/thirdparty/benchmark/tools/gbench/init.py
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/init.py
@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/benchmarks/thirdparty/benchmark/tools/gbench/report.py
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/report.py
@ -0,0 +1,208 @@
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+import re
+import copy
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    if old_val == 0 and new_val == 0:
+        return 0.0
+    if old_val == 0:
+        return float(new_val - old_val) / (float(old_val + new_val) / 2)
+    return float(new_val - old_val) / abs(old_val)
+
+
+def filter_benchmark(json_orig, family, replacement=""):
+    """
+    Apply a filter to the json, and only leave the 'family' of benchmarks.
+    """
+    regex = re.compile(family)
+    filtered = {}
+    filtered['benchmarks'] = []
+    for be in json_orig['benchmarks']:
+        if not regex.search(be['name']):
+            continue
+        filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
+        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
+        filtered['benchmarks'].append(filteredbench)
+    return filtered
+
+
+def generate_difference_report(json1, json2, use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    first_col_width = find_longest_name(json1['benchmarks'])
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+    first_col_width = max(first_col_width, len('Benchmark'))
+    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
+        'Benchmark', 12 + first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+
+    gen = (bn for bn in json1['benchmarks'] if 'real_time' in bn and 'cpu_time' in bn)
+    for bn in gen:
+        other_bench = find_test(bn['name'])
+        if not other_bench:
+            continue
+
+        if bn['time_unit'] != other_bench['time_unit']:
+            continue
+
+        def get_color(res):
+            if res > 0.05:
+                return BC_FAIL
+            elif res > -0.07:
+                return BC_WHITE
+            else:
+                return BC_CYAN
+        fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
+        tres = calculate_change(bn['real_time'], other_bench['real_time'])
+        cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+        output_strs += [color_format(use_color, fmt_str,
+            BC_HEADER, bn['name'], first_col_width,
+            get_color(tres), tres, get_color(cpures), cpures,
+            bn['real_time'], other_bench['real_time'],
+            bn['cpu_time'], other_bench['cpu_time'],
+            endc=BC_ENDC)]
+    return output_strs
+
+###############################################################################
+# Unit tests
+
+import unittest
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
+            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
+            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
+            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
+            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
+            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
+            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
+            ['BM_100xSlower', '+99.0000', '+99.0000', '100', '10000', '100', '10000'],
+            ['BM_100xFaster', '-0.9900', '-0.9900', '10000', '100', '10000', '100'],
+            ['BM_10PercentCPUToTime', '+0.1000', '-0.1000', '100', '110', '100', '90'],
+            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
+            ['BM_BadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(parts, expect_lines[i])
+
+
+class TestReportDifferenceBetweenFamilies(unittest.TestCase):
+    def load_result(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput = os.path.join(testInputs, 'test2_run.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
+            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
+            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
+            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
+        ]
+        json = self.load_result()
+        json1 = filter_benchmark(json, "BM_Z.ro", ".")
+        json2 = filter_benchmark(json, "BM_O.e", ".")
+        output_lines_with_header = generate_difference_report(json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(parts, expect_lines[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/benchmarks/thirdparty/benchmark/tools/gbench/util.py
+++ b/benchmarks/thirdparty/benchmark/tools/gbench/util.py
@ -0,0 +1,159 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid    = 0
+IT_JSON       = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, mode='rb') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            b'\xfe\xed\xfa\xce',  # MH_MAGIC
+            b'\xce\xfa\xed\xfe',  # MH_CIGAM
+            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
+            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == b'MZ'
+    else:
+        return magic_bytes == b'\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print("Invalid input file: %s" % msg)
+        sys.exit(1)
+    return ftype
+
+def find_benchmark_flag(prefix, benchmark_flags):
+    """
+    Search the specified list of flags for a flag matching `<prefix><arg>` and
+    if it is found return the arg it specifies. If specified more than once the
+    last value is returned. If the flag is not found None is returned.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    result = None
+    for f in benchmark_flags:
+        if f.startswith(prefix):
+            result = f[len(prefix):]
+    return result
+
+def remove_benchmark_flags(prefix, benchmark_flags):
+    """
+    Return a new list containing the specified benchmark_flags except those
+    with the specified prefix.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    return [f for f in benchmark_flags if not f.startswith(prefix)]
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    output_name = find_benchmark_flag('--benchmark_out=',
+                                      benchmark_flags)
+    is_temp_output = False
+    if output_name is None:
+        is_temp_output = True
+        thandle, output_name = tempfile.mkstemp()
+        os.close(thandle)
+        benchmark_flags = list(benchmark_flags) + \
+                          ['--benchmark_out=%s' % output_name]
+
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd)
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(output_name)
+    if is_temp_output:
+        os.unlink(output_name)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False # This branch is unreachable
--- a/benchmarks/thirdparty/benchmark/tools/strip_asm.py
+++ b/benchmarks/thirdparty/benchmark/tools/strip_asm.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python
+
+"""
+strip_asm.py - Cleanup ASM output for the specified file
+"""
+
+from argparse import ArgumentParser
+import sys
+import os
+import re
+
+def find_used_labels(asm):
+    found = set()
+    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+    for l in asm.splitlines():
+        m = label_re.match(l)
+        if m:
+            found.add('.L%s' % m.group(1))
+    return found
+
+
+def normalize_labels(asm):
+    decls = set()
+    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if m:
+            decls.add(m.group(0))
+    if len(decls) == 0:
+        return asm
+    needs_dot = next(iter(decls))[0] != '.'
+    if not needs_dot:
+        return asm
+    for ld in decls:
+        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
+    return asm
+
+
+def transform_labels(asm):
+    asm = normalize_labels(asm)
+    used_decls = find_used_labels(asm)
+    new_asm = ''
+    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if not m or m.group(0) in used_decls:
+            new_asm += l
+            new_asm += '\n'
+    return new_asm
+
+
+def is_identifier(tk):
+    if len(tk) == 0:
+        return False
+    first = tk[0]
+    if not first.isalpha() and first != '_':
+        return False
+    for i in range(1, len(tk)):
+        c = tk[i]
+        if not c.isalnum() and c != '_':
+            return False
+    return True
+
+def process_identifiers(l):
+    """
+    process_identifiers - process all identifiers and modify them to have
+    consistent names across all platforms; specifically across ELF and MachO.
+    For example, MachO inserts an additional understore at the beginning of
+    names. This function removes that.
+    """
+    parts = re.split(r'([a-zA-Z0-9_]+)', l)
+    new_line = ''
+    for tk in parts:
+        if is_identifier(tk):
+            if tk.startswith('__Z'):
+                tk = tk[1:]
+            elif tk.startswith('_') and len(tk) > 1 and \
+                    tk[1].isalpha() and tk[1] != 'Z':
+                tk = tk[1:]
+        new_line += tk
+    return new_line
+
+
+def process_asm(asm):
+    """
+    Strip the ASM of unwanted directives and lines
+    """
+    new_contents = ''
+    asm = transform_labels(asm)
+
+    # TODO: Add more things we want to remove
+    discard_regexes = [
+        re.compile("\s+\..*$"), # directive
+        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
+        re.compile("\s*#.*$"), # comment line
+        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
+        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
+    ]
+    keep_regexes = [
+
+    ]
+    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
+    for l in asm.splitlines():
+        # Remove Mach-O attribute
+        l = l.replace('@GOTPCREL', '')
+        add_line = True
+        for reg in discard_regexes:
+            if reg.match(l) is not None:
+                add_line = False
+                break
+        for reg in keep_regexes:
+            if reg.match(l) is not None:
+                add_line = True
+                break
+        if add_line:
+            if fn_label_def.match(l) and len(new_contents) != 0:
+                new_contents += '\n'
+            l = process_identifiers(l)
+            new_contents += l
+            new_contents += '\n'
+    return new_contents
+
+def main():
+    parser = ArgumentParser(
+        description='generate a stripped assembly file')
+    parser.add_argument(
+        'input', metavar='input', type=str, nargs=1,
+        help='An input assembly file')
+    parser.add_argument(
+        'out', metavar='output', type=str, nargs=1,
+        help='The output file')
+    args, unknown_args = parser.parse_known_args()
+    input = args.input[0]
+    output = args.out[0]
+    if not os.path.isfile(input):
+        print(("ERROR: input file '%s' does not exist") % input)
+        sys.exit(1)
+    contents = None
+    with open(input, 'r') as f:
+        contents = f.read()
+    new_contents = process_asm(contents)
+    with open(output, 'w') as f:
+        f.write(new_contents)
+
+
+if __name__ == '__main__':
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/test/thirdparty/catch/catch.hpp
+++ b/test/thirdparty/catch/catch.hpp
@ -1,6 +1,6 @@
 /*
- *  Catch v1.12.0
- *  Generated: 2018-01-11 21:56:34.893972
+ *  Catch v1.12.2
+ *  Generated: 2018-05-14 15:10:01.112442
 *  ----------------------------------------------------------
 *  This file has been merged from multiple headers. Please don't edit it directly
 *  Copyright (c) 2012 Two Blue Cubes Ltd. All rights reserved.
@ -6599,6 +6599,7 @@ namespace Catch {

 #endif // not Windows

+#include <cassert>
 #include <set>
 #include <string>

@ -7292,9 +7293,9 @@ namespace Catch {
        result_type operator()( result_type n ) const { return std::rand() % n; }

 #ifdef CATCH_CONFIG_CPP11_SHUFFLE
-        static constexpr result_type min() { return 0; }
-        static constexpr result_type max() { return 1000000; }
-        result_type operator()() const { return std::rand() % max(); }
+        static constexpr result_type (min)() { return 0; }
+        static constexpr result_type (max)() { return 1000000; }
+        result_type operator()() const { return std::rand() % (max)(); }
 #endif
        template<typename V>
        static void shuffle( V& vector ) {
@ -8466,7 +8467,7 @@ namespace Catch {
    }

    inline Version libraryVersion() {
-        static Version version( 1, 12, 0, "", 0 );
+        static Version version( 1, 12, 2, "", 0 );
        return version;
    }

@ -8501,11 +8502,18 @@ namespace Catch {
    : m_info( other.m_info )
    {}

+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable:4996) // std::uncaught_exception is deprecated in C++17
+#endif
    ScopedMessage::~ScopedMessage() {
        if ( !std::uncaught_exception() ){
            getResultCapture().popScopedMessage(m_info);
        }
    }
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif

 } // end namespace Catch

@ -9164,6 +9172,8 @@ std::string toString( std::nullptr_t ) {
 // #included from: catch_result_builder.hpp
 #define TWOBLUECUBES_CATCH_RESULT_BUILDER_HPP_INCLUDED

+#include <cassert>
+
 namespace Catch {

    ResultBuilder::ResultBuilder(   char const* macroName,
@ -10657,6 +10667,7 @@ namespace Catch {
 // #included from: ../reporters/catch_reporter_console.hpp
 #define TWOBLUECUBES_CATCH_REPORTER_CONSOLE_HPP_INCLUDED

+#include <cassert>
 #include <cfloat>
 #include <cstdio>