➕ using Google Benchmark #921
This commit is contained in:
parent
a8f711a2f1
commit
b406e3704b
53 changed files with 8039 additions and 0 deletions
517
benchmarks/thirdparty/benchmark/src/sysinfo.cc
vendored
Normal file
517
benchmarks/thirdparty/benchmark/src/sysinfo.cc
vendored
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
#include <Shlwapi.h>
|
||||
#include <VersionHelpers.h>
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
|
||||
#include <unistd.h>
|
||||
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
|
||||
defined BENCHMARK_OS_NETBSD
|
||||
#define BENCHMARK_HAS_SYSCTL
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "check.h"
|
||||
#include "cycleclock.h"
|
||||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "sleep.h"
|
||||
#include "string_util.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace {
|
||||
|
||||
void PrintImp(std::ostream& out) { out << std::endl; }
|
||||
|
||||
template <class First, class... Rest>
|
||||
void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
|
||||
out << std::forward<First>(f);
|
||||
PrintImp(out, std::forward<Rest>(rest)...);
|
||||
}
|
||||
|
||||
template <class... Args>
|
||||
BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
|
||||
PrintImp(std::cerr, std::forward<Args>(args)...);
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK_HAS_SYSCTL
|
||||
|
||||
/// ValueUnion - A type used to correctly alias the byte-for-byte output of
|
||||
/// `sysctl` with the result type it's to be interpreted as.
|
||||
struct ValueUnion {
|
||||
union DataT {
|
||||
uint32_t uint32_value;
|
||||
uint64_t uint64_value;
|
||||
// For correct aliasing of union members from bytes.
|
||||
char bytes[8];
|
||||
};
|
||||
using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
|
||||
|
||||
// The size of the data union member + its trailing array size.
|
||||
size_t Size;
|
||||
DataPtr Buff;
|
||||
|
||||
public:
|
||||
ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
|
||||
|
||||
explicit ValueUnion(size_t BuffSize)
|
||||
: Size(sizeof(DataT) + BuffSize),
|
||||
Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
|
||||
|
||||
ValueUnion(ValueUnion&& other) = default;
|
||||
|
||||
explicit operator bool() const { return bool(Buff); }
|
||||
|
||||
char* data() const { return Buff->bytes; }
|
||||
|
||||
std::string GetAsString() const { return std::string(data()); }
|
||||
|
||||
int64_t GetAsInteger() const {
|
||||
if (Size == sizeof(Buff->uint32_value))
|
||||
return static_cast<int32_t>(Buff->uint32_value);
|
||||
else if (Size == sizeof(Buff->uint64_value))
|
||||
return static_cast<int64_t>(Buff->uint64_value);
|
||||
BENCHMARK_UNREACHABLE();
|
||||
}
|
||||
|
||||
uint64_t GetAsUnsigned() const {
|
||||
if (Size == sizeof(Buff->uint32_value))
|
||||
return Buff->uint32_value;
|
||||
else if (Size == sizeof(Buff->uint64_value))
|
||||
return Buff->uint64_value;
|
||||
BENCHMARK_UNREACHABLE();
|
||||
}
|
||||
|
||||
template <class T, int N>
|
||||
std::array<T, N> GetAsArray() {
|
||||
const int ArrSize = sizeof(T) * N;
|
||||
CHECK_LE(ArrSize, Size);
|
||||
std::array<T, N> Arr;
|
||||
std::memcpy(Arr.data(), data(), ArrSize);
|
||||
return Arr;
|
||||
}
|
||||
};
|
||||
|
||||
ValueUnion GetSysctlImp(std::string const& Name) {
|
||||
size_t CurBuffSize = 0;
|
||||
if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
|
||||
return ValueUnion();
|
||||
|
||||
ValueUnion buff(CurBuffSize);
|
||||
if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
|
||||
return buff;
|
||||
return ValueUnion();
|
||||
}
|
||||
|
||||
BENCHMARK_MAYBE_UNUSED
|
||||
bool GetSysctl(std::string const& Name, std::string* Out) {
|
||||
Out->clear();
|
||||
auto Buff = GetSysctlImp(Name);
|
||||
if (!Buff) return false;
|
||||
Out->assign(Buff.data());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Tp,
|
||||
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
|
||||
bool GetSysctl(std::string const& Name, Tp* Out) {
|
||||
*Out = 0;
|
||||
auto Buff = GetSysctlImp(Name);
|
||||
if (!Buff) return false;
|
||||
*Out = static_cast<Tp>(Buff.GetAsUnsigned());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Tp, size_t N>
|
||||
bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
|
||||
auto Buff = GetSysctlImp(Name);
|
||||
if (!Buff) return false;
|
||||
*Out = Buff.GetAsArray<Tp, N>();
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class ArgT>
|
||||
bool ReadFromFile(std::string const& fname, ArgT* arg) {
|
||||
*arg = ArgT();
|
||||
std::ifstream f(fname.c_str());
|
||||
if (!f.is_open()) return false;
|
||||
f >> *arg;
|
||||
return f.good();
|
||||
}
|
||||
|
||||
bool CpuScalingEnabled(int num_cpus) {
|
||||
// We don't have a valid CPU count, so don't even bother.
|
||||
if (num_cpus <= 0) return false;
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
|
||||
// local file system. If reading the exported files fails, then we may not be
|
||||
// running on Linux, so we silently ignore all the read errors.
|
||||
std::string res;
|
||||
for (int cpu = 0; cpu < num_cpus; ++cpu) {
|
||||
std::string governor_file =
|
||||
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
|
||||
if (ReadFromFile(governor_file, &res) && res != "performance") return true;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
int CountSetBitsInCPUMap(std::string Val) {
|
||||
auto CountBits = [](std::string Part) {
|
||||
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
|
||||
Part = "0x" + Part;
|
||||
CPUMask Mask(std::stoul(Part, nullptr, 16));
|
||||
return static_cast<int>(Mask.count());
|
||||
};
|
||||
size_t Pos;
|
||||
int total = 0;
|
||||
while ((Pos = Val.find(',')) != std::string::npos) {
|
||||
total += CountBits(Val.substr(0, Pos));
|
||||
Val = Val.substr(Pos + 1);
|
||||
}
|
||||
if (!Val.empty()) {
|
||||
total += CountBits(Val);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
BENCHMARK_MAYBE_UNUSED
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
|
||||
int Idx = 0;
|
||||
while (true) {
|
||||
CPUInfo::CacheInfo info;
|
||||
std::string FPath = StrCat(dir, "index", Idx++, "/");
|
||||
std::ifstream f(StrCat(FPath, "size").c_str());
|
||||
if (!f.is_open()) break;
|
||||
std::string suffix;
|
||||
f >> info.size;
|
||||
if (f.fail())
|
||||
PrintErrorAndDie("Failed while reading file '", FPath, "size'");
|
||||
if (f.good()) {
|
||||
f >> suffix;
|
||||
if (f.bad())
|
||||
PrintErrorAndDie(
|
||||
"Invalid cache size format: failed to read size suffix");
|
||||
else if (f && suffix != "K")
|
||||
PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
|
||||
else if (suffix == "K")
|
||||
info.size *= 1000;
|
||||
}
|
||||
if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
|
||||
PrintErrorAndDie("Failed to read from file ", FPath, "type");
|
||||
if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
|
||||
PrintErrorAndDie("Failed to read from file ", FPath, "level");
|
||||
std::string map_str;
|
||||
if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
|
||||
PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
|
||||
info.num_sharing = CountSetBitsInCPUMap(map_str);
|
||||
res.push_back(info);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK_OS_MACOSX
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
|
||||
GetSysctl("hw.cacheconfig", &CacheCounts);
|
||||
|
||||
struct {
|
||||
std::string name;
|
||||
std::string type;
|
||||
int level;
|
||||
size_t num_sharing;
|
||||
} Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
|
||||
{"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
|
||||
{"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
|
||||
{"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
|
||||
for (auto& C : Cases) {
|
||||
int val;
|
||||
if (!GetSysctl(C.name, &val)) continue;
|
||||
CPUInfo::CacheInfo info;
|
||||
info.type = C.type;
|
||||
info.level = C.level;
|
||||
info.size = val;
|
||||
info.num_sharing = static_cast<int>(C.num_sharing);
|
||||
res.push_back(std::move(info));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#elif defined(BENCHMARK_OS_WINDOWS)
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
|
||||
std::vector<CPUInfo::CacheInfo> res;
|
||||
DWORD buffer_size = 0;
|
||||
using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
|
||||
using CInfo = CACHE_DESCRIPTOR;
|
||||
|
||||
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
|
||||
GetLogicalProcessorInformation(nullptr, &buffer_size);
|
||||
UPtr buff((PInfo*)malloc(buffer_size), &std::free);
|
||||
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
|
||||
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
|
||||
GetLastError());
|
||||
|
||||
PInfo* it = buff.get();
|
||||
PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
|
||||
|
||||
for (; it != end; ++it) {
|
||||
if (it->Relationship != RelationCache) continue;
|
||||
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
|
||||
BitSet B(it->ProcessorMask);
|
||||
// To prevent duplicates, only consider caches where CPU 0 is specified
|
||||
if (!B.test(0)) continue;
|
||||
CInfo* Cache = &it->Cache;
|
||||
CPUInfo::CacheInfo C;
|
||||
C.num_sharing = B.count();
|
||||
C.level = Cache->Level;
|
||||
C.size = Cache->Size;
|
||||
switch (Cache->Type) {
|
||||
case CacheUnified:
|
||||
C.type = "Unified";
|
||||
break;
|
||||
case CacheInstruction:
|
||||
C.type = "Instruction";
|
||||
break;
|
||||
case CacheData:
|
||||
C.type = "Data";
|
||||
break;
|
||||
case CacheTrace:
|
||||
C.type = "Trace";
|
||||
break;
|
||||
default:
|
||||
C.type = "Unknown";
|
||||
break;
|
||||
}
|
||||
res.push_back(C);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
|
||||
#ifdef BENCHMARK_OS_MACOSX
|
||||
return GetCacheSizesMacOSX();
|
||||
#elif defined(BENCHMARK_OS_WINDOWS)
|
||||
return GetCacheSizesWindows();
|
||||
#else
|
||||
return GetCacheSizesFromKVFS();
|
||||
#endif
|
||||
}
|
||||
|
||||
int GetNumCPUs() {
|
||||
#ifdef BENCHMARK_HAS_SYSCTL
|
||||
int NumCPU = -1;
|
||||
if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
|
||||
fprintf(stderr, "Err: %s\n", strerror(errno));
|
||||
std::exit(EXIT_FAILURE);
|
||||
#elif defined(BENCHMARK_OS_WINDOWS)
|
||||
SYSTEM_INFO sysinfo;
|
||||
// Use memset as opposed to = {} to avoid GCC missing initializer false
|
||||
// positives.
|
||||
std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
|
||||
GetSystemInfo(&sysinfo);
|
||||
return sysinfo.dwNumberOfProcessors; // number of logical
|
||||
// processors in the current
|
||||
// group
|
||||
#else
|
||||
int NumCPUs = 0;
|
||||
int MaxID = -1;
|
||||
std::ifstream f("/proc/cpuinfo");
|
||||
if (!f.is_open()) {
|
||||
std::cerr << "failed to open /proc/cpuinfo\n";
|
||||
return -1;
|
||||
}
|
||||
const std::string Key = "processor";
|
||||
std::string ln;
|
||||
while (std::getline(f, ln)) {
|
||||
if (ln.empty()) continue;
|
||||
size_t SplitIdx = ln.find(':');
|
||||
std::string value;
|
||||
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
|
||||
if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
|
||||
NumCPUs++;
|
||||
if (!value.empty()) {
|
||||
int CurID = std::stoi(value);
|
||||
MaxID = std::max(CurID, MaxID);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (f.bad()) {
|
||||
std::cerr << "Failure reading /proc/cpuinfo\n";
|
||||
return -1;
|
||||
}
|
||||
if (!f.eof()) {
|
||||
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
|
||||
return -1;
|
||||
}
|
||||
f.close();
|
||||
|
||||
if ((MaxID + 1) != NumCPUs) {
|
||||
fprintf(stderr,
|
||||
"CPU ID assignments in /proc/cpuinfo seem messed up."
|
||||
" This is usually caused by a bad BIOS.\n");
|
||||
}
|
||||
return NumCPUs;
|
||||
#endif
|
||||
BENCHMARK_UNREACHABLE();
|
||||
}
|
||||
|
||||
double GetCPUCyclesPerSecond() {
|
||||
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
|
||||
long freq;
|
||||
|
||||
// If the kernel is exporting the tsc frequency use that. There are issues
|
||||
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
|
||||
// exporintg an invalid p-state (on x86) or p-states may be used to put the
|
||||
// processor in a new mode (turbo mode). Essentially, those frequencies
|
||||
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
|
||||
// well.
|
||||
if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
|
||||
// If CPU scaling is in effect, we want to use the *maximum* frequency,
|
||||
// not whatever CPU speed some random processor happens to be using now.
|
||||
|| ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
|
||||
&freq)) {
|
||||
// The value is in kHz (as the file name suggests). For example, on a
|
||||
// 2GHz warpstation, the file contains the value "2000000".
|
||||
return freq * 1000.0;
|
||||
}
|
||||
|
||||
const double error_value = -1;
|
||||
double bogo_clock = error_value;
|
||||
|
||||
std::ifstream f("/proc/cpuinfo");
|
||||
if (!f.is_open()) {
|
||||
std::cerr << "failed to open /proc/cpuinfo\n";
|
||||
return error_value;
|
||||
}
|
||||
|
||||
auto startsWithKey = [](std::string const& Value, std::string const& Key) {
|
||||
if (Key.size() > Value.size()) return false;
|
||||
auto Cmp = [&](char X, char Y) {
|
||||
return std::tolower(X) == std::tolower(Y);
|
||||
};
|
||||
return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
|
||||
};
|
||||
|
||||
std::string ln;
|
||||
while (std::getline(f, ln)) {
|
||||
if (ln.empty()) continue;
|
||||
size_t SplitIdx = ln.find(':');
|
||||
std::string value;
|
||||
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
|
||||
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
|
||||
// accept postive values. Some environments (virtual machines) report zero,
|
||||
// which would cause infinite looping in WallTime_Init.
|
||||
if (startsWithKey(ln, "cpu MHz")) {
|
||||
if (!value.empty()) {
|
||||
double cycles_per_second = std::stod(value) * 1000000.0;
|
||||
if (cycles_per_second > 0) return cycles_per_second;
|
||||
}
|
||||
} else if (startsWithKey(ln, "bogomips")) {
|
||||
if (!value.empty()) {
|
||||
bogo_clock = std::stod(value) * 1000000.0;
|
||||
if (bogo_clock < 0.0) bogo_clock = error_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (f.bad()) {
|
||||
std::cerr << "Failure reading /proc/cpuinfo\n";
|
||||
return error_value;
|
||||
}
|
||||
if (!f.eof()) {
|
||||
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
|
||||
return error_value;
|
||||
}
|
||||
f.close();
|
||||
// If we found the bogomips clock, but nothing better, we'll use it (but
|
||||
// we're not happy about it); otherwise, fallback to the rough estimation
|
||||
// below.
|
||||
if (bogo_clock >= 0.0) return bogo_clock;
|
||||
|
||||
#elif defined BENCHMARK_HAS_SYSCTL
|
||||
constexpr auto* FreqStr =
|
||||
#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
|
||||
"machdep.tsc_freq";
|
||||
#else
|
||||
"hw.cpufrequency";
|
||||
#endif
|
||||
unsigned long long hz = 0;
|
||||
if (GetSysctl(FreqStr, &hz)) return hz;
|
||||
|
||||
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
|
||||
FreqStr, strerror(errno));
|
||||
|
||||
#elif defined BENCHMARK_OS_WINDOWS
|
||||
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
|
||||
// then make a crude estimate.
|
||||
DWORD data, data_size = sizeof(data);
|
||||
if (IsWindowsXPOrGreater() &&
|
||||
SUCCEEDED(
|
||||
SHGetValueA(HKEY_LOCAL_MACHINE,
|
||||
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
||||
"~MHz", nullptr, &data, &data_size)))
|
||||
return static_cast<double>((int64_t)data *
|
||||
(int64_t)(1000 * 1000)); // was mhz
|
||||
#endif
|
||||
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
|
||||
const int estimate_time_ms = 1000;
|
||||
const auto start_ticks = cycleclock::Now();
|
||||
SleepForMilliseconds(estimate_time_ms);
|
||||
return static_cast<double>(cycleclock::Now() - start_ticks);
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
const CPUInfo& CPUInfo::Get() {
|
||||
static const CPUInfo* info = new CPUInfo();
|
||||
return *info;
|
||||
}
|
||||
|
||||
CPUInfo::CPUInfo()
|
||||
: num_cpus(GetNumCPUs()),
|
||||
cycles_per_second(GetCPUCyclesPerSecond()),
|
||||
caches(GetCacheSizes()),
|
||||
scaling_enabled(CpuScalingEnabled(num_cpus)) {}
|
||||
|
||||
} // end namespace benchmark
|
||||
Loading…
Add table
Add a link
Reference in a new issue