517 lines
16 KiB
C++
517 lines
16 KiB
C++
// Copyright 2015 Google Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "internal_macros.h"
|
|
|
|
#ifdef BENCHMARK_OS_WINDOWS
|
|
#include <Shlwapi.h>
|
|
#include <VersionHelpers.h>
|
|
#include <Windows.h>
|
|
#else
|
|
#include <fcntl.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
|
|
#include <unistd.h>
|
|
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
|
|
defined BENCHMARK_OS_NETBSD
|
|
#define BENCHMARK_HAS_SYSCTL
|
|
#include <sys/sysctl.h>
|
|
#endif
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <bitset>
|
|
#include <cerrno>
|
|
#include <climits>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <sstream>
|
|
|
|
#include "check.h"
|
|
#include "cycleclock.h"
|
|
#include "internal_macros.h"
|
|
#include "log.h"
|
|
#include "sleep.h"
|
|
#include "string_util.h"
|
|
|
|
namespace benchmark {
|
|
namespace {
|
|
|
|
void PrintImp(std::ostream& out) { out << std::endl; }
|
|
|
|
template <class First, class... Rest>
|
|
void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
|
|
out << std::forward<First>(f);
|
|
PrintImp(out, std::forward<Rest>(rest)...);
|
|
}
|
|
|
|
template <class... Args>
|
|
BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
|
|
PrintImp(std::cerr, std::forward<Args>(args)...);
|
|
std::exit(EXIT_FAILURE);
|
|
}
|
|
|
|
#ifdef BENCHMARK_HAS_SYSCTL
|
|
|
|
/// ValueUnion - A type used to correctly alias the byte-for-byte output of
|
|
/// `sysctl` with the result type it's to be interpreted as.
|
|
struct ValueUnion {
|
|
union DataT {
|
|
uint32_t uint32_value;
|
|
uint64_t uint64_value;
|
|
// For correct aliasing of union members from bytes.
|
|
char bytes[8];
|
|
};
|
|
using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
|
|
|
|
// The size of the data union member + its trailing array size.
|
|
size_t Size;
|
|
DataPtr Buff;
|
|
|
|
public:
|
|
ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
|
|
|
|
explicit ValueUnion(size_t BuffSize)
|
|
: Size(sizeof(DataT) + BuffSize),
|
|
Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
|
|
|
|
ValueUnion(ValueUnion&& other) = default;
|
|
|
|
explicit operator bool() const { return bool(Buff); }
|
|
|
|
char* data() const { return Buff->bytes; }
|
|
|
|
std::string GetAsString() const { return std::string(data()); }
|
|
|
|
int64_t GetAsInteger() const {
|
|
if (Size == sizeof(Buff->uint32_value))
|
|
return static_cast<int32_t>(Buff->uint32_value);
|
|
else if (Size == sizeof(Buff->uint64_value))
|
|
return static_cast<int64_t>(Buff->uint64_value);
|
|
BENCHMARK_UNREACHABLE();
|
|
}
|
|
|
|
uint64_t GetAsUnsigned() const {
|
|
if (Size == sizeof(Buff->uint32_value))
|
|
return Buff->uint32_value;
|
|
else if (Size == sizeof(Buff->uint64_value))
|
|
return Buff->uint64_value;
|
|
BENCHMARK_UNREACHABLE();
|
|
}
|
|
|
|
template <class T, int N>
|
|
std::array<T, N> GetAsArray() {
|
|
const int ArrSize = sizeof(T) * N;
|
|
CHECK_LE(ArrSize, Size);
|
|
std::array<T, N> Arr;
|
|
std::memcpy(Arr.data(), data(), ArrSize);
|
|
return Arr;
|
|
}
|
|
};
|
|
|
|
ValueUnion GetSysctlImp(std::string const& Name) {
|
|
size_t CurBuffSize = 0;
|
|
if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
|
|
return ValueUnion();
|
|
|
|
ValueUnion buff(CurBuffSize);
|
|
if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
|
|
return buff;
|
|
return ValueUnion();
|
|
}
|
|
|
|
BENCHMARK_MAYBE_UNUSED
|
|
bool GetSysctl(std::string const& Name, std::string* Out) {
|
|
Out->clear();
|
|
auto Buff = GetSysctlImp(Name);
|
|
if (!Buff) return false;
|
|
Out->assign(Buff.data());
|
|
return true;
|
|
}
|
|
|
|
template <class Tp,
|
|
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
|
|
bool GetSysctl(std::string const& Name, Tp* Out) {
|
|
*Out = 0;
|
|
auto Buff = GetSysctlImp(Name);
|
|
if (!Buff) return false;
|
|
*Out = static_cast<Tp>(Buff.GetAsUnsigned());
|
|
return true;
|
|
}
|
|
|
|
template <class Tp, size_t N>
|
|
bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
|
|
auto Buff = GetSysctlImp(Name);
|
|
if (!Buff) return false;
|
|
*Out = Buff.GetAsArray<Tp, N>();
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
template <class ArgT>
|
|
bool ReadFromFile(std::string const& fname, ArgT* arg) {
|
|
*arg = ArgT();
|
|
std::ifstream f(fname.c_str());
|
|
if (!f.is_open()) return false;
|
|
f >> *arg;
|
|
return f.good();
|
|
}
|
|
|
|
bool CpuScalingEnabled(int num_cpus) {
|
|
// We don't have a valid CPU count, so don't even bother.
|
|
if (num_cpus <= 0) return false;
|
|
#ifndef BENCHMARK_OS_WINDOWS
|
|
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
|
|
// local file system. If reading the exported files fails, then we may not be
|
|
// running on Linux, so we silently ignore all the read errors.
|
|
std::string res;
|
|
for (int cpu = 0; cpu < num_cpus; ++cpu) {
|
|
std::string governor_file =
|
|
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
|
|
if (ReadFromFile(governor_file, &res) && res != "performance") return true;
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
int CountSetBitsInCPUMap(std::string Val) {
|
|
auto CountBits = [](std::string Part) {
|
|
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
|
|
Part = "0x" + Part;
|
|
CPUMask Mask(std::stoul(Part, nullptr, 16));
|
|
return static_cast<int>(Mask.count());
|
|
};
|
|
size_t Pos;
|
|
int total = 0;
|
|
while ((Pos = Val.find(',')) != std::string::npos) {
|
|
total += CountBits(Val.substr(0, Pos));
|
|
Val = Val.substr(Pos + 1);
|
|
}
|
|
if (!Val.empty()) {
|
|
total += CountBits(Val);
|
|
}
|
|
return total;
|
|
}
|
|
|
|
BENCHMARK_MAYBE_UNUSED
|
|
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
|
|
std::vector<CPUInfo::CacheInfo> res;
|
|
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
|
|
int Idx = 0;
|
|
while (true) {
|
|
CPUInfo::CacheInfo info;
|
|
std::string FPath = StrCat(dir, "index", Idx++, "/");
|
|
std::ifstream f(StrCat(FPath, "size").c_str());
|
|
if (!f.is_open()) break;
|
|
std::string suffix;
|
|
f >> info.size;
|
|
if (f.fail())
|
|
PrintErrorAndDie("Failed while reading file '", FPath, "size'");
|
|
if (f.good()) {
|
|
f >> suffix;
|
|
if (f.bad())
|
|
PrintErrorAndDie(
|
|
"Invalid cache size format: failed to read size suffix");
|
|
else if (f && suffix != "K")
|
|
PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
|
|
else if (suffix == "K")
|
|
info.size *= 1000;
|
|
}
|
|
if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
|
|
PrintErrorAndDie("Failed to read from file ", FPath, "type");
|
|
if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
|
|
PrintErrorAndDie("Failed to read from file ", FPath, "level");
|
|
std::string map_str;
|
|
if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
|
|
PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
|
|
info.num_sharing = CountSetBitsInCPUMap(map_str);
|
|
res.push_back(info);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
#ifdef BENCHMARK_OS_MACOSX
|
|
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
|
|
std::vector<CPUInfo::CacheInfo> res;
|
|
std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
|
|
GetSysctl("hw.cacheconfig", &CacheCounts);
|
|
|
|
struct {
|
|
std::string name;
|
|
std::string type;
|
|
int level;
|
|
size_t num_sharing;
|
|
} Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
|
|
{"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
|
|
{"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
|
|
{"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
|
|
for (auto& C : Cases) {
|
|
int val;
|
|
if (!GetSysctl(C.name, &val)) continue;
|
|
CPUInfo::CacheInfo info;
|
|
info.type = C.type;
|
|
info.level = C.level;
|
|
info.size = val;
|
|
info.num_sharing = static_cast<int>(C.num_sharing);
|
|
res.push_back(std::move(info));
|
|
}
|
|
return res;
|
|
}
|
|
#elif defined(BENCHMARK_OS_WINDOWS)
|
|
std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
|
|
std::vector<CPUInfo::CacheInfo> res;
|
|
DWORD buffer_size = 0;
|
|
using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
|
|
using CInfo = CACHE_DESCRIPTOR;
|
|
|
|
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
|
|
GetLogicalProcessorInformation(nullptr, &buffer_size);
|
|
UPtr buff((PInfo*)malloc(buffer_size), &std::free);
|
|
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
|
|
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
|
|
GetLastError());
|
|
|
|
PInfo* it = buff.get();
|
|
PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
|
|
|
|
for (; it != end; ++it) {
|
|
if (it->Relationship != RelationCache) continue;
|
|
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
|
|
BitSet B(it->ProcessorMask);
|
|
// To prevent duplicates, only consider caches where CPU 0 is specified
|
|
if (!B.test(0)) continue;
|
|
CInfo* Cache = &it->Cache;
|
|
CPUInfo::CacheInfo C;
|
|
C.num_sharing = B.count();
|
|
C.level = Cache->Level;
|
|
C.size = Cache->Size;
|
|
switch (Cache->Type) {
|
|
case CacheUnified:
|
|
C.type = "Unified";
|
|
break;
|
|
case CacheInstruction:
|
|
C.type = "Instruction";
|
|
break;
|
|
case CacheData:
|
|
C.type = "Data";
|
|
break;
|
|
case CacheTrace:
|
|
C.type = "Trace";
|
|
break;
|
|
default:
|
|
C.type = "Unknown";
|
|
break;
|
|
}
|
|
res.push_back(C);
|
|
}
|
|
return res;
|
|
}
|
|
#endif
|
|
|
|
std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
|
|
#ifdef BENCHMARK_OS_MACOSX
|
|
return GetCacheSizesMacOSX();
|
|
#elif defined(BENCHMARK_OS_WINDOWS)
|
|
return GetCacheSizesWindows();
|
|
#else
|
|
return GetCacheSizesFromKVFS();
|
|
#endif
|
|
}
|
|
|
|
int GetNumCPUs() {
|
|
#ifdef BENCHMARK_HAS_SYSCTL
|
|
int NumCPU = -1;
|
|
if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
|
|
fprintf(stderr, "Err: %s\n", strerror(errno));
|
|
std::exit(EXIT_FAILURE);
|
|
#elif defined(BENCHMARK_OS_WINDOWS)
|
|
SYSTEM_INFO sysinfo;
|
|
// Use memset as opposed to = {} to avoid GCC missing initializer false
|
|
// positives.
|
|
std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
|
|
GetSystemInfo(&sysinfo);
|
|
return sysinfo.dwNumberOfProcessors; // number of logical
|
|
// processors in the current
|
|
// group
|
|
#else
|
|
int NumCPUs = 0;
|
|
int MaxID = -1;
|
|
std::ifstream f("/proc/cpuinfo");
|
|
if (!f.is_open()) {
|
|
std::cerr << "failed to open /proc/cpuinfo\n";
|
|
return -1;
|
|
}
|
|
const std::string Key = "processor";
|
|
std::string ln;
|
|
while (std::getline(f, ln)) {
|
|
if (ln.empty()) continue;
|
|
size_t SplitIdx = ln.find(':');
|
|
std::string value;
|
|
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
|
|
if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
|
|
NumCPUs++;
|
|
if (!value.empty()) {
|
|
int CurID = std::stoi(value);
|
|
MaxID = std::max(CurID, MaxID);
|
|
}
|
|
}
|
|
}
|
|
if (f.bad()) {
|
|
std::cerr << "Failure reading /proc/cpuinfo\n";
|
|
return -1;
|
|
}
|
|
if (!f.eof()) {
|
|
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
|
|
return -1;
|
|
}
|
|
f.close();
|
|
|
|
if ((MaxID + 1) != NumCPUs) {
|
|
fprintf(stderr,
|
|
"CPU ID assignments in /proc/cpuinfo seem messed up."
|
|
" This is usually caused by a bad BIOS.\n");
|
|
}
|
|
return NumCPUs;
|
|
#endif
|
|
BENCHMARK_UNREACHABLE();
|
|
}
|
|
|
|
double GetCPUCyclesPerSecond() {
|
|
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
|
|
long freq;
|
|
|
|
// If the kernel is exporting the tsc frequency use that. There are issues
|
|
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
|
|
// exporintg an invalid p-state (on x86) or p-states may be used to put the
|
|
// processor in a new mode (turbo mode). Essentially, those frequencies
|
|
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
|
|
// well.
|
|
if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
|
|
// If CPU scaling is in effect, we want to use the *maximum* frequency,
|
|
// not whatever CPU speed some random processor happens to be using now.
|
|
|| ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
|
|
&freq)) {
|
|
// The value is in kHz (as the file name suggests). For example, on a
|
|
// 2GHz warpstation, the file contains the value "2000000".
|
|
return freq * 1000.0;
|
|
}
|
|
|
|
const double error_value = -1;
|
|
double bogo_clock = error_value;
|
|
|
|
std::ifstream f("/proc/cpuinfo");
|
|
if (!f.is_open()) {
|
|
std::cerr << "failed to open /proc/cpuinfo\n";
|
|
return error_value;
|
|
}
|
|
|
|
auto startsWithKey = [](std::string const& Value, std::string const& Key) {
|
|
if (Key.size() > Value.size()) return false;
|
|
auto Cmp = [&](char X, char Y) {
|
|
return std::tolower(X) == std::tolower(Y);
|
|
};
|
|
return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
|
|
};
|
|
|
|
std::string ln;
|
|
while (std::getline(f, ln)) {
|
|
if (ln.empty()) continue;
|
|
size_t SplitIdx = ln.find(':');
|
|
std::string value;
|
|
if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
|
|
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
|
|
// accept postive values. Some environments (virtual machines) report zero,
|
|
// which would cause infinite looping in WallTime_Init.
|
|
if (startsWithKey(ln, "cpu MHz")) {
|
|
if (!value.empty()) {
|
|
double cycles_per_second = std::stod(value) * 1000000.0;
|
|
if (cycles_per_second > 0) return cycles_per_second;
|
|
}
|
|
} else if (startsWithKey(ln, "bogomips")) {
|
|
if (!value.empty()) {
|
|
bogo_clock = std::stod(value) * 1000000.0;
|
|
if (bogo_clock < 0.0) bogo_clock = error_value;
|
|
}
|
|
}
|
|
}
|
|
if (f.bad()) {
|
|
std::cerr << "Failure reading /proc/cpuinfo\n";
|
|
return error_value;
|
|
}
|
|
if (!f.eof()) {
|
|
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
|
|
return error_value;
|
|
}
|
|
f.close();
|
|
// If we found the bogomips clock, but nothing better, we'll use it (but
|
|
// we're not happy about it); otherwise, fallback to the rough estimation
|
|
// below.
|
|
if (bogo_clock >= 0.0) return bogo_clock;
|
|
|
|
#elif defined BENCHMARK_HAS_SYSCTL
|
|
constexpr auto* FreqStr =
|
|
#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
|
|
"machdep.tsc_freq";
|
|
#else
|
|
"hw.cpufrequency";
|
|
#endif
|
|
unsigned long long hz = 0;
|
|
if (GetSysctl(FreqStr, &hz)) return hz;
|
|
|
|
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
|
|
FreqStr, strerror(errno));
|
|
|
|
#elif defined BENCHMARK_OS_WINDOWS
|
|
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
|
|
// then make a crude estimate.
|
|
DWORD data, data_size = sizeof(data);
|
|
if (IsWindowsXPOrGreater() &&
|
|
SUCCEEDED(
|
|
SHGetValueA(HKEY_LOCAL_MACHINE,
|
|
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
|
"~MHz", nullptr, &data, &data_size)))
|
|
return static_cast<double>((int64_t)data *
|
|
(int64_t)(1000 * 1000)); // was mhz
|
|
#endif
|
|
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
|
|
const int estimate_time_ms = 1000;
|
|
const auto start_ticks = cycleclock::Now();
|
|
SleepForMilliseconds(estimate_time_ms);
|
|
return static_cast<double>(cycleclock::Now() - start_ticks);
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
const CPUInfo& CPUInfo::Get() {
|
|
static const CPUInfo* info = new CPUInfo();
|
|
return *info;
|
|
}
|
|
|
|
CPUInfo::CPUInfo()
|
|
: num_cpus(GetNumCPUs()),
|
|
cycles_per_second(GetCPUCyclesPerSecond()),
|
|
caches(GetCacheSizes()),
|
|
scaling_enabled(CpuScalingEnabled(num_cpus)) {}
|
|
|
|
} // end namespace benchmark
|