/src/abseil-cpp/absl/base/internal/sysinfo.cc

Source (jump to first uncovered line)
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/base/internal/sysinfo.h"

#include "absl/base/attributes.h"

#ifdef _WIN32
#include <windows.h>
#else
#include <fcntl.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif

#ifdef __linux__
#include <sys/syscall.h>
#endif

#if defined(__APPLE__) || defined(__FreeBSD__)
#include <sys/sysctl.h>
#endif

#ifdef __FreeBSD__
#include <pthread_np.h>
#endif

#ifdef __NetBSD__
#include <lwp.h>
#endif

#if defined(__myriad2__)
#include <rtems.h>
#endif

#include <string.h>

#include <cassert>
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <ctime>
#include <limits>
#include <thread>  // NOLINT(build/c++11)
#include <utility>
#include <vector>

#include "absl/base/call_once.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/spinlock.h"
#include "absl/base/internal/unscaledcycleclock.h"
#include "absl/base/thread_annotations.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace base_internal {

namespace {

#if defined(_WIN32)

// Returns number of bits set in `bitMask`
DWORD Win32CountSetBits(ULONG_PTR bitMask) {
  for (DWORD bitSetCount = 0; ; ++bitSetCount) {
    if (bitMask == 0) return bitSetCount;
    bitMask &= bitMask - 1;
  }
}

// Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
// 0 if the number of processors is not available or can not be computed.
// https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
int Win32NumCPUs() {
#pragma comment(lib, "kernel32.lib")
  using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;

  DWORD info_size = sizeof(Info);
  Info* info(static_cast<Info*>(malloc(info_size)));
  if (info == nullptr) return 0;

  bool success = GetLogicalProcessorInformation(info, &info_size);
  if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
    free(info);
    info = static_cast<Info*>(malloc(info_size));
    if (info == nullptr) return 0;
    success = GetLogicalProcessorInformation(info, &info_size);
  }

  DWORD logicalProcessorCount = 0;
  if (success) {
    Info* ptr = info;
    DWORD byteOffset = 0;
    while (byteOffset + sizeof(Info) <= info_size) {
      switch (ptr->Relationship) {
        case RelationProcessorCore:
          logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
          break;

        case RelationNumaNode:
        case RelationCache:
        case RelationProcessorPackage:
          // Ignore other entries
          break;

        default:
          // Ignore unknown entries
          break;
      }
      byteOffset += sizeof(Info);
      ptr++;
    }
  }
  free(info);
  return static_cast<int>(logicalProcessorCount);
}

#endif

}  // namespace

static int GetNumCPUs() {
#if defined(__myriad2__)
  return 1;
#elif defined(_WIN32)
  const int hardware_concurrency = Win32NumCPUs();
  return hardware_concurrency ? hardware_concurrency : 1;
#elif defined(_AIX)
  return sysconf(_SC_NPROCESSORS_ONLN);
#else
  // Other possibilities:
  //  - Read /sys/devices/system/cpu/online and use cpumask_parse()
  //  - sysconf(_SC_NPROCESSORS_ONLN)
  return static_cast<int>(std::thread::hardware_concurrency());
#endif
}

#if defined(_WIN32)

static double GetNominalCPUFrequency() {
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
    !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
  // UWP apps don't have access to the registry and currently don't provide an
  // API informing about CPU nominal frequency.
  return 1.0;
#else
#pragma comment(lib, "advapi32.lib")  // For Reg* functions.
  HKEY key;
  // Use the Reg* functions rather than the SH functions because shlwapi.dll
  // pulls in gdi32.dll which makes process destruction much more costly.
  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
                    "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
                    KEY_READ, &key) == ERROR_SUCCESS) {
    DWORD type = 0;
    DWORD data = 0;
    DWORD data_size = sizeof(data);
    auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
                                   reinterpret_cast<LPBYTE>(&data), &data_size);
    RegCloseKey(key);
    if (result == ERROR_SUCCESS && type == REG_DWORD &&
        data_size == sizeof(data)) {
      return data * 1e6;  // Value is MHz.
    }
  }
  return 1.0;
#endif  // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
}

#elif defined(CTL_HW) && defined(HW_CPU_FREQ)

static double GetNominalCPUFrequency() {
  unsigned freq;
  size_t size = sizeof(freq);
  int mib[2] = {CTL_HW, HW_CPU_FREQ};
  if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
    return static_cast<double>(freq);
  }
  return 1.0;
}

#else

// Helper function for reading a long from a file. Returns true if successful
// and the memory location pointed to by value is set to the value read.
static bool ReadLongFromFile(const char *file, long *value) {
  bool ret = false;
#if defined(_POSIX_C_SOURCE)
  const int file_mode = (O_RDONLY | O_CLOEXEC);
#else
  const int file_mode = O_RDONLY;
#endif

  int fd = open(file, file_mode);
  if (fd != -1) {
    char line[1024];
    char *err;
    memset(line, '\0', sizeof(line));
    ssize_t len;
    do {
      len = read(fd, line, sizeof(line) - 1);
    } while (len < 0 && errno == EINTR);
    if (len <= 0) {
      ret = false;
    } else {
      const long temp_value = strtol(line, &err, 10);
      if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
        *value = temp_value;
        ret = true;
      }
    }
    close(fd);
  }
  return ret;
}

#if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)

// Reads a monotonic time source and returns a value in
// nanoseconds. The returned value uses an arbitrary epoch, not the
// Unix epoch.
static int64_t ReadMonotonicClockNanos() {
  struct timespec t;
#ifdef CLOCK_MONOTONIC_RAW
  int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
#else
  int rc = clock_gettime(CLOCK_MONOTONIC, &t);
#endif
  if (rc != 0) {
    ABSL_INTERNAL_LOG(
        FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
  }
  return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
}

class UnscaledCycleClockWrapperForInitializeFrequency {
 public:
  static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
};

struct TimeTscPair {
  int64_t time;  // From ReadMonotonicClockNanos().
  int64_t tsc;   // From UnscaledCycleClock::Now().
};

// Returns a pair of values (monotonic kernel time, TSC ticks) that
// approximately correspond to each other.  This is accomplished by
// doing several reads and picking the reading with the lowest
// latency.  This approach is used to minimize the probability that
// our thread was preempted between clock reads.
static TimeTscPair GetTimeTscPair() {
  int64_t best_latency = std::numeric_limits<int64_t>::max();
  TimeTscPair best;
  for (int i = 0; i < 10; ++i) {
    int64_t t0 = ReadMonotonicClockNanos();
    int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
    int64_t t1 = ReadMonotonicClockNanos();
    int64_t latency = t1 - t0;
    if (latency < best_latency) {
      best_latency = latency;
      best.time = t0;
      best.tsc = tsc;
    }
  }
  return best;
}

// Measures and returns the TSC frequency by taking a pair of
// measurements approximately `sleep_nanoseconds` apart.
static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
  auto t0 = GetTimeTscPair();
  struct timespec ts;
  ts.tv_sec = 0;
  ts.tv_nsec = sleep_nanoseconds;
  while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
  auto t1 = GetTimeTscPair();
  double elapsed_ticks = t1.tsc - t0.tsc;
  double elapsed_time = (t1.time - t0.time) * 1e-9;
  return elapsed_ticks / elapsed_time;
}

// Measures and returns the TSC frequency by calling
// MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
// frequency measurement stabilizes.
static double MeasureTscFrequency() {
  double last_measurement = -1.0;
  int sleep_nanoseconds = 1000000;  // 1 millisecond.
  for (int i = 0; i < 8; ++i) {
    double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
    if (measurement * 0.99 < last_measurement &&
        last_measurement < measurement * 1.01) {
      // Use the current measurement if it is within 1% of the
      // previous measurement.
      return measurement;
    }
    last_measurement = measurement;
    sleep_nanoseconds *= 2;
  }
  return last_measurement;
}

#endif  // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY

static double GetNominalCPUFrequency() {
  long freq = 0;

  // Google's production kernel has a patch to export the TSC
  // frequency through sysfs. If the kernel is exporting the TSC
  // frequency use that. There are issues where cpuinfo_max_freq
  // cannot be relied on because the BIOS may be exporting an invalid
  // p-state (on x86) or p-states may be used to put the processor in
  // a new mode (turbo mode). Essentially, those frequencies cannot
  // always be relied upon. The same reasons apply to /proc/cpuinfo as
  // well.
  if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
    return freq * 1e3;  // Value is kHz.
  }

#if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
  // On these platforms, the TSC frequency is the nominal CPU
  // frequency.  But without having the kernel export it directly
  // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
  // other way to reliably get the TSC frequency, so we have to
  // measure it ourselves.  Some CPUs abuse cpuinfo_max_freq by
  // exporting "fake" frequencies for implementing new features. For
  // example, Intel's turbo mode is enabled by exposing a p-state
  // value with a higher frequency than that of the real TSC
  // rate. Because of this, we prefer to measure the TSC rate
  // ourselves on i386 and x86-64.
  return MeasureTscFrequency();
#else

  // If CPU scaling is in effect, we want to use the *maximum*
  // frequency, not whatever CPU speed some random processor happens
  // to be using now.
  if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
                       &freq)) {
    return freq * 1e3;  // Value is kHz.
  }

  return 1.0;
#endif  // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
}

#endif

ABSL_CONST_INIT static once_flag init_num_cpus_once;
ABSL_CONST_INIT static int num_cpus = 0;

// NumCPUs() may be called before main() and before malloc is properly
// initialized, therefore this must not allocate memory.
int NumCPUs() {
  base_internal::LowLevelCallOnce(
      &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
  return num_cpus;
}

// A default frequency of 0.0 might be dangerous if it is used in division.
ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;

// NominalCPUFrequency() may be called before main() and before malloc is
// properly initialized, therefore this must not allocate memory.
double NominalCPUFrequency() {
  base_internal::LowLevelCallOnce(
      &init_nominal_cpu_frequency_once,
      []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
  return nominal_cpu_frequency;
}

#if defined(_WIN32)

pid_t GetTID() {
  return pid_t{GetCurrentThreadId()};
}

#elif defined(__linux__)

#ifndef SYS_gettid
#define SYS_gettid __NR_gettid
#endif

pid_t GetTID() {
  return static_cast<pid_t>(syscall(SYS_gettid));
}

#elif defined(__akaros__)

pid_t GetTID() {
  // Akaros has a concept of "vcore context", which is the state the program
  // is forced into when we need to make a user-level scheduling decision, or
  // run a signal handler.  This is analogous to the interrupt context that a
  // CPU might enter if it encounters some kind of exception.
  //
  // There is no current thread context in vcore context, but we need to give
  // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
  // Thread 0 always exists, so if we are in vcore context, we return that.
  //
  // Otherwise, we know (since we are using pthreads) that the uthread struct
  // current_uthread is pointing to is the first element of a
  // struct pthread_tcb, so we extract and return the thread ID from that.
  //
  // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
  // structure at some point. We should modify this code to remove the cast
  // when that happens.
  if (in_vcore_context())
    return 0;
  return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
}

#elif defined(__myriad2__)

pid_t GetTID() {
  uint32_t tid;
  rtems_task_ident(RTEMS_SELF, 0, &tid);
  return tid;
}

#elif defined(__APPLE__)

pid_t GetTID() {
  uint64_t tid;
  // `nullptr` here implies this thread.  This only fails if the specified
  // thread is invalid or the pointer-to-tid is null, so we needn't worry about
  // it.
  pthread_threadid_np(nullptr, &tid);
  return static_cast<pid_t>(tid);
}

#elif defined(__FreeBSD__)

pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }

#elif defined(__OpenBSD__)

pid_t GetTID() { return getthrid(); }

#elif defined(__NetBSD__)

pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }

#elif defined(__native_client__)

pid_t GetTID() {
  auto* thread = pthread_self();
  static_assert(sizeof(pid_t) == sizeof(thread),
                "In NaCL int expected to be the same size as a pointer");
  return reinterpret_cast<pid_t>(thread);
}

#else

// Fallback implementation of `GetTID` using `pthread_self`.
pid_t GetTID() {
  // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
  // should be handled above.
  return static_cast<pid_t>(pthread_self());
}

#endif

// GetCachedTID() caches the thread ID in thread-local storage (which is a
// userspace construct) to avoid unnecessary system calls. Without this caching,
// it can take roughly 98ns, while it takes roughly 1ns with this caching.
pid_t GetCachedTID() {
#ifdef ABSL_HAVE_THREAD_LOCAL
  static thread_local pid_t thread_id = GetTID();
  return thread_id;
#else
  return GetTID();
#endif  // ABSL_HAVE_THREAD_LOCAL
}

}  // namespace base_internal
ABSL_NAMESPACE_END
}  // namespace absl

Coverage Report

Created: 2023-09-25 06:27

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2017 The Abseil Authors.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// https://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		#include "absl/base/internal/sysinfo.h"
16
17		#include "absl/base/attributes.h"
18
19		#ifdef _WIN32
20		#include <windows.h>
21		#else
22		#include <fcntl.h>
23		#include <pthread.h>
24		#include <sys/stat.h>
25		#include <sys/types.h>
26		#include <unistd.h>
27		#endif
28
29		#ifdef __linux__
30		#include <sys/syscall.h>
31		#endif
32
33		#if defined(__APPLE__) \|\| defined(__FreeBSD__)
34		#include <sys/sysctl.h>
35		#endif
36
37		#ifdef __FreeBSD__
38		#include <pthread_np.h>
39		#endif
40
41		#ifdef __NetBSD__
42		#include <lwp.h>
43		#endif
44
45		#if defined(__myriad2__)
46		#include <rtems.h>
47		#endif
48
49		#include <string.h>
50
51		#include <cassert>
52		#include <cerrno>
53		#include <cstdint>
54		#include <cstdio>
55		#include <cstdlib>
56		#include <ctime>
57		#include <limits>
58		#include <thread> // NOLINT(build/c++11)
59		#include <utility>
60		#include <vector>
61
62		#include "absl/base/call_once.h"
63		#include "absl/base/config.h"
64		#include "absl/base/internal/raw_logging.h"
65		#include "absl/base/internal/spinlock.h"
66		#include "absl/base/internal/unscaledcycleclock.h"
67		#include "absl/base/thread_annotations.h"
68
69		namespace absl {
70		ABSL_NAMESPACE_BEGIN
71		namespace base_internal {
72
73		namespace {
74
75		#if defined(_WIN32)
76
77		// Returns number of bits set in `bitMask`
78		DWORD Win32CountSetBits(ULONG_PTR bitMask) {
79		for (DWORD bitSetCount = 0; ; ++bitSetCount) {
80		if (bitMask == 0) return bitSetCount;
81		bitMask &= bitMask - 1;
82		}
83		}
84
85		// Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
86		// 0 if the number of processors is not available or can not be computed.
87		// https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
88		int Win32NumCPUs() {
89		#pragma comment(lib, "kernel32.lib")
90		using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
91
92		DWORD info_size = sizeof(Info);
93		Info* info(static_cast<Info*>(malloc(info_size)));
94		if (info == nullptr) return 0;
95
96		bool success = GetLogicalProcessorInformation(info, &info_size);
97		if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
98		free(info);
99		info = static_cast<Info*>(malloc(info_size));
100		if (info == nullptr) return 0;
101		success = GetLogicalProcessorInformation(info, &info_size);
102		}
103
104		DWORD logicalProcessorCount = 0;
105		if (success) {
106		Info* ptr = info;
107		DWORD byteOffset = 0;
108		while (byteOffset + sizeof(Info) <= info_size) {
109		switch (ptr->Relationship) {
110		case RelationProcessorCore:
111		logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
112		break;
113
114		case RelationNumaNode:
115		case RelationCache:
116		case RelationProcessorPackage:
117		// Ignore other entries
118		break;
119
120		default:
121		// Ignore unknown entries
122		break;
123		}
124		byteOffset += sizeof(Info);
125		ptr++;
126		}
127		}
128		free(info);
129		return static_cast<int>(logicalProcessorCount);
130		}
131
132		#endif
133
134		} // namespace
135
136	0	static int GetNumCPUs() {
137		#if defined(__myriad2__)
138		return 1;
139		#elif defined(_WIN32)
140		const int hardware_concurrency = Win32NumCPUs();
141		return hardware_concurrency ? hardware_concurrency : 1;
142		#elif defined(_AIX)
143		return sysconf(_SC_NPROCESSORS_ONLN);
144		#else
145		// Other possibilities:
146		// - Read /sys/devices/system/cpu/online and use cpumask_parse()
147		// - sysconf(_SC_NPROCESSORS_ONLN)
148	0	return static_cast<int>(std::thread::hardware_concurrency());
149	0	#endif
150	0	}
151
152		#if defined(_WIN32)
153
154		static double GetNominalCPUFrequency() {
155		#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
156		!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
157		// UWP apps don't have access to the registry and currently don't provide an
158		// API informing about CPU nominal frequency.
159		return 1.0;
160		#else
161		#pragma comment(lib, "advapi32.lib") // For Reg* functions.
162		HKEY key;
163		// Use the Reg* functions rather than the SH functions because shlwapi.dll
164		// pulls in gdi32.dll which makes process destruction much more costly.
165		if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
166		"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
167		KEY_READ, &key) == ERROR_SUCCESS) {
168		DWORD type = 0;
169		DWORD data = 0;
170		DWORD data_size = sizeof(data);
171		auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
172		reinterpret_cast<LPBYTE>(&data), &data_size);
173		RegCloseKey(key);
174		if (result == ERROR_SUCCESS && type == REG_DWORD &&
175		data_size == sizeof(data)) {
176		return data * 1e6; // Value is MHz.
177		}
178		}
179		return 1.0;
180		#endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
181		}
182
183		#elif defined(CTL_HW) && defined(HW_CPU_FREQ)
184
185		static double GetNominalCPUFrequency() {
186		unsigned freq;
187		size_t size = sizeof(freq);
188		int mib[2] = {CTL_HW, HW_CPU_FREQ};
189		if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
190		return static_cast<double>(freq);
191		}
192		return 1.0;
193		}
194
195		#else
196
197		// Helper function for reading a long from a file. Returns true if successful
198		// and the memory location pointed to by value is set to the value read.
199	0	static bool ReadLongFromFile(const char file, long value) {
200	0	bool ret = false;
201	0	#if defined(_POSIX_C_SOURCE)
202	0	const int file_mode = (O_RDONLY \| O_CLOEXEC);
203		#else
204		const int file_mode = O_RDONLY;
205		#endif
206
207	0	int fd = open(file, file_mode);
208	0	if (fd != -1) {
209	0	char line[1024];
210	0	char *err;
211	0	memset(line, '\0', sizeof(line));
212	0	ssize_t len;
213	0	do {
214	0	len = read(fd, line, sizeof(line) - 1);
215	0	} while (len < 0 && errno == EINTR);
216	0	if (len <= 0) {
217	0	ret = false;
218	0	} else {
219	0	const long temp_value = strtol(line, &err, 10);
220	0	if (line[0] != '\0' && (err == '\n' \|\| err == '\0')) {
221	0	*value = temp_value;
222	0	ret = true;
223	0	}
224	0	}
225	0	close(fd);
226	0	}
227	0	return ret;
228	0	}
229
230		#if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
231
232		// Reads a monotonic time source and returns a value in
233		// nanoseconds. The returned value uses an arbitrary epoch, not the
234		// Unix epoch.
235	0	static int64_t ReadMonotonicClockNanos() {
236	0	struct timespec t;
237	0	#ifdef CLOCK_MONOTONIC_RAW
238	0	int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
239		#else
240		int rc = clock_gettime(CLOCK_MONOTONIC, &t);
241		#endif
242	0	if (rc != 0) {
243	0	ABSL_INTERNAL_LOG(
244	0	FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
245	0	}
246	0	return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
247	0	}
248
249		class UnscaledCycleClockWrapperForInitializeFrequency {
250		public:
251	0	static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
252		};
253
254		struct TimeTscPair {
255		int64_t time; // From ReadMonotonicClockNanos().
256		int64_t tsc; // From UnscaledCycleClock::Now().
257		};
258
259		// Returns a pair of values (monotonic kernel time, TSC ticks) that
260		// approximately correspond to each other. This is accomplished by
261		// doing several reads and picking the reading with the lowest
262		// latency. This approach is used to minimize the probability that
263		// our thread was preempted between clock reads.
264	0	static TimeTscPair GetTimeTscPair() {
265	0	int64_t best_latency = std::numeric_limits<int64_t>::max();
266	0	TimeTscPair best;
267	0	for (int i = 0; i < 10; ++i) {
268	0	int64_t t0 = ReadMonotonicClockNanos();
269	0	int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
270	0	int64_t t1 = ReadMonotonicClockNanos();
271	0	int64_t latency = t1 - t0;
272	0	if (latency < best_latency) {
273	0	best_latency = latency;
274	0	best.time = t0;
275	0	best.tsc = tsc;
276	0	}
277	0	}
278	0	return best;
279	0	}
280
281		// Measures and returns the TSC frequency by taking a pair of
282		// measurements approximately `sleep_nanoseconds` apart.
283	0	static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
284	0	auto t0 = GetTimeTscPair();
285	0	struct timespec ts;
286	0	ts.tv_sec = 0;
287	0	ts.tv_nsec = sleep_nanoseconds;
288	0	while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
289	0	auto t1 = GetTimeTscPair();
290	0	double elapsed_ticks = t1.tsc - t0.tsc;
291	0	double elapsed_time = (t1.time - t0.time) * 1e-9;
292	0	return elapsed_ticks / elapsed_time;
293	0	}
294
295		// Measures and returns the TSC frequency by calling
296		// MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
297		// frequency measurement stabilizes.
298	0	static double MeasureTscFrequency() {
299	0	double last_measurement = -1.0;
300	0	int sleep_nanoseconds = 1000000; // 1 millisecond.
301	0	for (int i = 0; i < 8; ++i) {
302	0	double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
303	0	if (measurement * 0.99 < last_measurement &&
304	0	last_measurement < measurement * 1.01) {
305		// Use the current measurement if it is within 1% of the
306		// previous measurement.
307	0	return measurement;
308	0	}
309	0	last_measurement = measurement;
310	0	sleep_nanoseconds *= 2;
311	0	}
312	0	return last_measurement;
313	0	}
314
315		#endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
316
317	0	static double GetNominalCPUFrequency() {
318	0	long freq = 0;
319
320		// Google's production kernel has a patch to export the TSC
321		// frequency through sysfs. If the kernel is exporting the TSC
322		// frequency use that. There are issues where cpuinfo_max_freq
323		// cannot be relied on because the BIOS may be exporting an invalid
324		// p-state (on x86) or p-states may be used to put the processor in
325		// a new mode (turbo mode). Essentially, those frequencies cannot
326		// always be relied upon. The same reasons apply to /proc/cpuinfo as
327		// well.
328	0	if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
329	0	return freq * 1e3; // Value is kHz.
330	0	}
331
332	0	#if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
333		// On these platforms, the TSC frequency is the nominal CPU
334		// frequency. But without having the kernel export it directly
335		// though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
336		// other way to reliably get the TSC frequency, so we have to
337		// measure it ourselves. Some CPUs abuse cpuinfo_max_freq by
338		// exporting "fake" frequencies for implementing new features. For
339		// example, Intel's turbo mode is enabled by exposing a p-state
340		// value with a higher frequency than that of the real TSC
341		// rate. Because of this, we prefer to measure the TSC rate
342		// ourselves on i386 and x86-64.
343	0	return MeasureTscFrequency();
344		#else
345
346		// If CPU scaling is in effect, we want to use the maximum
347		// frequency, not whatever CPU speed some random processor happens
348		// to be using now.
349		if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
350		&freq)) {
351		return freq * 1e3; // Value is kHz.
352		}
353
354		return 1.0;
355		#endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
356	0	}
357
358		#endif
359
360		ABSL_CONST_INIT static once_flag init_num_cpus_once;
361		ABSL_CONST_INIT static int num_cpus = 0;
362
363		// NumCPUs() may be called before main() and before malloc is properly
364		// initialized, therefore this must not allocate memory.
365	0	int NumCPUs() {
366	0	base_internal::LowLevelCallOnce(
367	0	&init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
368	0	return num_cpus;
369	0	}
370
371		// A default frequency of 0.0 might be dangerous if it is used in division.
372		ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
373		ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
374
375		// NominalCPUFrequency() may be called before main() and before malloc is
376		// properly initialized, therefore this must not allocate memory.
377	0	double NominalCPUFrequency() {
378	0	base_internal::LowLevelCallOnce(
379	0	&init_nominal_cpu_frequency_once,
380	0	[]() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
381	0	return nominal_cpu_frequency;
382	0	}
383
384		#if defined(_WIN32)
385
386		pid_t GetTID() {
387		return pid_t{GetCurrentThreadId()};
388		}
389
390		#elif defined(__linux__)
391
392		#ifndef SYS_gettid
393		#define SYS_gettid __NR_gettid
394		#endif
395
396	1	pid_t GetTID() {
397	1	return static_cast<pid_t>(syscall(SYS_gettid));
398	1	}
399
400		#elif defined(__akaros__)
401
402		pid_t GetTID() {
403		// Akaros has a concept of "vcore context", which is the state the program
404		// is forced into when we need to make a user-level scheduling decision, or
405		// run a signal handler. This is analogous to the interrupt context that a
406		// CPU might enter if it encounters some kind of exception.
407		//
408		// There is no current thread context in vcore context, but we need to give
409		// a reasonable answer if asked for a thread ID (e.g., in a signal handler).
410		// Thread 0 always exists, so if we are in vcore context, we return that.
411		//
412		// Otherwise, we know (since we are using pthreads) that the uthread struct
413		// current_uthread is pointing to is the first element of a
414		// struct pthread_tcb, so we extract and return the thread ID from that.
415		//
416		// TODO(dcross): Akaros anticipates moving the thread ID to the uthread
417		// structure at some point. We should modify this code to remove the cast
418		// when that happens.
419		if (in_vcore_context())
420		return 0;
421		return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
422		}
423
424		#elif defined(__myriad2__)
425
426		pid_t GetTID() {
427		uint32_t tid;
428		rtems_task_ident(RTEMS_SELF, 0, &tid);
429		return tid;
430		}
431
432		#elif defined(__APPLE__)
433
434		pid_t GetTID() {
435		uint64_t tid;
436		// `nullptr` here implies this thread. This only fails if the specified
437		// thread is invalid or the pointer-to-tid is null, so we needn't worry about
438		// it.
439		pthread_threadid_np(nullptr, &tid);
440		return static_cast<pid_t>(tid);
441		}
442
443		#elif defined(__FreeBSD__)
444
445		pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
446
447		#elif defined(__OpenBSD__)
448
449		pid_t GetTID() { return getthrid(); }
450
451		#elif defined(__NetBSD__)
452
453		pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
454
455		#elif defined(__native_client__)
456
457		pid_t GetTID() {
458		auto* thread = pthread_self();
459		static_assert(sizeof(pid_t) == sizeof(thread),
460		"In NaCL int expected to be the same size as a pointer");
461		return reinterpret_cast<pid_t>(thread);
462		}
463
464		#else
465
466		// Fallback implementation of `GetTID` using `pthread_self`.
467		pid_t GetTID() {
468		// `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
469		// should be handled above.
470		return static_cast<pid_t>(pthread_self());
471		}
472
473		#endif
474
475		// GetCachedTID() caches the thread ID in thread-local storage (which is a
476		// userspace construct) to avoid unnecessary system calls. Without this caching,
477		// it can take roughly 98ns, while it takes roughly 1ns with this caching.
478	3.24M	pid_t GetCachedTID() {
479	3.24M	#ifdef ABSL_HAVE_THREAD_LOCAL
480	3.24M	static thread_local pid_t thread_id = GetTID();
481	3.24M	return thread_id;
482		#else
483		return GetTID();
484		#endif // ABSL_HAVE_THREAD_LOCAL
485	3.24M	}
486
487		} // namespace base_internal
488		ABSL_NAMESPACE_END
489		} // namespace absl