/src/abseil-cpp/absl/base/internal/sysinfo.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2017 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/base/internal/sysinfo.h" |
16 | | |
17 | | #include "absl/base/attributes.h" |
18 | | |
19 | | #ifdef _WIN32 |
20 | | #include <windows.h> |
21 | | #else |
22 | | #include <fcntl.h> |
23 | | #include <pthread.h> |
24 | | #include <sys/stat.h> |
25 | | #include <sys/types.h> |
26 | | #include <unistd.h> |
27 | | #endif |
28 | | |
29 | | #ifdef __linux__ |
30 | | #include <sys/syscall.h> |
31 | | #endif |
32 | | |
33 | | #if defined(__APPLE__) || defined(__FreeBSD__) |
34 | | #include <sys/sysctl.h> |
35 | | #endif |
36 | | |
37 | | #ifdef __FreeBSD__ |
38 | | #include <pthread_np.h> |
39 | | #endif |
40 | | |
41 | | #ifdef __NetBSD__ |
42 | | #include <lwp.h> |
43 | | #endif |
44 | | |
45 | | #if defined(__myriad2__) |
46 | | #include <rtems.h> |
47 | | #endif |
48 | | |
49 | | #include <string.h> |
50 | | |
51 | | #include <cassert> |
52 | | #include <cerrno> |
53 | | #include <cstdint> |
54 | | #include <cstdio> |
55 | | #include <cstdlib> |
56 | | #include <ctime> |
57 | | #include <limits> |
58 | | #include <thread> // NOLINT(build/c++11) |
59 | | #include <utility> |
60 | | #include <vector> |
61 | | |
62 | | #include "absl/base/call_once.h" |
63 | | #include "absl/base/config.h" |
64 | | #include "absl/base/internal/raw_logging.h" |
65 | | #include "absl/base/internal/spinlock.h" |
66 | | #include "absl/base/internal/unscaledcycleclock.h" |
67 | | #include "absl/base/thread_annotations.h" |
68 | | |
69 | | namespace absl { |
70 | | ABSL_NAMESPACE_BEGIN |
71 | | namespace base_internal { |
72 | | |
73 | | namespace { |
74 | | |
75 | | #if defined(_WIN32) |
76 | | |
77 | | // Returns number of bits set in `bitMask` |
78 | | DWORD Win32CountSetBits(ULONG_PTR bitMask) { |
79 | | for (DWORD bitSetCount = 0; ; ++bitSetCount) { |
80 | | if (bitMask == 0) return bitSetCount; |
81 | | bitMask &= bitMask - 1; |
82 | | } |
83 | | } |
84 | | |
85 | | // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or |
86 | | // 0 if the number of processors is not available or can not be computed. |
87 | | // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation |
88 | | int Win32NumCPUs() { |
89 | | #pragma comment(lib, "kernel32.lib") |
90 | | using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; |
91 | | |
92 | | DWORD info_size = sizeof(Info); |
93 | | Info* info(static_cast<Info*>(malloc(info_size))); |
94 | | if (info == nullptr) return 0; |
95 | | |
96 | | bool success = GetLogicalProcessorInformation(info, &info_size); |
97 | | if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { |
98 | | free(info); |
99 | | info = static_cast<Info*>(malloc(info_size)); |
100 | | if (info == nullptr) return 0; |
101 | | success = GetLogicalProcessorInformation(info, &info_size); |
102 | | } |
103 | | |
104 | | DWORD logicalProcessorCount = 0; |
105 | | if (success) { |
106 | | Info* ptr = info; |
107 | | DWORD byteOffset = 0; |
108 | | while (byteOffset + sizeof(Info) <= info_size) { |
109 | | switch (ptr->Relationship) { |
110 | | case RelationProcessorCore: |
111 | | logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask); |
112 | | break; |
113 | | |
114 | | case RelationNumaNode: |
115 | | case RelationCache: |
116 | | case RelationProcessorPackage: |
117 | | // Ignore other entries |
118 | | break; |
119 | | |
120 | | default: |
121 | | // Ignore unknown entries |
122 | | break; |
123 | | } |
124 | | byteOffset += sizeof(Info); |
125 | | ptr++; |
126 | | } |
127 | | } |
128 | | free(info); |
129 | | return static_cast<int>(logicalProcessorCount); |
130 | | } |
131 | | |
132 | | #endif |
133 | | |
134 | | } // namespace |
135 | | |
136 | 0 | static int GetNumCPUs() { |
137 | | #if defined(__myriad2__) |
138 | | return 1; |
139 | | #elif defined(_WIN32) |
140 | | const int hardware_concurrency = Win32NumCPUs(); |
141 | | return hardware_concurrency ? hardware_concurrency : 1; |
142 | | #elif defined(_AIX) |
143 | | return sysconf(_SC_NPROCESSORS_ONLN); |
144 | | #else |
145 | | // Other possibilities: |
146 | | // - Read /sys/devices/system/cpu/online and use cpumask_parse() |
147 | | // - sysconf(_SC_NPROCESSORS_ONLN) |
148 | 0 | return static_cast<int>(std::thread::hardware_concurrency()); |
149 | 0 | #endif |
150 | 0 | } |
151 | | |
152 | | #if defined(_WIN32) |
153 | | |
154 | | static double GetNominalCPUFrequency() { |
155 | | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \ |
156 | | !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) |
157 | | // UWP apps don't have access to the registry and currently don't provide an |
158 | | // API informing about CPU nominal frequency. |
159 | | return 1.0; |
160 | | #else |
161 | | #pragma comment(lib, "advapi32.lib") // For Reg* functions. |
162 | | HKEY key; |
163 | | // Use the Reg* functions rather than the SH functions because shlwapi.dll |
164 | | // pulls in gdi32.dll which makes process destruction much more costly. |
165 | | if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, |
166 | | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, |
167 | | KEY_READ, &key) == ERROR_SUCCESS) { |
168 | | DWORD type = 0; |
169 | | DWORD data = 0; |
170 | | DWORD data_size = sizeof(data); |
171 | | auto result = RegQueryValueExA(key, "~MHz", nullptr, &type, |
172 | | reinterpret_cast<LPBYTE>(&data), &data_size); |
173 | | RegCloseKey(key); |
174 | | if (result == ERROR_SUCCESS && type == REG_DWORD && |
175 | | data_size == sizeof(data)) { |
176 | | return data * 1e6; // Value is MHz. |
177 | | } |
178 | | } |
179 | | return 1.0; |
180 | | #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP |
181 | | } |
182 | | |
183 | | #elif defined(CTL_HW) && defined(HW_CPU_FREQ) |
184 | | |
185 | | static double GetNominalCPUFrequency() { |
186 | | unsigned freq; |
187 | | size_t size = sizeof(freq); |
188 | | int mib[2] = {CTL_HW, HW_CPU_FREQ}; |
189 | | if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) { |
190 | | return static_cast<double>(freq); |
191 | | } |
192 | | return 1.0; |
193 | | } |
194 | | |
195 | | #else |
196 | | |
197 | | // Helper function for reading a long from a file. Returns true if successful |
198 | | // and the memory location pointed to by value is set to the value read. |
199 | 0 | static bool ReadLongFromFile(const char *file, long *value) { |
200 | 0 | bool ret = false; |
201 | 0 | #if defined(_POSIX_C_SOURCE) |
202 | 0 | const int file_mode = (O_RDONLY | O_CLOEXEC); |
203 | | #else |
204 | | const int file_mode = O_RDONLY; |
205 | | #endif |
206 | |
|
207 | 0 | int fd = open(file, file_mode); |
208 | 0 | if (fd != -1) { |
209 | 0 | char line[1024]; |
210 | 0 | char *err; |
211 | 0 | memset(line, '\0', sizeof(line)); |
212 | 0 | ssize_t len; |
213 | 0 | do { |
214 | 0 | len = read(fd, line, sizeof(line) - 1); |
215 | 0 | } while (len < 0 && errno == EINTR); |
216 | 0 | if (len <= 0) { |
217 | 0 | ret = false; |
218 | 0 | } else { |
219 | 0 | const long temp_value = strtol(line, &err, 10); |
220 | 0 | if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { |
221 | 0 | *value = temp_value; |
222 | 0 | ret = true; |
223 | 0 | } |
224 | 0 | } |
225 | 0 | close(fd); |
226 | 0 | } |
227 | 0 | return ret; |
228 | 0 | } |
229 | | |
230 | | #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) |
231 | | |
232 | | // Reads a monotonic time source and returns a value in |
233 | | // nanoseconds. The returned value uses an arbitrary epoch, not the |
234 | | // Unix epoch. |
235 | 0 | static int64_t ReadMonotonicClockNanos() { |
236 | 0 | struct timespec t; |
237 | 0 | #ifdef CLOCK_MONOTONIC_RAW |
238 | 0 | int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t); |
239 | | #else |
240 | | int rc = clock_gettime(CLOCK_MONOTONIC, &t); |
241 | | #endif |
242 | 0 | if (rc != 0) { |
243 | 0 | ABSL_INTERNAL_LOG( |
244 | 0 | FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")"); |
245 | 0 | } |
246 | 0 | return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec; |
247 | 0 | } |
248 | | |
249 | | class UnscaledCycleClockWrapperForInitializeFrequency { |
250 | | public: |
251 | 0 | static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); } |
252 | | }; |
253 | | |
254 | | struct TimeTscPair { |
255 | | int64_t time; // From ReadMonotonicClockNanos(). |
256 | | int64_t tsc; // From UnscaledCycleClock::Now(). |
257 | | }; |
258 | | |
259 | | // Returns a pair of values (monotonic kernel time, TSC ticks) that |
260 | | // approximately correspond to each other. This is accomplished by |
261 | | // doing several reads and picking the reading with the lowest |
262 | | // latency. This approach is used to minimize the probability that |
263 | | // our thread was preempted between clock reads. |
264 | 0 | static TimeTscPair GetTimeTscPair() { |
265 | 0 | int64_t best_latency = std::numeric_limits<int64_t>::max(); |
266 | 0 | TimeTscPair best; |
267 | 0 | for (int i = 0; i < 10; ++i) { |
268 | 0 | int64_t t0 = ReadMonotonicClockNanos(); |
269 | 0 | int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now(); |
270 | 0 | int64_t t1 = ReadMonotonicClockNanos(); |
271 | 0 | int64_t latency = t1 - t0; |
272 | 0 | if (latency < best_latency) { |
273 | 0 | best_latency = latency; |
274 | 0 | best.time = t0; |
275 | 0 | best.tsc = tsc; |
276 | 0 | } |
277 | 0 | } |
278 | 0 | return best; |
279 | 0 | } |
280 | | |
281 | | // Measures and returns the TSC frequency by taking a pair of |
282 | | // measurements approximately `sleep_nanoseconds` apart. |
283 | 0 | static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) { |
284 | 0 | auto t0 = GetTimeTscPair(); |
285 | 0 | struct timespec ts; |
286 | 0 | ts.tv_sec = 0; |
287 | 0 | ts.tv_nsec = sleep_nanoseconds; |
288 | 0 | while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {} |
289 | 0 | auto t1 = GetTimeTscPair(); |
290 | 0 | double elapsed_ticks = t1.tsc - t0.tsc; |
291 | 0 | double elapsed_time = (t1.time - t0.time) * 1e-9; |
292 | 0 | return elapsed_ticks / elapsed_time; |
293 | 0 | } |
294 | | |
295 | | // Measures and returns the TSC frequency by calling |
296 | | // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the |
297 | | // frequency measurement stabilizes. |
298 | 0 | static double MeasureTscFrequency() { |
299 | 0 | double last_measurement = -1.0; |
300 | 0 | int sleep_nanoseconds = 1000000; // 1 millisecond. |
301 | 0 | for (int i = 0; i < 8; ++i) { |
302 | 0 | double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds); |
303 | 0 | if (measurement * 0.99 < last_measurement && |
304 | 0 | last_measurement < measurement * 1.01) { |
305 | | // Use the current measurement if it is within 1% of the |
306 | | // previous measurement. |
307 | 0 | return measurement; |
308 | 0 | } |
309 | 0 | last_measurement = measurement; |
310 | 0 | sleep_nanoseconds *= 2; |
311 | 0 | } |
312 | 0 | return last_measurement; |
313 | 0 | } |
314 | | |
315 | | #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY |
316 | | |
317 | 0 | static double GetNominalCPUFrequency() { |
318 | 0 | long freq = 0; |
319 | | |
320 | | // Google's production kernel has a patch to export the TSC |
321 | | // frequency through sysfs. If the kernel is exporting the TSC |
322 | | // frequency use that. There are issues where cpuinfo_max_freq |
323 | | // cannot be relied on because the BIOS may be exporting an invalid |
324 | | // p-state (on x86) or p-states may be used to put the processor in |
325 | | // a new mode (turbo mode). Essentially, those frequencies cannot |
326 | | // always be relied upon. The same reasons apply to /proc/cpuinfo as |
327 | | // well. |
328 | 0 | if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { |
329 | 0 | return freq * 1e3; // Value is kHz. |
330 | 0 | } |
331 | | |
332 | 0 | #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) |
333 | | // On these platforms, the TSC frequency is the nominal CPU |
334 | | // frequency. But without having the kernel export it directly |
335 | | // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no |
336 | | // other way to reliably get the TSC frequency, so we have to |
337 | | // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by |
338 | | // exporting "fake" frequencies for implementing new features. For |
339 | | // example, Intel's turbo mode is enabled by exposing a p-state |
340 | | // value with a higher frequency than that of the real TSC |
341 | | // rate. Because of this, we prefer to measure the TSC rate |
342 | | // ourselves on i386 and x86-64. |
343 | 0 | return MeasureTscFrequency(); |
344 | | #else |
345 | | |
346 | | // If CPU scaling is in effect, we want to use the *maximum* |
347 | | // frequency, not whatever CPU speed some random processor happens |
348 | | // to be using now. |
349 | | if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", |
350 | | &freq)) { |
351 | | return freq * 1e3; // Value is kHz. |
352 | | } |
353 | | |
354 | | return 1.0; |
355 | | #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY |
356 | 0 | } |
357 | | |
358 | | #endif |
359 | | |
360 | | ABSL_CONST_INIT static once_flag init_num_cpus_once; |
361 | | ABSL_CONST_INIT static int num_cpus = 0; |
362 | | |
363 | | // NumCPUs() may be called before main() and before malloc is properly |
364 | | // initialized, therefore this must not allocate memory. |
365 | 0 | int NumCPUs() { |
366 | 0 | base_internal::LowLevelCallOnce( |
367 | 0 | &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); }); |
368 | 0 | return num_cpus; |
369 | 0 | } |
370 | | |
371 | | // A default frequency of 0.0 might be dangerous if it is used in division. |
372 | | ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once; |
373 | | ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0; |
374 | | |
375 | | // NominalCPUFrequency() may be called before main() and before malloc is |
376 | | // properly initialized, therefore this must not allocate memory. |
377 | 0 | double NominalCPUFrequency() { |
378 | 0 | base_internal::LowLevelCallOnce( |
379 | 0 | &init_nominal_cpu_frequency_once, |
380 | 0 | []() { nominal_cpu_frequency = GetNominalCPUFrequency(); }); |
381 | 0 | return nominal_cpu_frequency; |
382 | 0 | } |
383 | | |
384 | | #if defined(_WIN32) |
385 | | |
386 | | pid_t GetTID() { |
387 | | return pid_t{GetCurrentThreadId()}; |
388 | | } |
389 | | |
390 | | #elif defined(__linux__) |
391 | | |
392 | | #ifndef SYS_gettid |
393 | | #define SYS_gettid __NR_gettid |
394 | | #endif |
395 | | |
396 | 1 | pid_t GetTID() { |
397 | 1 | return static_cast<pid_t>(syscall(SYS_gettid)); |
398 | 1 | } |
399 | | |
400 | | #elif defined(__akaros__) |
401 | | |
402 | | pid_t GetTID() { |
403 | | // Akaros has a concept of "vcore context", which is the state the program |
404 | | // is forced into when we need to make a user-level scheduling decision, or |
405 | | // run a signal handler. This is analogous to the interrupt context that a |
406 | | // CPU might enter if it encounters some kind of exception. |
407 | | // |
408 | | // There is no current thread context in vcore context, but we need to give |
409 | | // a reasonable answer if asked for a thread ID (e.g., in a signal handler). |
410 | | // Thread 0 always exists, so if we are in vcore context, we return that. |
411 | | // |
412 | | // Otherwise, we know (since we are using pthreads) that the uthread struct |
413 | | // current_uthread is pointing to is the first element of a |
414 | | // struct pthread_tcb, so we extract and return the thread ID from that. |
415 | | // |
416 | | // TODO(dcross): Akaros anticipates moving the thread ID to the uthread |
417 | | // structure at some point. We should modify this code to remove the cast |
418 | | // when that happens. |
419 | | if (in_vcore_context()) |
420 | | return 0; |
421 | | return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id; |
422 | | } |
423 | | |
424 | | #elif defined(__myriad2__) |
425 | | |
426 | | pid_t GetTID() { |
427 | | uint32_t tid; |
428 | | rtems_task_ident(RTEMS_SELF, 0, &tid); |
429 | | return tid; |
430 | | } |
431 | | |
432 | | #elif defined(__APPLE__) |
433 | | |
434 | | pid_t GetTID() { |
435 | | uint64_t tid; |
436 | | // `nullptr` here implies this thread. This only fails if the specified |
437 | | // thread is invalid or the pointer-to-tid is null, so we needn't worry about |
438 | | // it. |
439 | | pthread_threadid_np(nullptr, &tid); |
440 | | return static_cast<pid_t>(tid); |
441 | | } |
442 | | |
443 | | #elif defined(__FreeBSD__) |
444 | | |
445 | | pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); } |
446 | | |
447 | | #elif defined(__OpenBSD__) |
448 | | |
449 | | pid_t GetTID() { return getthrid(); } |
450 | | |
451 | | #elif defined(__NetBSD__) |
452 | | |
453 | | pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); } |
454 | | |
455 | | #elif defined(__native_client__) |
456 | | |
457 | | pid_t GetTID() { |
458 | | auto* thread = pthread_self(); |
459 | | static_assert(sizeof(pid_t) == sizeof(thread), |
460 | | "In NaCL int expected to be the same size as a pointer"); |
461 | | return reinterpret_cast<pid_t>(thread); |
462 | | } |
463 | | |
464 | | #else |
465 | | |
466 | | // Fallback implementation of `GetTID` using `pthread_self`. |
467 | | pid_t GetTID() { |
468 | | // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't |
469 | | // should be handled above. |
470 | | return static_cast<pid_t>(pthread_self()); |
471 | | } |
472 | | |
473 | | #endif |
474 | | |
475 | | // GetCachedTID() caches the thread ID in thread-local storage (which is a |
476 | | // userspace construct) to avoid unnecessary system calls. Without this caching, |
477 | | // it can take roughly 98ns, while it takes roughly 1ns with this caching. |
478 | 3.24M | pid_t GetCachedTID() { |
479 | 3.24M | #ifdef ABSL_HAVE_THREAD_LOCAL |
480 | 3.24M | static thread_local pid_t thread_id = GetTID(); |
481 | 3.24M | return thread_id; |
482 | | #else |
483 | | return GetTID(); |
484 | | #endif // ABSL_HAVE_THREAD_LOCAL |
485 | 3.24M | } |
486 | | |
487 | | } // namespace base_internal |
488 | | ABSL_NAMESPACE_END |
489 | | } // namespace absl |