/src/abseil-cpp/absl/base/internal/sysinfo.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2017 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/base/internal/sysinfo.h" |
16 | | |
17 | | #include "absl/base/attributes.h" |
18 | | |
19 | | #ifdef _WIN32 |
20 | | #include <windows.h> |
21 | | #else |
22 | | #include <fcntl.h> |
23 | | #include <pthread.h> |
24 | | #include <sys/stat.h> |
25 | | #include <sys/types.h> |
26 | | #include <unistd.h> |
27 | | #endif |
28 | | |
29 | | #ifdef __linux__ |
30 | | #include <sys/syscall.h> |
31 | | #endif |
32 | | |
33 | | #if defined(__APPLE__) || defined(__FreeBSD__) |
34 | | #include <sys/sysctl.h> |
35 | | #endif |
36 | | |
37 | | #ifdef __FreeBSD__ |
38 | | #include <pthread_np.h> |
39 | | #endif |
40 | | |
41 | | #ifdef __NetBSD__ |
42 | | #include <lwp.h> |
43 | | #endif |
44 | | |
45 | | #if defined(__myriad2__) |
46 | | #include <rtems.h> |
47 | | #endif |
48 | | |
49 | | #if defined(__Fuchsia__) |
50 | | #include <zircon/process.h> |
51 | | #endif |
52 | | |
53 | | #include <string.h> |
54 | | |
55 | | #include <cassert> |
56 | | #include <cerrno> |
57 | | #include <cstdint> |
58 | | #include <cstdio> |
59 | | #include <cstdlib> |
60 | | #include <ctime> |
61 | | #include <limits> |
62 | | #include <thread> // NOLINT(build/c++11) |
63 | | #include <utility> |
64 | | #include <vector> |
65 | | |
66 | | #include "absl/base/call_once.h" |
67 | | #include "absl/base/config.h" |
68 | | #include "absl/base/internal/raw_logging.h" |
69 | | #include "absl/base/internal/spinlock.h" |
70 | | #include "absl/base/internal/unscaledcycleclock.h" |
71 | | #include "absl/base/thread_annotations.h" |
72 | | |
73 | | namespace absl { |
74 | | ABSL_NAMESPACE_BEGIN |
75 | | namespace base_internal { |
76 | | |
77 | | namespace { |
78 | | |
79 | | #if defined(_WIN32) |
80 | | |
81 | | // Returns number of bits set in `bitMask` |
82 | | DWORD Win32CountSetBits(ULONG_PTR bitMask) { |
83 | | for (DWORD bitSetCount = 0; ; ++bitSetCount) { |
84 | | if (bitMask == 0) return bitSetCount; |
85 | | bitMask &= bitMask - 1; |
86 | | } |
87 | | } |
88 | | |
89 | | // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or |
90 | | // 0 if the number of processors is not available or can not be computed. |
91 | | // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation |
92 | | int Win32NumCPUs() { |
93 | | #pragma comment(lib, "kernel32.lib") |
94 | | using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; |
95 | | |
96 | | DWORD info_size = sizeof(Info); |
97 | | Info* info(static_cast<Info*>(malloc(info_size))); |
98 | | if (info == nullptr) return 0; |
99 | | |
100 | | bool success = GetLogicalProcessorInformation(info, &info_size); |
101 | | if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { |
102 | | free(info); |
103 | | info = static_cast<Info*>(malloc(info_size)); |
104 | | if (info == nullptr) return 0; |
105 | | success = GetLogicalProcessorInformation(info, &info_size); |
106 | | } |
107 | | |
108 | | DWORD logicalProcessorCount = 0; |
109 | | if (success) { |
110 | | Info* ptr = info; |
111 | | DWORD byteOffset = 0; |
112 | | while (byteOffset + sizeof(Info) <= info_size) { |
113 | | switch (ptr->Relationship) { |
114 | | case RelationProcessorCore: |
115 | | logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask); |
116 | | break; |
117 | | |
118 | | case RelationNumaNode: |
119 | | case RelationCache: |
120 | | case RelationProcessorPackage: |
121 | | // Ignore other entries |
122 | | break; |
123 | | |
124 | | default: |
125 | | // Ignore unknown entries |
126 | | break; |
127 | | } |
128 | | byteOffset += sizeof(Info); |
129 | | ptr++; |
130 | | } |
131 | | } |
132 | | free(info); |
133 | | return static_cast<int>(logicalProcessorCount); |
134 | | } |
135 | | |
136 | | #endif |
137 | | |
138 | | } // namespace |
139 | | |
140 | 0 | static int GetNumCPUs() { |
141 | | #if defined(__myriad2__) |
142 | | return 1; |
143 | | #elif defined(_WIN32) |
144 | | const int hardware_concurrency = Win32NumCPUs(); |
145 | | return hardware_concurrency ? hardware_concurrency : 1; |
146 | | #elif defined(_AIX) |
147 | | return sysconf(_SC_NPROCESSORS_ONLN); |
148 | | #else |
149 | | // Other possibilities: |
150 | | // - Read /sys/devices/system/cpu/online and use cpumask_parse() |
151 | | // - sysconf(_SC_NPROCESSORS_ONLN) |
152 | 0 | return static_cast<int>(std::thread::hardware_concurrency()); |
153 | 0 | #endif |
154 | 0 | } |
155 | | |
156 | | #if defined(_WIN32) |
157 | | |
158 | | static double GetNominalCPUFrequency() { |
159 | | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \ |
160 | | !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) |
161 | | // UWP apps don't have access to the registry and currently don't provide an |
162 | | // API informing about CPU nominal frequency. |
163 | | return 1.0; |
164 | | #else |
165 | | #pragma comment(lib, "advapi32.lib") // For Reg* functions. |
166 | | HKEY key; |
167 | | // Use the Reg* functions rather than the SH functions because shlwapi.dll |
168 | | // pulls in gdi32.dll which makes process destruction much more costly. |
169 | | if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, |
170 | | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, |
171 | | KEY_READ, &key) == ERROR_SUCCESS) { |
172 | | DWORD type = 0; |
173 | | DWORD data = 0; |
174 | | DWORD data_size = sizeof(data); |
175 | | auto result = RegQueryValueExA(key, "~MHz", nullptr, &type, |
176 | | reinterpret_cast<LPBYTE>(&data), &data_size); |
177 | | RegCloseKey(key); |
178 | | if (result == ERROR_SUCCESS && type == REG_DWORD && |
179 | | data_size == sizeof(data)) { |
180 | | return data * 1e6; // Value is MHz. |
181 | | } |
182 | | } |
183 | | return 1.0; |
184 | | #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP |
185 | | } |
186 | | |
187 | | #elif defined(CTL_HW) && defined(HW_CPU_FREQ) |
188 | | |
189 | | static double GetNominalCPUFrequency() { |
190 | | unsigned freq; |
191 | | size_t size = sizeof(freq); |
192 | | int mib[2] = {CTL_HW, HW_CPU_FREQ}; |
193 | | if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) { |
194 | | return static_cast<double>(freq); |
195 | | } |
196 | | return 1.0; |
197 | | } |
198 | | |
199 | | #else |
200 | | |
201 | | // Helper function for reading a long from a file. Returns true if successful |
202 | | // and the memory location pointed to by value is set to the value read. |
203 | 0 | static bool ReadLongFromFile(const char *file, long *value) { |
204 | 0 | bool ret = false; |
205 | 0 | #if defined(_POSIX_C_SOURCE) |
206 | 0 | const int file_mode = (O_RDONLY | O_CLOEXEC); |
207 | | #else |
208 | | const int file_mode = O_RDONLY; |
209 | | #endif |
210 | |
|
211 | 0 | int fd = open(file, file_mode); |
212 | 0 | if (fd != -1) { |
213 | 0 | char line[1024]; |
214 | 0 | char *err; |
215 | 0 | memset(line, '\0', sizeof(line)); |
216 | 0 | ssize_t len; |
217 | 0 | do { |
218 | 0 | len = read(fd, line, sizeof(line) - 1); |
219 | 0 | } while (len < 0 && errno == EINTR); |
220 | 0 | if (len <= 0) { |
221 | 0 | ret = false; |
222 | 0 | } else { |
223 | 0 | const long temp_value = strtol(line, &err, 10); |
224 | 0 | if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { |
225 | 0 | *value = temp_value; |
226 | 0 | ret = true; |
227 | 0 | } |
228 | 0 | } |
229 | 0 | close(fd); |
230 | 0 | } |
231 | 0 | return ret; |
232 | 0 | } |
233 | | |
234 | | #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) |
235 | | |
236 | | // Reads a monotonic time source and returns a value in |
237 | | // nanoseconds. The returned value uses an arbitrary epoch, not the |
238 | | // Unix epoch. |
239 | 0 | static int64_t ReadMonotonicClockNanos() { |
240 | 0 | struct timespec t; |
241 | 0 | #ifdef CLOCK_MONOTONIC_RAW |
242 | 0 | int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t); |
243 | | #else |
244 | | int rc = clock_gettime(CLOCK_MONOTONIC, &t); |
245 | | #endif |
246 | 0 | if (rc != 0) { |
247 | 0 | ABSL_INTERNAL_LOG( |
248 | 0 | FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")"); |
249 | 0 | } |
250 | 0 | return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec; |
251 | 0 | } |
252 | | |
253 | | class UnscaledCycleClockWrapperForInitializeFrequency { |
254 | | public: |
255 | 0 | static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); } |
256 | | }; |
257 | | |
258 | | struct TimeTscPair { |
259 | | int64_t time; // From ReadMonotonicClockNanos(). |
260 | | int64_t tsc; // From UnscaledCycleClock::Now(). |
261 | | }; |
262 | | |
263 | | // Returns a pair of values (monotonic kernel time, TSC ticks) that |
264 | | // approximately correspond to each other. This is accomplished by |
265 | | // doing several reads and picking the reading with the lowest |
266 | | // latency. This approach is used to minimize the probability that |
267 | | // our thread was preempted between clock reads. |
268 | 0 | static TimeTscPair GetTimeTscPair() { |
269 | 0 | int64_t best_latency = std::numeric_limits<int64_t>::max(); |
270 | 0 | TimeTscPair best; |
271 | 0 | for (int i = 0; i < 10; ++i) { |
272 | 0 | int64_t t0 = ReadMonotonicClockNanos(); |
273 | 0 | int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now(); |
274 | 0 | int64_t t1 = ReadMonotonicClockNanos(); |
275 | 0 | int64_t latency = t1 - t0; |
276 | 0 | if (latency < best_latency) { |
277 | 0 | best_latency = latency; |
278 | 0 | best.time = t0; |
279 | 0 | best.tsc = tsc; |
280 | 0 | } |
281 | 0 | } |
282 | 0 | return best; |
283 | 0 | } |
284 | | |
285 | | // Measures and returns the TSC frequency by taking a pair of |
286 | | // measurements approximately `sleep_nanoseconds` apart. |
287 | 0 | static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) { |
288 | 0 | auto t0 = GetTimeTscPair(); |
289 | 0 | struct timespec ts; |
290 | 0 | ts.tv_sec = 0; |
291 | 0 | ts.tv_nsec = sleep_nanoseconds; |
292 | 0 | while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {} |
293 | 0 | auto t1 = GetTimeTscPair(); |
294 | 0 | double elapsed_ticks = t1.tsc - t0.tsc; |
295 | 0 | double elapsed_time = (t1.time - t0.time) * 1e-9; |
296 | 0 | return elapsed_ticks / elapsed_time; |
297 | 0 | } |
298 | | |
299 | | // Measures and returns the TSC frequency by calling |
300 | | // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the |
301 | | // frequency measurement stabilizes. |
302 | 0 | static double MeasureTscFrequency() { |
303 | 0 | double last_measurement = -1.0; |
304 | 0 | int sleep_nanoseconds = 1000000; // 1 millisecond. |
305 | 0 | for (int i = 0; i < 8; ++i) { |
306 | 0 | double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds); |
307 | 0 | if (measurement * 0.99 < last_measurement && |
308 | 0 | last_measurement < measurement * 1.01) { |
309 | | // Use the current measurement if it is within 1% of the |
310 | | // previous measurement. |
311 | 0 | return measurement; |
312 | 0 | } |
313 | 0 | last_measurement = measurement; |
314 | 0 | sleep_nanoseconds *= 2; |
315 | 0 | } |
316 | 0 | return last_measurement; |
317 | 0 | } |
318 | | |
319 | | #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY |
320 | | |
321 | 0 | static double GetNominalCPUFrequency() { |
322 | 0 | long freq = 0; |
323 | | |
324 | | // Google's production kernel has a patch to export the TSC |
325 | | // frequency through sysfs. If the kernel is exporting the TSC |
326 | | // frequency use that. There are issues where cpuinfo_max_freq |
327 | | // cannot be relied on because the BIOS may be exporting an invalid |
328 | | // p-state (on x86) or p-states may be used to put the processor in |
329 | | // a new mode (turbo mode). Essentially, those frequencies cannot |
330 | | // always be relied upon. The same reasons apply to /proc/cpuinfo as |
331 | | // well. |
332 | 0 | if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { |
333 | 0 | return freq * 1e3; // Value is kHz. |
334 | 0 | } |
335 | | |
336 | 0 | #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY) |
337 | | // On these platforms, the TSC frequency is the nominal CPU |
338 | | // frequency. But without having the kernel export it directly |
339 | | // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no |
340 | | // other way to reliably get the TSC frequency, so we have to |
341 | | // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by |
342 | | // exporting "fake" frequencies for implementing new features. For |
343 | | // example, Intel's turbo mode is enabled by exposing a p-state |
344 | | // value with a higher frequency than that of the real TSC |
345 | | // rate. Because of this, we prefer to measure the TSC rate |
346 | | // ourselves on i386 and x86-64. |
347 | 0 | return MeasureTscFrequency(); |
348 | | #else |
349 | | |
350 | | // If CPU scaling is in effect, we want to use the *maximum* |
351 | | // frequency, not whatever CPU speed some random processor happens |
352 | | // to be using now. |
353 | | if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", |
354 | | &freq)) { |
355 | | return freq * 1e3; // Value is kHz. |
356 | | } |
357 | | |
358 | | return 1.0; |
359 | | #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY |
360 | 0 | } |
361 | | |
362 | | #endif |
363 | | |
364 | | ABSL_CONST_INIT static once_flag init_num_cpus_once; |
365 | | ABSL_CONST_INIT static int num_cpus = 0; |
366 | | |
367 | | // NumCPUs() may be called before main() and before malloc is properly |
368 | | // initialized, therefore this must not allocate memory. |
369 | 0 | int NumCPUs() { |
370 | 0 | base_internal::LowLevelCallOnce( |
371 | 0 | &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); }); |
372 | 0 | return num_cpus; |
373 | 0 | } |
374 | | |
375 | | // A default frequency of 0.0 might be dangerous if it is used in division. |
376 | | ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once; |
377 | | ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0; |
378 | | |
379 | | // NominalCPUFrequency() may be called before main() and before malloc is |
380 | | // properly initialized, therefore this must not allocate memory. |
381 | 0 | double NominalCPUFrequency() { |
382 | 0 | base_internal::LowLevelCallOnce( |
383 | 0 | &init_nominal_cpu_frequency_once, |
384 | 0 | []() { nominal_cpu_frequency = GetNominalCPUFrequency(); }); |
385 | 0 | return nominal_cpu_frequency; |
386 | 0 | } |
387 | | |
388 | | #if defined(_WIN32) |
389 | | |
390 | | pid_t GetTID() { |
391 | | return pid_t{GetCurrentThreadId()}; |
392 | | } |
393 | | |
394 | | #elif defined(__linux__) |
395 | | |
396 | | #ifndef SYS_gettid |
397 | | #define SYS_gettid __NR_gettid |
398 | | #endif |
399 | | |
400 | 1 | pid_t GetTID() { |
401 | 1 | return static_cast<pid_t>(syscall(SYS_gettid)); |
402 | 1 | } |
403 | | |
404 | | #elif defined(__akaros__) |
405 | | |
406 | | pid_t GetTID() { |
407 | | // Akaros has a concept of "vcore context", which is the state the program |
408 | | // is forced into when we need to make a user-level scheduling decision, or |
409 | | // run a signal handler. This is analogous to the interrupt context that a |
410 | | // CPU might enter if it encounters some kind of exception. |
411 | | // |
412 | | // There is no current thread context in vcore context, but we need to give |
413 | | // a reasonable answer if asked for a thread ID (e.g., in a signal handler). |
414 | | // Thread 0 always exists, so if we are in vcore context, we return that. |
415 | | // |
416 | | // Otherwise, we know (since we are using pthreads) that the uthread struct |
417 | | // current_uthread is pointing to is the first element of a |
418 | | // struct pthread_tcb, so we extract and return the thread ID from that. |
419 | | // |
420 | | // TODO(dcross): Akaros anticipates moving the thread ID to the uthread |
421 | | // structure at some point. We should modify this code to remove the cast |
422 | | // when that happens. |
423 | | if (in_vcore_context()) |
424 | | return 0; |
425 | | return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id; |
426 | | } |
427 | | |
428 | | #elif defined(__myriad2__) |
429 | | |
430 | | pid_t GetTID() { |
431 | | uint32_t tid; |
432 | | rtems_task_ident(RTEMS_SELF, 0, &tid); |
433 | | return tid; |
434 | | } |
435 | | |
436 | | #elif defined(__APPLE__) |
437 | | |
438 | | pid_t GetTID() { |
439 | | uint64_t tid; |
440 | | // `nullptr` here implies this thread. This only fails if the specified |
441 | | // thread is invalid or the pointer-to-tid is null, so we needn't worry about |
442 | | // it. |
443 | | pthread_threadid_np(nullptr, &tid); |
444 | | return static_cast<pid_t>(tid); |
445 | | } |
446 | | |
447 | | #elif defined(__FreeBSD__) |
448 | | |
449 | | pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); } |
450 | | |
451 | | #elif defined(__OpenBSD__) |
452 | | |
453 | | pid_t GetTID() { return getthrid(); } |
454 | | |
455 | | #elif defined(__NetBSD__) |
456 | | |
457 | | pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); } |
458 | | |
459 | | #elif defined(__native_client__) |
460 | | |
461 | | pid_t GetTID() { |
462 | | auto* thread = pthread_self(); |
463 | | static_assert(sizeof(pid_t) == sizeof(thread), |
464 | | "In NaCL int expected to be the same size as a pointer"); |
465 | | return reinterpret_cast<pid_t>(thread); |
466 | | } |
467 | | |
468 | | #elif defined(__Fuchsia__) |
469 | | |
470 | | pid_t GetTID() { |
471 | | // Use our thread handle as the TID, which should be unique within this |
472 | | // process (but may not be globally unique). The handle value was chosen over |
473 | | // a kernel object ID (KOID) because zx_handle_t (32-bits) can be cast to a |
474 | | // pid_t type without loss of precision, but a zx_koid_t (64-bits) cannot. |
475 | | return static_cast<pid_t>(zx_thread_self()); |
476 | | } |
477 | | |
478 | | #else |
479 | | |
480 | | // Fallback implementation of `GetTID` using `pthread_self`. |
481 | | pid_t GetTID() { |
482 | | // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't |
483 | | // should be handled above. |
484 | | return static_cast<pid_t>(pthread_self()); |
485 | | } |
486 | | |
487 | | #endif |
488 | | |
489 | | // GetCachedTID() caches the thread ID in thread-local storage (which is a |
490 | | // userspace construct) to avoid unnecessary system calls. Without this caching, |
491 | | // it can take roughly 98ns, while it takes roughly 1ns with this caching. |
492 | 3.79M | pid_t GetCachedTID() { |
493 | 3.79M | #ifdef ABSL_HAVE_THREAD_LOCAL |
494 | 3.79M | static thread_local pid_t thread_id = GetTID(); |
495 | 3.79M | return thread_id; |
496 | | #else |
497 | | return GetTID(); |
498 | | #endif // ABSL_HAVE_THREAD_LOCAL |
499 | 3.79M | } |
500 | | |
501 | | } // namespace base_internal |
502 | | ABSL_NAMESPACE_END |
503 | | } // namespace absl |