/src/highwayhash/highwayhash/arch_specific.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2017 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "highwayhash/arch_specific.h" |
16 | | |
17 | | #include <stdint.h> |
18 | | |
19 | | #if HH_ARCH_X64 && !HH_MSC_VERSION |
20 | | #include <cpuid.h> |
21 | | #endif |
22 | | |
23 | | #if HH_ARCH_PPC |
24 | | #if __GLIBC__ |
25 | | #include <sys/platform/ppc.h> // __ppc_get_timebase_freq |
26 | | #elif __FreeBSD__ |
27 | | // clang-format off |
28 | | #include <sys/types.h> |
29 | | #include <sys/sysctl.h> /* must come after sys/types.h */ |
30 | | // clang-format on |
31 | | #endif |
32 | | #endif |
33 | | |
34 | | #include <string.h> // memcpy |
35 | | #include <string> |
36 | | |
37 | | namespace highwayhash { |
38 | | |
39 | 0 | const char* TargetName(const TargetBits target_bit) { |
40 | 0 | switch (target_bit) { |
41 | 0 | case HH_TARGET_Portable: |
42 | 0 | return "Portable"; |
43 | 0 | case HH_TARGET_SSE41: |
44 | 0 | return "SSE41"; |
45 | 0 | case HH_TARGET_AVX2: |
46 | 0 | return "AVX2"; |
47 | 0 | case HH_TARGET_VSX: |
48 | 0 | return "VSX"; |
49 | 0 | case HH_TARGET_NEON: |
50 | 0 | return "NEON"; |
51 | 0 | default: |
52 | 0 | return nullptr; // zero, multiple, or unknown bits |
53 | 0 | } |
54 | 0 | } |
55 | | |
56 | | #if HH_ARCH_X64 |
57 | | |
58 | | namespace { |
59 | | |
60 | 0 | std::string BrandString() { |
61 | 0 | char brand_string[49]; |
62 | 0 | uint32_t abcd[4]; |
63 | | |
64 | | // Check if brand string is supported (it is on all reasonable Intel/AMD) |
65 | 0 | Cpuid(0x80000000U, 0, abcd); |
66 | 0 | if (abcd[0] < 0x80000004U) { |
67 | 0 | return std::string(); |
68 | 0 | } |
69 | | |
70 | 0 | for (int i = 0; i < 3; ++i) { |
71 | 0 | Cpuid(0x80000002U + i, 0, abcd); |
72 | 0 | memcpy(brand_string + i * 16, &abcd, sizeof(abcd)); |
73 | 0 | } |
74 | 0 | brand_string[48] = 0; |
75 | 0 | return brand_string; |
76 | 0 | } |
77 | | |
78 | | } // namespace |
79 | | |
80 | | void Cpuid(const uint32_t level, const uint32_t count, |
81 | 4 | uint32_t* HH_RESTRICT abcd) { |
82 | | #if HH_MSC_VERSION |
83 | | int regs[4]; |
84 | | __cpuidex(regs, level, count); |
85 | | for (int i = 0; i < 4; ++i) { |
86 | | abcd[i] = regs[i]; |
87 | | } |
88 | | #else |
89 | 4 | uint32_t a, b, c, d; |
90 | 4 | __cpuid_count(level, count, a, b, c, d); |
91 | 4 | abcd[0] = a; |
92 | 4 | abcd[1] = b; |
93 | 4 | abcd[2] = c; |
94 | 4 | abcd[3] = d; |
95 | 4 | #endif |
96 | 4 | } |
97 | | |
98 | 0 | uint32_t ApicId() { |
99 | 0 | uint32_t abcd[4]; |
100 | 0 | Cpuid(1, 0, abcd); |
101 | 0 | return abcd[1] >> 24; // ebx |
102 | 0 | } |
103 | | |
104 | | #endif // HH_ARCH_X64 |
105 | | |
106 | | namespace { |
107 | | |
108 | 0 | double DetectNominalClockRate() { |
109 | 0 | #if HH_ARCH_X64 |
110 | 0 | const std::string& brand_string = BrandString(); |
111 | | // Brand strings include the maximum configured frequency. These prefixes are |
112 | | // defined by Intel CPUID documentation. |
113 | 0 | const char* prefixes[3] = {"MHz", "GHz", "THz"}; |
114 | 0 | const double multipliers[3] = {1E6, 1E9, 1E12}; |
115 | 0 | for (size_t i = 0; i < 3; ++i) { |
116 | 0 | const size_t pos_prefix = brand_string.find(prefixes[i]); |
117 | 0 | if (pos_prefix != std::string::npos) { |
118 | 0 | const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1); |
119 | 0 | if (pos_space != std::string::npos) { |
120 | 0 | const std::string digits = |
121 | 0 | brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1); |
122 | 0 | return std::stod(digits) * multipliers[i]; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | } |
126 | | #elif HH_ARCH_PPC |
127 | | double freq = -1; |
128 | | #if __linux__ |
129 | | char line[200]; |
130 | | char* s; |
131 | | char* value; |
132 | | |
133 | | FILE* f = fopen("/proc/cpuinfo", "r"); |
134 | | if (f != nullptr) { |
135 | | while (fgets(line, sizeof(line), f) != nullptr) { |
136 | | // NOTE: the ':' is the only character we can rely on |
137 | | if (!(value = strchr(line, ':'))) continue; |
138 | | // terminate the valuename |
139 | | *value++ = '\0'; |
140 | | // skip any leading spaces |
141 | | while (*value == ' ') value++; |
142 | | if ((s = strchr(value, '\n'))) *s = '\0'; |
143 | | |
144 | | if (!strncasecmp(line, "clock", strlen("clock")) && |
145 | | sscanf(value, "%lf", &freq) == 1) { |
146 | | freq *= 1E6; |
147 | | break; |
148 | | } |
149 | | } |
150 | | fclose(f); |
151 | | return freq; |
152 | | } |
153 | | #elif __FreeBSD__ |
154 | | size_t length = sizeof(freq); |
155 | | sysctlbyname("dev.cpu.0.freq", &freq, &length, NULL, 0); |
156 | | freq *= 1E6; |
157 | | return freq; |
158 | | #endif |
159 | | #endif |
160 | | |
161 | 0 | return 0.0; |
162 | 0 | } |
163 | | |
164 | | } // namespace |
165 | | |
166 | 0 | double NominalClockRate() { |
167 | | // Thread-safe caching - this is called several times. |
168 | 0 | static const double cycles_per_second = DetectNominalClockRate(); |
169 | 0 | return cycles_per_second; |
170 | 0 | } |
171 | | |
172 | 0 | double InvariantTicksPerSecond() { |
173 | | #if HH_ARCH_PPC |
174 | | #if __GLIBC__ |
175 | | static const double cycles_per_second = __ppc_get_timebase_freq(); |
176 | | #elif __FreeBSD__ |
177 | | double cycles_per_second = 0; |
178 | | size_t length = sizeof(cycles_per_second); |
179 | | sysctlbyname("kern.timecounter.tc.timebase.frequency", &cycles_per_second, |
180 | | &length, NULL, 0); |
181 | | #elif __OpenBSD__ |
182 | | /* There is currently no method of retrieving this via userland. |
183 | | * This value is correct for Power8 and Power9. |
184 | | */ |
185 | | static const double cycles_per_second = 512000000; |
186 | | #endif |
187 | | return cycles_per_second; |
188 | | #else |
189 | 0 | return NominalClockRate(); |
190 | 0 | #endif |
191 | 0 | } |
192 | | |
193 | | } // namespace highwayhash |