/src/openh264/codec/common/src/cpu.cpp
Line | Count | Source |
1 | | /*! |
2 | | * \copy |
3 | | * Copyright (c) 2009-2013, Cisco Systems |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions |
8 | | * are met: |
9 | | * |
10 | | * * Redistributions of source code must retain the above copyright |
11 | | * notice, this list of conditions and the following disclaimer. |
12 | | * |
13 | | * * Redistributions in binary form must reproduce the above copyright |
14 | | * notice, this list of conditions and the following disclaimer in |
15 | | * the documentation and/or other materials provided with the |
16 | | * distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
21 | | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
22 | | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
23 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
24 | | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
25 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
26 | | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 | | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | | * POSSIBILITY OF SUCH DAMAGE. |
30 | | * |
31 | | * |
32 | | * \file cpu.cpp |
33 | | * |
34 | | * \brief CPU compatibility detection |
35 | | * |
36 | | * \date 04/29/2009 Created |
37 | | * |
38 | | ************************************************************************************* |
39 | | */ |
40 | | #include <string.h> |
41 | | #include <stdio.h> |
42 | | #ifdef ANDROID_NDK |
43 | | #include <cpu-features.h> |
44 | | #endif |
45 | | #include "cpu.h" |
46 | | #include "cpu_core.h" |
47 | | |
48 | | |
49 | | |
50 | | #define CPU_Vendor_AMD "AuthenticAMD" |
51 | | #define CPU_Vendor_INTEL "GenuineIntel" |
52 | | #define CPU_Vendor_CYRIX "CyrixInstead" |
53 | | |
54 | | #if defined(X86_ASM) |
55 | | |
56 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
57 | | uint32_t uiCPU = 0; |
58 | | uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0; |
59 | | int32_t CacheLineSize = 0; |
60 | | int8_t chVendorName[16] = { 0 }; |
61 | | uint32_t uiMaxCpuidLevel = 0; |
62 | | |
63 | | if (!WelsCPUIdVerify()) { |
64 | | /* cpuid is not supported in cpu */ |
65 | | return 0; |
66 | | } |
67 | | |
68 | | WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVendorName[0], (uint32_t*)&chVendorName[8], (uint32_t*)&chVendorName[4]); |
69 | | uiMaxCpuidLevel = uiFeatureA; |
70 | | if (uiMaxCpuidLevel == 0) { |
71 | | /* maximum input value for basic cpuid information */ |
72 | | return 0; |
73 | | } |
74 | | |
75 | | WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
76 | | if ((uiFeatureD & 0x00800000) == 0) { |
77 | | /* Basic MMX technology is not support in cpu, mean nothing for us so return here */ |
78 | | return 0; |
79 | | } |
80 | | |
81 | | uiCPU = WELS_CPU_MMX; |
82 | | if (uiFeatureD & 0x02000000) { |
83 | | /* SSE technology is identical to AMD MMX extensions */ |
84 | | uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE; |
85 | | } |
86 | | if (uiFeatureD & 0x04000000) { |
87 | | /* SSE2 support here */ |
88 | | uiCPU |= WELS_CPU_SSE2; |
89 | | } |
90 | | if (uiFeatureD & 0x00000001) { |
91 | | /* x87 FPU on-chip checking */ |
92 | | uiCPU |= WELS_CPU_FPU; |
93 | | } |
94 | | if (uiFeatureD & 0x00008000) { |
95 | | /* CMOV instruction checking */ |
96 | | uiCPU |= WELS_CPU_CMOV; |
97 | | } |
98 | | if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) || |
99 | | (!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))) { // confirmed_safe_unsafe_usage |
100 | | if (uiFeatureD & 0x10000000) { |
101 | | /* Multi-Threading checking: contains of multiple logic processors */ |
102 | | uiCPU |= WELS_CPU_HTT; |
103 | | } |
104 | | } |
105 | | |
106 | | if (uiFeatureC & 0x00000001) { |
107 | | /* SSE3 support here */ |
108 | | uiCPU |= WELS_CPU_SSE3; |
109 | | } |
110 | | if (uiFeatureC & 0x00000200) { |
111 | | /* SSSE3 support here */ |
112 | | uiCPU |= WELS_CPU_SSSE3; |
113 | | } |
114 | | if (uiFeatureC & 0x00080000) { |
115 | | /* SSE4.1 support here, 45nm Penryn processor */ |
116 | | uiCPU |= WELS_CPU_SSE41; |
117 | | } |
118 | | if (uiFeatureC & 0x00100000) { |
119 | | /* SSE4.2 support here, next generation Nehalem processor */ |
120 | | uiCPU |= WELS_CPU_SSE42; |
121 | | } |
122 | | if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) { |
123 | | /* AVX supported */ |
124 | | uiCPU |= WELS_CPU_AVX; |
125 | | } |
126 | | if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) { |
127 | | /* AVX FMA supported */ |
128 | | uiCPU |= WELS_CPU_FMA; |
129 | | } |
130 | | if (uiFeatureC & 0x02000000) { |
131 | | /* AES checking */ |
132 | | uiCPU |= WELS_CPU_AES; |
133 | | } |
134 | | if (uiFeatureC & 0x00400000) { |
135 | | /* MOVBE checking */ |
136 | | uiCPU |= WELS_CPU_MOVBE; |
137 | | } |
138 | | |
139 | | if (uiMaxCpuidLevel >= 7) { |
140 | | uiFeatureC = 0; |
141 | | WelsCPUId (7, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
142 | | if ((uiCPU & WELS_CPU_AVX) && (uiFeatureB & 0x00000020)) { |
143 | | /* AVX2 supported */ |
144 | | uiCPU |= WELS_CPU_AVX2; |
145 | | } |
146 | | } |
147 | | |
148 | | if (uiMaxCpuidLevel >= 7) { |
149 | | uiFeatureC = WelsCPUDetectAVX512(); |
150 | | if (uiFeatureC & 0x10000) uiCPU |= WELS_CPU_AVX512F; |
151 | | if (uiFeatureC & 0x10000000) uiCPU |= WELS_CPU_AVX512CD; |
152 | | if (uiFeatureC & 0x20000) uiCPU |= WELS_CPU_AVX512DQ; |
153 | | if (uiFeatureC & 0x40000000) uiCPU |= WELS_CPU_AVX512BW; |
154 | | if (uiFeatureC & 0x80000000) uiCPU |= WELS_CPU_AVX512VL; |
155 | | } |
156 | | |
157 | | if (pNumberOfLogicProcessors != NULL) { |
158 | | if (uiCPU & WELS_CPU_HTT) { |
159 | | *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX |
160 | | } else { |
161 | | *pNumberOfLogicProcessors = 0; |
162 | | } |
163 | | if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { |
164 | | if (uiMaxCpuidLevel >= 4) { |
165 | | uiFeatureC = 0; |
166 | | WelsCPUId (0x4, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
167 | | if (uiFeatureA != 0) { |
168 | | *pNumberOfLogicProcessors = ((uiFeatureA & 0xfc000000) >> 26) + 1; |
169 | | } |
170 | | } |
171 | | } |
172 | | } |
173 | | |
174 | | WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
175 | | |
176 | | if ((!strcmp ((const char*)chVendorName, CPU_Vendor_AMD)) |
177 | | && (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage |
178 | | WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
179 | | if (uiFeatureD & 0x00400000) { |
180 | | uiCPU |= WELS_CPU_MMXEXT; |
181 | | } |
182 | | if (uiFeatureD & 0x80000000) { |
183 | | uiCPU |= WELS_CPU_3DNOW; |
184 | | } |
185 | | } |
186 | | |
187 | | if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { // confirmed_safe_unsafe_usage |
188 | | int32_t family, model; |
189 | | |
190 | | WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
191 | | family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff); |
192 | | model = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0); |
193 | | |
194 | | if ((family == 6) && (model == 9 || model == 13 || model == 14)) { |
195 | | uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3); |
196 | | } |
197 | | } |
198 | | |
199 | | // get cache line size |
200 | | if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) |
201 | | || ! (strcmp ((const char*)chVendorName, CPU_Vendor_CYRIX))) { // confirmed_safe_unsafe_usage |
202 | | WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD); |
203 | | |
204 | | CacheLineSize = (uiFeatureB & 0xff00) >> |
205 | | 5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte |
206 | | |
207 | | if (CacheLineSize == 128) { |
208 | | uiCPU |= WELS_CPU_CACHELINE_128; |
209 | | } else if (CacheLineSize == 64) { |
210 | | uiCPU |= WELS_CPU_CACHELINE_64; |
211 | | } else if (CacheLineSize == 32) { |
212 | | uiCPU |= WELS_CPU_CACHELINE_32; |
213 | | } else if (CacheLineSize == 16) { |
214 | | uiCPU |= WELS_CPU_CACHELINE_16; |
215 | | } |
216 | | } |
217 | | |
218 | | return uiCPU; |
219 | | } |
220 | | |
221 | | |
222 | | void WelsCPURestore (const uint32_t kuiCPU) { |
223 | | if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) { |
224 | | WelsEmms(); |
225 | | } |
226 | | } |
227 | | |
228 | | #elif defined(HAVE_NEON) //For supporting both android platform and iOS platform |
229 | | #if defined(ANDROID_NDK) |
230 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
231 | | uint32_t uiCPU = 0; |
232 | | AndroidCpuFamily cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; |
233 | | uint64_t uiFeatures = 0; |
234 | | cpuFamily = android_getCpuFamily(); |
235 | | if (cpuFamily == ANDROID_CPU_FAMILY_ARM) { |
236 | | uiFeatures = android_getCpuFeatures(); |
237 | | if (uiFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) { |
238 | | uiCPU |= WELS_CPU_ARMv7; |
239 | | } |
240 | | if (uiFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) { |
241 | | uiCPU |= WELS_CPU_VFPv3; |
242 | | } |
243 | | if (uiFeatures & ANDROID_CPU_ARM_FEATURE_NEON) { |
244 | | uiCPU |= WELS_CPU_NEON; |
245 | | } |
246 | | } |
247 | | |
248 | | if (pNumberOfLogicProcessors != NULL) { |
249 | | *pNumberOfLogicProcessors = android_getCpuCount(); |
250 | | } |
251 | | |
252 | | return uiCPU; |
253 | | } |
254 | | |
255 | | #elif defined(__APPLE__) |
256 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
257 | | uint32_t uiCPU = 0; |
258 | | |
259 | | #if defined(__ARM_NEON__) |
260 | | uiCPU |= WELS_CPU_ARMv7; |
261 | | uiCPU |= WELS_CPU_VFPv3; |
262 | | uiCPU |= WELS_CPU_NEON; |
263 | | #endif |
264 | | return uiCPU; |
265 | | } |
266 | | #elif defined(__linux__) |
267 | | |
268 | | /* Generic arm/linux cpu feature detection */ |
269 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
270 | | int flags = 0; |
271 | | FILE* f = fopen ("/proc/cpuinfo", "r"); |
272 | | |
273 | | #if defined(__chromeos__) |
274 | | flags |= WELS_CPU_NEON; |
275 | | #endif |
276 | | |
277 | | if (!f) { |
278 | | return flags; |
279 | | } |
280 | | |
281 | | char buf[200]; |
282 | | while (fgets (buf, sizeof (buf), f)) { |
283 | | if (!strncmp (buf, "Features", strlen ("Features"))) { |
284 | | // The asimd and fp features are listed on 64 bit ARMv8 kernels |
285 | | if (strstr (buf, " neon ") || strstr (buf, " asimd ")) |
286 | | flags |= WELS_CPU_NEON; |
287 | | if (strstr (buf, " vfpv3 ") || strstr (buf, " fp ")) |
288 | | flags |= WELS_CPU_VFPv3; |
289 | | break; |
290 | | } |
291 | | } |
292 | | fclose (f); |
293 | | return flags; |
294 | | } |
295 | | |
296 | | #else /* HAVE_NEON enabled but no runtime detection */ |
297 | | |
298 | | /* No runtime feature detection available, but built with HAVE_NEON - assume |
299 | | * that NEON and all associated features are available. */ |
300 | | |
301 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
302 | | return WELS_CPU_ARMv7 | |
303 | | WELS_CPU_VFPv3 | |
304 | | WELS_CPU_NEON; |
305 | | } |
306 | | #endif |
307 | | #elif defined(HAVE_NEON_AARCH64) |
308 | | |
309 | | /* For AArch64, no runtime detection actually is necessary for now, since |
310 | | * NEON and VFPv3 is mandatory on all such CPUs. (/proc/cpuinfo doesn't |
311 | | * contain neon, and the android cpufeatures library doesn't return it |
312 | | * either.) */ |
313 | | |
314 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
315 | | return WELS_CPU_VFPv3 | |
316 | | WELS_CPU_NEON; |
317 | | } |
318 | | |
319 | | #elif defined(mips) |
320 | | /* Get cpu features from cpuinfo. */ |
321 | | static uint32_t get_cpu_flags_from_cpuinfo(void) |
322 | | { |
323 | | uint32_t flags = 0; |
324 | | |
325 | | # ifdef __linux__ |
326 | | FILE* fp = fopen("/proc/cpuinfo", "r"); |
327 | | if (!fp) |
328 | | return flags; |
329 | | |
330 | | char buf[200]; |
331 | | memset(buf, 0, sizeof(buf)); |
332 | | while (fgets(buf, sizeof(buf), fp)) { |
333 | | if (!strncmp(buf, "model name", strlen("model name"))) { |
334 | | if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") || |
335 | | strstr(buf, "Loongson-2K")) { |
336 | | flags |= WELS_CPU_MMI; |
337 | | } |
338 | | break; |
339 | | } |
340 | | } |
341 | | while (fgets(buf, sizeof(buf), fp)) { |
342 | | if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) { |
343 | | if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) { |
344 | | flags |= WELS_CPU_MMI; |
345 | | } |
346 | | if (strstr(buf, "msa")) { |
347 | | flags |= WELS_CPU_MSA; |
348 | | } |
349 | | break; |
350 | | } |
351 | | } |
352 | | fclose(fp); |
353 | | # endif |
354 | | |
355 | | return flags; |
356 | | } |
357 | | |
358 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
359 | | return get_cpu_flags_from_cpuinfo(); |
360 | | } |
361 | | |
362 | | #elif defined(__loongarch__) && defined(__linux__) |
363 | | /* The getauxval is used to dynamically identify the characteristics |
364 | | * of the loongarch in the running processor during software execution. */ |
365 | | #include <sys/auxv.h> |
366 | | #define LA_HWCAP_LSX (1 << 4) |
367 | | #define LA_HWCAP_LASX (1 << 5) |
368 | | |
369 | | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
370 | | uint32_t flags = 0; |
371 | | uint32_t flag = (uint32_t)getauxval(AT_HWCAP); |
372 | | if (flag & LA_HWCAP_LSX) |
373 | | flags |= WELS_CPU_LSX; |
374 | | if (flag & LA_HWCAP_LASX) |
375 | | flags |= WELS_CPU_LASX; |
376 | | return flags; |
377 | | } |
378 | | |
379 | | #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64, loongarch nor mips */ |
380 | | |
381 | 0 | uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { |
382 | 0 | return 0; |
383 | 0 | } |
384 | | |
385 | | #endif |