/src/libwebp/src/dsp/cpu.c
Line | Count | Source |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // CPU detection |
11 | | // |
12 | | // Author: Christian Duvivier (cduvivier@google.com) |
13 | | |
14 | | #include "src/dsp/cpu.h" |
15 | | |
16 | | #if defined(WEBP_HAVE_NEON_RTCD) |
17 | | #include <stdio.h> |
18 | | #include <string.h> |
19 | | #endif |
20 | | |
21 | | #if defined(WEBP_ANDROID_NEON) |
22 | | #include <cpu-features.h> |
23 | | #endif |
24 | | |
25 | | #include <stddef.h> |
26 | | |
27 | | #include "src/webp/types.h" |
28 | | |
29 | | //------------------------------------------------------------------------------ |
30 | | // SSE2 detection. |
31 | | // |
32 | | |
33 | | // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. |
34 | | #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) |
35 | | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { |
36 | | __asm__ volatile( |
37 | | "mov %%ebx, %%edi\n" |
38 | | "cpuid\n" |
39 | | "xchg %%edi, %%ebx\n" |
40 | | : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), |
41 | | "=d"(cpu_info[3]) |
42 | | : "a"(info_type), "c"(0)); |
43 | | } |
44 | | #elif defined(__i386__) || defined(__x86_64__) |
45 | 38 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { |
46 | 38 | __asm__ volatile("cpuid\n" |
47 | 38 | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), |
48 | 38 | "=d"(cpu_info[3]) |
49 | 38 | : "a"(info_type), "c"(0)); |
50 | 38 | } Line | Count | Source | 45 | 38 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { | 46 | 38 | __asm__ volatile("cpuid\n" | 47 | 38 | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), | 48 | 38 | "=d"(cpu_info[3]) | 49 | 38 | : "a"(info_type), "c"(0)); | 50 | 38 | } |
Unexecuted instantiation: sharpyuv_cpu.c:GetCPUInfo |
51 | | #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) |
52 | | |
53 | | #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 |
54 | | #include <intrin.h> |
55 | | #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 |
56 | | #define WEBP_HAVE_MSC_CPUID |
57 | | #elif _MSC_VER > 1310 |
58 | | #include <intrin.h> |
59 | | #define GetCPUInfo __cpuid |
60 | | #define WEBP_HAVE_MSC_CPUID |
61 | | #endif |
62 | | |
63 | | #endif |
64 | | |
65 | | // NaCl has no support for xgetbv or the raw opcode. |
66 | | #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
67 | 0 | static WEBP_INLINE uint64_t xgetbv(void) { |
68 | 0 | const uint32_t ecx = 0; |
69 | 0 | uint32_t eax, edx; |
70 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
71 | 0 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" |
72 | 0 | : "=a"(eax), "=d"(edx) |
73 | 0 | : "c"(ecx)); |
74 | 0 | return ((uint64_t)edx << 32) | eax; |
75 | 0 | } Unexecuted instantiation: cpu.c:xgetbv Unexecuted instantiation: sharpyuv_cpu.c:xgetbv |
76 | | #elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \ |
77 | | _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
78 | | #include <immintrin.h> |
79 | | #define xgetbv() _xgetbv(0) |
80 | | #elif defined(_MSC_VER) && defined(_M_IX86) |
81 | | static WEBP_INLINE uint64_t xgetbv(void) { |
82 | | uint32_t eax_, edx_; |
83 | | __asm { |
84 | | xor ecx, ecx // ecx = 0 |
85 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
86 | | __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
87 | | mov eax_, eax |
88 | | mov edx_, edx |
89 | | } |
90 | | return ((uint64_t)edx_ << 32) | eax_; |
91 | | } |
92 | | #else |
93 | | #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
94 | | #endif |
95 | | |
96 | | #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID) |
97 | | |
98 | | // helper function for run-time detection of slow SSSE3 platforms |
99 | 0 | static int CheckSlowModel(int info) { |
100 | | // Table listing display models with longer latencies for the bsr instruction |
101 | | // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb. |
102 | | // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual. |
103 | 0 | static const uint8_t kSlowModels[] = { |
104 | 0 | 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture |
105 | 0 | 0x1c, 0x26, 0x27 // Atom Microarchitecture |
106 | 0 | }; |
107 | 0 | const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf); |
108 | 0 | const uint32_t family = (info >> 8) & 0xf; |
109 | 0 | if (family == 0x06) { |
110 | 0 | size_t i; |
111 | 0 | for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) { |
112 | 0 | if (model == kSlowModels[i]) return 1; |
113 | 0 | } |
114 | 0 | } |
115 | 0 | return 0; |
116 | 0 | } Unexecuted instantiation: cpu.c:CheckSlowModel Unexecuted instantiation: sharpyuv_cpu.c:CheckSlowModel |
117 | | |
118 | 19 | static int x86CPUInfo(CPUFeature feature) { |
119 | 19 | int max_cpuid_value; |
120 | 19 | int cpu_info[4]; |
121 | 19 | int is_intel = 0; |
122 | | |
123 | | // get the highest feature value cpuid supports |
124 | 19 | GetCPUInfo(cpu_info, 0); |
125 | 19 | max_cpuid_value = cpu_info[0]; |
126 | 19 | if (max_cpuid_value < 1) { |
127 | 0 | return 0; |
128 | 19 | } else { |
129 | 19 | const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG |
130 | 19 | const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni |
131 | 19 | const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn |
132 | 19 | is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && |
133 | 0 | cpu_info[2] == VENDOR_ID_INTEL_ECX && |
134 | 0 | cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel? |
135 | 19 | } |
136 | | |
137 | 19 | GetCPUInfo(cpu_info, 1); |
138 | 19 | if (feature == kSSE2) { |
139 | 10 | return !!(cpu_info[3] & (1 << 26)); |
140 | 10 | } |
141 | 9 | if (feature == kSSE3) { |
142 | 0 | return !!(cpu_info[2] & (1 << 0)); |
143 | 0 | } |
144 | 9 | if (feature == kSlowSSSE3) { |
145 | 1 | if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3? |
146 | 0 | return CheckSlowModel(cpu_info[0]); |
147 | 0 | } |
148 | 1 | return 0; |
149 | 1 | } |
150 | | |
151 | 8 | if (feature == kSSE4_1) { |
152 | 8 | return !!(cpu_info[2] & (1 << 19)); |
153 | 8 | } |
154 | 0 | if (feature == kAVX) { |
155 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
156 | 0 | if ((cpu_info[2] & 0x18000000) == 0x18000000) { |
157 | | // XMM state and YMM state enabled by the OS. |
158 | 0 | return (xgetbv() & 0x6) == 0x6; |
159 | 0 | } |
160 | 0 | } |
161 | 0 | if (feature == kAVX2) { |
162 | 0 | if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { |
163 | 0 | GetCPUInfo(cpu_info, 7); |
164 | 0 | return !!(cpu_info[1] & (1 << 5)); |
165 | 0 | } |
166 | 0 | } |
167 | 0 | return 0; |
168 | 0 | } Line | Count | Source | 118 | 19 | static int x86CPUInfo(CPUFeature feature) { | 119 | 19 | int max_cpuid_value; | 120 | 19 | int cpu_info[4]; | 121 | 19 | int is_intel = 0; | 122 | | | 123 | | // get the highest feature value cpuid supports | 124 | 19 | GetCPUInfo(cpu_info, 0); | 125 | 19 | max_cpuid_value = cpu_info[0]; | 126 | 19 | if (max_cpuid_value < 1) { | 127 | 0 | return 0; | 128 | 19 | } else { | 129 | 19 | const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG | 130 | 19 | const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni | 131 | 19 | const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn | 132 | 19 | is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && | 133 | 0 | cpu_info[2] == VENDOR_ID_INTEL_ECX && | 134 | 0 | cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel? | 135 | 19 | } | 136 | | | 137 | 19 | GetCPUInfo(cpu_info, 1); | 138 | 19 | if (feature == kSSE2) { | 139 | 10 | return !!(cpu_info[3] & (1 << 26)); | 140 | 10 | } | 141 | 9 | if (feature == kSSE3) { | 142 | 0 | return !!(cpu_info[2] & (1 << 0)); | 143 | 0 | } | 144 | 9 | if (feature == kSlowSSSE3) { | 145 | 1 | if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3? | 146 | 0 | return CheckSlowModel(cpu_info[0]); | 147 | 0 | } | 148 | 1 | return 0; | 149 | 1 | } | 150 | | | 151 | 8 | if (feature == kSSE4_1) { | 152 | 8 | return !!(cpu_info[2] & (1 << 19)); | 153 | 8 | } | 154 | 0 | if (feature == kAVX) { | 155 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 156 | 0 | if ((cpu_info[2] & 0x18000000) == 0x18000000) { | 157 | | // XMM state and YMM state enabled by the OS. | 158 | 0 | return (xgetbv() & 0x6) == 0x6; | 159 | 0 | } | 160 | 0 | } | 161 | 0 | if (feature == kAVX2) { | 162 | 0 | if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { | 163 | 0 | GetCPUInfo(cpu_info, 7); | 164 | 0 | return !!(cpu_info[1] & (1 << 5)); | 165 | 0 | } | 166 | 0 | } | 167 | 0 | return 0; | 168 | 0 | } |
Unexecuted instantiation: sharpyuv_cpu.c:x86CPUInfo |
169 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
170 | | VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; |
171 | | #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. |
172 | | static int AndroidCPUInfo(CPUFeature feature) { |
173 | | const AndroidCpuFamily cpu_family = android_getCpuFamily(); |
174 | | const uint64_t cpu_features = android_getCpuFeatures(); |
175 | | if (feature == kNEON) { |
176 | | return cpu_family == ANDROID_CPU_FAMILY_ARM && |
177 | | (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0; |
178 | | } |
179 | | return 0; |
180 | | } |
181 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
182 | | VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; |
183 | | #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test |
184 | | // Use compile flags as an indicator of SIMD support instead of a runtime check. |
185 | | static int wasmCPUInfo(CPUFeature feature) { |
186 | | switch (feature) { |
187 | | #ifdef WEBP_HAVE_SSE2 |
188 | | case kSSE2: |
189 | | return 1; |
190 | | #endif |
191 | | #ifdef WEBP_HAVE_SSE41 |
192 | | case kSSE3: |
193 | | case kSlowSSSE3: |
194 | | case kSSE4_1: |
195 | | return 1; |
196 | | #endif |
197 | | #ifdef WEBP_HAVE_NEON |
198 | | case kNEON: |
199 | | return 1; |
200 | | #endif |
201 | | default: |
202 | | break; |
203 | | } |
204 | | return 0; |
205 | | } |
206 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
207 | | VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; |
208 | | #elif defined(WEBP_HAVE_NEON) |
209 | | // In most cases this function doesn't check for NEON support (it's assumed by |
210 | | // the configuration), but enables turning off NEON at runtime, for testing |
211 | | // purposes, by setting VP8GetCPUInfo = NULL. |
212 | | static int armCPUInfo(CPUFeature feature) { |
213 | | if (feature != kNEON) return 0; |
214 | | #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) |
215 | | { |
216 | | int has_neon = 0; |
217 | | char line[200]; |
218 | | FILE* const cpuinfo = fopen("/proc/cpuinfo", "r"); |
219 | | if (cpuinfo == NULL) return 0; |
220 | | while (fgets(line, sizeof(line), cpuinfo)) { |
221 | | if (!strncmp(line, "Features", 8)) { |
222 | | if (strstr(line, " neon ") != NULL) { |
223 | | has_neon = 1; |
224 | | break; |
225 | | } |
226 | | } |
227 | | } |
228 | | fclose(cpuinfo); |
229 | | return has_neon; |
230 | | } |
231 | | #else |
232 | | return 1; |
233 | | #endif |
234 | | } |
235 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
236 | | VP8CPUInfo VP8GetCPUInfo = armCPUInfo; |
237 | | #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ |
238 | | defined(WEBP_USE_MSA) |
239 | | static int mipsCPUInfo(CPUFeature feature) { |
240 | | if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) { |
241 | | return 1; |
242 | | } else { |
243 | | return 0; |
244 | | } |
245 | | } |
246 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
247 | | VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; |
248 | | #else |
249 | | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
250 | | VP8CPUInfo VP8GetCPUInfo = NULL; |
251 | | #endif |