/src/gdal/port/cpl_cpu_features.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: CPL - Common Portability Library |
4 | | * Purpose: CPU features detection |
5 | | * Author: Even Rouault, <even dot rouault at spatialys dot com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2016, Even Rouault <even dot rouault at spatialys dot com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "cpl_port.h" |
14 | | #include "cpl_string.h" |
15 | | #include "cpl_cpu_features.h" |
16 | | |
17 | | //! @cond Doxygen_Suppress |
18 | | |
19 | 0 | #define CPUID_SSSE3_ECX_BIT 9 |
20 | 6 | #define CPUID_OSXSAVE_ECX_BIT 27 |
21 | 6 | #define CPUID_AVX_ECX_BIT 28 |
22 | | |
23 | | #define CPUID_SSE_EDX_BIT 25 |
24 | | |
25 | 12 | #define BIT_XMM_STATE (1 << 1) |
26 | 12 | #define BIT_YMM_STATE (2 << 1) |
27 | | |
28 | | #define REG_EAX 0 |
29 | | #define REG_EBX 1 |
30 | 12 | #define REG_ECX 2 |
31 | | #define REG_EDX 3 |
32 | | |
33 | | #if defined(__GNUC__) |
34 | | #if defined(__x86_64) |
35 | | #define GCC_CPUID(level, a, b, c, d) \ |
36 | 6 | __asm__("xchgq %%rbx, %q1\n" \ |
37 | 6 | "cpuid\n" \ |
38 | 6 | "xchgq %%rbx, %q1" \ |
39 | 6 | : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \ |
40 | 6 | : "0"(level)) |
41 | | #else |
42 | | #define GCC_CPUID(level, a, b, c, d) \ |
43 | | __asm__("xchgl %%ebx, %1\n" \ |
44 | | "cpuid\n" \ |
45 | | "xchgl %%ebx, %1" \ |
46 | | : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \ |
47 | | : "0"(level)) |
48 | | #endif |
49 | | |
50 | | #define CPL_CPUID(level, array) \ |
51 | 6 | GCC_CPUID(level, array[0], array[1], array[2], array[3]) |
52 | | |
53 | | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
54 | | |
55 | | #include <intrin.h> |
56 | | #define CPL_CPUID(level, array) __cpuid(array, level) |
57 | | |
58 | | #endif |
59 | | |
60 | | #if defined(HAVE_SSE_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSE) |
61 | | |
62 | | /************************************************************************/ |
63 | | /* CPLHaveRuntimeSSE() */ |
64 | | /************************************************************************/ |
65 | | |
66 | | bool CPLHaveRuntimeSSE() |
67 | | { |
68 | | int cpuinfo[4] = {0, 0, 0, 0}; |
69 | | CPL_CPUID(1, cpuinfo); |
70 | | return (cpuinfo[REG_EDX] & (1 << CPUID_SSE_EDX_BIT)) != 0; |
71 | | } |
72 | | |
73 | | #endif |
74 | | |
75 | | #if defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3) |
76 | | |
77 | | /************************************************************************/ |
78 | | /* CPLHaveRuntimeSSSE3() */ |
79 | | /************************************************************************/ |
80 | | |
81 | | static inline bool CPLDetectSSSE3() |
82 | 0 | { |
83 | 0 | int cpuinfo[4] = {0, 0, 0, 0}; |
84 | 0 | CPL_CPUID(1, cpuinfo); |
85 | 0 | return (cpuinfo[REG_ECX] & (1 << CPUID_SSSE3_ECX_BIT)) != 0; |
86 | 0 | } |
87 | | |
88 | | #if defined(__GNUC__) && !defined(DEBUG) |
89 | | bool bCPLHasSSSE3 = false; |
90 | | static void CPLHaveRuntimeSSSE3Initialize() __attribute__((constructor)); |
91 | | |
92 | | static void CPLHaveRuntimeSSSE3Initialize() |
93 | | { |
94 | | bCPLHasSSSE3 = CPLDetectSSSE3(); |
95 | | } |
96 | | #else |
97 | | bool CPLHaveRuntimeSSSE3() |
98 | 0 | { |
99 | 0 | #ifdef DEBUG |
100 | 0 | if (!CPLTestBool(CPLGetConfigOption("GDAL_USE_SSSE3", "YES"))) |
101 | 0 | return false; |
102 | 0 | #endif |
103 | 0 | return CPLDetectSSSE3(); |
104 | 0 | } |
105 | | #endif |
106 | | |
107 | | #endif // defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3) |
108 | | |
109 | | #if defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX) |
110 | | |
111 | | /************************************************************************/ |
112 | | /* CPLHaveRuntimeAVX() */ |
113 | | /************************************************************************/ |
114 | | |
115 | | #if defined(__GNUC__) |
116 | | |
117 | | static bool CPLDetectRuntimeAVX() |
118 | 6 | { |
119 | 6 | int cpuinfo[4] = {0, 0, 0, 0}; |
120 | 6 | CPL_CPUID(1, cpuinfo); |
121 | | |
122 | | // Check OSXSAVE feature. |
123 | 6 | if ((cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0) |
124 | 0 | { |
125 | 0 | return false; |
126 | 0 | } |
127 | | |
128 | | // Check AVX feature. |
129 | 6 | if ((cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0) |
130 | 0 | { |
131 | 0 | return false; |
132 | 0 | } |
133 | | |
134 | | // Issue XGETBV and check the XMM and YMM state bit. |
135 | 6 | unsigned int nXCRLow; |
136 | 6 | unsigned int nXCRHigh; |
137 | 6 | __asm__("xgetbv" : "=a"(nXCRLow), "=d"(nXCRHigh) : "c"(0)); |
138 | 6 | if ((nXCRLow & (BIT_XMM_STATE | BIT_YMM_STATE)) != |
139 | 6 | (BIT_XMM_STATE | BIT_YMM_STATE)) |
140 | 0 | { |
141 | 0 | return false; |
142 | 0 | } |
143 | 6 | CPL_IGNORE_RET_VAL(nXCRHigh); // unused |
144 | | |
145 | 6 | return true; |
146 | 6 | } |
147 | | |
148 | | bool bCPLHasAVX = false; |
149 | | static void CPLHaveRuntimeAVXInitialize() __attribute__((constructor(101))); |
150 | | |
151 | | static void CPLHaveRuntimeAVXInitialize() |
152 | 6 | { |
153 | 6 | bCPLHasAVX = CPLDetectRuntimeAVX(); |
154 | 6 | } |
155 | | |
156 | | #elif defined(_MSC_VER) |
157 | | bool CPLHaveRuntimeAVX() |
158 | | { |
159 | | static const bool bHasAVX = []() -> bool |
160 | | { |
161 | | int cpuinfo[4] = {0, 0, 0, 0}; |
162 | | CPL_CPUID(1, cpuinfo); |
163 | | |
164 | | // Check OSXSAVE feature. |
165 | | if ((cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0) |
166 | | { |
167 | | return false; |
168 | | } |
169 | | |
170 | | // Check AVX feature. |
171 | | if ((cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0) |
172 | | { |
173 | | return false; |
174 | | } |
175 | | |
176 | | // Issue XGETBV and check the XMM and YMM state bit. |
177 | | unsigned __int64 xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); |
178 | | if ((xcrFeatureMask & (BIT_XMM_STATE | BIT_YMM_STATE)) != |
179 | | (BIT_XMM_STATE | BIT_YMM_STATE)) |
180 | | { |
181 | | return false; |
182 | | } |
183 | | |
184 | | return true; |
185 | | }(); |
186 | | |
187 | | return bHasAVX; |
188 | | } |
189 | | |
190 | | #else |
191 | | |
192 | | bool CPLHaveRuntimeAVX() |
193 | | { |
194 | | return false; |
195 | | } |
196 | | |
197 | | #endif |
198 | | |
199 | | #endif // defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX) |
200 | | |
201 | | #if defined(HAVE_AVX2_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX2) |
202 | | |
203 | | #if defined(__GNUC__) |
204 | | |
205 | | bool bCPLHasAVX2 = false; |
206 | | // Use 102 because CPLHaveRuntimeAVXInitialize() uses 101, so we are run |
207 | | // afterwards |
208 | | static void CPLHaveRuntimeAVX2Initialize() __attribute__((constructor(102))); |
209 | | |
210 | | static void CPLHaveRuntimeAVX2Initialize() |
211 | 6 | { |
212 | 6 | bCPLHasAVX2 = CPLHaveRuntimeAVX() && __builtin_cpu_supports("avx2"); |
213 | 6 | } |
214 | | |
215 | | #else |
216 | | |
217 | | /************************************************************************/ |
218 | | /* CPLHaveRuntimeAVX2() */ |
219 | | /************************************************************************/ |
220 | | |
221 | | bool CPLHaveRuntimeAVX2() |
222 | | { |
223 | | static const bool bHasAVX2 = []() -> bool |
224 | | { |
225 | | #if defined(_MSC_VER) |
226 | | if (!CPLHaveRuntimeAVX()) |
227 | | return false; |
228 | | int cpuInfo[4] = {}; |
229 | | __cpuidex(cpuInfo, 7, 0); |
230 | | return (cpuInfo[REG_EBX] & (1 << 5)) != 0; // EBX bit 5 = AVX2 |
231 | | #else |
232 | | return false; |
233 | | #endif |
234 | | }(); |
235 | | return bHasAVX2; |
236 | | } |
237 | | |
238 | | #endif |
239 | | |
240 | | #endif // defined(HAVE_AVX2_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX2) |
241 | | |
242 | | //! @endcond |