/src/vvdec/source/Lib/CommonLib/x86/CommonDefX86.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /* |
44 | | * \ingroup CommonLib |
45 | | * \file CommondefX86.cpp |
46 | | * \brief This file contains the SIMD x86 common used functions. |
47 | | */ |
48 | | |
49 | | #include "CommonDefX86.h" |
50 | | |
51 | | #include <array> |
52 | | #include <cstdint> |
53 | | #include "CommonLib/CommonDef.h" |
54 | | |
55 | | |
56 | | #ifdef REAL_TARGET_X86 |
57 | | # if defined( _WIN32 ) && !defined( __MINGW32__ ) |
58 | | # include <intrin.h> |
59 | | # else |
60 | | # include <x86intrin.h> // needed for _xgetbv, which is not provided by simd-everywhere |
61 | | # include <cpuid.h> |
62 | | # endif |
63 | | #endif |
64 | | |
65 | | #if defined(TARGET_SIMD_X86) && ENABLE_SIMD_OPT |
66 | | |
67 | | namespace vvdec |
68 | | { |
69 | | constexpr std::array<const char*, 6> vext_names{ "SCALAR", "SSE41", "SSE42", "AVX", "AVX2", "AVX512" }; |
70 | | |
71 | | #if __GNUC__ // valid for GCC and clang |
72 | | # define NO_OPT_SIMD __attribute__( ( optimize( "no-tree-vectorize" ) ) ) |
73 | | #else |
74 | | # define NO_OPT_SIMD |
75 | | #endif |
76 | | |
77 | | #ifdef REAL_TARGET_X86 |
78 | | |
79 | | #if defined( __MINGW32__ ) && !defined( __MINGW64__ ) |
80 | | # define SIMD_UP_TO_SSE42 1 |
81 | | #else |
82 | | # define SIMD_UP_TO_SSE42 0 |
83 | | #endif |
84 | | |
85 | | |
86 | | /* use __cpuid for windows or inline assembler for gcc and clang */ |
87 | | #if defined( _WIN32 ) && !defined( __MINGW32__ ) |
88 | | # define doCpuid __cpuid |
89 | | # define doCpuidex __cpuidex |
90 | | #else // !_WIN32 |
91 | | static inline void doCpuid( int CPUInfo[4], int InfoType ) |
92 | 0 | { |
93 | 0 | __get_cpuid( (unsigned) InfoType, (unsigned*) &CPUInfo[0], (unsigned*) &CPUInfo[1], (unsigned*) &CPUInfo[2], (unsigned*) &CPUInfo[3] ); |
94 | 0 | } |
95 | | # if !SIMD_UP_TO_SSE42 |
96 | | static inline void doCpuidex( int CPUInfo[4], int InfoType0, int InfoType1 ) |
97 | 0 | { |
98 | 0 | __cpuid_count( InfoType0, InfoType1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3] ); |
99 | 0 | } |
100 | | # endif // !SIMD_UP_TO_SSE42 |
101 | | #endif // !_WIN32 |
102 | | |
103 | | static inline int64_t xgetbv( int ctr ) |
104 | 0 | { |
105 | 0 | #if( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) \ |
106 | 0 | || ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) \ |
107 | 0 | || GCC_VERSION_AT_LEAST( 8, 0 ) \ |
108 | | || CLANG_VERSION_AT_LEAST( 9, 0 ) // Microsoft, Intel, newer GCC or newer Clang compiler supporting _xgetbv intrinsic |
109 | |
|
110 | 0 | return _xgetbv( ctr ); // intrinsic function for XGETBV |
111 | |
|
112 | | #elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax |
113 | | |
114 | | uint32_t a, d; |
115 | | #if GCC_VERSION_AT_LEAST( 4, 4 ) || CLANG_VERSION_AT_LEAST( 3, 3 ) |
116 | | __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : ); |
117 | | #else |
118 | | __asm( ".byte 0x0f, 0x01, 0xd0" : "=a"( a ), "=d"( d ) : "c"( ctr ) : ); |
119 | | #endif |
120 | | return a | ( uint64_t( d ) << 32 ); |
121 | | |
122 | | #else // #elif defined (_MSC_FULL_VER) || (defined (__INTEL_COMPILER)...) // other compiler. try inline assembly with masm/intel/MS syntax |
123 | | |
124 | | uint32_t a, d; |
125 | | __asm { |
126 | | mov ecx, ctr |
127 | | _emit 0x0f |
128 | | _emit 0x01 |
129 | | _emit 0xd0 ; // xgetbv |
130 | | mov a, eax |
131 | | mov d, edx |
132 | | } |
133 | | return a | ( uint64_t( d ) << 32 ); |
134 | | |
135 | | #endif |
136 | 0 | } |
137 | | |
138 | | |
139 | | #define BIT_HAS_MMX (1 << 23) |
140 | | #define BIT_HAS_SSE (1 << 25) |
141 | | #define BIT_HAS_SSE2 (1 << 26) |
142 | | #define BIT_HAS_SSE3 (1 << 0) |
143 | | #define BIT_HAS_SSSE3 (1 << 9) |
144 | 0 | #define BIT_HAS_SSE41 (1 << 19) |
145 | 0 | #define BIT_HAS_SSE42 (1 << 20) |
146 | | #define BIT_HAS_SSE4a (1 << 6) |
147 | 0 | #define BIT_HAS_OSXSAVE (1 << 27) |
148 | 0 | #define BIT_HAS_AVX ((1 << 28)|BIT_HAS_OSXSAVE) |
149 | 0 | #define BIT_HAS_AVX2 (1 << 5) |
150 | | #define BIT_HAS_AVX512F (1 << 16) |
151 | | #define BIT_HAS_AVX512DQ (1 << 17) |
152 | | #define BIT_HAS_AVX512BW (1 << 30) |
153 | | #define BIT_HAS_FMA3 (1 << 12) |
154 | | #define BIT_HAS_FMA4 (1 << 16) |
155 | | #define BIT_HAS_X64 (1 << 29) |
156 | | #define BIT_HAS_XOP (1 << 11) |
157 | | |
158 | | /** |
159 | | * \brief Read instruction set extension support flags from CPU register; |
160 | | */ |
161 | | NO_OPT_SIMD |
162 | | static X86_VEXT _get_x86_extensions() |
163 | 0 | { |
164 | 0 | int regs[4] = { 0, 0, 0, 0 }; |
165 | 0 | X86_VEXT ext; |
166 | 0 | ext = SCALAR; |
167 | |
|
168 | 0 | doCpuid( regs, 0 ); |
169 | 0 | if( regs[0] == 0 ) |
170 | 0 | return ext; |
171 | | |
172 | 0 | doCpuid( regs, 1 ); |
173 | 0 | if( !( regs[2] & BIT_HAS_SSE41 ) ) |
174 | 0 | return ext; |
175 | 0 | ext = SSE41; |
176 | |
|
177 | 0 | if( !( regs[2] & BIT_HAS_SSE42 ) ) |
178 | 0 | return ext; |
179 | 0 | ext = SSE42; |
180 | |
|
181 | | # if SIMD_UP_TO_SSE42 |
182 | | return ext; |
183 | | # else // !SIMD_UP_TO_SSE42 |
184 | |
|
185 | 0 | doCpuidex( regs, 1, 1 ); |
186 | 0 | if( !( ( regs[2] & BIT_HAS_AVX ) == BIT_HAS_AVX ) ) |
187 | 0 | return ext; // first check if the cpu supports avx |
188 | 0 | if( ( xgetbv( 0 ) & 6 ) != 6 ) |
189 | 0 | return ext; // then see if the os uses YMM state management via XSAVE etc... |
190 | 0 | # ifndef _WIN32 |
191 | | // don't detect AVX, as there are problems with MSVC production illegal ops for AVX |
192 | 0 | ext = AVX; |
193 | 0 | # endif |
194 | | |
195 | | // #ifdef USE_AVX2 |
196 | 0 | doCpuidex( regs, 7, 0 ); |
197 | 0 | if( !( regs[1] & BIT_HAS_AVX2 ) ) |
198 | 0 | return ext; |
199 | 0 | ext = AVX2; |
200 | | // #endif |
201 | |
|
202 | | # ifdef USE_AVX512 |
203 | | if( ( xgetbv( 0 ) & 0xE0 ) != 0xE0 ) |
204 | | return ext; // see if OPMASK state and ZMM are availabe and enabled |
205 | | doCpuidex( regs, 7, 0 ); |
206 | | if( !( regs[1] & BIT_HAS_AVX512F ) ) |
207 | | return ext; |
208 | | if( !( regs[1] & BIT_HAS_AVX512DQ ) ) |
209 | | return ext; |
210 | | if( !( regs[1] & BIT_HAS_AVX512BW ) ) |
211 | | return ext; |
212 | | ext = AVX512; |
213 | | # endif // USE_AVX512 |
214 | 0 | # endif // !SIMD_UP_TO_SSE42 |
215 | |
|
216 | 0 | return ext; |
217 | 0 | } |
218 | | |
219 | | #endif // REAL_TARGET_X86 |
220 | | |
221 | | NO_OPT_SIMD |
222 | | X86_VEXT read_x86_extension_flags( X86_VEXT request ) |
223 | 0 | { |
224 | 0 | #ifdef REAL_TARGET_X86 |
225 | 0 | static const X86_VEXT max_supported = _get_x86_extensions(); |
226 | 0 | static X86_VEXT ext_flags = max_supported; |
227 | | #else |
228 | | static const X86_VEXT max_supported = AVX; // disable AVX2 for non-x86 because the SIMD-Everywhere implementation is buggy |
229 | | static X86_VEXT ext_flags = SIMD_EVERYWHERE_EXTENSION_LEVEL; // default to SSE42 for WASM and SIMD-everywhere |
230 | | #endif |
231 | |
|
232 | 0 | if( request != UNDEFINED ) |
233 | 0 | { |
234 | 0 | if( request > max_supported ) |
235 | 0 | { |
236 | 0 | #ifdef REAL_TARGET_X86 |
237 | 0 | THROW_UNSUPPORTED( "requested SIMD level (" << request << ") not supported by current CPU (max " << max_supported << ")." ); |
238 | | #else |
239 | | THROW_UNSUPPORTED( "requested SIMD level (" << request << ") not supported because the SIMD-Everywhere implementation for AVX2 is buggy." ); |
240 | | #endif |
241 | 0 | } |
242 | | |
243 | 0 | ext_flags = request; |
244 | 0 | } |
245 | | |
246 | 0 | return ext_flags; |
247 | 0 | } |
248 | | |
249 | | std::string read_x86_extension_name() |
250 | 0 | { |
251 | 0 | X86_VEXT vext = read_x86_extension_flags(); |
252 | 0 | if( vext < 0 || vext >= vext_names.size() ) |
253 | 0 | { |
254 | 0 | static const char extension_not_available[] = "NA"; |
255 | 0 | return extension_not_available; |
256 | 0 | } |
257 | | |
258 | 0 | # if REAL_TARGET_X86 |
259 | | |
260 | 0 | return vext_names[vext]; |
261 | |
|
262 | | # else // !REAL_TARGET_X86 |
263 | | if( vext == SCALAR ) |
264 | | { |
265 | | return vext_names[vext]; |
266 | | } |
267 | | else |
268 | | { |
269 | | # if defined( REAL_TARGET_ARM ) |
270 | | return std::string( "NEON/SIMDE(" ) + vext_names[vext] + ")"; |
271 | | # elif defined( REAL_TARGET_WASM ) |
272 | | return std::string( "WASM/Emscripten(" ) + vext_names[vext] + ")"; |
273 | | # elif defined( REAL_TARGET_LOONGARCH ) |
274 | | return std::string( "LSX/SIMDE(" ) + vext_names[vext] + ")"; |
275 | | # else |
276 | | return std::string( "SIMDE(" ) + vext_names[vext] + ")" ; |
277 | | # endif |
278 | | } |
279 | | # endif // !REAL_TARGET_X86 |
280 | 0 | } |
281 | | |
282 | | } // namespace vvdec |
283 | | |
284 | | #endif // TARGET_SIMD_X86 |