/src/vvenc/source/Lib/CommonLib/x86/CommonDefX86.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /* |
44 | | * \ingroup CommonLib |
45 | | * \file CommondefX86.cpp |
46 | | * \brief This file contains the SIMD x86 common used functions. |
47 | | */ |
48 | | |
49 | | #include "CommonDefX86.h" |
50 | | |
51 | | #include <map> |
52 | | #include <cstdint> |
53 | | #include "CommonLib/CommonDef.h" |
54 | | |
55 | | |
56 | | #ifdef REAL_TARGET_X86 |
57 | | # if defined( _WIN32 ) && !defined( __MINGW32__ ) |
58 | | # include <intrin.h> |
59 | | # else |
60 | | # include <x86intrin.h> // needed for _xgetbv, which is not provided by simd-everywhere |
61 | | # include <cpuid.h> |
62 | | # endif |
63 | | #endif |
64 | | |
65 | | #if defined(TARGET_SIMD_X86) && ENABLE_SIMD_OPT |
66 | | |
67 | | namespace vvenc |
68 | | { |
69 | | |
70 | | # if REAL_TARGET_X86 |
71 | | const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SSE41, "SSE41" }, { SSE42, "SSE42" }, { AVX, "AVX" }, { AVX2, "AVX2" }, { AVX512, "AVX512" } }; |
72 | | # else // !REAL_TARGET_X86 |
73 | | # if defined( REAL_TARGET_ARM ) |
74 | | const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "NEON" } }; |
75 | | # elif defined( REAL_TARGET_WASM ) |
76 | | const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "WASM" } }; |
77 | | # elif defined( REAL_TARGET_LOONGARCH ) |
78 | | const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "LSX" } }; |
79 | | # else |
80 | | const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "SIMDE" } }; |
81 | | # endif |
82 | | # endif // !REAL_TARGET_X86 |
83 | | |
84 | | const std::string& x86_vext_to_string( X86_VEXT vext ) |
85 | 0 | { |
86 | 0 | try |
87 | 0 | { |
88 | 0 | return vext_names.at( vext ); |
89 | 0 | } |
90 | 0 | catch( std::out_of_range& ) |
91 | 0 | { |
92 | 0 | THROW( "Invalid SIMD extension value " << vext ); |
93 | 0 | } |
94 | 0 | } |
95 | | |
96 | | X86_VEXT string_to_x86_vext( const std::string& ext_name ) |
97 | 0 | { |
98 | 0 | if( ext_name.empty() ) |
99 | 0 | { |
100 | 0 | return UNDEFINED; |
101 | 0 | } |
102 | | |
103 | 0 | for( auto& it: vext_names ) |
104 | 0 | { |
105 | 0 | if( it.second == ext_name ) |
106 | 0 | { |
107 | 0 | return it.first; |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | 0 | THROW( "Invalid SIMD Mode string: \"" << ext_name << "\"" ); |
112 | 0 | } |
113 | | |
114 | | #if __GNUC__ // valid for GCC and clang |
115 | | # define NO_OPT_SIMD __attribute__( ( optimize( "no-tree-vectorize" ) ) ) |
116 | | #else |
117 | | # define NO_OPT_SIMD |
118 | | #endif |
119 | | |
120 | | #ifdef REAL_TARGET_X86 |
121 | | |
122 | | #if defined( __MINGW32__ ) && !defined( __MINGW64__ ) |
123 | | # define SIMD_UP_TO_SSE42 1 |
124 | | #else |
125 | | # define SIMD_UP_TO_SSE42 0 |
126 | | #endif |
127 | | |
128 | | |
129 | | /* use __cpuid for windows or inline assembler for gcc and clang */ |
130 | | #if defined( _WIN32 ) && !defined( __MINGW32__ ) |
131 | | # define doCpuid __cpuid |
132 | | # define doCpuidex __cpuidex |
133 | | #else // !_WIN32 |
134 | | static inline void doCpuid( int CPUInfo[4], int InfoType ) |
135 | 0 | { |
136 | 0 | __get_cpuid( (unsigned) InfoType, (unsigned*) &CPUInfo[0], (unsigned*) &CPUInfo[1], (unsigned*) &CPUInfo[2], (unsigned*) &CPUInfo[3] ); |
137 | 0 | } |
138 | | # if !SIMD_UP_TO_SSE42 |
139 | | static inline void doCpuidex( int CPUInfo[4], int InfoType0, int InfoType1 ) |
140 | 0 | { |
141 | 0 | __cpuid_count( InfoType0, InfoType1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3] ); |
142 | 0 | } |
143 | | # endif // !SIMD_UP_TO_SSE42 |
144 | | #endif // !_WIN32 |
145 | | |
146 | | static inline int64_t xgetbv( int ctr ) |
147 | 0 | { |
148 | 0 | #if( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) \ |
149 | 0 | || ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) \ |
150 | 0 | || GCC_VERSION_AT_LEAST( 8, 0 ) \ |
151 | | || CLANG_VERSION_AT_LEAST( 9, 0 ) // Microsoft, Intel, newer GCC or newer Clang compiler supporting _xgetbv intrinsic |
152 | |
|
153 | 0 | return _xgetbv( ctr ); // intrinsic function for XGETBV |
154 | |
|
155 | | #elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax |
156 | | |
157 | | uint32_t a, d; |
158 | | #if GCC_VERSION_AT_LEAST( 4, 4 ) || CLANG_VERSION_AT_LEAST( 3, 3 ) |
159 | | __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : ); |
160 | | #else |
161 | | __asm( ".byte 0x0f, 0x01, 0xd0" : "=a"( a ), "=d"( d ) : "c"( ctr ) : ); |
162 | | #endif |
163 | | return a | ( uint64_t( d ) << 32 ); |
164 | | |
165 | | #else // #elif defined (_MSC_FULL_VER) || (defined (__INTEL_COMPILER)...) // other compiler. try inline assembly with masm/intel/MS syntax |
166 | | |
167 | | uint32_t a, d; |
168 | | __asm { |
169 | | mov ecx, ctr |
170 | | _emit 0x0f |
171 | | _emit 0x01 |
172 | | _emit 0xd0 ; // xgetbv |
173 | | mov a, eax |
174 | | mov d, edx |
175 | | } |
176 | | return a | ( uint64_t( d ) << 32 ); |
177 | | |
178 | | #endif |
179 | 0 | } |
180 | | |
181 | | |
182 | | #define BIT_HAS_MMX (1 << 23) |
183 | | #define BIT_HAS_SSE (1 << 25) |
184 | | #define BIT_HAS_SSE2 (1 << 26) |
185 | | #define BIT_HAS_SSE3 (1 << 0) |
186 | | #define BIT_HAS_SSSE3 (1 << 9) |
187 | 0 | #define BIT_HAS_SSE41 (1 << 19) |
188 | 0 | #define BIT_HAS_SSE42 (1 << 20) |
189 | | #define BIT_HAS_SSE4a (1 << 6) |
190 | 0 | #define BIT_HAS_OSXSAVE (1 << 27) |
191 | 0 | #define BIT_HAS_AVX ((1 << 28)|BIT_HAS_OSXSAVE) |
192 | 0 | #define BIT_HAS_AVX2 (1 << 5) |
193 | | #define BIT_HAS_AVX512F (1 << 16) |
194 | | #define BIT_HAS_AVX512DQ (1 << 17) |
195 | | #define BIT_HAS_AVX512BW (1 << 30) |
196 | | #define BIT_HAS_FMA3 (1 << 12) |
197 | | #define BIT_HAS_FMA4 (1 << 16) |
198 | | #define BIT_HAS_X64 (1 << 29) |
199 | | #define BIT_HAS_XOP (1 << 11) |
200 | | |
201 | | /** |
202 | | * \brief Read instruction set extension support flags from CPU register; |
203 | | */ |
204 | | NO_OPT_SIMD |
205 | | static X86_VEXT _get_x86_extensions() |
206 | 0 | { |
207 | 0 | int regs[4] = { 0, 0, 0, 0 }; |
208 | 0 | X86_VEXT ext; |
209 | 0 | ext = SCALAR; |
210 | |
|
211 | 0 | doCpuid( regs, 0 ); |
212 | 0 | if( regs[0] == 0 ) |
213 | 0 | return ext; |
214 | | |
215 | 0 | doCpuid( regs, 1 ); |
216 | 0 | if( !( regs[2] & BIT_HAS_SSE41 ) ) |
217 | 0 | return ext; |
218 | 0 | ext = SSE41; |
219 | |
|
220 | 0 | if( !( regs[2] & BIT_HAS_SSE42 ) ) |
221 | 0 | return ext; |
222 | 0 | ext = SSE42; |
223 | |
|
224 | | # if SIMD_UP_TO_SSE42 |
225 | | return ext; |
226 | | # else // !SIMD_UP_TO_SSE42 |
227 | |
|
228 | 0 | doCpuidex( regs, 1, 1 ); |
229 | 0 | if( !( ( regs[2] & BIT_HAS_AVX ) == BIT_HAS_AVX ) ) |
230 | 0 | return ext; // first check if the cpu supports avx |
231 | 0 | if( ( xgetbv( 0 ) & 6 ) != 6 ) |
232 | 0 | return ext; // then see if the os uses YMM state management via XSAVE etc... |
233 | 0 | # ifndef _WIN32 |
234 | | // don't detect AVX, as there are problems with MSVC production illegal ops for AVX |
235 | 0 | ext = AVX; |
236 | 0 | # endif |
237 | | |
238 | | // #ifdef USE_AVX2 |
239 | 0 | doCpuidex( regs, 7, 0 ); |
240 | 0 | if( !( regs[1] & BIT_HAS_AVX2 ) ) |
241 | 0 | return ext; |
242 | 0 | ext = AVX2; |
243 | | // #endif |
244 | |
|
245 | | # ifdef USE_AVX512 |
246 | | if( ( xgetbv( 0 ) & 0xE0 ) != 0xE0 ) |
247 | | return ext; // see if OPMASK state and ZMM are availabe and enabled |
248 | | doCpuidex( regs, 7, 0 ); |
249 | | if( !( regs[1] & BIT_HAS_AVX512F ) ) |
250 | | return ext; |
251 | | if( !( regs[1] & BIT_HAS_AVX512DQ ) ) |
252 | | return ext; |
253 | | if( !( regs[1] & BIT_HAS_AVX512BW ) ) |
254 | | return ext; |
255 | | ext = AVX512; |
256 | | # endif // USE_AVX512 |
257 | 0 | # endif // !SIMD_UP_TO_SSE42 |
258 | |
|
259 | 0 | return ext; |
260 | 0 | } |
261 | | |
262 | | #endif // REAL_TARGET_X86 |
263 | | |
264 | | NO_OPT_SIMD |
265 | | X86_VEXT read_x86_extension_flags( X86_VEXT request ) |
266 | 0 | { |
267 | 0 | #ifdef REAL_TARGET_X86 |
268 | 0 | static const X86_VEXT max_supported = _get_x86_extensions(); |
269 | 0 | static X86_VEXT ext_flags = max_supported; |
270 | | #else |
271 | | static const X86_VEXT max_supported = AVX2; // disable AVX2 for non-x86 because the SIMD-Everywhere implementation is buggy |
272 | | static X86_VEXT ext_flags = SIMD_EVERYWHERE_EXTENSION_LEVEL; // default to SSE42 for WASM and SIMD-everywhere |
273 | | #endif |
274 | |
|
275 | 0 | if( request != UNDEFINED ) |
276 | 0 | { |
277 | 0 | if( request > max_supported ) |
278 | 0 | { |
279 | 0 | #ifdef REAL_TARGET_X86 |
280 | 0 | THROW( "requested SIMD level (" << x86_vext_to_string( request ) << ") not supported by current CPU (max " << x86_vext_to_string( max_supported ) << ")." ); |
281 | 0 | #endif |
282 | 0 | } |
283 | | |
284 | 0 | ext_flags = request; |
285 | 0 | } |
286 | | |
287 | 0 | #ifdef REAL_TARGET_X86 |
288 | 0 | if( max_supported < X86_SIMD_SSE41 ) |
289 | 0 | { |
290 | 0 | THROW( "maximum SIMD level of current CPU is " << x86_vext_to_string( max_supported ) << " but at least SSE4.1 is required." ); |
291 | 0 | } |
292 | 0 | #endif |
293 | | |
294 | 0 | return ext_flags; |
295 | 0 | } |
296 | | |
297 | | const std::string& read_x86_extension_name() |
298 | 0 | { |
299 | 0 | return x86_vext_to_string( read_x86_extension_flags() ); |
300 | 0 | } |
301 | | |
302 | | } // namespace vvenc |
303 | | |
304 | | #endif // TARGET_SIMD_X86 |