Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/x86/CommonDefX86.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/*
44
 * \ingroup CommonLib
45
 * \file    CommondefX86.cpp
46
 * \brief   This file contains the SIMD x86 common used functions.
47
 */
48
49
#include "CommonDefX86.h"
50
51
#include <map>
52
#include <cstdint>
53
#include "CommonLib/CommonDef.h"
54
55
56
#ifdef REAL_TARGET_X86
57
# if defined( _WIN32 ) && !defined( __MINGW32__ )
58
#  include <intrin.h>
59
# else
60
#  include <x86intrin.h>  // needed for _xgetbv, which is not provided by simd-everywhere
61
#  include <cpuid.h>
62
# endif
63
#endif
64
65
#if defined(TARGET_SIMD_X86)  && ENABLE_SIMD_OPT
66
67
namespace vvenc
68
{
69
70
#  if REAL_TARGET_X86
71
const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SSE41, "SSE41" }, { SSE42, "SSE42" }, { AVX, "AVX" }, { AVX2, "AVX2" }, { AVX512, "AVX512" } };
72
#  else   // !REAL_TARGET_X86
73
#    if defined( REAL_TARGET_ARM )
74
const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "NEON" } };
75
#    elif defined( REAL_TARGET_WASM )
76
const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "WASM" } };
77
#    elif defined( REAL_TARGET_LOONGARCH )
78
const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "LSX" } };
79
#    else
80
const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SCALAR, "SCALAR" }, { SIMD_EVERYWHERE_EXTENSION_LEVEL, "SIMDE" } };
81
#    endif
82
#  endif   // !REAL_TARGET_X86
83
84
const std::string& x86_vext_to_string( X86_VEXT vext )
85
0
{
86
0
  try
87
0
  {
88
0
    return vext_names.at( vext );
89
0
  }
90
0
  catch( std::out_of_range& )
91
0
  {
92
0
    THROW( "Invalid SIMD extension value " << vext );
93
0
  }
94
0
}
95
96
X86_VEXT string_to_x86_vext( const std::string& ext_name )
97
0
{
98
0
  if( ext_name.empty() )
99
0
  {
100
0
    return UNDEFINED;
101
0
  }
102
103
0
  for( auto& it: vext_names )
104
0
  {
105
0
    if( it.second == ext_name )
106
0
    {
107
0
      return it.first;
108
0
    }
109
0
  }
110
111
0
  THROW( "Invalid SIMD Mode string: \"" << ext_name << "\"" );
112
0
}
113
114
#if __GNUC__   // valid for GCC and clang
115
# define NO_OPT_SIMD __attribute__( ( optimize( "no-tree-vectorize" ) ) )
116
#else
117
# define NO_OPT_SIMD
118
#endif
119
120
#ifdef REAL_TARGET_X86
121
122
#if defined( __MINGW32__ ) && !defined( __MINGW64__ )
123
# define SIMD_UP_TO_SSE42 1
124
#else
125
# define SIMD_UP_TO_SSE42 0
126
#endif
127
128
129
/* use __cpuid for windows or inline assembler for gcc and clang */
130
#if defined( _WIN32 ) && !defined( __MINGW32__ )
131
# define doCpuid   __cpuid
132
# define doCpuidex __cpuidex
133
#else   // !_WIN32
134
static inline void doCpuid( int CPUInfo[4], int InfoType )
135
0
{
136
0
  __get_cpuid( (unsigned) InfoType, (unsigned*) &CPUInfo[0], (unsigned*) &CPUInfo[1], (unsigned*) &CPUInfo[2], (unsigned*) &CPUInfo[3] );
137
0
}
138
# if !SIMD_UP_TO_SSE42
139
static inline void doCpuidex( int CPUInfo[4], int InfoType0, int InfoType1 )
140
0
{
141
0
  __cpuid_count( InfoType0, InfoType1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3] );
142
0
}
143
# endif  // !SIMD_UP_TO_SSE42
144
#endif  // !_WIN32
145
146
static inline int64_t xgetbv( int ctr )
147
0
{
148
0
#if( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 )    \
149
0
  || ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) \
150
0
  || GCC_VERSION_AT_LEAST( 8, 0 )                                \
151
  || CLANG_VERSION_AT_LEAST( 9, 0 )   // Microsoft, Intel, newer GCC or newer Clang compiler supporting _xgetbv intrinsic
152
153
0
  return _xgetbv( ctr );   // intrinsic function for XGETBV
154
155
#elif defined( __GNUC__ )   // use inline assembly, Gnu/AT&T syntax
156
157
  uint32_t a, d;
158
#if GCC_VERSION_AT_LEAST( 4, 4 ) || CLANG_VERSION_AT_LEAST( 3, 3 )
159
  __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
160
#else
161
  __asm( ".byte 0x0f, 0x01, 0xd0" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
162
#endif
163
  return a | ( uint64_t( d ) << 32 );
164
165
#else  // #elif defined (_MSC_FULL_VER) || (defined (__INTEL_COMPILER)...) // other compiler. try inline assembly with masm/intel/MS syntax
166
167
  uint32_t a, d;
168
  __asm {
169
        mov ecx, ctr
170
        _emit 0x0f
171
        _emit 0x01
172
        _emit 0xd0 ; // xgetbv
173
        mov a, eax
174
        mov d, edx
175
  }
176
  return a | ( uint64_t( d ) << 32 );
177
178
#endif
179
0
}
180
181
182
#define BIT_HAS_MMX                    (1 << 23)
183
#define BIT_HAS_SSE                    (1 << 25)
184
#define BIT_HAS_SSE2                   (1 << 26)
185
#define BIT_HAS_SSE3                   (1 <<  0)
186
#define BIT_HAS_SSSE3                  (1 <<  9)
187
0
#define BIT_HAS_SSE41                  (1 << 19)
188
0
#define BIT_HAS_SSE42                  (1 << 20)
189
#define BIT_HAS_SSE4a                  (1 <<  6)
190
0
#define BIT_HAS_OSXSAVE                (1 << 27)
191
0
#define BIT_HAS_AVX                   ((1 << 28)|BIT_HAS_OSXSAVE)
192
0
#define BIT_HAS_AVX2                   (1 <<  5)
193
#define BIT_HAS_AVX512F                (1 << 16)
194
#define BIT_HAS_AVX512DQ               (1 << 17)
195
#define BIT_HAS_AVX512BW               (1 << 30)
196
#define BIT_HAS_FMA3                   (1 << 12)
197
#define BIT_HAS_FMA4                   (1 << 16)
198
#define BIT_HAS_X64                    (1 << 29)
199
#define BIT_HAS_XOP                    (1 << 11)
200
201
/**
202
 * \brief Read instruction set extension support flags from CPU register;
203
 */
204
NO_OPT_SIMD
205
static X86_VEXT _get_x86_extensions()
206
0
{
207
0
  int      regs[4] = { 0, 0, 0, 0 };
208
0
  X86_VEXT ext;
209
0
  ext = SCALAR;
210
211
0
  doCpuid( regs, 0 );
212
0
  if( regs[0] == 0 )
213
0
    return ext;
214
215
0
  doCpuid( regs, 1 );
216
0
  if( !( regs[2] & BIT_HAS_SSE41 ) )
217
0
    return ext;
218
0
  ext = SSE41;
219
220
0
  if( !( regs[2] & BIT_HAS_SSE42 ) )
221
0
    return ext;
222
0
  ext = SSE42;
223
224
#  if SIMD_UP_TO_SSE42
225
  return ext;
226
#  else   //  !SIMD_UP_TO_SSE42
227
228
0
  doCpuidex( regs, 1, 1 );
229
0
  if( !( ( regs[2] & BIT_HAS_AVX ) == BIT_HAS_AVX ) )
230
0
    return ext;   // first check if the cpu supports avx
231
0
  if( ( xgetbv( 0 ) & 6 ) != 6 )
232
0
    return ext;   // then see if the os uses YMM state management via XSAVE etc...
233
0
#    ifndef _WIN32
234
  // don't detect AVX, as there are problems with MSVC production illegal ops for AVX
235
0
  ext = AVX;
236
0
#    endif
237
238
// #ifdef USE_AVX2
239
0
  doCpuidex( regs, 7, 0 );
240
0
  if( !( regs[1] & BIT_HAS_AVX2 ) )
241
0
    return ext;
242
0
  ext = AVX2;
243
// #endif
244
245
#    ifdef USE_AVX512
246
  if( ( xgetbv( 0 ) & 0xE0 ) != 0xE0 )
247
    return ext;   // see if OPMASK state and ZMM are availabe and enabled
248
  doCpuidex( regs, 7, 0 );
249
  if( !( regs[1] & BIT_HAS_AVX512F ) )
250
    return ext;
251
  if( !( regs[1] & BIT_HAS_AVX512DQ ) )
252
    return ext;
253
  if( !( regs[1] & BIT_HAS_AVX512BW ) )
254
    return ext;
255
  ext = AVX512;
256
#    endif   //  USE_AVX512
257
0
#  endif     // !SIMD_UP_TO_SSE42
258
259
0
  return ext;
260
0
}
261
262
#endif   // REAL_TARGET_X86
263
264
NO_OPT_SIMD
265
X86_VEXT read_x86_extension_flags( X86_VEXT request )
266
0
{
267
0
#ifdef REAL_TARGET_X86
268
0
  static const X86_VEXT max_supported = _get_x86_extensions();
269
0
  static X86_VEXT       ext_flags     = max_supported;
270
#else
271
  static const X86_VEXT max_supported = AVX2;                               // disable AVX2 for non-x86 because the SIMD-Everywhere implementation is buggy
272
  static X86_VEXT       ext_flags     = SIMD_EVERYWHERE_EXTENSION_LEVEL;   // default to SSE42 for WASM and SIMD-everywhere
273
#endif
274
275
0
  if( request != UNDEFINED )
276
0
  {
277
0
    if( request > max_supported )
278
0
    {
279
0
#ifdef REAL_TARGET_X86
280
0
      THROW( "requested SIMD level (" << x86_vext_to_string( request ) << ") not supported by current CPU (max " << x86_vext_to_string( max_supported ) << ")." );
281
0
#endif
282
0
    }
283
284
0
    ext_flags = request;
285
0
  }
286
287
0
#ifdef REAL_TARGET_X86
288
0
  if( max_supported < X86_SIMD_SSE41 )
289
0
  {
290
0
    THROW( "maximum SIMD level of current CPU is " << x86_vext_to_string( max_supported ) << " but at least SSE4.1 is required." );
291
0
  }
292
0
#endif
293
294
0
  return ext_flags;
295
0
}
296
297
const std::string& read_x86_extension_name()
298
0
{
299
0
  return x86_vext_to_string( read_x86_extension_flags() );
300
0
}
301
302
}   // namespace vvenc
303
304
#endif // TARGET_SIMD_X86