Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/CommonLib/x86/CommonDefX86.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/*
44
 * \ingroup CommonLib
45
 * \file    CommondefX86.cpp
46
 * \brief   This file contains the SIMD x86 common used functions.
47
 */
48
49
#include "CommonDefX86.h"
50
51
#include <array>
52
#include <cstdint>
53
#include "CommonLib/CommonDef.h"
54
55
56
#ifdef REAL_TARGET_X86
57
# if defined( _WIN32 ) && !defined( __MINGW32__ )
58
#  include <intrin.h>
59
# else
60
#  include <x86intrin.h>  // needed for _xgetbv, which is not provided by simd-everywhere
61
#  include <cpuid.h>
62
# endif
63
#endif
64
65
#if defined(TARGET_SIMD_X86) && ENABLE_SIMD_OPT
66
67
namespace vvdec
68
{
69
constexpr std::array<const char*, 6> vext_names{ "SCALAR", "SSE41", "SSE42", "AVX", "AVX2", "AVX512" };
70
71
#if __GNUC__   // valid for GCC and clang
72
# define NO_OPT_SIMD __attribute__( ( optimize( "no-tree-vectorize" ) ) )
73
#else
74
# define NO_OPT_SIMD
75
#endif
76
77
#ifdef REAL_TARGET_X86
78
79
#if defined( __MINGW32__ ) && !defined( __MINGW64__ )
80
# define SIMD_UP_TO_SSE42 1
81
#else
82
# define SIMD_UP_TO_SSE42 0
83
#endif
84
85
86
/* use __cpuid for windows or inline assembler for gcc and clang */
87
#if defined( _WIN32 ) && !defined( __MINGW32__ )
88
# define doCpuid   __cpuid
89
# define doCpuidex __cpuidex
90
#else   // !_WIN32
91
static inline void doCpuid( int CPUInfo[4], int InfoType )
92
0
{
93
0
  __get_cpuid( (unsigned) InfoType, (unsigned*) &CPUInfo[0], (unsigned*) &CPUInfo[1], (unsigned*) &CPUInfo[2], (unsigned*) &CPUInfo[3] );
94
0
}
95
# if !SIMD_UP_TO_SSE42
96
static inline void doCpuidex( int CPUInfo[4], int InfoType0, int InfoType1 )
97
0
{
98
0
  __cpuid_count( InfoType0, InfoType1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3] );
99
0
}
100
# endif  // !SIMD_UP_TO_SSE42
101
#endif  // !_WIN32
102
103
static inline int64_t xgetbv( int ctr )
104
0
{
105
0
#if( defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 )    \
106
0
  || ( defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 ) \
107
0
  || GCC_VERSION_AT_LEAST( 8, 0 )                                \
108
  || CLANG_VERSION_AT_LEAST( 9, 0 )   // Microsoft, Intel, newer GCC or newer Clang compiler supporting _xgetbv intrinsic
109
110
0
  return _xgetbv( ctr );   // intrinsic function for XGETBV
111
112
#elif defined( __GNUC__ )   // use inline assembly, Gnu/AT&T syntax
113
114
  uint32_t a, d;
115
#if GCC_VERSION_AT_LEAST( 4, 4 ) || CLANG_VERSION_AT_LEAST( 3, 3 )
116
  __asm( "xgetbv" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
117
#else
118
  __asm( ".byte 0x0f, 0x01, 0xd0" : "=a"( a ), "=d"( d ) : "c"( ctr ) : );
119
#endif
120
  return a | ( uint64_t( d ) << 32 );
121
122
#else  // #elif defined (_MSC_FULL_VER) || (defined (__INTEL_COMPILER)...) // other compiler. try inline assembly with masm/intel/MS syntax
123
124
  uint32_t a, d;
125
  __asm {
126
        mov ecx, ctr
127
        _emit 0x0f
128
        _emit 0x01
129
        _emit 0xd0 ; // xgetbv
130
        mov a, eax
131
        mov d, edx
132
  }
133
  return a | ( uint64_t( d ) << 32 );
134
135
#endif
136
0
}
137
138
139
#define BIT_HAS_MMX                    (1 << 23)
140
#define BIT_HAS_SSE                    (1 << 25)
141
#define BIT_HAS_SSE2                   (1 << 26)
142
#define BIT_HAS_SSE3                   (1 <<  0)
143
#define BIT_HAS_SSSE3                  (1 <<  9)
144
0
#define BIT_HAS_SSE41                  (1 << 19)
145
0
#define BIT_HAS_SSE42                  (1 << 20)
146
#define BIT_HAS_SSE4a                  (1 <<  6)
147
0
#define BIT_HAS_OSXSAVE                (1 << 27)
148
0
#define BIT_HAS_AVX                   ((1 << 28)|BIT_HAS_OSXSAVE)
149
0
#define BIT_HAS_AVX2                   (1 <<  5)
150
#define BIT_HAS_AVX512F                (1 << 16)
151
#define BIT_HAS_AVX512DQ               (1 << 17)
152
#define BIT_HAS_AVX512BW               (1 << 30)
153
#define BIT_HAS_FMA3                   (1 << 12)
154
#define BIT_HAS_FMA4                   (1 << 16)
155
#define BIT_HAS_X64                    (1 << 29)
156
#define BIT_HAS_XOP                    (1 << 11)
157
158
/**
159
 * \brief Read instruction set extension support flags from CPU register;
160
 */
161
NO_OPT_SIMD
162
static X86_VEXT _get_x86_extensions()
163
0
{
164
0
  int      regs[4] = { 0, 0, 0, 0 };
165
0
  X86_VEXT ext;
166
0
  ext = SCALAR;
167
168
0
  doCpuid( regs, 0 );
169
0
  if( regs[0] == 0 )
170
0
    return ext;
171
172
0
  doCpuid( regs, 1 );
173
0
  if( !( regs[2] & BIT_HAS_SSE41 ) )
174
0
    return ext;
175
0
  ext = SSE41;
176
177
0
  if( !( regs[2] & BIT_HAS_SSE42 ) )
178
0
    return ext;
179
0
  ext = SSE42;
180
181
#  if SIMD_UP_TO_SSE42
182
  return ext;
183
#  else   //  !SIMD_UP_TO_SSE42
184
185
0
  doCpuidex( regs, 1, 1 );
186
0
  if( !( ( regs[2] & BIT_HAS_AVX ) == BIT_HAS_AVX ) )
187
0
    return ext;   // first check if the cpu supports avx
188
0
  if( ( xgetbv( 0 ) & 6 ) != 6 )
189
0
    return ext;   // then see if the os uses YMM state management via XSAVE etc...
190
0
#    ifndef _WIN32
191
  // don't detect AVX, as there are problems with MSVC production illegal ops for AVX
192
0
  ext = AVX;
193
0
#    endif
194
195
// #ifdef USE_AVX2
196
0
  doCpuidex( regs, 7, 0 );
197
0
  if( !( regs[1] & BIT_HAS_AVX2 ) )
198
0
    return ext;
199
0
  ext = AVX2;
200
// #endif
201
202
#    ifdef USE_AVX512
203
  if( ( xgetbv( 0 ) & 0xE0 ) != 0xE0 )
204
    return ext;   // see if OPMASK state and ZMM are availabe and enabled
205
  doCpuidex( regs, 7, 0 );
206
  if( !( regs[1] & BIT_HAS_AVX512F ) )
207
    return ext;
208
  if( !( regs[1] & BIT_HAS_AVX512DQ ) )
209
    return ext;
210
  if( !( regs[1] & BIT_HAS_AVX512BW ) )
211
    return ext;
212
  ext = AVX512;
213
#    endif   //  USE_AVX512
214
0
#  endif     // !SIMD_UP_TO_SSE42
215
216
0
  return ext;
217
0
}
218
219
#endif   // REAL_TARGET_X86
220
221
NO_OPT_SIMD
222
X86_VEXT read_x86_extension_flags( X86_VEXT request )
223
0
{
224
0
#ifdef REAL_TARGET_X86
225
0
  static const X86_VEXT max_supported = _get_x86_extensions();
226
0
  static X86_VEXT       ext_flags     = max_supported;
227
#else
228
  static const X86_VEXT max_supported = AVX;                               // disable AVX2 for non-x86 because the SIMD-Everywhere implementation is buggy
229
  static X86_VEXT       ext_flags     = SIMD_EVERYWHERE_EXTENSION_LEVEL;   // default to SSE42 for WASM and SIMD-everywhere
230
#endif
231
232
0
  if( request != UNDEFINED )
233
0
  {
234
0
    if( request > max_supported )
235
0
    {
236
0
#ifdef REAL_TARGET_X86
237
0
      THROW_UNSUPPORTED( "requested SIMD level (" << request << ") not supported by current CPU (max " << max_supported << ")." );
238
#else
239
      THROW_UNSUPPORTED( "requested SIMD level (" << request << ") not supported because the SIMD-Everywhere implementation for AVX2 is buggy." );
240
#endif
241
0
    }
242
243
0
    ext_flags = request;
244
0
  }
245
246
0
  return ext_flags;
247
0
}
248
249
std::string read_x86_extension_name()
250
0
{
251
0
  X86_VEXT vext = read_x86_extension_flags();
252
0
  if( vext < 0 || vext >= vext_names.size() )
253
0
  {
254
0
    static const char extension_not_available[] = "NA";
255
0
    return extension_not_available;
256
0
  }
257
258
0
# if REAL_TARGET_X86
259
260
0
  return vext_names[vext];
261
262
# else   // !REAL_TARGET_X86
263
  if( vext == SCALAR )
264
  {
265
    return vext_names[vext];
266
  }
267
  else
268
  {
269
#  if defined( REAL_TARGET_ARM )
270
    return std::string( "NEON/SIMDE(" ) + vext_names[vext] + ")";
271
#  elif defined( REAL_TARGET_WASM )
272
    return std::string( "WASM/Emscripten(" ) + vext_names[vext] + ")";
273
#  elif defined( REAL_TARGET_LOONGARCH )
274
    return std::string( "LSX/SIMDE(" ) + vext_names[vext] + ")";
275
#  else
276
    return std::string( "SIMDE(" ) + vext_names[vext] + ")" ;
277
#  endif
278
  }
279
# endif   // !REAL_TARGET_X86
280
0
}
281
282
}   // namespace vvdec
283
284
#endif   // TARGET_SIMD_X86