Coverage Report

Created: 2025-07-11 07:08

/src/libvpx/vpx_ports/x86.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#ifndef VPX_VPX_PORTS_X86_H_
12
#define VPX_VPX_PORTS_X86_H_
13
#include <stdlib.h>
14
15
#if defined(_MSC_VER)
16
#include <intrin.h> /* For __cpuidex, __rdtsc */
17
#endif
18
19
#include "vpx_config.h"
20
#include "vpx/vpx_integer.h"
21
22
#ifdef __cplusplus
23
extern "C" {
24
#endif
25
26
typedef enum {
27
  VPX_CPU_UNKNOWN = -1,
28
  VPX_CPU_AMD,
29
  VPX_CPU_AMD_OLD,
30
  VPX_CPU_CENTAUR,
31
  VPX_CPU_CYRIX,
32
  VPX_CPU_INTEL,
33
  VPX_CPU_NEXGEN,
34
  VPX_CPU_NSC,
35
  VPX_CPU_RISE,
36
  VPX_CPU_SIS,
37
  VPX_CPU_TRANSMETA,
38
  VPX_CPU_TRANSMETA_OLD,
39
  VPX_CPU_UMC,
40
  VPX_CPU_VIA,
41
42
  VPX_CPU_LAST
43
} vpx_cpu_t;
44
45
#if defined(__GNUC__) || defined(__ANDROID__)
46
#if VPX_ARCH_X86_64
47
#define cpuid(func, func2, ax, bx, cx, dx)                      \
48
18
  __asm__ __volatile__("cpuid           \n\t"                   \
49
18
                       : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
50
18
                       : "a"(func), "c"(func2))
51
#else
52
#define cpuid(func, func2, ax, bx, cx, dx)     \
53
  __asm__ __volatile__(                        \
54
      "mov %%ebx, %%edi   \n\t"                \
55
      "cpuid              \n\t"                \
56
      "xchg %%edi, %%ebx  \n\t"                \
57
      : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
58
      : "a"(func), "c"(func2))
59
#endif
60
#elif defined(__SUNPRO_C) || \
61
    defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
62
#if VPX_ARCH_X86_64
63
#define cpuid(func, func2, ax, bx, cx, dx)     \
64
  asm volatile(                                \
65
      "xchg %rsi, %rbx \n\t"                   \
66
      "cpuid           \n\t"                   \
67
      "movl %ebx, %edi \n\t"                   \
68
      "xchg %rsi, %rbx \n\t"                   \
69
      : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
70
      : "a"(func), "c"(func2))
71
#else
72
#define cpuid(func, func2, ax, bx, cx, dx)     \
73
  asm volatile(                                \
74
      "pushl %ebx       \n\t"                  \
75
      "cpuid            \n\t"                  \
76
      "movl %ebx, %edi  \n\t"                  \
77
      "popl %ebx        \n\t"                  \
78
      : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
79
      : "a"(func), "c"(func2))
80
#endif
81
#else /* end __SUNPRO__ */
82
#if VPX_ARCH_X86_64
83
#if defined(_MSC_VER) && _MSC_VER > 1500
84
#define cpuid(func, func2, a, b, c, d) \
85
  do {                                 \
86
    int regs[4];                       \
87
    __cpuidex(regs, func, func2);      \
88
    a = regs[0];                       \
89
    b = regs[1];                       \
90
    c = regs[2];                       \
91
    d = regs[3];                       \
92
  } while (0)
93
#else
94
#define cpuid(func, func2, a, b, c, d) \
95
  do {                                 \
96
    int regs[4];                       \
97
    __cpuid(regs, func);               \
98
    a = regs[0];                       \
99
    b = regs[1];                       \
100
    c = regs[2];                       \
101
    d = regs[3];                       \
102
  } while (0)
103
#endif
104
#else
105
#define cpuid(func, func2, a, b, c, d)                              \
106
  __asm mov eax, func __asm mov ecx, func2 __asm cpuid __asm mov a, \
107
      eax __asm mov b, ebx __asm mov c, ecx __asm mov d, edx
108
#endif
109
#endif /* end others */
110
111
// NaCl has no support for xgetbv or the raw opcode.
112
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
113
6
static INLINE uint64_t xgetbv(void) {
114
6
  const uint32_t ecx = 0;
115
6
  uint32_t eax, edx;
116
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
117
6
  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118
6
                   : "=a"(eax), "=d"(edx)
119
6
                   : "c"(ecx));
120
6
  return ((uint64_t)edx << 32) | eax;
121
6
}
vpx_scale_rtcd.c:xgetbv
Line
Count
Source
113
2
static INLINE uint64_t xgetbv(void) {
114
2
  const uint32_t ecx = 0;
115
2
  uint32_t eax, edx;
116
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
117
2
  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118
2
                   : "=a"(eax), "=d"(edx)
119
2
                   : "c"(ecx));
120
2
  return ((uint64_t)edx << 32) | eax;
121
2
}
vpx_dsp_rtcd.c:xgetbv
Line
Count
Source
113
2
static INLINE uint64_t xgetbv(void) {
114
2
  const uint32_t ecx = 0;
115
2
  uint32_t eax, edx;
116
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
117
2
  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118
2
                   : "=a"(eax), "=d"(edx)
119
2
                   : "c"(ecx));
120
2
  return ((uint64_t)edx << 32) | eax;
121
2
}
vp9_rtcd.c:xgetbv
Line
Count
Source
113
1
static INLINE uint64_t xgetbv(void) {
114
1
  const uint32_t ecx = 0;
115
1
  uint32_t eax, edx;
116
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
117
1
  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118
1
                   : "=a"(eax), "=d"(edx)
119
1
                   : "c"(ecx));
120
1
  return ((uint64_t)edx << 32) | eax;
121
1
}
Unexecuted instantiation: vp8_dx_iface.c:xgetbv
Unexecuted instantiation: onyxd_if.c:xgetbv
Unexecuted instantiation: threading.c:xgetbv
Unexecuted instantiation: systemdependent.c:xgetbv
rtcd.c:xgetbv
Line
Count
Source
113
1
static INLINE uint64_t xgetbv(void) {
114
1
  const uint32_t ecx = 0;
115
1
  uint32_t eax, edx;
116
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
117
1
  __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118
1
                   : "=a"(eax), "=d"(edx)
119
1
                   : "c"(ecx));
120
1
  return ((uint64_t)edx << 32) | eax;
121
1
}
Unexecuted instantiation: decodeframe.c:xgetbv
Unexecuted instantiation: detokenize.c:xgetbv
Unexecuted instantiation: decodemv.c:xgetbv
122
#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
123
    _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
124
#include <immintrin.h>
125
#define xgetbv() _xgetbv(0)
126
#elif defined(_MSC_VER) && defined(_M_IX86)
127
static INLINE uint64_t xgetbv(void) {
128
  uint32_t eax_, edx_;
129
  __asm {
130
    xor ecx, ecx  // ecx = 0
131
    // Use the raw opcode for xgetbv for compatibility with older toolchains.
132
    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
133
    mov eax_, eax
134
    mov edx_, edx
135
  }
136
  return ((uint64_t)edx_ << 32) | eax_;
137
}
138
#else
139
#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
140
#endif
141
142
#if defined(_MSC_VER) && _MSC_VER >= 1700
143
#undef NOMINMAX
144
#define NOMINMAX
145
#ifndef WIN32_LEAN_AND_MEAN
146
#define WIN32_LEAN_AND_MEAN
147
#endif
148
#include <windows.h>
149
#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
150
#define getenv(x) NULL
151
#endif
152
#endif
153
154
6
#define HAS_MMX 0x001
155
6
#define HAS_SSE 0x002
156
6
#define HAS_SSE2 0x004
157
6
#define HAS_SSE3 0x008
158
148
#define HAS_SSSE3 0x010
159
27
#define HAS_SSE4_1 0x020
160
6
#define HAS_AVX 0x040
161
82
#define HAS_AVX2 0x080
162
0
#define HAS_AVX512 0x100
163
#ifndef BIT
164
168
#define BIT(n) (1u << (n))
165
#endif
166
167
12
#define MMX_BITS BIT(23)
168
12
#define SSE_BITS BIT(25)
169
12
#define SSE2_BITS BIT(26)
170
12
#define SSE3_BITS BIT(0)
171
12
#define SSSE3_BITS BIT(9)
172
12
#define SSE4_1_BITS BIT(19)
173
// Bits 27 (OSXSAVE) & 28 (256-bit AVX)
174
12
#define AVX_BITS (BIT(27) | BIT(28))
175
12
#define AVX2_BITS BIT(5)
176
// Bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & 30 (AVX-512BW)
177
// & 31 (AVX-512VL)
178
12
#define AVX512_BITS (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))
179
180
#define FEATURE_SET(reg, feature) \
181
54
  (((reg) & (feature##_BITS)) == (feature##_BITS))
182
183
6
static INLINE int x86_simd_caps(void) {
184
6
  unsigned int flags = 0;
185
6
  unsigned int mask = ~0u;
186
6
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
187
6
  char *env;
188
6
  (void)reg_ebx;
189
190
  /* See if the CPU capabilities are being overridden by the environment */
191
6
  env = getenv("VPX_SIMD_CAPS");
192
6
  if (env && *env) return (int)strtol(env, NULL, 0);
193
194
6
  env = getenv("VPX_SIMD_CAPS_MASK");
195
6
  if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
196
197
  /* Ensure that the CPUID instruction supports extended features */
198
6
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
199
6
  if (max_cpuid_val < 1) return 0;
200
201
  /* Get the standard feature flags */
202
6
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
203
204
6
  flags |= FEATURE_SET(reg_edx, MMX) ? HAS_MMX : 0;
205
6
  flags |= FEATURE_SET(reg_edx, SSE) ? HAS_SSE : 0;
206
6
  flags |= FEATURE_SET(reg_edx, SSE2) ? HAS_SSE2 : 0;
207
6
  flags |= FEATURE_SET(reg_ecx, SSE3) ? HAS_SSE3 : 0;
208
6
  flags |= FEATURE_SET(reg_ecx, SSSE3) ? HAS_SSSE3 : 0;
209
6
  flags |= FEATURE_SET(reg_ecx, SSE4_1) ? HAS_SSE4_1 : 0;
210
211
6
  if (FEATURE_SET(reg_ecx, AVX)) {
212
    // Check for OS-support of YMM state. Necessary for AVX and AVX2.
213
6
    if ((xgetbv() & 0x6) == 0x6) {
214
6
      flags |= HAS_AVX;
215
6
      if (max_cpuid_val >= 7) {
216
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
217
6
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
218
6
        flags |= FEATURE_SET(reg_ebx, AVX2) ? HAS_AVX2 : 0;
219
6
        if (FEATURE_SET(reg_ebx, AVX512)) {
220
          // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
221
0
          if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
222
0
        }
223
6
      }
224
6
    }
225
6
  }
226
6
  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227
6
  return flags & mask;
228
6
}
vpx_scale_rtcd.c:x86_simd_caps
Line
Count
Source
183
2
static INLINE int x86_simd_caps(void) {
184
2
  unsigned int flags = 0;
185
2
  unsigned int mask = ~0u;
186
2
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
187
2
  char *env;
188
2
  (void)reg_ebx;
189
190
  /* See if the CPU capabilities are being overridden by the environment */
191
2
  env = getenv("VPX_SIMD_CAPS");
192
2
  if (env && *env) return (int)strtol(env, NULL, 0);
193
194
2
  env = getenv("VPX_SIMD_CAPS_MASK");
195
2
  if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
196
197
  /* Ensure that the CPUID instruction supports extended features */
198
2
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
199
2
  if (max_cpuid_val < 1) return 0;
200
201
  /* Get the standard feature flags */
202
2
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
203
204
2
  flags |= FEATURE_SET(reg_edx, MMX) ? HAS_MMX : 0;
205
2
  flags |= FEATURE_SET(reg_edx, SSE) ? HAS_SSE : 0;
206
2
  flags |= FEATURE_SET(reg_edx, SSE2) ? HAS_SSE2 : 0;
207
2
  flags |= FEATURE_SET(reg_ecx, SSE3) ? HAS_SSE3 : 0;
208
2
  flags |= FEATURE_SET(reg_ecx, SSSE3) ? HAS_SSSE3 : 0;
209
2
  flags |= FEATURE_SET(reg_ecx, SSE4_1) ? HAS_SSE4_1 : 0;
210
211
2
  if (FEATURE_SET(reg_ecx, AVX)) {
212
    // Check for OS-support of YMM state. Necessary for AVX and AVX2.
213
2
    if ((xgetbv() & 0x6) == 0x6) {
214
2
      flags |= HAS_AVX;
215
2
      if (max_cpuid_val >= 7) {
216
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
217
2
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
218
2
        flags |= FEATURE_SET(reg_ebx, AVX2) ? HAS_AVX2 : 0;
219
2
        if (FEATURE_SET(reg_ebx, AVX512)) {
220
          // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
221
0
          if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
222
0
        }
223
2
      }
224
2
    }
225
2
  }
226
2
  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227
2
  return flags & mask;
228
2
}
vpx_dsp_rtcd.c:x86_simd_caps
Line
Count
Source
183
2
static INLINE int x86_simd_caps(void) {
184
2
  unsigned int flags = 0;
185
2
  unsigned int mask = ~0u;
186
2
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
187
2
  char *env;
188
2
  (void)reg_ebx;
189
190
  /* See if the CPU capabilities are being overridden by the environment */
191
2
  env = getenv("VPX_SIMD_CAPS");
192
2
  if (env && *env) return (int)strtol(env, NULL, 0);
193
194
2
  env = getenv("VPX_SIMD_CAPS_MASK");
195
2
  if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
196
197
  /* Ensure that the CPUID instruction supports extended features */
198
2
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
199
2
  if (max_cpuid_val < 1) return 0;
200
201
  /* Get the standard feature flags */
202
2
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
203
204
2
  flags |= FEATURE_SET(reg_edx, MMX) ? HAS_MMX : 0;
205
2
  flags |= FEATURE_SET(reg_edx, SSE) ? HAS_SSE : 0;
206
2
  flags |= FEATURE_SET(reg_edx, SSE2) ? HAS_SSE2 : 0;
207
2
  flags |= FEATURE_SET(reg_ecx, SSE3) ? HAS_SSE3 : 0;
208
2
  flags |= FEATURE_SET(reg_ecx, SSSE3) ? HAS_SSSE3 : 0;
209
2
  flags |= FEATURE_SET(reg_ecx, SSE4_1) ? HAS_SSE4_1 : 0;
210
211
2
  if (FEATURE_SET(reg_ecx, AVX)) {
212
    // Check for OS-support of YMM state. Necessary for AVX and AVX2.
213
2
    if ((xgetbv() & 0x6) == 0x6) {
214
2
      flags |= HAS_AVX;
215
2
      if (max_cpuid_val >= 7) {
216
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
217
2
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
218
2
        flags |= FEATURE_SET(reg_ebx, AVX2) ? HAS_AVX2 : 0;
219
2
        if (FEATURE_SET(reg_ebx, AVX512)) {
220
          // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
221
0
          if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
222
0
        }
223
2
      }
224
2
    }
225
2
  }
226
2
  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227
2
  return flags & mask;
228
2
}
vp9_rtcd.c:x86_simd_caps
Line
Count
Source
183
1
static INLINE int x86_simd_caps(void) {
184
1
  unsigned int flags = 0;
185
1
  unsigned int mask = ~0u;
186
1
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
187
1
  char *env;
188
1
  (void)reg_ebx;
189
190
  /* See if the CPU capabilities are being overridden by the environment */
191
1
  env = getenv("VPX_SIMD_CAPS");
192
1
  if (env && *env) return (int)strtol(env, NULL, 0);
193
194
1
  env = getenv("VPX_SIMD_CAPS_MASK");
195
1
  if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
196
197
  /* Ensure that the CPUID instruction supports extended features */
198
1
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
199
1
  if (max_cpuid_val < 1) return 0;
200
201
  /* Get the standard feature flags */
202
1
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
203
204
1
  flags |= FEATURE_SET(reg_edx, MMX) ? HAS_MMX : 0;
205
1
  flags |= FEATURE_SET(reg_edx, SSE) ? HAS_SSE : 0;
206
1
  flags |= FEATURE_SET(reg_edx, SSE2) ? HAS_SSE2 : 0;
207
1
  flags |= FEATURE_SET(reg_ecx, SSE3) ? HAS_SSE3 : 0;
208
1
  flags |= FEATURE_SET(reg_ecx, SSSE3) ? HAS_SSSE3 : 0;
209
1
  flags |= FEATURE_SET(reg_ecx, SSE4_1) ? HAS_SSE4_1 : 0;
210
211
1
  if (FEATURE_SET(reg_ecx, AVX)) {
212
    // Check for OS-support of YMM state. Necessary for AVX and AVX2.
213
1
    if ((xgetbv() & 0x6) == 0x6) {
214
1
      flags |= HAS_AVX;
215
1
      if (max_cpuid_val >= 7) {
216
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
217
1
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
218
1
        flags |= FEATURE_SET(reg_ebx, AVX2) ? HAS_AVX2 : 0;
219
1
        if (FEATURE_SET(reg_ebx, AVX512)) {
220
          // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
221
0
          if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
222
0
        }
223
1
      }
224
1
    }
225
1
  }
226
1
  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227
1
  return flags & mask;
228
1
}
Unexecuted instantiation: vp8_dx_iface.c:x86_simd_caps
Unexecuted instantiation: onyxd_if.c:x86_simd_caps
Unexecuted instantiation: threading.c:x86_simd_caps
Unexecuted instantiation: systemdependent.c:x86_simd_caps
rtcd.c:x86_simd_caps
Line
Count
Source
183
1
static INLINE int x86_simd_caps(void) {
184
1
  unsigned int flags = 0;
185
1
  unsigned int mask = ~0u;
186
1
  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
187
1
  char *env;
188
1
  (void)reg_ebx;
189
190
  /* See if the CPU capabilities are being overridden by the environment */
191
1
  env = getenv("VPX_SIMD_CAPS");
192
1
  if (env && *env) return (int)strtol(env, NULL, 0);
193
194
1
  env = getenv("VPX_SIMD_CAPS_MASK");
195
1
  if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
196
197
  /* Ensure that the CPUID instruction supports extended features */
198
1
  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
199
1
  if (max_cpuid_val < 1) return 0;
200
201
  /* Get the standard feature flags */
202
1
  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
203
204
1
  flags |= FEATURE_SET(reg_edx, MMX) ? HAS_MMX : 0;
205
1
  flags |= FEATURE_SET(reg_edx, SSE) ? HAS_SSE : 0;
206
1
  flags |= FEATURE_SET(reg_edx, SSE2) ? HAS_SSE2 : 0;
207
1
  flags |= FEATURE_SET(reg_ecx, SSE3) ? HAS_SSE3 : 0;
208
1
  flags |= FEATURE_SET(reg_ecx, SSSE3) ? HAS_SSSE3 : 0;
209
1
  flags |= FEATURE_SET(reg_ecx, SSE4_1) ? HAS_SSE4_1 : 0;
210
211
1
  if (FEATURE_SET(reg_ecx, AVX)) {
212
    // Check for OS-support of YMM state. Necessary for AVX and AVX2.
213
1
    if ((xgetbv() & 0x6) == 0x6) {
214
1
      flags |= HAS_AVX;
215
1
      if (max_cpuid_val >= 7) {
216
        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
217
1
        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
218
1
        flags |= FEATURE_SET(reg_ebx, AVX2) ? HAS_AVX2 : 0;
219
1
        if (FEATURE_SET(reg_ebx, AVX512)) {
220
          // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
221
0
          if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
222
0
        }
223
1
      }
224
1
    }
225
1
  }
226
1
  (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227
1
  return flags & mask;
228
1
}
Unexecuted instantiation: decodeframe.c:x86_simd_caps
Unexecuted instantiation: detokenize.c:x86_simd_caps
Unexecuted instantiation: decodemv.c:x86_simd_caps
229
230
// Fine-Grain Measurement Functions
231
//
232
// If you are timing a small region of code, access the timestamp counter
233
// (TSC) via:
234
//
235
// unsigned int start = x86_tsc_start();
236
//   ...
237
// unsigned int end = x86_tsc_end();
238
// unsigned int diff = end - start;
239
//
240
// The start/end functions introduce a few more instructions than using
241
// x86_readtsc directly, but prevent the CPU's out-of-order execution from
242
// affecting the measurement (by having earlier/later instructions be evaluated
243
// in the time interval). See the white paper, "How to Benchmark Code
244
// Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by
245
// Gabriele Paoloni for more information.
246
//
247
// If you are timing a large function (CPU time > a couple of seconds), use
248
// x86_readtsc64 to read the timestamp counter in a 64-bit integer. The
249
// out-of-order leakage that can occur is minimal compared to total runtime.
250
0
static INLINE unsigned int x86_readtsc(void) {
251
0
#if defined(__GNUC__)
252
0
  unsigned int tsc;
253
0
  __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
254
0
  return tsc;
255
0
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
256
0
  unsigned int tsc;
257
0
  asm volatile("rdtsc\n\t" : "=a"(tsc) :);
258
0
  return tsc;
259
0
#else
260
0
#if VPX_ARCH_X86_64
261
0
  return (unsigned int)__rdtsc();
262
0
#else
263
0
  __asm rdtsc;
264
0
#endif
265
0
#endif
266
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtsc
Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtsc
Unexecuted instantiation: vp9_rtcd.c:x86_readtsc
Unexecuted instantiation: vp8_dx_iface.c:x86_readtsc
Unexecuted instantiation: onyxd_if.c:x86_readtsc
Unexecuted instantiation: threading.c:x86_readtsc
Unexecuted instantiation: systemdependent.c:x86_readtsc
Unexecuted instantiation: rtcd.c:x86_readtsc
Unexecuted instantiation: decodeframe.c:x86_readtsc
Unexecuted instantiation: detokenize.c:x86_readtsc
Unexecuted instantiation: decodemv.c:x86_readtsc
267
// 64-bit CPU cycle counter
268
0
static INLINE uint64_t x86_readtsc64(void) {
269
0
#if defined(__GNUC__)
270
0
  uint32_t hi, lo;
271
0
  __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
272
0
  return ((uint64_t)hi << 32) | lo;
273
0
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
274
0
  uint_t hi, lo;
275
0
  asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
276
0
  return ((uint64_t)hi << 32) | lo;
277
0
#else
278
0
#if VPX_ARCH_X86_64
279
0
  return (uint64_t)__rdtsc();
280
0
#else
281
0
  __asm rdtsc;
282
0
#endif
283
0
#endif
284
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtsc64
Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtsc64
Unexecuted instantiation: vp9_rtcd.c:x86_readtsc64
Unexecuted instantiation: vp8_dx_iface.c:x86_readtsc64
Unexecuted instantiation: onyxd_if.c:x86_readtsc64
Unexecuted instantiation: threading.c:x86_readtsc64
Unexecuted instantiation: systemdependent.c:x86_readtsc64
Unexecuted instantiation: rtcd.c:x86_readtsc64
Unexecuted instantiation: decodeframe.c:x86_readtsc64
Unexecuted instantiation: detokenize.c:x86_readtsc64
Unexecuted instantiation: decodemv.c:x86_readtsc64
285
286
// 32-bit CPU cycle counter with a partial fence against out-of-order execution.
287
0
static INLINE unsigned int x86_readtscp(void) {
288
0
#if defined(__GNUC__)
289
0
  unsigned int tscp;
290
0
  __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :);
291
0
  return tscp;
292
0
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
293
0
  unsigned int tscp;
294
0
  asm volatile("rdtscp\n\t" : "=a"(tscp) :);
295
0
  return tscp;
296
0
#elif defined(_MSC_VER)
297
0
  unsigned int ui;
298
0
  return (unsigned int)__rdtscp(&ui);
299
0
#else
300
0
#if VPX_ARCH_X86_64
301
0
  return (unsigned int)__rdtscp();
302
0
#else
303
0
  __asm rdtscp;
304
0
#endif
305
0
#endif
306
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtscp
Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtscp
Unexecuted instantiation: vp9_rtcd.c:x86_readtscp
Unexecuted instantiation: vp8_dx_iface.c:x86_readtscp
Unexecuted instantiation: onyxd_if.c:x86_readtscp
Unexecuted instantiation: threading.c:x86_readtscp
Unexecuted instantiation: systemdependent.c:x86_readtscp
Unexecuted instantiation: rtcd.c:x86_readtscp
Unexecuted instantiation: decodeframe.c:x86_readtscp
Unexecuted instantiation: detokenize.c:x86_readtscp
Unexecuted instantiation: decodemv.c:x86_readtscp
307
308
0
static INLINE unsigned int x86_tsc_start(void) {
309
0
  unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
310
0
  // This call should not be removed. See function notes above.
311
0
  cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
312
0
  // Avoid compiler warnings on unused-but-set variables.
313
0
  (void)reg_eax;
314
0
  (void)reg_ebx;
315
0
  (void)reg_ecx;
316
0
  (void)reg_edx;
317
0
  return x86_readtsc();
318
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x86_tsc_start
Unexecuted instantiation: vpx_dsp_rtcd.c:x86_tsc_start
Unexecuted instantiation: vp9_rtcd.c:x86_tsc_start
Unexecuted instantiation: vp8_dx_iface.c:x86_tsc_start
Unexecuted instantiation: onyxd_if.c:x86_tsc_start
Unexecuted instantiation: threading.c:x86_tsc_start
Unexecuted instantiation: systemdependent.c:x86_tsc_start
Unexecuted instantiation: rtcd.c:x86_tsc_start
Unexecuted instantiation: decodeframe.c:x86_tsc_start
Unexecuted instantiation: detokenize.c:x86_tsc_start
Unexecuted instantiation: decodemv.c:x86_tsc_start
319
320
0
static INLINE unsigned int x86_tsc_end(void) {
321
0
  uint32_t v = x86_readtscp();
322
0
  unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
323
0
  // This call should not be removed. See function notes above.
324
0
  cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
325
0
  // Avoid compiler warnings on unused-but-set variables.
326
0
  (void)reg_eax;
327
0
  (void)reg_ebx;
328
0
  (void)reg_ecx;
329
0
  (void)reg_edx;
330
0
  return v;
331
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x86_tsc_end
Unexecuted instantiation: vpx_dsp_rtcd.c:x86_tsc_end
Unexecuted instantiation: vp9_rtcd.c:x86_tsc_end
Unexecuted instantiation: vp8_dx_iface.c:x86_tsc_end
Unexecuted instantiation: onyxd_if.c:x86_tsc_end
Unexecuted instantiation: threading.c:x86_tsc_end
Unexecuted instantiation: systemdependent.c:x86_tsc_end
Unexecuted instantiation: rtcd.c:x86_tsc_end
Unexecuted instantiation: decodeframe.c:x86_tsc_end
Unexecuted instantiation: detokenize.c:x86_tsc_end
Unexecuted instantiation: decodemv.c:x86_tsc_end
332
333
#if defined(__GNUC__)
334
32.7M
#define x86_pause_hint() __asm__ __volatile__("pause \n\t")
335
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
336
#define x86_pause_hint() asm volatile("pause \n\t")
337
#else
338
#if VPX_ARCH_X86_64
339
#define x86_pause_hint() _mm_pause();
340
#else
341
#define x86_pause_hint() __asm pause
342
#endif
343
#endif
344
345
#if defined(__GNUC__)
346
0
static void x87_set_control_word(unsigned short mode) {
347
0
  __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
348
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x87_set_control_word
Unexecuted instantiation: vpx_dsp_rtcd.c:x87_set_control_word
Unexecuted instantiation: vp9_rtcd.c:x87_set_control_word
Unexecuted instantiation: vp8_dx_iface.c:x87_set_control_word
Unexecuted instantiation: onyxd_if.c:x87_set_control_word
Unexecuted instantiation: threading.c:x87_set_control_word
Unexecuted instantiation: systemdependent.c:x87_set_control_word
Unexecuted instantiation: rtcd.c:x87_set_control_word
Unexecuted instantiation: decodeframe.c:x87_set_control_word
Unexecuted instantiation: detokenize.c:x87_set_control_word
Unexecuted instantiation: decodemv.c:x87_set_control_word
349
0
static unsigned short x87_get_control_word(void) {
350
0
  unsigned short mode;
351
0
  __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
352
0
  return mode;
353
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x87_get_control_word
Unexecuted instantiation: vpx_dsp_rtcd.c:x87_get_control_word
Unexecuted instantiation: vp9_rtcd.c:x87_get_control_word
Unexecuted instantiation: vp8_dx_iface.c:x87_get_control_word
Unexecuted instantiation: onyxd_if.c:x87_get_control_word
Unexecuted instantiation: threading.c:x87_get_control_word
Unexecuted instantiation: systemdependent.c:x87_get_control_word
Unexecuted instantiation: rtcd.c:x87_get_control_word
Unexecuted instantiation: decodeframe.c:x87_get_control_word
Unexecuted instantiation: detokenize.c:x87_get_control_word
Unexecuted instantiation: decodemv.c:x87_get_control_word
354
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
355
static void x87_set_control_word(unsigned short mode) {
356
  asm volatile("fldcw %0" : : "m"(*&mode));
357
}
358
static unsigned short x87_get_control_word(void) {
359
  unsigned short mode;
360
  asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
361
  return mode;
362
}
363
#elif VPX_ARCH_X86_64
364
/* No fldcw intrinsics on Windows x64, punt to external asm */
365
extern void vpx_winx64_fldcw(unsigned short mode);
366
extern unsigned short vpx_winx64_fstcw(void);
367
#define x87_set_control_word vpx_winx64_fldcw
368
#define x87_get_control_word vpx_winx64_fstcw
369
#else
370
static void x87_set_control_word(unsigned short mode) {
371
  __asm { fldcw mode }
372
}
373
static unsigned short x87_get_control_word(void) {
374
  unsigned short mode;
375
  __asm { fstcw mode }
376
  return mode;
377
}
378
#endif
379
380
0
static INLINE unsigned int x87_set_double_precision(void) {
381
0
  unsigned int mode = x87_get_control_word();
382
0
  // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1
383
0
  // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
384
0
  // 8.1.5.2 Precision Control Field
385
0
  // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control")
386
0
  // determine the number of bits used in floating point calculations. To match
387
0
  // later SSE instructions restrict x87 operations to Double Precision (0x200).
388
0
  // Precision                     PC Field
389
0
  // Single Precision (24-Bits)    00B
390
0
  // Reserved                      01B
391
0
  // Double Precision (53-Bits)    10B
392
0
  // Extended Precision (64-Bits)  11B
393
0
  x87_set_control_word((mode & ~0x300u) | 0x200u);
394
0
  return mode;
395
0
}
Unexecuted instantiation: vpx_scale_rtcd.c:x87_set_double_precision
Unexecuted instantiation: vpx_dsp_rtcd.c:x87_set_double_precision
Unexecuted instantiation: vp9_rtcd.c:x87_set_double_precision
Unexecuted instantiation: vp8_dx_iface.c:x87_set_double_precision
Unexecuted instantiation: onyxd_if.c:x87_set_double_precision
Unexecuted instantiation: threading.c:x87_set_double_precision
Unexecuted instantiation: systemdependent.c:x87_set_double_precision
Unexecuted instantiation: rtcd.c:x87_set_double_precision
Unexecuted instantiation: decodeframe.c:x87_set_double_precision
Unexecuted instantiation: detokenize.c:x87_set_double_precision
Unexecuted instantiation: decodemv.c:x87_set_double_precision
396
397
#ifdef __cplusplus
398
}  // extern "C"
399
#endif
400
401
#endif  // VPX_VPX_PORTS_X86_H_