/src/libvpx/vpx_ports/x86.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #ifndef VPX_VPX_PORTS_X86_H_ |
12 | | #define VPX_VPX_PORTS_X86_H_ |
13 | | #include <stdlib.h> |
14 | | |
15 | | #if defined(_MSC_VER) |
16 | | #include <intrin.h> /* For __cpuidex, __rdtsc */ |
17 | | #endif |
18 | | |
19 | | #include "vpx_config.h" |
20 | | #include "vpx/vpx_integer.h" |
21 | | |
22 | | #ifdef __cplusplus |
23 | | extern "C" { |
24 | | #endif |
25 | | |
26 | | typedef enum { |
27 | | VPX_CPU_UNKNOWN = -1, |
28 | | VPX_CPU_AMD, |
29 | | VPX_CPU_AMD_OLD, |
30 | | VPX_CPU_CENTAUR, |
31 | | VPX_CPU_CYRIX, |
32 | | VPX_CPU_INTEL, |
33 | | VPX_CPU_NEXGEN, |
34 | | VPX_CPU_NSC, |
35 | | VPX_CPU_RISE, |
36 | | VPX_CPU_SIS, |
37 | | VPX_CPU_TRANSMETA, |
38 | | VPX_CPU_TRANSMETA_OLD, |
39 | | VPX_CPU_UMC, |
40 | | VPX_CPU_VIA, |
41 | | |
42 | | VPX_CPU_LAST |
43 | | } vpx_cpu_t; |
44 | | |
45 | | #if defined(__GNUC__) || defined(__ANDROID__) |
46 | | #if VPX_ARCH_X86_64 |
47 | | #define cpuid(func, func2, ax, bx, cx, dx) \ |
48 | 18 | __asm__ __volatile__("cpuid \n\t" \ |
49 | 18 | : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \ |
50 | 18 | : "a"(func), "c"(func2)) |
51 | | #else |
52 | | #define cpuid(func, func2, ax, bx, cx, dx) \ |
53 | | __asm__ __volatile__( \ |
54 | | "mov %%ebx, %%edi \n\t" \ |
55 | | "cpuid \n\t" \ |
56 | | "xchg %%edi, %%ebx \n\t" \ |
57 | | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
58 | | : "a"(func), "c"(func2)) |
59 | | #endif |
60 | | #elif defined(__SUNPRO_C) || \ |
61 | | defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ |
62 | | #if VPX_ARCH_X86_64 |
63 | | #define cpuid(func, func2, ax, bx, cx, dx) \ |
64 | | asm volatile( \ |
65 | | "xchg %rsi, %rbx \n\t" \ |
66 | | "cpuid \n\t" \ |
67 | | "movl %ebx, %edi \n\t" \ |
68 | | "xchg %rsi, %rbx \n\t" \ |
69 | | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
70 | | : "a"(func), "c"(func2)) |
71 | | #else |
72 | | #define cpuid(func, func2, ax, bx, cx, dx) \ |
73 | | asm volatile( \ |
74 | | "pushl %ebx \n\t" \ |
75 | | "cpuid \n\t" \ |
76 | | "movl %ebx, %edi \n\t" \ |
77 | | "popl %ebx \n\t" \ |
78 | | : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ |
79 | | : "a"(func), "c"(func2)) |
80 | | #endif |
81 | | #else /* end __SUNPRO__ */ |
82 | | #if VPX_ARCH_X86_64 |
83 | | #if defined(_MSC_VER) && _MSC_VER > 1500 |
84 | | #define cpuid(func, func2, a, b, c, d) \ |
85 | | do { \ |
86 | | int regs[4]; \ |
87 | | __cpuidex(regs, func, func2); \ |
88 | | a = regs[0]; \ |
89 | | b = regs[1]; \ |
90 | | c = regs[2]; \ |
91 | | d = regs[3]; \ |
92 | | } while (0) |
93 | | #else |
94 | | #define cpuid(func, func2, a, b, c, d) \ |
95 | | do { \ |
96 | | int regs[4]; \ |
97 | | __cpuid(regs, func); \ |
98 | | a = regs[0]; \ |
99 | | b = regs[1]; \ |
100 | | c = regs[2]; \ |
101 | | d = regs[3]; \ |
102 | | } while (0) |
103 | | #endif |
104 | | #else |
105 | | #define cpuid(func, func2, a, b, c, d) \ |
106 | | __asm mov eax, func __asm mov ecx, func2 __asm cpuid __asm mov a, \ |
107 | | eax __asm mov b, ebx __asm mov c, ecx __asm mov d, edx |
108 | | #endif |
109 | | #endif /* end others */ |
110 | | |
111 | | // NaCl has no support for xgetbv or the raw opcode. |
112 | | #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
113 | 6 | static INLINE uint64_t xgetbv(void) { |
114 | 6 | const uint32_t ecx = 0; |
115 | 6 | uint32_t eax, edx; |
116 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
117 | 6 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" |
118 | 6 | : "=a"(eax), "=d"(edx) |
119 | 6 | : "c"(ecx)); |
120 | 6 | return ((uint64_t)edx << 32) | eax; |
121 | 6 | } Unexecuted instantiation: vpx_encoder.c:xgetbv Unexecuted instantiation: vp8_cx_iface.c:xgetbv Unexecuted instantiation: ethreading.c:xgetbv Unexecuted instantiation: onyx_if.c:xgetbv Unexecuted instantiation: pickinter.c:xgetbv Unexecuted instantiation: picklpf.c:xgetbv Unexecuted instantiation: vp8_quantize.c:xgetbv Unexecuted instantiation: ratectrl.c:xgetbv Unexecuted instantiation: rdopt.c:xgetbv Unexecuted instantiation: segmentation.c:xgetbv Unexecuted instantiation: vp8_skin_detection.c:xgetbv Unexecuted instantiation: tokenize.c:xgetbv Unexecuted instantiation: temporal_filter.c:xgetbv Unexecuted instantiation: vp8_enc_stubs_sse2.c:xgetbv Line | Count | Source | 113 | 2 | static INLINE uint64_t xgetbv(void) { | 114 | 2 | const uint32_t ecx = 0; | 115 | 2 | uint32_t eax, edx; | 116 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 117 | 2 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" | 118 | 2 | : "=a"(eax), "=d"(edx) | 119 | 2 | : "c"(ecx)); | 120 | 2 | return ((uint64_t)edx << 32) | eax; | 121 | 2 | } |
Line | Count | Source | 113 | 2 | static INLINE uint64_t xgetbv(void) { | 114 | 2 | const uint32_t ecx = 0; | 115 | 2 | uint32_t eax, edx; | 116 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 117 | 2 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" | 118 | 2 | : "=a"(eax), "=d"(edx) | 119 | 2 | : "c"(ecx)); | 120 | 2 | return ((uint64_t)edx << 32) | eax; | 121 | 2 | } |
Unexecuted instantiation: systemdependent.c:xgetbv Line | Count | Source | 113 | 1 | static INLINE uint64_t xgetbv(void) { | 114 | 1 | const uint32_t ecx = 0; | 115 | 1 | uint32_t eax, edx; | 116 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 117 | 1 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" | 118 | 1 | : "=a"(eax), "=d"(edx) | 119 | 1 | : "c"(ecx)); | 120 | 1 | return ((uint64_t)edx << 32) | eax; | 121 | 1 | } |
Unexecuted instantiation: bitstream.c:xgetbv Unexecuted instantiation: encodeframe.c:xgetbv Unexecuted instantiation: encodeintra.c:xgetbv Unexecuted instantiation: encodemb.c:xgetbv Unexecuted instantiation: encodemv.c:xgetbv Unexecuted instantiation: firstpass.c:xgetbv Unexecuted instantiation: mcomp.c:xgetbv Unexecuted instantiation: modecosts.c:xgetbv Unexecuted instantiation: vp8_quantize_sse2.c:xgetbv Line | Count | Source | 113 | 1 | static INLINE uint64_t xgetbv(void) { | 114 | 1 | const uint32_t ecx = 0; | 115 | 1 | uint32_t eax, edx; | 116 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 117 | 1 | __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" | 118 | 1 | : "=a"(eax), "=d"(edx) | 119 | 1 | : "c"(ecx)); | 120 | 1 | return ((uint64_t)edx << 32) | eax; | 121 | 1 | } |
|
122 | | #elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \ |
123 | | _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
124 | | #include <immintrin.h> |
125 | | #define xgetbv() _xgetbv(0) |
126 | | #elif defined(_MSC_VER) && defined(_M_IX86) |
127 | | static INLINE uint64_t xgetbv(void) { |
128 | | uint32_t eax_, edx_; |
129 | | __asm { |
130 | | xor ecx, ecx // ecx = 0 |
131 | | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
132 | | __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
133 | | mov eax_, eax |
134 | | mov edx_, edx |
135 | | } |
136 | | return ((uint64_t)edx_ << 32) | eax_; |
137 | | } |
138 | | #else |
139 | | #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
140 | | #endif |
141 | | |
142 | | #if defined(_MSC_VER) && _MSC_VER >= 1700 |
143 | | #undef NOMINMAX |
144 | | #define NOMINMAX |
145 | | #ifndef WIN32_LEAN_AND_MEAN |
146 | | #define WIN32_LEAN_AND_MEAN |
147 | | #endif |
148 | | #include <windows.h> |
149 | | #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) |
150 | | #define getenv(x) NULL |
151 | | #endif |
152 | | #endif |
153 | | |
154 | 6 | #define HAS_MMX 0x001 |
155 | 6 | #define HAS_SSE 0x002 |
156 | 6 | #define HAS_SSE2 0x004 |
157 | 7 | #define HAS_SSE3 0x008 |
158 | 158 | #define HAS_SSSE3 0x010 |
159 | 34 | #define HAS_SSE4_1 0x020 |
160 | 10 | #define HAS_AVX 0x040 |
161 | 242 | #define HAS_AVX2 0x080 |
162 | 2 | #define HAS_AVX512 0x100 |
163 | | #ifndef BIT |
164 | 126 | #define BIT(n) (1u << (n)) |
165 | | #endif |
166 | | |
167 | 6 | static INLINE int x86_simd_caps(void) { |
168 | 6 | unsigned int flags = 0; |
169 | 6 | unsigned int mask = ~0u; |
170 | 6 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; |
171 | 6 | char *env; |
172 | 6 | (void)reg_ebx; |
173 | | |
174 | | /* See if the CPU capabilities are being overridden by the environment */ |
175 | 6 | env = getenv("VPX_SIMD_CAPS"); |
176 | | |
177 | 6 | if (env && *env) return (int)strtol(env, NULL, 0); |
178 | | |
179 | 6 | env = getenv("VPX_SIMD_CAPS_MASK"); |
180 | | |
181 | 6 | if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); |
182 | | |
183 | | /* Ensure that the CPUID instruction supports extended features */ |
184 | 6 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); |
185 | | |
186 | 6 | if (max_cpuid_val < 1) return 0; |
187 | | |
188 | | /* Get the standard feature flags */ |
189 | 6 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
190 | | |
191 | 6 | if (reg_edx & BIT(23)) flags |= HAS_MMX; |
192 | | |
193 | 6 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
194 | | |
195 | 6 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
196 | | |
197 | 6 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
198 | | |
199 | 6 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
200 | | |
201 | 6 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
202 | | |
203 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
204 | 6 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { |
205 | | // Check for OS-support of YMM state. Necessary for AVX and AVX2. |
206 | 6 | if ((xgetbv() & 0x6) == 0x6) { |
207 | 6 | flags |= HAS_AVX; |
208 | | |
209 | 6 | if (max_cpuid_val >= 7) { |
210 | | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
211 | 6 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
212 | | |
213 | 6 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
214 | | |
215 | | // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & |
216 | | // 30 (AVX-512BW) & 32 (AVX-512VL) |
217 | 6 | if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == |
218 | 6 | (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { |
219 | | // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. |
220 | 0 | if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; |
221 | 0 | } |
222 | 6 | } |
223 | 6 | } |
224 | 6 | } |
225 | | |
226 | 6 | (void)reg_eax; // Avoid compiler warning on unused-but-set variable. |
227 | | |
228 | 6 | return flags & mask; |
229 | 6 | } Unexecuted instantiation: vpx_encoder.c:x86_simd_caps Unexecuted instantiation: vp8_cx_iface.c:x86_simd_caps Unexecuted instantiation: ethreading.c:x86_simd_caps Unexecuted instantiation: onyx_if.c:x86_simd_caps Unexecuted instantiation: pickinter.c:x86_simd_caps Unexecuted instantiation: picklpf.c:x86_simd_caps Unexecuted instantiation: vp8_quantize.c:x86_simd_caps Unexecuted instantiation: ratectrl.c:x86_simd_caps Unexecuted instantiation: rdopt.c:x86_simd_caps Unexecuted instantiation: segmentation.c:x86_simd_caps Unexecuted instantiation: vp8_skin_detection.c:x86_simd_caps Unexecuted instantiation: tokenize.c:x86_simd_caps Unexecuted instantiation: temporal_filter.c:x86_simd_caps Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_simd_caps vpx_scale_rtcd.c:x86_simd_caps Line | Count | Source | 167 | 2 | static INLINE int x86_simd_caps(void) { | 168 | 2 | unsigned int flags = 0; | 169 | 2 | unsigned int mask = ~0u; | 170 | 2 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; | 171 | 2 | char *env; | 172 | 2 | (void)reg_ebx; | 173 | | | 174 | | /* See if the CPU capabilities are being overridden by the environment */ | 175 | 2 | env = getenv("VPX_SIMD_CAPS"); | 176 | | | 177 | 2 | if (env && *env) return (int)strtol(env, NULL, 0); | 178 | | | 179 | 2 | env = getenv("VPX_SIMD_CAPS_MASK"); | 180 | | | 181 | 2 | if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); | 182 | | | 183 | | /* Ensure that the CPUID instruction supports extended features */ | 184 | 2 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); | 185 | | | 186 | 2 | if (max_cpuid_val < 1) return 0; | 187 | | | 188 | | /* Get the standard feature flags */ | 189 | 2 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 190 | | | 191 | 2 | if (reg_edx & BIT(23)) flags |= HAS_MMX; | 192 | | | 193 | 2 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 194 | | | 195 | 2 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 196 | | | 197 | 2 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 198 | | | 199 | 2 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 200 | | | 201 | 2 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 202 | | | 203 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 204 | 2 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { | 205 | | // Check for OS-support of YMM state. Necessary for AVX and AVX2. | 206 | 2 | if ((xgetbv() & 0x6) == 0x6) { | 207 | 2 | flags |= HAS_AVX; | 208 | | | 209 | 2 | if (max_cpuid_val >= 7) { | 210 | | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 211 | 2 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 212 | | | 213 | 2 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 214 | | | 215 | | // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & | 216 | | // 30 (AVX-512BW) & 32 (AVX-512VL) | 217 | 2 | if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == | 218 | 2 | (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { | 219 | | // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. | 220 | 0 | if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; | 221 | 0 | } | 222 | 2 | } | 223 | 2 | } | 224 | 2 | } | 225 | | | 226 | 2 | (void)reg_eax; // Avoid compiler warning on unused-but-set variable. | 227 | | | 228 | 2 | return flags & mask; | 229 | 2 | } |
vpx_dsp_rtcd.c:x86_simd_caps Line | Count | Source | 167 | 2 | static INLINE int x86_simd_caps(void) { | 168 | 2 | unsigned int flags = 0; | 169 | 2 | unsigned int mask = ~0u; | 170 | 2 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; | 171 | 2 | char *env; | 172 | 2 | (void)reg_ebx; | 173 | | | 174 | | /* See if the CPU capabilities are being overridden by the environment */ | 175 | 2 | env = getenv("VPX_SIMD_CAPS"); | 176 | | | 177 | 2 | if (env && *env) return (int)strtol(env, NULL, 0); | 178 | | | 179 | 2 | env = getenv("VPX_SIMD_CAPS_MASK"); | 180 | | | 181 | 2 | if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); | 182 | | | 183 | | /* Ensure that the CPUID instruction supports extended features */ | 184 | 2 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); | 185 | | | 186 | 2 | if (max_cpuid_val < 1) return 0; | 187 | | | 188 | | /* Get the standard feature flags */ | 189 | 2 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 190 | | | 191 | 2 | if (reg_edx & BIT(23)) flags |= HAS_MMX; | 192 | | | 193 | 2 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 194 | | | 195 | 2 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 196 | | | 197 | 2 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 198 | | | 199 | 2 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 200 | | | 201 | 2 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 202 | | | 203 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 204 | 2 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { | 205 | | // Check for OS-support of YMM state. Necessary for AVX and AVX2. | 206 | 2 | if ((xgetbv() & 0x6) == 0x6) { | 207 | 2 | flags |= HAS_AVX; | 208 | | | 209 | 2 | if (max_cpuid_val >= 7) { | 210 | | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 211 | 2 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 212 | | | 213 | 2 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 214 | | | 215 | | // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & | 216 | | // 30 (AVX-512BW) & 32 (AVX-512VL) | 217 | 2 | if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == | 218 | 2 | (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { | 219 | | // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. | 220 | 0 | if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; | 221 | 0 | } | 222 | 2 | } | 223 | 2 | } | 224 | 2 | } | 225 | | | 226 | 2 | (void)reg_eax; // Avoid compiler warning on unused-but-set variable. | 227 | | | 228 | 2 | return flags & mask; | 229 | 2 | } |
Unexecuted instantiation: systemdependent.c:x86_simd_caps Line | Count | Source | 167 | 1 | static INLINE int x86_simd_caps(void) { | 168 | 1 | unsigned int flags = 0; | 169 | 1 | unsigned int mask = ~0u; | 170 | 1 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; | 171 | 1 | char *env; | 172 | 1 | (void)reg_ebx; | 173 | | | 174 | | /* See if the CPU capabilities are being overridden by the environment */ | 175 | 1 | env = getenv("VPX_SIMD_CAPS"); | 176 | | | 177 | 1 | if (env && *env) return (int)strtol(env, NULL, 0); | 178 | | | 179 | 1 | env = getenv("VPX_SIMD_CAPS_MASK"); | 180 | | | 181 | 1 | if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); | 182 | | | 183 | | /* Ensure that the CPUID instruction supports extended features */ | 184 | 1 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); | 185 | | | 186 | 1 | if (max_cpuid_val < 1) return 0; | 187 | | | 188 | | /* Get the standard feature flags */ | 189 | 1 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 190 | | | 191 | 1 | if (reg_edx & BIT(23)) flags |= HAS_MMX; | 192 | | | 193 | 1 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 194 | | | 195 | 1 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 196 | | | 197 | 1 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 198 | | | 199 | 1 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 200 | | | 201 | 1 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 202 | | | 203 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 204 | 1 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { | 205 | | // Check for OS-support of YMM state. Necessary for AVX and AVX2. | 206 | 1 | if ((xgetbv() & 0x6) == 0x6) { | 207 | 1 | flags |= HAS_AVX; | 208 | | | 209 | 1 | if (max_cpuid_val >= 7) { | 210 | | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 211 | 1 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 212 | | | 213 | 1 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 214 | | | 215 | | // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & | 216 | | // 30 (AVX-512BW) & 32 (AVX-512VL) | 217 | 1 | if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == | 218 | 1 | (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { | 219 | | // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. | 220 | 0 | if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; | 221 | 0 | } | 222 | 1 | } | 223 | 1 | } | 224 | 1 | } | 225 | | | 226 | 1 | (void)reg_eax; // Avoid compiler warning on unused-but-set variable. | 227 | | | 228 | 1 | return flags & mask; | 229 | 1 | } |
Unexecuted instantiation: bitstream.c:x86_simd_caps Unexecuted instantiation: encodeframe.c:x86_simd_caps Unexecuted instantiation: encodeintra.c:x86_simd_caps Unexecuted instantiation: encodemb.c:x86_simd_caps Unexecuted instantiation: encodemv.c:x86_simd_caps Unexecuted instantiation: firstpass.c:x86_simd_caps Unexecuted instantiation: mcomp.c:x86_simd_caps Unexecuted instantiation: modecosts.c:x86_simd_caps Unexecuted instantiation: vp8_quantize_sse2.c:x86_simd_caps Line | Count | Source | 167 | 1 | static INLINE int x86_simd_caps(void) { | 168 | 1 | unsigned int flags = 0; | 169 | 1 | unsigned int mask = ~0u; | 170 | 1 | unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; | 171 | 1 | char *env; | 172 | 1 | (void)reg_ebx; | 173 | | | 174 | | /* See if the CPU capabilities are being overridden by the environment */ | 175 | 1 | env = getenv("VPX_SIMD_CAPS"); | 176 | | | 177 | 1 | if (env && *env) return (int)strtol(env, NULL, 0); | 178 | | | 179 | 1 | env = getenv("VPX_SIMD_CAPS_MASK"); | 180 | | | 181 | 1 | if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); | 182 | | | 183 | | /* Ensure that the CPUID instruction supports extended features */ | 184 | 1 | cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); | 185 | | | 186 | 1 | if (max_cpuid_val < 1) return 0; | 187 | | | 188 | | /* Get the standard feature flags */ | 189 | 1 | cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 190 | | | 191 | 1 | if (reg_edx & BIT(23)) flags |= HAS_MMX; | 192 | | | 193 | 1 | if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 194 | | | 195 | 1 | if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 196 | | | 197 | 1 | if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 198 | | | 199 | 1 | if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 200 | | | 201 | 1 | if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 202 | | | 203 | | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 204 | 1 | if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { | 205 | | // Check for OS-support of YMM state. Necessary for AVX and AVX2. | 206 | 1 | if ((xgetbv() & 0x6) == 0x6) { | 207 | 1 | flags |= HAS_AVX; | 208 | | | 209 | 1 | if (max_cpuid_val >= 7) { | 210 | | /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 211 | 1 | cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 212 | | | 213 | 1 | if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 214 | | | 215 | | // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & | 216 | | // 30 (AVX-512BW) & 32 (AVX-512VL) | 217 | 1 | if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == | 218 | 1 | (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { | 219 | | // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. | 220 | 0 | if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; | 221 | 0 | } | 222 | 1 | } | 223 | 1 | } | 224 | 1 | } | 225 | | | 226 | 1 | (void)reg_eax; // Avoid compiler warning on unused-but-set variable. | 227 | | | 228 | 1 | return flags & mask; | 229 | 1 | } |
|
230 | | |
231 | | // Fine-Grain Measurement Functions |
232 | | // |
233 | | // If you are timing a small region of code, access the timestamp counter |
234 | | // (TSC) via: |
235 | | // |
236 | | // unsigned int start = x86_tsc_start(); |
237 | | // ... |
238 | | // unsigned int end = x86_tsc_end(); |
239 | | // unsigned int diff = end - start; |
240 | | // |
241 | | // The start/end functions introduce a few more instructions than using |
242 | | // x86_readtsc directly, but prevent the CPU's out-of-order execution from |
243 | | // affecting the measurement (by having earlier/later instructions be evaluated |
244 | | // in the time interval). See the white paper, "How to Benchmark Code |
245 | | // Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by |
246 | | // Gabriele Paoloni for more information. |
247 | | // |
248 | | // If you are timing a large function (CPU time > a couple of seconds), use |
249 | | // x86_readtsc64 to read the timestamp counter in a 64-bit integer. The |
250 | | // out-of-order leakage that can occur is minimal compared to total runtime. |
251 | 0 | static INLINE unsigned int x86_readtsc(void) { |
252 | 0 | #if defined(__GNUC__) |
253 | 0 | unsigned int tsc; |
254 | 0 | __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :); |
255 | 0 | return tsc; |
256 | 0 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
257 | 0 | unsigned int tsc; |
258 | 0 | asm volatile("rdtsc\n\t" : "=a"(tsc) :); |
259 | 0 | return tsc; |
260 | 0 | #else |
261 | 0 | #if VPX_ARCH_X86_64 |
262 | 0 | return (unsigned int)__rdtsc(); |
263 | 0 | #else |
264 | 0 | __asm rdtsc; |
265 | 0 | #endif |
266 | 0 | #endif |
267 | 0 | } Unexecuted instantiation: vpx_encoder.c:x86_readtsc Unexecuted instantiation: vp8_cx_iface.c:x86_readtsc Unexecuted instantiation: ethreading.c:x86_readtsc Unexecuted instantiation: onyx_if.c:x86_readtsc Unexecuted instantiation: pickinter.c:x86_readtsc Unexecuted instantiation: picklpf.c:x86_readtsc Unexecuted instantiation: vp8_quantize.c:x86_readtsc Unexecuted instantiation: ratectrl.c:x86_readtsc Unexecuted instantiation: rdopt.c:x86_readtsc Unexecuted instantiation: segmentation.c:x86_readtsc Unexecuted instantiation: vp8_skin_detection.c:x86_readtsc Unexecuted instantiation: tokenize.c:x86_readtsc Unexecuted instantiation: temporal_filter.c:x86_readtsc Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_readtsc Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtsc Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtsc Unexecuted instantiation: systemdependent.c:x86_readtsc Unexecuted instantiation: rtcd.c:x86_readtsc Unexecuted instantiation: bitstream.c:x86_readtsc Unexecuted instantiation: encodeframe.c:x86_readtsc Unexecuted instantiation: encodeintra.c:x86_readtsc Unexecuted instantiation: encodemb.c:x86_readtsc Unexecuted instantiation: encodemv.c:x86_readtsc Unexecuted instantiation: firstpass.c:x86_readtsc Unexecuted instantiation: mcomp.c:x86_readtsc Unexecuted instantiation: modecosts.c:x86_readtsc Unexecuted instantiation: vp8_quantize_sse2.c:x86_readtsc Unexecuted instantiation: vp9_rtcd.c:x86_readtsc |
268 | | // 64-bit CPU cycle counter |
269 | 0 | static INLINE uint64_t x86_readtsc64(void) { |
270 | 0 | #if defined(__GNUC__) |
271 | 0 | uint32_t hi, lo; |
272 | 0 | __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); |
273 | 0 | return ((uint64_t)hi << 32) | lo; |
274 | 0 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
275 | 0 | uint_t hi, lo; |
276 | 0 | asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); |
277 | 0 | return ((uint64_t)hi << 32) | lo; |
278 | 0 | #else |
279 | 0 | #if VPX_ARCH_X86_64 |
280 | 0 | return (uint64_t)__rdtsc(); |
281 | 0 | #else |
282 | 0 | __asm rdtsc; |
283 | 0 | #endif |
284 | 0 | #endif |
285 | 0 | } Unexecuted instantiation: vpx_encoder.c:x86_readtsc64 Unexecuted instantiation: vp8_cx_iface.c:x86_readtsc64 Unexecuted instantiation: ethreading.c:x86_readtsc64 Unexecuted instantiation: onyx_if.c:x86_readtsc64 Unexecuted instantiation: pickinter.c:x86_readtsc64 Unexecuted instantiation: picklpf.c:x86_readtsc64 Unexecuted instantiation: vp8_quantize.c:x86_readtsc64 Unexecuted instantiation: ratectrl.c:x86_readtsc64 Unexecuted instantiation: rdopt.c:x86_readtsc64 Unexecuted instantiation: segmentation.c:x86_readtsc64 Unexecuted instantiation: vp8_skin_detection.c:x86_readtsc64 Unexecuted instantiation: tokenize.c:x86_readtsc64 Unexecuted instantiation: temporal_filter.c:x86_readtsc64 Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_readtsc64 Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtsc64 Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtsc64 Unexecuted instantiation: systemdependent.c:x86_readtsc64 Unexecuted instantiation: rtcd.c:x86_readtsc64 Unexecuted instantiation: bitstream.c:x86_readtsc64 Unexecuted instantiation: encodeframe.c:x86_readtsc64 Unexecuted instantiation: encodeintra.c:x86_readtsc64 Unexecuted instantiation: encodemb.c:x86_readtsc64 Unexecuted instantiation: encodemv.c:x86_readtsc64 Unexecuted instantiation: firstpass.c:x86_readtsc64 Unexecuted instantiation: mcomp.c:x86_readtsc64 Unexecuted instantiation: modecosts.c:x86_readtsc64 Unexecuted instantiation: vp8_quantize_sse2.c:x86_readtsc64 Unexecuted instantiation: vp9_rtcd.c:x86_readtsc64 |
286 | | |
287 | | // 32-bit CPU cycle counter with a partial fence against out-of-order execution. |
288 | 0 | static INLINE unsigned int x86_readtscp(void) { |
289 | 0 | #if defined(__GNUC__) |
290 | 0 | unsigned int tscp; |
291 | 0 | __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :); |
292 | 0 | return tscp; |
293 | 0 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
294 | 0 | unsigned int tscp; |
295 | 0 | asm volatile("rdtscp\n\t" : "=a"(tscp) :); |
296 | 0 | return tscp; |
297 | 0 | #elif defined(_MSC_VER) |
298 | 0 | unsigned int ui; |
299 | 0 | return (unsigned int)__rdtscp(&ui); |
300 | 0 | #else |
301 | 0 | #if VPX_ARCH_X86_64 |
302 | 0 | return (unsigned int)__rdtscp(); |
303 | 0 | #else |
304 | 0 | __asm rdtscp; |
305 | 0 | #endif |
306 | 0 | #endif |
307 | 0 | } Unexecuted instantiation: vpx_encoder.c:x86_readtscp Unexecuted instantiation: vp8_cx_iface.c:x86_readtscp Unexecuted instantiation: ethreading.c:x86_readtscp Unexecuted instantiation: onyx_if.c:x86_readtscp Unexecuted instantiation: pickinter.c:x86_readtscp Unexecuted instantiation: picklpf.c:x86_readtscp Unexecuted instantiation: vp8_quantize.c:x86_readtscp Unexecuted instantiation: ratectrl.c:x86_readtscp Unexecuted instantiation: rdopt.c:x86_readtscp Unexecuted instantiation: segmentation.c:x86_readtscp Unexecuted instantiation: vp8_skin_detection.c:x86_readtscp Unexecuted instantiation: tokenize.c:x86_readtscp Unexecuted instantiation: temporal_filter.c:x86_readtscp Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_readtscp Unexecuted instantiation: vpx_scale_rtcd.c:x86_readtscp Unexecuted instantiation: vpx_dsp_rtcd.c:x86_readtscp Unexecuted instantiation: systemdependent.c:x86_readtscp Unexecuted instantiation: rtcd.c:x86_readtscp Unexecuted instantiation: bitstream.c:x86_readtscp Unexecuted instantiation: encodeframe.c:x86_readtscp Unexecuted instantiation: encodeintra.c:x86_readtscp Unexecuted instantiation: encodemb.c:x86_readtscp Unexecuted instantiation: encodemv.c:x86_readtscp Unexecuted instantiation: firstpass.c:x86_readtscp Unexecuted instantiation: mcomp.c:x86_readtscp Unexecuted instantiation: modecosts.c:x86_readtscp Unexecuted instantiation: vp8_quantize_sse2.c:x86_readtscp Unexecuted instantiation: vp9_rtcd.c:x86_readtscp |
308 | | |
309 | 0 | static INLINE unsigned int x86_tsc_start(void) { |
310 | 0 | unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; |
311 | 0 | // This call should not be removed. See function notes above. |
312 | 0 | cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
313 | 0 | // Avoid compiler warnings on unused-but-set variables. |
314 | 0 | (void)reg_eax; |
315 | 0 | (void)reg_ebx; |
316 | 0 | (void)reg_ecx; |
317 | 0 | (void)reg_edx; |
318 | 0 | return x86_readtsc(); |
319 | 0 | } Unexecuted instantiation: vpx_encoder.c:x86_tsc_start Unexecuted instantiation: vp8_cx_iface.c:x86_tsc_start Unexecuted instantiation: ethreading.c:x86_tsc_start Unexecuted instantiation: onyx_if.c:x86_tsc_start Unexecuted instantiation: pickinter.c:x86_tsc_start Unexecuted instantiation: picklpf.c:x86_tsc_start Unexecuted instantiation: vp8_quantize.c:x86_tsc_start Unexecuted instantiation: ratectrl.c:x86_tsc_start Unexecuted instantiation: rdopt.c:x86_tsc_start Unexecuted instantiation: segmentation.c:x86_tsc_start Unexecuted instantiation: vp8_skin_detection.c:x86_tsc_start Unexecuted instantiation: tokenize.c:x86_tsc_start Unexecuted instantiation: temporal_filter.c:x86_tsc_start Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_tsc_start Unexecuted instantiation: vpx_scale_rtcd.c:x86_tsc_start Unexecuted instantiation: vpx_dsp_rtcd.c:x86_tsc_start Unexecuted instantiation: systemdependent.c:x86_tsc_start Unexecuted instantiation: rtcd.c:x86_tsc_start Unexecuted instantiation: bitstream.c:x86_tsc_start Unexecuted instantiation: encodeframe.c:x86_tsc_start Unexecuted instantiation: encodeintra.c:x86_tsc_start Unexecuted instantiation: encodemb.c:x86_tsc_start Unexecuted instantiation: encodemv.c:x86_tsc_start Unexecuted instantiation: firstpass.c:x86_tsc_start Unexecuted instantiation: mcomp.c:x86_tsc_start Unexecuted instantiation: modecosts.c:x86_tsc_start Unexecuted instantiation: vp8_quantize_sse2.c:x86_tsc_start Unexecuted instantiation: vp9_rtcd.c:x86_tsc_start |
320 | | |
321 | 0 | static INLINE unsigned int x86_tsc_end(void) { |
322 | 0 | uint32_t v = x86_readtscp(); |
323 | 0 | unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; |
324 | 0 | // This call should not be removed. See function notes above. |
325 | 0 | cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
326 | 0 | // Avoid compiler warnings on unused-but-set variables. |
327 | 0 | (void)reg_eax; |
328 | 0 | (void)reg_ebx; |
329 | 0 | (void)reg_ecx; |
330 | 0 | (void)reg_edx; |
331 | 0 | return v; |
332 | 0 | } Unexecuted instantiation: vpx_encoder.c:x86_tsc_end Unexecuted instantiation: vp8_cx_iface.c:x86_tsc_end Unexecuted instantiation: ethreading.c:x86_tsc_end Unexecuted instantiation: onyx_if.c:x86_tsc_end Unexecuted instantiation: pickinter.c:x86_tsc_end Unexecuted instantiation: picklpf.c:x86_tsc_end Unexecuted instantiation: vp8_quantize.c:x86_tsc_end Unexecuted instantiation: ratectrl.c:x86_tsc_end Unexecuted instantiation: rdopt.c:x86_tsc_end Unexecuted instantiation: segmentation.c:x86_tsc_end Unexecuted instantiation: vp8_skin_detection.c:x86_tsc_end Unexecuted instantiation: tokenize.c:x86_tsc_end Unexecuted instantiation: temporal_filter.c:x86_tsc_end Unexecuted instantiation: vp8_enc_stubs_sse2.c:x86_tsc_end Unexecuted instantiation: vpx_scale_rtcd.c:x86_tsc_end Unexecuted instantiation: vpx_dsp_rtcd.c:x86_tsc_end Unexecuted instantiation: systemdependent.c:x86_tsc_end Unexecuted instantiation: rtcd.c:x86_tsc_end Unexecuted instantiation: bitstream.c:x86_tsc_end Unexecuted instantiation: encodeframe.c:x86_tsc_end Unexecuted instantiation: encodeintra.c:x86_tsc_end Unexecuted instantiation: encodemb.c:x86_tsc_end Unexecuted instantiation: encodemv.c:x86_tsc_end Unexecuted instantiation: firstpass.c:x86_tsc_end Unexecuted instantiation: mcomp.c:x86_tsc_end Unexecuted instantiation: modecosts.c:x86_tsc_end Unexecuted instantiation: vp8_quantize_sse2.c:x86_tsc_end Unexecuted instantiation: vp9_rtcd.c:x86_tsc_end |
333 | | |
334 | | #if defined(__GNUC__) |
335 | 0 | #define x86_pause_hint() __asm__ __volatile__("pause \n\t") |
336 | | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
337 | | #define x86_pause_hint() asm volatile("pause \n\t") |
338 | | #else |
339 | | #if VPX_ARCH_X86_64 |
340 | | #define x86_pause_hint() _mm_pause(); |
341 | | #else |
342 | | #define x86_pause_hint() __asm pause |
343 | | #endif |
344 | | #endif |
345 | | |
346 | | #if defined(__GNUC__) |
347 | 288k | static void x87_set_control_word(unsigned short mode) { |
348 | 288k | __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); |
349 | 288k | } vpx_encoder.c:x87_set_control_word Line | Count | Source | 347 | 288k | static void x87_set_control_word(unsigned short mode) { | 348 | 288k | __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); | 349 | 288k | } |
Unexecuted instantiation: vp8_cx_iface.c:x87_set_control_word Unexecuted instantiation: ethreading.c:x87_set_control_word Unexecuted instantiation: onyx_if.c:x87_set_control_word Unexecuted instantiation: pickinter.c:x87_set_control_word Unexecuted instantiation: picklpf.c:x87_set_control_word Unexecuted instantiation: vp8_quantize.c:x87_set_control_word Unexecuted instantiation: ratectrl.c:x87_set_control_word Unexecuted instantiation: rdopt.c:x87_set_control_word Unexecuted instantiation: segmentation.c:x87_set_control_word Unexecuted instantiation: vp8_skin_detection.c:x87_set_control_word Unexecuted instantiation: tokenize.c:x87_set_control_word Unexecuted instantiation: temporal_filter.c:x87_set_control_word Unexecuted instantiation: vp8_enc_stubs_sse2.c:x87_set_control_word Unexecuted instantiation: vpx_scale_rtcd.c:x87_set_control_word Unexecuted instantiation: vpx_dsp_rtcd.c:x87_set_control_word Unexecuted instantiation: systemdependent.c:x87_set_control_word Unexecuted instantiation: rtcd.c:x87_set_control_word Unexecuted instantiation: bitstream.c:x87_set_control_word Unexecuted instantiation: encodeframe.c:x87_set_control_word Unexecuted instantiation: encodeintra.c:x87_set_control_word Unexecuted instantiation: encodemb.c:x87_set_control_word Unexecuted instantiation: encodemv.c:x87_set_control_word Unexecuted instantiation: firstpass.c:x87_set_control_word Unexecuted instantiation: mcomp.c:x87_set_control_word Unexecuted instantiation: modecosts.c:x87_set_control_word Unexecuted instantiation: vp8_quantize_sse2.c:x87_set_control_word Unexecuted instantiation: vp9_rtcd.c:x87_set_control_word |
350 | 144k | static unsigned short x87_get_control_word(void) { |
351 | 144k | unsigned short mode; |
352 | 144k | __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :); |
353 | 144k | return mode; |
354 | 144k | } vpx_encoder.c:x87_get_control_word Line | Count | Source | 350 | 144k | static unsigned short x87_get_control_word(void) { | 351 | 144k | unsigned short mode; | 352 | 144k | __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :); | 353 | 144k | return mode; | 354 | 144k | } |
Unexecuted instantiation: vp8_cx_iface.c:x87_get_control_word Unexecuted instantiation: ethreading.c:x87_get_control_word Unexecuted instantiation: onyx_if.c:x87_get_control_word Unexecuted instantiation: pickinter.c:x87_get_control_word Unexecuted instantiation: picklpf.c:x87_get_control_word Unexecuted instantiation: vp8_quantize.c:x87_get_control_word Unexecuted instantiation: ratectrl.c:x87_get_control_word Unexecuted instantiation: rdopt.c:x87_get_control_word Unexecuted instantiation: segmentation.c:x87_get_control_word Unexecuted instantiation: vp8_skin_detection.c:x87_get_control_word Unexecuted instantiation: tokenize.c:x87_get_control_word Unexecuted instantiation: temporal_filter.c:x87_get_control_word Unexecuted instantiation: vp8_enc_stubs_sse2.c:x87_get_control_word Unexecuted instantiation: vpx_scale_rtcd.c:x87_get_control_word Unexecuted instantiation: vpx_dsp_rtcd.c:x87_get_control_word Unexecuted instantiation: systemdependent.c:x87_get_control_word Unexecuted instantiation: rtcd.c:x87_get_control_word Unexecuted instantiation: bitstream.c:x87_get_control_word Unexecuted instantiation: encodeframe.c:x87_get_control_word Unexecuted instantiation: encodeintra.c:x87_get_control_word Unexecuted instantiation: encodemb.c:x87_get_control_word Unexecuted instantiation: encodemv.c:x87_get_control_word Unexecuted instantiation: firstpass.c:x87_get_control_word Unexecuted instantiation: mcomp.c:x87_get_control_word Unexecuted instantiation: modecosts.c:x87_get_control_word Unexecuted instantiation: vp8_quantize_sse2.c:x87_get_control_word Unexecuted instantiation: vp9_rtcd.c:x87_get_control_word |
355 | | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
356 | | static void x87_set_control_word(unsigned short mode) { |
357 | | asm volatile("fldcw %0" : : "m"(*&mode)); |
358 | | } |
359 | | static unsigned short x87_get_control_word(void) { |
360 | | unsigned short mode; |
361 | | asm volatile("fstcw %0\n\t" : "=m"(*&mode) :); |
362 | | return mode; |
363 | | } |
364 | | #elif VPX_ARCH_X86_64 |
365 | | /* No fldcw intrinsics on Windows x64, punt to external asm */ |
366 | | extern void vpx_winx64_fldcw(unsigned short mode); |
367 | | extern unsigned short vpx_winx64_fstcw(void); |
368 | | #define x87_set_control_word vpx_winx64_fldcw |
369 | | #define x87_get_control_word vpx_winx64_fstcw |
370 | | #else |
371 | | static void x87_set_control_word(unsigned short mode) { |
372 | | __asm { fldcw mode } |
373 | | } |
374 | | static unsigned short x87_get_control_word(void) { |
375 | | unsigned short mode; |
376 | | __asm { fstcw mode } |
377 | | return mode; |
378 | | } |
379 | | #endif |
380 | | |
381 | 144k | static INLINE unsigned int x87_set_double_precision(void) { |
382 | 144k | unsigned int mode = x87_get_control_word(); |
383 | | // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1 |
384 | | // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf |
385 | | // 8.1.5.2 Precision Control Field |
386 | | // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control") |
387 | | // determine the number of bits used in floating point calculations. To match |
388 | | // later SSE instructions restrict x87 operations to Double Precision (0x200). |
389 | | // Precision PC Field |
390 | | // Single Precision (24-Bits) 00B |
391 | | // Reserved 01B |
392 | | // Double Precision (53-Bits) 10B |
393 | | // Extended Precision (64-Bits) 11B |
394 | 144k | x87_set_control_word((mode & ~0x300u) | 0x200u); |
395 | 144k | return mode; |
396 | 144k | } vpx_encoder.c:x87_set_double_precision Line | Count | Source | 381 | 144k | static INLINE unsigned int x87_set_double_precision(void) { | 382 | 144k | unsigned int mode = x87_get_control_word(); | 383 | | // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1 | 384 | | // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf | 385 | | // 8.1.5.2 Precision Control Field | 386 | | // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control") | 387 | | // determine the number of bits used in floating point calculations. To match | 388 | | // later SSE instructions restrict x87 operations to Double Precision (0x200). | 389 | | // Precision PC Field | 390 | | // Single Precision (24-Bits) 00B | 391 | | // Reserved 01B | 392 | | // Double Precision (53-Bits) 10B | 393 | | // Extended Precision (64-Bits) 11B | 394 | 144k | x87_set_control_word((mode & ~0x300u) | 0x200u); | 395 | 144k | return mode; | 396 | 144k | } |
Unexecuted instantiation: vp8_cx_iface.c:x87_set_double_precision Unexecuted instantiation: ethreading.c:x87_set_double_precision Unexecuted instantiation: onyx_if.c:x87_set_double_precision Unexecuted instantiation: pickinter.c:x87_set_double_precision Unexecuted instantiation: picklpf.c:x87_set_double_precision Unexecuted instantiation: vp8_quantize.c:x87_set_double_precision Unexecuted instantiation: ratectrl.c:x87_set_double_precision Unexecuted instantiation: rdopt.c:x87_set_double_precision Unexecuted instantiation: segmentation.c:x87_set_double_precision Unexecuted instantiation: vp8_skin_detection.c:x87_set_double_precision Unexecuted instantiation: tokenize.c:x87_set_double_precision Unexecuted instantiation: temporal_filter.c:x87_set_double_precision Unexecuted instantiation: vp8_enc_stubs_sse2.c:x87_set_double_precision Unexecuted instantiation: vpx_scale_rtcd.c:x87_set_double_precision Unexecuted instantiation: vpx_dsp_rtcd.c:x87_set_double_precision Unexecuted instantiation: systemdependent.c:x87_set_double_precision Unexecuted instantiation: rtcd.c:x87_set_double_precision Unexecuted instantiation: bitstream.c:x87_set_double_precision Unexecuted instantiation: encodeframe.c:x87_set_double_precision Unexecuted instantiation: encodeintra.c:x87_set_double_precision Unexecuted instantiation: encodemb.c:x87_set_double_precision Unexecuted instantiation: encodemv.c:x87_set_double_precision Unexecuted instantiation: firstpass.c:x87_set_double_precision Unexecuted instantiation: mcomp.c:x87_set_double_precision Unexecuted instantiation: modecosts.c:x87_set_double_precision Unexecuted instantiation: vp8_quantize_sse2.c:x87_set_double_precision Unexecuted instantiation: vp9_rtcd.c:x87_set_double_precision |
397 | | |
398 | | #ifdef __cplusplus |
399 | | } // extern "C" |
400 | | #endif |
401 | | |
402 | | #endif // VPX_VPX_PORTS_X86_H_ |