/src/mozilla-central/mozglue/build/SSE.cpp
Line | Count | Source |
1 | | /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | /* compile-time and runtime tests for whether to use SSE instructions */ |
7 | | |
8 | | #include "SSE.h" |
9 | | |
10 | | #ifdef HAVE_CPUID_H |
11 | | // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64 |
12 | | #include <cpuid.h> |
13 | | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) |
14 | | // MSVC 2005 or newer on x86-32 or x86-64 |
15 | | #include <intrin.h> |
16 | | #endif |
17 | | |
18 | | namespace { |
19 | | |
20 | | // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION. |
21 | | // We can't declare these functions in the header file, however, because |
22 | | // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to |
23 | | // include both SSE.h and <windows.h>. |
24 | | |
25 | | #ifdef HAVE_CPUID_H |
26 | | |
27 | | enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; |
28 | | |
29 | | static bool |
30 | | has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) |
31 | 27 | { |
32 | 27 | unsigned int regs[4]; |
33 | 27 | unsigned int eax, ebx, ecx, edx; |
34 | 27 | unsigned max = __get_cpuid_max(0, NULL); |
35 | 27 | if (level > max) |
36 | 3 | return false; |
37 | 24 | __cpuid_count(level, 0, eax, ebx, ecx, edx); |
38 | 24 | regs[0] = eax; |
39 | 24 | regs[1] = ebx; |
40 | 24 | regs[2] = ecx; |
41 | 24 | regs[3] = edx; |
42 | 24 | return (regs[reg] & bits) == bits; |
43 | 24 | } |
44 | | |
45 | | #if !defined(MOZILLA_PRESUME_AVX) |
46 | 6 | static uint64_t xgetbv(uint32_t xcr) { |
47 | 6 | uint32_t eax, edx; |
48 | 6 | __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
49 | 6 | return (uint64_t)(edx) << 32 | eax; |
50 | 6 | } |
51 | | #endif |
52 | | |
53 | | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) |
54 | | |
55 | | enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; |
56 | | |
57 | | static bool |
58 | | has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) |
59 | | { |
60 | | // Check that the level in question is supported. |
61 | | int regs[4]; |
62 | | __cpuid(regs, level & 0x80000000u); |
63 | | if (unsigned(regs[0]) < level) |
64 | | return false; |
65 | | |
66 | | // "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction." |
67 | | __cpuid(regs, level); |
68 | | return (unsigned(regs[reg]) & bits) == bits; |
69 | | } |
70 | | |
71 | | #if !defined(MOZILLA_PRESUME_AVX) |
72 | | static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); } |
73 | | #endif |
74 | | |
75 | | #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__)) |
76 | | |
77 | | enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; |
78 | | |
79 | | #ifdef __i386 |
80 | | static void |
81 | | moz_cpuid(int CPUInfo[4], int InfoType) |
82 | | { |
83 | | asm ( |
84 | | "xchg %esi, %ebx\n" |
85 | | "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) |
86 | | "cpuid\n" |
87 | | "movl %eax, (%edi)\n" |
88 | | "movl %ebx, 4(%edi)\n" |
89 | | "movl %ecx, 8(%edi)\n" |
90 | | "movl %edx, 12(%edi)\n" |
91 | | "xchg %esi, %ebx\n" |
92 | | : |
93 | | : "a"(InfoType), // %eax |
94 | | "D"(CPUInfo) // %edi |
95 | | : "%ecx", "%edx", "%esi" |
96 | | ); |
97 | | } |
98 | | #else |
99 | | static void |
100 | | moz_cpuid(int CPUInfo[4], int InfoType) |
101 | | { |
102 | | asm ( |
103 | | "xchg %rsi, %rbx\n" |
104 | | "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) |
105 | | "cpuid\n" |
106 | | "movl %eax, (%rdi)\n" |
107 | | "movl %ebx, 4(%rdi)\n" |
108 | | "movl %ecx, 8(%rdi)\n" |
109 | | "movl %edx, 12(%rdi)\n" |
110 | | "xchg %rsi, %rbx\n" |
111 | | : |
112 | | : "a"(InfoType), // %eax |
113 | | "D"(CPUInfo) // %rdi |
114 | | : "%ecx", "%edx", "%rsi" |
115 | | ); |
116 | | } |
117 | | #endif |
118 | | |
119 | | static bool |
120 | | has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) |
121 | | { |
122 | | // Check that the level in question is supported. |
123 | | volatile int regs[4]; |
124 | | moz_cpuid((int *)regs, level & 0x80000000u); |
125 | | if (unsigned(regs[0]) < level) |
126 | | return false; |
127 | | |
128 | | moz_cpuid((int *)regs, level); |
129 | | return (unsigned(regs[reg]) & bits) == bits; |
130 | | } |
131 | | |
132 | | #endif // end CPUID declarations |
133 | | |
134 | | } // namespace |
135 | | |
136 | | namespace mozilla { |
137 | | |
138 | | namespace sse_private { |
139 | | |
140 | | #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) |
141 | | |
142 | | #if !defined(MOZILLA_PRESUME_MMX) |
143 | | bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23)); |
144 | | #endif |
145 | | |
146 | | #if !defined(MOZILLA_PRESUME_SSE) |
147 | | bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25)); |
148 | | #endif |
149 | | |
150 | | #if !defined(MOZILLA_PRESUME_SSE2) |
151 | | bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26)); |
152 | | #endif |
153 | | |
154 | | #if !defined(MOZILLA_PRESUME_SSE3) |
155 | | bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0)); |
156 | | #endif |
157 | | |
158 | | #if !defined(MOZILLA_PRESUME_SSSE3) |
159 | | bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9)); |
160 | | #endif |
161 | | |
162 | | #if !defined(MOZILLA_PRESUME_SSE4A) |
163 | | bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6)); |
164 | | #endif |
165 | | |
166 | | #if !defined(MOZILLA_PRESUME_SSE4_1) |
167 | | bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19)); |
168 | | #endif |
169 | | |
170 | | #if !defined(MOZILLA_PRESUME_SSE4_2) |
171 | | bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20)); |
172 | | #endif |
173 | | |
174 | | #if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2) |
175 | | static bool has_avx() |
176 | 6 | { |
177 | | #if defined(MOZILLA_PRESUME_AVX) |
178 | | return true; |
179 | | #else |
180 | | const unsigned AVX = 1u << 28; |
181 | 6 | const unsigned OSXSAVE = 1u << 27; |
182 | 6 | const unsigned XSAVE = 1u << 26; |
183 | 6 | |
184 | 6 | const unsigned XMM_STATE = 1u << 1; |
185 | 6 | const unsigned YMM_STATE = 1u << 2; |
186 | 6 | const unsigned AVX_STATE = XMM_STATE | YMM_STATE; |
187 | 6 | |
188 | 6 | return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) && |
189 | 6 | // ensure the OS supports XSAVE of YMM registers |
190 | 6 | (xgetbv(0) & AVX_STATE) == AVX_STATE; |
191 | 6 | #endif // MOZILLA_PRESUME_AVX |
192 | 6 | } |
193 | | #endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2 |
194 | | |
195 | | #if !defined(MOZILLA_PRESUME_AVX) |
196 | | bool avx_enabled = has_avx(); |
197 | | #endif |
198 | | |
199 | | #if !defined(MOZILLA_PRESUME_AVX2) |
200 | | bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5)); |
201 | | #endif |
202 | | |
203 | | #if !defined(MOZILLA_PRESUME_AES) |
204 | | bool aes_enabled = has_cpuid_bits(1u, ecx, (1u<<25)); |
205 | | #endif |
206 | | |
207 | | #endif |
208 | | |
209 | | } // namespace sse_private |
210 | | } // namespace mozilla |