/src/theora/lib/x86/x86cpu.c
Line | Count | Source |
1 | | /******************************************************************** |
2 | | * * |
3 | | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | | * * |
8 | | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | | * by the Xiph.Org Foundation and contributors * |
10 | | * https://www.xiph.org/ * |
11 | | * * |
12 | | ******************************************************************** |
13 | | |
14 | | CPU capability detection for x86 processors. |
15 | | Originally written by Rudolf Marek. |
16 | | |
17 | | function: |
18 | | |
19 | | ********************************************************************/ |
20 | | |
21 | | #include "x86cpu.h" |
22 | | |
23 | | #if defined(_WIN32) |
24 | | # define WIN32_LEAN_AND_MEAN |
25 | | # define WIN32_EXTRA_LEAN |
26 | | # include <windows.h> |
27 | | #endif |
28 | | |
29 | | #if !defined(OC_X86_ASM) |
30 | | ogg_uint32_t oc_cpu_flags_get(void){ |
31 | | return 0; |
32 | | } |
33 | | #else |
34 | | # if defined(__amd64__)||defined(__x86_64__) |
35 | | /*On x86-64, gcc seems to be able to figure out how to save %rbx for us when |
36 | | compiling with -fPIC.*/ |
37 | | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
38 | 11.5k | __asm__ __volatile__( \ |
39 | 11.5k | "cpuid\n\t" \ |
40 | 11.5k | :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
41 | 11.5k | :"a"(_op) \ |
42 | 11.5k | :"cc" \ |
43 | 11.5k | ) |
44 | | # else |
45 | | /*On x86-32, not so much.*/ |
46 | | # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ |
47 | | __asm__ __volatile__( \ |
48 | | "xchgl %%ebx,%[ebx]\n\t" \ |
49 | | "cpuid\n\t" \ |
50 | | "xchgl %%ebx,%[ebx]\n\t" \ |
51 | | :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ |
52 | | :"a"(_op) \ |
53 | | :"cc" \ |
54 | | ) |
55 | | # endif |
56 | | |
57 | 2.87k | static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
58 | 2.87k | ogg_uint32_t flags; |
59 | | /*If there isn't even MMX, give up.*/ |
60 | 2.87k | if(!(_edx&0x00800000))return 0; |
61 | 2.87k | flags=OC_CPU_X86_MMX; |
62 | 2.87k | if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; |
63 | 2.87k | if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; |
64 | 2.87k | if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; |
65 | 2.87k | if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; |
66 | 2.87k | if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; |
67 | 2.87k | if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; |
68 | 2.87k | return flags; |
69 | 2.87k | } |
70 | | |
71 | 2.87k | static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ |
72 | 2.87k | ogg_uint32_t flags; |
73 | | /*If there isn't even MMX, give up.*/ |
74 | 2.87k | if(!(_edx&0x00800000))return 0; |
75 | 2.87k | flags=OC_CPU_X86_MMX; |
76 | 2.87k | if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; |
77 | 2.87k | if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; |
78 | 2.87k | if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; |
79 | 2.87k | if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; |
80 | 2.87k | if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; |
81 | 2.87k | return flags; |
82 | 2.87k | } |
83 | | |
84 | 2.87k | ogg_uint32_t oc_cpu_flags_get(void){ |
85 | 2.87k | ogg_uint32_t flags; |
86 | 2.87k | ogg_uint32_t eax; |
87 | 2.87k | ogg_uint32_t ebx; |
88 | 2.87k | ogg_uint32_t ecx; |
89 | 2.87k | ogg_uint32_t edx; |
90 | | # if !defined(__amd64__)&&!defined(__x86_64__) |
91 | | /*Not all x86-32 chips support cpuid, so we have to check.*/ |
92 | | __asm__ __volatile__( |
93 | | "pushfl\n\t" |
94 | | "pushfl\n\t" |
95 | | "popl %[a]\n\t" |
96 | | "movl %[a],%[b]\n\t" |
97 | | "xorl $0x200000,%[a]\n\t" |
98 | | "pushl %[a]\n\t" |
99 | | "popfl\n\t" |
100 | | "pushfl\n\t" |
101 | | "popl %[a]\n\t" |
102 | | "popfl\n\t" |
103 | | :[a]"=r"(eax),[b]"=r"(ebx) |
104 | | : |
105 | | :"cc" |
106 | | ); |
107 | | /*No cpuid.*/ |
108 | | if(eax==ebx)return 0; |
109 | | # endif |
110 | 2.87k | cpuid(0,eax,ebx,ecx,edx); |
111 | | /* l e t n I e n i u n e G*/ |
112 | 2.87k | if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| |
113 | | /* 6 8 x M T e n i u n e G*/ |
114 | 2.87k | ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ |
115 | 0 | int family; |
116 | 0 | int model; |
117 | | /*Intel, Transmeta (tested with Crusoe TM5800):*/ |
118 | 0 | cpuid(1,eax,ebx,ecx,edx); |
119 | 0 | flags=oc_parse_intel_flags(edx,ecx); |
120 | 0 | family=(eax>>8)&0xF; |
121 | 0 | model=(eax>>4)&0xF; |
122 | | /*The SSE unit on the Pentium M and Core Duo is much slower than the MMX |
123 | | unit, so don't use it.*/ |
124 | 0 | if(family==6&&(model==9||model==13||model==14)){ |
125 | 0 | flags&=~(OC_CPU_X86_SSE2|OC_CPU_X86_PNI); |
126 | 0 | } |
127 | 0 | } |
128 | | /* D M A c i t n e h t u A*/ |
129 | 2.87k | else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| |
130 | | /* C S N y b e d o e G*/ |
131 | 2.87k | ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ |
132 | | /*AMD, Geode:*/ |
133 | 2.87k | cpuid(0x80000000,eax,ebx,ecx,edx); |
134 | 2.87k | if(eax<0x80000001)flags=0; |
135 | 2.87k | else{ |
136 | 2.87k | cpuid(0x80000001,eax,ebx,ecx,edx); |
137 | 2.87k | flags=oc_parse_amd_flags(edx,ecx); |
138 | 2.87k | } |
139 | | /*Also check for SSE.*/ |
140 | 2.87k | cpuid(1,eax,ebx,ecx,edx); |
141 | 2.87k | flags|=oc_parse_intel_flags(edx,ecx); |
142 | 2.87k | } |
143 | | /*Technically some VIA chips can be configured in the BIOS to return any |
144 | | string here the user wants. |
145 | | There is a special detection method that can be used to identify such |
146 | | processors, but in my opinion, if the user really wants to change it, they |
147 | | deserve what they get.*/ |
148 | | /* s l u a H r u a t n e C*/ |
149 | 0 | else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ |
150 | | /*VIA:*/ |
151 | | /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) |
152 | | chips (thanks to the engineers from Centaur Technology who provided it). |
153 | | These chips support Intel-like cpuid info. |
154 | | The C3-2 (Nehemiah) cores appear to, as well.*/ |
155 | 0 | cpuid(1,eax,ebx,ecx,edx); |
156 | 0 | flags=oc_parse_intel_flags(edx,ecx); |
157 | 0 | if(eax>=0x80000001){ |
158 | | /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. |
159 | | We need to check this even if the Intel test succeeds to pick up 3DNow! |
160 | | support on these processors. |
161 | | Unlike actual AMD processors, we cannot _rely_ on this info, since |
162 | | some cores (e.g., the 693 stepping of the Nehemiah) claim to support |
163 | | this function, yet return edx=0, despite the Intel test indicating |
164 | | MMX support. |
165 | | Therefore the features detected here are strictly added to those |
166 | | detected by the Intel test.*/ |
167 | | /*TODO: How about earlier chips?*/ |
168 | 0 | cpuid(0x80000001,eax,ebx,ecx,edx); |
169 | | /*Note: As of the C7, this function returns Intel-style extended feature |
170 | | flags, not AMD-style. |
171 | | Currently, this only defines bits 11, 20, and 29 (0x20100800), which |
172 | | do not conflict with any of the AMD flags we inspect. |
173 | | For the remaining bits, Intel tells us, "Do not count on their value", |
174 | | but VIA assures us that they will all be zero (at least on the C7 and |
175 | | Isaiah chips). |
176 | | In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 |
177 | | (0xC0C00000) for something else, we will have to add code to detect |
178 | | the model to decide when it is appropriate to inspect them.*/ |
179 | 0 | flags|=oc_parse_amd_flags(edx,ecx); |
180 | 0 | } |
181 | 0 | } |
182 | 0 | else{ |
183 | | /*Implement me.*/ |
184 | 0 | flags=0; |
185 | 0 | } |
186 | | #if defined(_WIN32) |
187 | | OSVERSIONINFO win_version_info; |
188 | | memset(&win_version_info, 0, sizeof(win_version_info)); |
189 | | win_version_info.dwOSVersionInfoSize = sizeof(win_version_info); |
190 | | GetVersionEx(&win_version_info); |
191 | | |
192 | | if (win_version_info.dwMajorVersion < 4 || |
193 | | (win_version_info.dwMajorVersion == 4 && win_version_info.dwMinorVersion == 0)) { |
194 | | // Windows 95 and NT4 or before don't backup SSE+ XMM registers when switching tasks |
195 | | // Disable SSE and stick to MMX to avoid possible corruption |
196 | | flags &= OC_CPU_X86_MMX|OC_CPU_X86_3DNOW|OC_CPU_X86_3DNOWEXT|OC_CPU_X86_MMXEXT; |
197 | | } |
198 | | |
199 | | #endif |
200 | 2.87k | return flags; |
201 | 2.87k | } |
202 | | #endif |