/src/mozilla-central/security/nss/lib/freebl/mpi/mpcpucache.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #include "mpi.h" |
6 | | #include "prtypes.h" |
7 | | |
8 | | /* |
9 | | * This file implements a single function: s_mpi_getProcessorLineSize(); |
10 | | * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
11 | | * if a cache exists, or zero if there is no cache. If more than one |
12 | | * cache line exists, it should return the smallest line size (which is |
13 | | * usually the L1 cache). |
14 | | * |
15 | | * mp_modexp uses this information to make sure that private key information |
16 | | * isn't being leaked through the cache. |
17 | | * |
18 | | * Currently the file returns good data for most modern x86 processors, and |
19 | | * reasonable data on 64-bit ppc processors. All other processors are assumed |
20 | | * to have a cache line size of 32 bytes. |
21 | | * |
22 | | */ |
23 | | |
24 | | #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
25 | | /* X86 processors have special instructions that tell us about the cache */ |
26 | | #include "string.h" |
27 | | |
28 | | #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
29 | | #define AMD_64 1 |
30 | | #endif |
31 | | |
32 | | /* Generic CPUID function */ |
33 | | #if defined(AMD_64) |
34 | | |
35 | | #if defined(__GNUC__) |
36 | | |
37 | | void |
38 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
39 | | unsigned long *ebx, unsigned long *ecx, |
40 | | unsigned long *edx) |
41 | 0 | { |
42 | 0 | __asm__("cpuid\n\t" |
43 | 0 | : "=a"(*eax), |
44 | 0 | "=b"(*ebx), |
45 | 0 | "=c"(*ecx), |
46 | 0 | "=d"(*edx) |
47 | 0 | : "0"(op)); |
48 | 0 | } |
49 | | |
50 | | #elif defined(_MSC_VER) |
51 | | |
52 | | #include <intrin.h> |
53 | | |
54 | | void |
55 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
56 | | unsigned long *ebx, unsigned long *ecx, |
57 | | unsigned long *edx) |
58 | | { |
59 | | int intrinsic_out[4]; |
60 | | |
61 | | __cpuid(intrinsic_out, op); |
62 | | *eax = intrinsic_out[0]; |
63 | | *ebx = intrinsic_out[1]; |
64 | | *ecx = intrinsic_out[2]; |
65 | | *edx = intrinsic_out[3]; |
66 | | } |
67 | | |
68 | | #endif |
69 | | |
70 | | #else /* !defined(AMD_64) */ |
71 | | |
72 | | /* x86 */ |
73 | | |
74 | | #if defined(__GNUC__) |
75 | | void |
76 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
77 | | unsigned long *ebx, unsigned long *ecx, |
78 | | unsigned long *edx) |
79 | | { |
80 | | /* Some older processors don't fill the ecx register with cpuid, so clobber it |
81 | | * before calling cpuid, so that there's no risk of picking random bits that |
82 | | * erroneously indicate that absent CPU features are present. |
83 | | * Also, GCC isn't smart enough to save the ebx PIC register on its own |
84 | | * in this case, so do it by hand. Use edi to store ebx and pass the |
85 | | * value returned in ebx from cpuid through edi. */ |
86 | | __asm__("xor %%ecx, %%ecx\n\t" |
87 | | "mov %%ebx,%%edi\n\t" |
88 | | "cpuid\n\t" |
89 | | "xchgl %%ebx,%%edi\n\t" |
90 | | : "=a"(*eax), |
91 | | "=D"(*ebx), |
92 | | "=c"(*ecx), |
93 | | "=d"(*edx) |
94 | | : "0"(op)); |
95 | | } |
96 | | |
97 | | /* |
98 | | * try flipping a processor flag to determine CPU type |
99 | | */ |
100 | | static unsigned long |
101 | | changeFlag(unsigned long flag) |
102 | | { |
103 | | unsigned long changedFlags, originalFlags; |
104 | | __asm__("pushfl\n\t" /* get the flags */ |
105 | | "popl %0\n\t" |
106 | | "movl %0,%1\n\t" /* save the original flags */ |
107 | | "xorl %2,%0\n\t" /* flip the bit */ |
108 | | "pushl %0\n\t" /* set the flags */ |
109 | | "popfl\n\t" |
110 | | "pushfl\n\t" /* get the flags again (for return) */ |
111 | | "popl %0\n\t" |
112 | | "pushl %1\n\t" /* restore the original flags */ |
113 | | "popfl\n\t" |
114 | | : "=r"(changedFlags), |
115 | | "=r"(originalFlags), |
116 | | "=r"(flag) |
117 | | : "2"(flag)); |
118 | | return changedFlags ^ originalFlags; |
119 | | } |
120 | | |
121 | | #elif defined(_MSC_VER) |
122 | | |
123 | | /* |
124 | | * windows versions of the above assembler |
125 | | */ |
126 | | #define wcpuid __asm __emit 0fh __asm __emit 0a2h |
127 | | void |
128 | | freebl_cpuid(unsigned long op, unsigned long *Reax, |
129 | | unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
130 | | { |
131 | | unsigned long Leax, Lebx, Lecx, Ledx; |
132 | | __asm { |
133 | | pushad |
134 | | xor ecx,ecx |
135 | | mov eax,op |
136 | | wcpuid |
137 | | mov Leax,eax |
138 | | mov Lebx,ebx |
139 | | mov Lecx,ecx |
140 | | mov Ledx,edx |
141 | | popad |
142 | | } |
143 | | *Reax = Leax; |
144 | | *Rebx = Lebx; |
145 | | *Recx = Lecx; |
146 | | *Redx = Ledx; |
147 | | } |
148 | | |
149 | | static unsigned long |
150 | | changeFlag(unsigned long flag) |
151 | | { |
152 | | unsigned long changedFlags, originalFlags; |
153 | | __asm { |
154 | | push eax |
155 | | push ebx |
156 | | pushfd /* get the flags */ |
157 | | pop eax |
158 | | push eax /* save the flags on the stack */ |
159 | | mov originalFlags,eax /* save the original flags */ |
160 | | mov ebx,flag |
161 | | xor eax,ebx /* flip the bit */ |
162 | | push eax /* set the flags */ |
163 | | popfd |
164 | | pushfd /* get the flags again (for return) */ |
165 | | pop eax |
166 | | popfd /* restore the original flags */ |
167 | | mov changedFlags,eax |
168 | | pop ebx |
169 | | pop eax |
170 | | } |
171 | | return changedFlags ^ originalFlags; |
172 | | } |
173 | | #endif |
174 | | |
175 | | #endif |
176 | | |
177 | | #if !defined(AMD_64) |
178 | | #define AC_FLAG 0x40000 |
179 | | #define ID_FLAG 0x200000 |
180 | | |
181 | | /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
182 | | static int |
183 | | is386() |
184 | | { |
185 | | return changeFlag(AC_FLAG) == 0; |
186 | | } |
187 | | |
188 | | /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
189 | | static int |
190 | | is486() |
191 | | { |
192 | | return changeFlag(ID_FLAG) == 0; |
193 | | } |
194 | | #endif |
195 | | |
196 | | /* |
197 | | * table for Intel Cache. |
198 | | * See Intel Application Note AP-485 for more information |
199 | | */ |
200 | | |
201 | | typedef unsigned char CacheTypeEntry; |
202 | | |
203 | | typedef enum { |
204 | | Cache_NONE = 0, |
205 | | Cache_UNKNOWN = 1, |
206 | | Cache_TLB = 2, |
207 | | Cache_TLBi = 3, |
208 | | Cache_TLBd = 4, |
209 | | Cache_Trace = 5, |
210 | | Cache_L1 = 6, |
211 | | Cache_L1i = 7, |
212 | | Cache_L1d = 8, |
213 | | Cache_L2 = 9, |
214 | | Cache_L2i = 10, |
215 | | Cache_L2d = 11, |
216 | | Cache_L3 = 12, |
217 | | Cache_L3i = 13, |
218 | | Cache_L3d = 14 |
219 | | } CacheType; |
220 | | |
221 | | struct _cache { |
222 | | CacheTypeEntry type; |
223 | | unsigned char lineSize; |
224 | | }; |
225 | | static const struct _cache CacheMap[256] = { |
226 | | /* 00 */ { Cache_NONE, 0 }, |
227 | | /* 01 */ { Cache_TLBi, 0 }, |
228 | | /* 02 */ { Cache_TLBi, 0 }, |
229 | | /* 03 */ { Cache_TLBd, 0 }, |
230 | | /* 04 */ { |
231 | | Cache_TLBd, |
232 | | }, |
233 | | /* 05 */ { Cache_UNKNOWN, 0 }, |
234 | | /* 06 */ { Cache_L1i, 32 }, |
235 | | /* 07 */ { Cache_UNKNOWN, 0 }, |
236 | | /* 08 */ { Cache_L1i, 32 }, |
237 | | /* 09 */ { Cache_UNKNOWN, 0 }, |
238 | | /* 0a */ { Cache_L1d, 32 }, |
239 | | /* 0b */ { Cache_UNKNOWN, 0 }, |
240 | | /* 0c */ { Cache_L1d, 32 }, |
241 | | /* 0d */ { Cache_UNKNOWN, 0 }, |
242 | | /* 0e */ { Cache_UNKNOWN, 0 }, |
243 | | /* 0f */ { Cache_UNKNOWN, 0 }, |
244 | | /* 10 */ { Cache_UNKNOWN, 0 }, |
245 | | /* 11 */ { Cache_UNKNOWN, 0 }, |
246 | | /* 12 */ { Cache_UNKNOWN, 0 }, |
247 | | /* 13 */ { Cache_UNKNOWN, 0 }, |
248 | | /* 14 */ { Cache_UNKNOWN, 0 }, |
249 | | /* 15 */ { Cache_UNKNOWN, 0 }, |
250 | | /* 16 */ { Cache_UNKNOWN, 0 }, |
251 | | /* 17 */ { Cache_UNKNOWN, 0 }, |
252 | | /* 18 */ { Cache_UNKNOWN, 0 }, |
253 | | /* 19 */ { Cache_UNKNOWN, 0 }, |
254 | | /* 1a */ { Cache_UNKNOWN, 0 }, |
255 | | /* 1b */ { Cache_UNKNOWN, 0 }, |
256 | | /* 1c */ { Cache_UNKNOWN, 0 }, |
257 | | /* 1d */ { Cache_UNKNOWN, 0 }, |
258 | | /* 1e */ { Cache_UNKNOWN, 0 }, |
259 | | /* 1f */ { Cache_UNKNOWN, 0 }, |
260 | | /* 20 */ { Cache_UNKNOWN, 0 }, |
261 | | /* 21 */ { Cache_UNKNOWN, 0 }, |
262 | | /* 22 */ { Cache_L3, 64 }, |
263 | | /* 23 */ { Cache_L3, 64 }, |
264 | | /* 24 */ { Cache_UNKNOWN, 0 }, |
265 | | /* 25 */ { Cache_L3, 64 }, |
266 | | /* 26 */ { Cache_UNKNOWN, 0 }, |
267 | | /* 27 */ { Cache_UNKNOWN, 0 }, |
268 | | /* 28 */ { Cache_UNKNOWN, 0 }, |
269 | | /* 29 */ { Cache_L3, 64 }, |
270 | | /* 2a */ { Cache_UNKNOWN, 0 }, |
271 | | /* 2b */ { Cache_UNKNOWN, 0 }, |
272 | | /* 2c */ { Cache_L1d, 64 }, |
273 | | /* 2d */ { Cache_UNKNOWN, 0 }, |
274 | | /* 2e */ { Cache_UNKNOWN, 0 }, |
275 | | /* 2f */ { Cache_UNKNOWN, 0 }, |
276 | | /* 30 */ { Cache_L1i, 64 }, |
277 | | /* 31 */ { Cache_UNKNOWN, 0 }, |
278 | | /* 32 */ { Cache_UNKNOWN, 0 }, |
279 | | /* 33 */ { Cache_UNKNOWN, 0 }, |
280 | | /* 34 */ { Cache_UNKNOWN, 0 }, |
281 | | /* 35 */ { Cache_UNKNOWN, 0 }, |
282 | | /* 36 */ { Cache_UNKNOWN, 0 }, |
283 | | /* 37 */ { Cache_UNKNOWN, 0 }, |
284 | | /* 38 */ { Cache_UNKNOWN, 0 }, |
285 | | /* 39 */ { Cache_L2, 64 }, |
286 | | /* 3a */ { Cache_UNKNOWN, 0 }, |
287 | | /* 3b */ { Cache_L2, 64 }, |
288 | | /* 3c */ { Cache_L2, 64 }, |
289 | | /* 3d */ { Cache_UNKNOWN, 0 }, |
290 | | /* 3e */ { Cache_UNKNOWN, 0 }, |
291 | | /* 3f */ { Cache_UNKNOWN, 0 }, |
292 | | /* 40 */ { Cache_L2, 0 }, |
293 | | /* 41 */ { Cache_L2, 32 }, |
294 | | /* 42 */ { Cache_L2, 32 }, |
295 | | /* 43 */ { Cache_L2, 32 }, |
296 | | /* 44 */ { Cache_L2, 32 }, |
297 | | /* 45 */ { Cache_L2, 32 }, |
298 | | /* 46 */ { Cache_UNKNOWN, 0 }, |
299 | | /* 47 */ { Cache_UNKNOWN, 0 }, |
300 | | /* 48 */ { Cache_UNKNOWN, 0 }, |
301 | | /* 49 */ { Cache_UNKNOWN, 0 }, |
302 | | /* 4a */ { Cache_UNKNOWN, 0 }, |
303 | | /* 4b */ { Cache_UNKNOWN, 0 }, |
304 | | /* 4c */ { Cache_UNKNOWN, 0 }, |
305 | | /* 4d */ { Cache_UNKNOWN, 0 }, |
306 | | /* 4e */ { Cache_UNKNOWN, 0 }, |
307 | | /* 4f */ { Cache_UNKNOWN, 0 }, |
308 | | /* 50 */ { Cache_TLBi, 0 }, |
309 | | /* 51 */ { Cache_TLBi, 0 }, |
310 | | /* 52 */ { Cache_TLBi, 0 }, |
311 | | /* 53 */ { Cache_UNKNOWN, 0 }, |
312 | | /* 54 */ { Cache_UNKNOWN, 0 }, |
313 | | /* 55 */ { Cache_UNKNOWN, 0 }, |
314 | | /* 56 */ { Cache_UNKNOWN, 0 }, |
315 | | /* 57 */ { Cache_UNKNOWN, 0 }, |
316 | | /* 58 */ { Cache_UNKNOWN, 0 }, |
317 | | /* 59 */ { Cache_UNKNOWN, 0 }, |
318 | | /* 5a */ { Cache_UNKNOWN, 0 }, |
319 | | /* 5b */ { Cache_TLBd, 0 }, |
320 | | /* 5c */ { Cache_TLBd, 0 }, |
321 | | /* 5d */ { Cache_TLBd, 0 }, |
322 | | /* 5e */ { Cache_UNKNOWN, 0 }, |
323 | | /* 5f */ { Cache_UNKNOWN, 0 }, |
324 | | /* 60 */ { Cache_UNKNOWN, 0 }, |
325 | | /* 61 */ { Cache_UNKNOWN, 0 }, |
326 | | /* 62 */ { Cache_UNKNOWN, 0 }, |
327 | | /* 63 */ { Cache_UNKNOWN, 0 }, |
328 | | /* 64 */ { Cache_UNKNOWN, 0 }, |
329 | | /* 65 */ { Cache_UNKNOWN, 0 }, |
330 | | /* 66 */ { Cache_L1d, 64 }, |
331 | | /* 67 */ { Cache_L1d, 64 }, |
332 | | /* 68 */ { Cache_L1d, 64 }, |
333 | | /* 69 */ { Cache_UNKNOWN, 0 }, |
334 | | /* 6a */ { Cache_UNKNOWN, 0 }, |
335 | | /* 6b */ { Cache_UNKNOWN, 0 }, |
336 | | /* 6c */ { Cache_UNKNOWN, 0 }, |
337 | | /* 6d */ { Cache_UNKNOWN, 0 }, |
338 | | /* 6e */ { Cache_UNKNOWN, 0 }, |
339 | | /* 6f */ { Cache_UNKNOWN, 0 }, |
340 | | /* 70 */ { Cache_Trace, 1 }, |
341 | | /* 71 */ { Cache_Trace, 1 }, |
342 | | /* 72 */ { Cache_Trace, 1 }, |
343 | | /* 73 */ { Cache_UNKNOWN, 0 }, |
344 | | /* 74 */ { Cache_UNKNOWN, 0 }, |
345 | | /* 75 */ { Cache_UNKNOWN, 0 }, |
346 | | /* 76 */ { Cache_UNKNOWN, 0 }, |
347 | | /* 77 */ { Cache_UNKNOWN, 0 }, |
348 | | /* 78 */ { Cache_UNKNOWN, 0 }, |
349 | | /* 79 */ { Cache_L2, 64 }, |
350 | | /* 7a */ { Cache_L2, 64 }, |
351 | | /* 7b */ { Cache_L2, 64 }, |
352 | | /* 7c */ { Cache_L2, 64 }, |
353 | | /* 7d */ { Cache_UNKNOWN, 0 }, |
354 | | /* 7e */ { Cache_UNKNOWN, 0 }, |
355 | | /* 7f */ { Cache_UNKNOWN, 0 }, |
356 | | /* 80 */ { Cache_UNKNOWN, 0 }, |
357 | | /* 81 */ { Cache_UNKNOWN, 0 }, |
358 | | /* 82 */ { Cache_L2, 32 }, |
359 | | /* 83 */ { Cache_L2, 32 }, |
360 | | /* 84 */ { Cache_L2, 32 }, |
361 | | /* 85 */ { Cache_L2, 32 }, |
362 | | /* 86 */ { Cache_L2, 64 }, |
363 | | /* 87 */ { Cache_L2, 64 }, |
364 | | /* 88 */ { Cache_UNKNOWN, 0 }, |
365 | | /* 89 */ { Cache_UNKNOWN, 0 }, |
366 | | /* 8a */ { Cache_UNKNOWN, 0 }, |
367 | | /* 8b */ { Cache_UNKNOWN, 0 }, |
368 | | /* 8c */ { Cache_UNKNOWN, 0 }, |
369 | | /* 8d */ { Cache_UNKNOWN, 0 }, |
370 | | /* 8e */ { Cache_UNKNOWN, 0 }, |
371 | | /* 8f */ { Cache_UNKNOWN, 0 }, |
372 | | /* 90 */ { Cache_UNKNOWN, 0 }, |
373 | | /* 91 */ { Cache_UNKNOWN, 0 }, |
374 | | /* 92 */ { Cache_UNKNOWN, 0 }, |
375 | | /* 93 */ { Cache_UNKNOWN, 0 }, |
376 | | /* 94 */ { Cache_UNKNOWN, 0 }, |
377 | | /* 95 */ { Cache_UNKNOWN, 0 }, |
378 | | /* 96 */ { Cache_UNKNOWN, 0 }, |
379 | | /* 97 */ { Cache_UNKNOWN, 0 }, |
380 | | /* 98 */ { Cache_UNKNOWN, 0 }, |
381 | | /* 99 */ { Cache_UNKNOWN, 0 }, |
382 | | /* 9a */ { Cache_UNKNOWN, 0 }, |
383 | | /* 9b */ { Cache_UNKNOWN, 0 }, |
384 | | /* 9c */ { Cache_UNKNOWN, 0 }, |
385 | | /* 9d */ { Cache_UNKNOWN, 0 }, |
386 | | /* 9e */ { Cache_UNKNOWN, 0 }, |
387 | | /* 9f */ { Cache_UNKNOWN, 0 }, |
388 | | /* a0 */ { Cache_UNKNOWN, 0 }, |
389 | | /* a1 */ { Cache_UNKNOWN, 0 }, |
390 | | /* a2 */ { Cache_UNKNOWN, 0 }, |
391 | | /* a3 */ { Cache_UNKNOWN, 0 }, |
392 | | /* a4 */ { Cache_UNKNOWN, 0 }, |
393 | | /* a5 */ { Cache_UNKNOWN, 0 }, |
394 | | /* a6 */ { Cache_UNKNOWN, 0 }, |
395 | | /* a7 */ { Cache_UNKNOWN, 0 }, |
396 | | /* a8 */ { Cache_UNKNOWN, 0 }, |
397 | | /* a9 */ { Cache_UNKNOWN, 0 }, |
398 | | /* aa */ { Cache_UNKNOWN, 0 }, |
399 | | /* ab */ { Cache_UNKNOWN, 0 }, |
400 | | /* ac */ { Cache_UNKNOWN, 0 }, |
401 | | /* ad */ { Cache_UNKNOWN, 0 }, |
402 | | /* ae */ { Cache_UNKNOWN, 0 }, |
403 | | /* af */ { Cache_UNKNOWN, 0 }, |
404 | | /* b0 */ { Cache_TLBi, 0 }, |
405 | | /* b1 */ { Cache_UNKNOWN, 0 }, |
406 | | /* b2 */ { Cache_UNKNOWN, 0 }, |
407 | | /* b3 */ { Cache_TLBd, 0 }, |
408 | | /* b4 */ { Cache_UNKNOWN, 0 }, |
409 | | /* b5 */ { Cache_UNKNOWN, 0 }, |
410 | | /* b6 */ { Cache_UNKNOWN, 0 }, |
411 | | /* b7 */ { Cache_UNKNOWN, 0 }, |
412 | | /* b8 */ { Cache_UNKNOWN, 0 }, |
413 | | /* b9 */ { Cache_UNKNOWN, 0 }, |
414 | | /* ba */ { Cache_UNKNOWN, 0 }, |
415 | | /* bb */ { Cache_UNKNOWN, 0 }, |
416 | | /* bc */ { Cache_UNKNOWN, 0 }, |
417 | | /* bd */ { Cache_UNKNOWN, 0 }, |
418 | | /* be */ { Cache_UNKNOWN, 0 }, |
419 | | /* bf */ { Cache_UNKNOWN, 0 }, |
420 | | /* c0 */ { Cache_UNKNOWN, 0 }, |
421 | | /* c1 */ { Cache_UNKNOWN, 0 }, |
422 | | /* c2 */ { Cache_UNKNOWN, 0 }, |
423 | | /* c3 */ { Cache_UNKNOWN, 0 }, |
424 | | /* c4 */ { Cache_UNKNOWN, 0 }, |
425 | | /* c5 */ { Cache_UNKNOWN, 0 }, |
426 | | /* c6 */ { Cache_UNKNOWN, 0 }, |
427 | | /* c7 */ { Cache_UNKNOWN, 0 }, |
428 | | /* c8 */ { Cache_UNKNOWN, 0 }, |
429 | | /* c9 */ { Cache_UNKNOWN, 0 }, |
430 | | /* ca */ { Cache_UNKNOWN, 0 }, |
431 | | /* cb */ { Cache_UNKNOWN, 0 }, |
432 | | /* cc */ { Cache_UNKNOWN, 0 }, |
433 | | /* cd */ { Cache_UNKNOWN, 0 }, |
434 | | /* ce */ { Cache_UNKNOWN, 0 }, |
435 | | /* cf */ { Cache_UNKNOWN, 0 }, |
436 | | /* d0 */ { Cache_UNKNOWN, 0 }, |
437 | | /* d1 */ { Cache_UNKNOWN, 0 }, |
438 | | /* d2 */ { Cache_UNKNOWN, 0 }, |
439 | | /* d3 */ { Cache_UNKNOWN, 0 }, |
440 | | /* d4 */ { Cache_UNKNOWN, 0 }, |
441 | | /* d5 */ { Cache_UNKNOWN, 0 }, |
442 | | /* d6 */ { Cache_UNKNOWN, 0 }, |
443 | | /* d7 */ { Cache_UNKNOWN, 0 }, |
444 | | /* d8 */ { Cache_UNKNOWN, 0 }, |
445 | | /* d9 */ { Cache_UNKNOWN, 0 }, |
446 | | /* da */ { Cache_UNKNOWN, 0 }, |
447 | | /* db */ { Cache_UNKNOWN, 0 }, |
448 | | /* dc */ { Cache_UNKNOWN, 0 }, |
449 | | /* dd */ { Cache_UNKNOWN, 0 }, |
450 | | /* de */ { Cache_UNKNOWN, 0 }, |
451 | | /* df */ { Cache_UNKNOWN, 0 }, |
452 | | /* e0 */ { Cache_UNKNOWN, 0 }, |
453 | | /* e1 */ { Cache_UNKNOWN, 0 }, |
454 | | /* e2 */ { Cache_UNKNOWN, 0 }, |
455 | | /* e3 */ { Cache_UNKNOWN, 0 }, |
456 | | /* e4 */ { Cache_UNKNOWN, 0 }, |
457 | | /* e5 */ { Cache_UNKNOWN, 0 }, |
458 | | /* e6 */ { Cache_UNKNOWN, 0 }, |
459 | | /* e7 */ { Cache_UNKNOWN, 0 }, |
460 | | /* e8 */ { Cache_UNKNOWN, 0 }, |
461 | | /* e9 */ { Cache_UNKNOWN, 0 }, |
462 | | /* ea */ { Cache_UNKNOWN, 0 }, |
463 | | /* eb */ { Cache_UNKNOWN, 0 }, |
464 | | /* ec */ { Cache_UNKNOWN, 0 }, |
465 | | /* ed */ { Cache_UNKNOWN, 0 }, |
466 | | /* ee */ { Cache_UNKNOWN, 0 }, |
467 | | /* ef */ { Cache_UNKNOWN, 0 }, |
468 | | /* f0 */ { Cache_UNKNOWN, 0 }, |
469 | | /* f1 */ { Cache_UNKNOWN, 0 }, |
470 | | /* f2 */ { Cache_UNKNOWN, 0 }, |
471 | | /* f3 */ { Cache_UNKNOWN, 0 }, |
472 | | /* f4 */ { Cache_UNKNOWN, 0 }, |
473 | | /* f5 */ { Cache_UNKNOWN, 0 }, |
474 | | /* f6 */ { Cache_UNKNOWN, 0 }, |
475 | | /* f7 */ { Cache_UNKNOWN, 0 }, |
476 | | /* f8 */ { Cache_UNKNOWN, 0 }, |
477 | | /* f9 */ { Cache_UNKNOWN, 0 }, |
478 | | /* fa */ { Cache_UNKNOWN, 0 }, |
479 | | /* fb */ { Cache_UNKNOWN, 0 }, |
480 | | /* fc */ { Cache_UNKNOWN, 0 }, |
481 | | /* fd */ { Cache_UNKNOWN, 0 }, |
482 | | /* fe */ { Cache_UNKNOWN, 0 }, |
483 | | /* ff */ { Cache_UNKNOWN, 0 } |
484 | | }; |
485 | | |
486 | | /* |
487 | | * use the above table to determine the CacheEntryLineSize. |
488 | | */ |
489 | | static void |
490 | | getIntelCacheEntryLineSize(unsigned long val, int *level, |
491 | | unsigned long *lineSize) |
492 | 0 | { |
493 | 0 | CacheType type; |
494 | 0 |
|
495 | 0 | type = CacheMap[val].type; |
496 | 0 | /* only interested in data caches */ |
497 | 0 | /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
498 | 0 | * this data check has the side effect of rejecting that entry. If |
499 | 0 | * that wasn't the case, we could have to reject it explicitly */ |
500 | 0 | if (CacheMap[val].lineSize == 0) { |
501 | 0 | return; |
502 | 0 | } |
503 | 0 | /* look at the caches, skip types we aren't interested in. |
504 | 0 | * if we already have a value for a lower level cache, skip the |
505 | 0 | * current entry */ |
506 | 0 | if ((type == Cache_L1) || (type == Cache_L1d)) { |
507 | 0 | *level = 1; |
508 | 0 | *lineSize = CacheMap[val].lineSize; |
509 | 0 | } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
510 | 0 | *level = 2; |
511 | 0 | *lineSize = CacheMap[val].lineSize; |
512 | 0 | } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
513 | 0 | *level = 3; |
514 | 0 | *lineSize = CacheMap[val].lineSize; |
515 | 0 | } |
516 | 0 | return; |
517 | 0 | } |
518 | | |
519 | | static void |
520 | | getIntelRegisterCacheLineSize(unsigned long val, |
521 | | int *level, unsigned long *lineSize) |
522 | 0 | { |
523 | 0 | getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
524 | 0 | getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
525 | 0 | getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
526 | 0 | getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
527 | 0 | } |
528 | | |
529 | | /* |
530 | | * returns '0' if no recognized cache is found, or if the cache |
531 | | * information is supported by this processor |
532 | | */ |
533 | | static unsigned long |
534 | | getIntelCacheLineSize(int cpuidLevel) |
535 | 0 | { |
536 | 0 | int level = 4; |
537 | 0 | unsigned long lineSize = 0; |
538 | 0 | unsigned long eax, ebx, ecx, edx; |
539 | 0 | int repeat, count; |
540 | 0 |
|
541 | 0 | if (cpuidLevel < 2) { |
542 | 0 | return 0; |
543 | 0 | } |
544 | 0 | |
545 | 0 | /* command '2' of the cpuid is intel's cache info call. Each byte of the |
546 | 0 | * 4 registers contain a potential descriptor for the cache. The CacheMap |
547 | 0 | * table maps the cache entry with the processor cache. Register 'al' |
548 | 0 | * contains a count value that cpuid '2' needs to be called in order to |
549 | 0 | * find all the cache descriptors. Only registers with the high bit set |
550 | 0 | * to 'zero' have valid descriptors. This code loops through all the |
551 | 0 | * required calls to cpuid '2' and passes any valid descriptors it finds |
552 | 0 | * to the getIntelRegisterCacheLineSize code, which breaks the registers |
553 | 0 | * down into their component descriptors. In the end the lineSize of the |
554 | 0 | * lowest level cache data cache is returned. */ |
555 | 0 | freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
556 | 0 | repeat = eax & 0xf; |
557 | 0 | for (count = 0; count < repeat; count++) { |
558 | 0 | if ((eax & 0x80000000) == 0) { |
559 | 0 | getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
560 | 0 | } |
561 | 0 | if ((ebx & 0x80000000) == 0) { |
562 | 0 | getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
563 | 0 | } |
564 | 0 | if ((ecx & 0x80000000) == 0) { |
565 | 0 | getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
566 | 0 | } |
567 | 0 | if ((edx & 0x80000000) == 0) { |
568 | 0 | getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
569 | 0 | } |
570 | 0 | if (count + 1 != repeat) { |
571 | 0 | freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
572 | 0 | } |
573 | 0 | } |
574 | 0 | return lineSize; |
575 | 0 | } |
576 | | |
577 | | /* |
578 | | * returns '0' if the cache info is not supported by this processor. |
579 | | * This is based on the AMD extended cache commands for cpuid. |
580 | | * (see "AMD Processor Recognition Application Note" Publication 20734). |
581 | | * Some other processors use the identical scheme. |
582 | | * (see "Processor Recognition, Transmeta Corporation"). |
583 | | */ |
584 | | static unsigned long |
585 | | getOtherCacheLineSize(unsigned long cpuidLevel) |
586 | 0 | { |
587 | 0 | unsigned long lineSize = 0; |
588 | 0 | unsigned long eax, ebx, ecx, edx; |
589 | 0 |
|
590 | 0 | /* get the Extended CPUID level */ |
591 | 0 | freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
592 | 0 | cpuidLevel = eax; |
593 | 0 |
|
594 | 0 | if (cpuidLevel >= 0x80000005) { |
595 | 0 | freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
596 | 0 | lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
597 | 0 | } |
598 | 0 | return lineSize; |
599 | 0 | } |
600 | | |
601 | | static const char *const manMap[] = { |
602 | | #define INTEL 0 |
603 | | "GenuineIntel", |
604 | | #define AMD 1 |
605 | | "AuthenticAMD", |
606 | | #define CYRIX 2 |
607 | | "CyrixInstead", |
608 | | #define CENTAUR 2 |
609 | | "CentaurHauls", |
610 | | #define NEXGEN 3 |
611 | | "NexGenDriven", |
612 | | #define TRANSMETA 4 |
613 | | "GenuineTMx86", |
614 | | #define RISE 5 |
615 | | "RiseRiseRise", |
616 | | #define UMC 6 |
617 | | "UMC UMC UMC ", |
618 | | #define SIS 7 |
619 | | "Sis Sis Sis ", |
620 | | #define NATIONAL 8 |
621 | | "Geode by NSC", |
622 | | }; |
623 | | |
624 | | static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); |
625 | | |
626 | | #define MAN_UNKNOWN 9 |
627 | | |
628 | | #if !defined(AMD_64) |
629 | | #define SSE2_FLAG (1 << 26) |
630 | | unsigned long |
631 | | s_mpi_is_sse2() |
632 | | { |
633 | | unsigned long eax, ebx, ecx, edx; |
634 | | |
635 | | if (is386() || is486()) { |
636 | | return 0; |
637 | | } |
638 | | freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
639 | | |
640 | | /* has no SSE2 extensions */ |
641 | | if (eax == 0) { |
642 | | return 0; |
643 | | } |
644 | | |
645 | | freebl_cpuid(1, &eax, &ebx, &ecx, &edx); |
646 | | return (edx & SSE2_FLAG) == SSE2_FLAG; |
647 | | } |
648 | | #endif |
649 | | |
650 | | unsigned long |
651 | | s_mpi_getProcessorLineSize() |
652 | | { |
653 | | unsigned long eax, ebx, ecx, edx; |
654 | | PRUint32 cpuid[3]; |
655 | | unsigned long cpuidLevel; |
656 | | unsigned long cacheLineSize = 0; |
657 | | int manufacturer = MAN_UNKNOWN; |
658 | | int i; |
659 | | char string[13]; |
660 | | |
661 | | #if !defined(AMD_64) |
662 | | if (is386()) { |
663 | | return 0; /* 386 had no cache */ |
664 | | } |
665 | | if (is486()) { |
666 | | return 32; /* really? need more info */ |
667 | | } |
668 | | #endif |
669 | | |
670 | | /* Pentium, cpuid command is available */ |
671 | | freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
672 | | cpuidLevel = eax; |
673 | | /* string holds the CPU's manufacturer ID string - a twelve |
674 | | * character ASCII string stored in ebx, edx, ecx, and |
675 | | * the 32-bit extended feature flags are in edx, ecx. |
676 | | */ |
677 | | cpuid[0] = ebx; |
678 | | cpuid[1] = ecx; |
679 | | cpuid[2] = edx; |
680 | | memcpy(string, cpuid, sizeof(cpuid)); |
681 | | string[12] = 0; |
682 | | |
683 | | manufacturer = MAN_UNKNOWN; |
684 | | for (i = 0; i < n_manufacturers; i++) { |
685 | | if (strcmp(manMap[i], string) == 0) { |
686 | | manufacturer = i; |
687 | | } |
688 | | } |
689 | | |
690 | | if (manufacturer == INTEL) { |
691 | | cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
692 | | } else { |
693 | | cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
694 | | } |
695 | | /* doesn't support cache info based on cpuid. This means |
696 | | * an old pentium class processor, which have cache lines of |
697 | | * 32. If we learn differently, we can use a switch based on |
698 | | * the Manufacturer id */ |
699 | | if (cacheLineSize == 0) { |
700 | | cacheLineSize = 32; |
701 | | } |
702 | | return cacheLineSize; |
703 | | } |
704 | | #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
705 | | #endif |
706 | | |
707 | | #if defined(__ppc64__) |
708 | | /* |
709 | | * Sigh, The PPC has some really nice features to help us determine cache |
710 | | * size, since it had lots of direct control functions to do so. The POWER |
711 | | * processor even has an instruction to do this, but it was dropped in |
712 | | * PowerPC. Unfortunately most of them are not available in user mode. |
713 | | * |
714 | | * The dcbz function would be a great way to determine cache line size except |
715 | | * 1) it only works on write-back memory (it throws an exception otherwise), |
716 | | * and 2) because so many mac programs 'knew' the processor cache size was |
717 | | * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
718 | | * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
719 | | * these programs happy. dcbzl work if 64 bit instructions are supported. |
720 | | * If you know 64 bit instructions are supported, and that stack is |
721 | | * write-back, you can use this code. |
722 | | */ |
723 | | #include "memory.h" |
724 | | |
725 | | /* clear the cache line that contains 'array' */ |
726 | | static inline void |
727 | | dcbzl(char *array) |
728 | | { |
729 | | register char *a asm("r2") = array; |
730 | | __asm__ __volatile__("dcbzl %0,r0" |
731 | | : "=r"(a) |
732 | | : "0"(a)); |
733 | | } |
734 | | |
735 | | #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) |
736 | | |
737 | | #define PPC_MAX_LINE_SIZE 256 |
738 | | unsigned long |
739 | | s_mpi_getProcessorLineSize() |
740 | | { |
741 | | char testArray[2 * PPC_MAX_LINE_SIZE + 1]; |
742 | | char *test; |
743 | | int i; |
744 | | |
745 | | /* align the array on a maximum line size boundary, so we |
746 | | * know we are starting to clear from the first address */ |
747 | | test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
748 | | /* set all the values to 1's */ |
749 | | memset(test, 0xff, PPC_MAX_LINE_SIZE); |
750 | | /* clear one cache block starting at 'test' */ |
751 | | dcbzl(test); |
752 | | |
753 | | /* find the size of the cleared area, that's our block size */ |
754 | | for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { |
755 | | if (test[i - 1] == 0) { |
756 | | return i; |
757 | | } |
758 | | } |
759 | | return 0; |
760 | | } |
761 | | |
762 | | #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
763 | | #endif |
764 | | |
765 | | /* |
766 | | * put other processor and platform specific cache code here |
767 | | * return the smallest cache line size in bytes on the processor |
768 | | * (usually the L1 cache). If the OS has a call, this would be |
769 | | * a greate place to put it. |
770 | | * |
771 | | * If there is no cache, return 0; |
772 | | * |
773 | | * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
774 | | * below aren't compiled. |
775 | | * |
776 | | */ |
777 | | |
778 | | /* If no way to get the processor cache line size has been defined, assume |
779 | | * it's 32 bytes (most common value, does not significantly impact performance) |
780 | | */ |
781 | | #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
782 | | unsigned long |
783 | | s_mpi_getProcessorLineSize() |
784 | | { |
785 | | return 32; |
786 | | } |
787 | | #endif |