/src/nss/lib/freebl/mpi/mpcpucache.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #include "mpi.h" |
6 | | #include "prtypes.h" |
7 | | |
8 | | /* |
9 | | * This file implements a single function: s_mpi_getProcessorLineSize(); |
10 | | * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
11 | | * if a cache exists, or zero if there is no cache. If more than one |
12 | | * cache line exists, it should return the smallest line size (which is |
13 | | * usually the L1 cache). |
14 | | * |
15 | | * mp_modexp uses this information to make sure that private key information |
16 | | * isn't being leaked through the cache. |
17 | | * |
18 | | * Currently the file returns good data for most modern x86 processors, and |
19 | | * reasonable data on 64-bit ppc processors. All other processors are assumed |
20 | | * to have a cache line size of 32 bytes. |
21 | | * |
22 | | */ |
23 | | |
24 | | #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
25 | | /* X86 processors have special instructions that tell us about the cache */ |
26 | | #include "string.h" |
27 | | |
28 | | #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
29 | | #define AMD_64 1 |
30 | | #endif |
31 | | |
32 | | /* Generic CPUID function */ |
33 | | #if defined(AMD_64) |
34 | | |
35 | | #if defined(__GNUC__) |
36 | | |
37 | | void |
38 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
39 | | unsigned long *ebx, unsigned long *ecx, |
40 | | unsigned long *edx) |
41 | 0 | { |
42 | 0 | __asm__("xor %%ecx, %%ecx\n\t" |
43 | 0 | "cpuid\n\t" |
44 | 0 | : "=a"(*eax), |
45 | 0 | "=b"(*ebx), |
46 | 0 | "=c"(*ecx), |
47 | 0 | "=d"(*edx) |
48 | 0 | : "0"(op)); |
49 | 0 | } |
50 | | |
51 | | #elif defined(_MSC_VER) |
52 | | |
53 | | #include <intrin.h> |
54 | | |
55 | | void |
56 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
57 | | unsigned long *ebx, unsigned long *ecx, |
58 | | unsigned long *edx) |
59 | | { |
60 | | int intrinsic_out[4]; |
61 | | |
62 | | __cpuid(intrinsic_out, op); |
63 | | *eax = intrinsic_out[0]; |
64 | | *ebx = intrinsic_out[1]; |
65 | | *ecx = intrinsic_out[2]; |
66 | | *edx = intrinsic_out[3]; |
67 | | } |
68 | | |
69 | | #endif |
70 | | |
71 | | #else /* !defined(AMD_64) */ |
72 | | |
73 | | /* x86 */ |
74 | | |
75 | | #if defined(__GNUC__) |
76 | | void |
77 | | freebl_cpuid(unsigned long op, unsigned long *eax, |
78 | | unsigned long *ebx, unsigned long *ecx, |
79 | | unsigned long *edx) |
80 | | { |
81 | | /* Some older processors don't fill the ecx register with cpuid, so clobber it |
82 | | * before calling cpuid, so that there's no risk of picking random bits that |
83 | | * erroneously indicate that absent CPU features are present. |
84 | | * Also, GCC isn't smart enough to save the ebx PIC register on its own |
85 | | * in this case, so do it by hand. Use edi to store ebx and pass the |
86 | | * value returned in ebx from cpuid through edi. */ |
87 | | __asm__("xor %%ecx, %%ecx\n\t" |
88 | | "mov %%ebx,%%edi\n\t" |
89 | | "cpuid\n\t" |
90 | | "xchgl %%ebx,%%edi\n\t" |
91 | | : "=a"(*eax), |
92 | | "=D"(*ebx), |
93 | | "=c"(*ecx), |
94 | | "=d"(*edx) |
95 | | : "0"(op)); |
96 | | } |
97 | | |
98 | | /* |
99 | | * try flipping a processor flag to determine CPU type |
100 | | */ |
101 | | static unsigned long |
102 | | changeFlag(unsigned long flag) |
103 | | { |
104 | | unsigned long changedFlags, originalFlags; |
105 | | __asm__("pushfl\n\t" /* get the flags */ |
106 | | "popl %0\n\t" |
107 | | "movl %0,%1\n\t" /* save the original flags */ |
108 | | "xorl %2,%0\n\t" /* flip the bit */ |
109 | | "pushl %0\n\t" /* set the flags */ |
110 | | "popfl\n\t" |
111 | | "pushfl\n\t" /* get the flags again (for return) */ |
112 | | "popl %0\n\t" |
113 | | "pushl %1\n\t" /* restore the original flags */ |
114 | | "popfl\n\t" |
115 | | : "=r"(changedFlags), |
116 | | "=r"(originalFlags), |
117 | | "=r"(flag) |
118 | | : "2"(flag)); |
119 | | return changedFlags ^ originalFlags; |
120 | | } |
121 | | |
122 | | #elif defined(_MSC_VER) |
123 | | |
124 | | /* |
125 | | * windows versions of the above assembler |
126 | | */ |
127 | | #define wcpuid __asm __emit 0fh __asm __emit 0a2h |
128 | | void |
129 | | freebl_cpuid(unsigned long op, unsigned long *Reax, |
130 | | unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
131 | | { |
132 | | unsigned long Leax, Lebx, Lecx, Ledx; |
133 | | __asm { |
134 | | pushad |
135 | | xor ecx,ecx |
136 | | mov eax,op |
137 | | wcpuid |
138 | | mov Leax,eax |
139 | | mov Lebx,ebx |
140 | | mov Lecx,ecx |
141 | | mov Ledx,edx |
142 | | popad |
143 | | } |
144 | | *Reax = Leax; |
145 | | *Rebx = Lebx; |
146 | | *Recx = Lecx; |
147 | | *Redx = Ledx; |
148 | | } |
149 | | |
150 | | static unsigned long |
151 | | changeFlag(unsigned long flag) |
152 | | { |
153 | | unsigned long changedFlags, originalFlags; |
154 | | __asm { |
155 | | push eax |
156 | | push ebx |
157 | | pushfd /* get the flags */ |
158 | | pop eax |
159 | | push eax /* save the flags on the stack */ |
160 | | mov originalFlags,eax /* save the original flags */ |
161 | | mov ebx,flag |
162 | | xor eax,ebx /* flip the bit */ |
163 | | push eax /* set the flags */ |
164 | | popfd |
165 | | pushfd /* get the flags again (for return) */ |
166 | | pop eax |
167 | | popfd /* restore the original flags */ |
168 | | mov changedFlags,eax |
169 | | pop ebx |
170 | | pop eax |
171 | | } |
172 | | return changedFlags ^ originalFlags; |
173 | | } |
174 | | #endif |
175 | | |
176 | | #endif |
177 | | |
178 | | #if !defined(AMD_64) |
179 | | #define AC_FLAG 0x40000 |
180 | | #define ID_FLAG 0x200000 |
181 | | |
182 | | /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
183 | | static int |
184 | | is386() |
185 | | { |
186 | | return changeFlag(AC_FLAG) == 0; |
187 | | } |
188 | | |
189 | | /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
190 | | static int |
191 | | is486() |
192 | | { |
193 | | return changeFlag(ID_FLAG) == 0; |
194 | | } |
195 | | #endif |
196 | | |
197 | | /* |
198 | | * table for Intel Cache. |
199 | | * See Intel Application Note AP-485 for more information |
200 | | */ |
201 | | |
202 | | typedef unsigned char CacheTypeEntry; |
203 | | |
204 | | typedef enum { |
205 | | Cache_NONE = 0, |
206 | | Cache_UNKNOWN = 1, |
207 | | Cache_TLB = 2, |
208 | | Cache_TLBi = 3, |
209 | | Cache_TLBd = 4, |
210 | | Cache_Trace = 5, |
211 | | Cache_L1 = 6, |
212 | | Cache_L1i = 7, |
213 | | Cache_L1d = 8, |
214 | | Cache_L2 = 9, |
215 | | Cache_L2i = 10, |
216 | | Cache_L2d = 11, |
217 | | Cache_L3 = 12, |
218 | | Cache_L3i = 13, |
219 | | Cache_L3d = 14 |
220 | | } CacheType; |
221 | | |
222 | | struct _cache { |
223 | | CacheTypeEntry type; |
224 | | unsigned char lineSize; |
225 | | }; |
226 | | static const struct _cache CacheMap[256] = { |
227 | | /* 00 */ { Cache_NONE, 0 }, |
228 | | /* 01 */ { Cache_TLBi, 0 }, |
229 | | /* 02 */ { Cache_TLBi, 0 }, |
230 | | /* 03 */ { Cache_TLBd, 0 }, |
231 | | /* 04 */ { |
232 | | Cache_TLBd, |
233 | | }, |
234 | | /* 05 */ { Cache_UNKNOWN, 0 }, |
235 | | /* 06 */ { Cache_L1i, 32 }, |
236 | | /* 07 */ { Cache_UNKNOWN, 0 }, |
237 | | /* 08 */ { Cache_L1i, 32 }, |
238 | | /* 09 */ { Cache_UNKNOWN, 0 }, |
239 | | /* 0a */ { Cache_L1d, 32 }, |
240 | | /* 0b */ { Cache_UNKNOWN, 0 }, |
241 | | /* 0c */ { Cache_L1d, 32 }, |
242 | | /* 0d */ { Cache_UNKNOWN, 0 }, |
243 | | /* 0e */ { Cache_UNKNOWN, 0 }, |
244 | | /* 0f */ { Cache_UNKNOWN, 0 }, |
245 | | /* 10 */ { Cache_UNKNOWN, 0 }, |
246 | | /* 11 */ { Cache_UNKNOWN, 0 }, |
247 | | /* 12 */ { Cache_UNKNOWN, 0 }, |
248 | | /* 13 */ { Cache_UNKNOWN, 0 }, |
249 | | /* 14 */ { Cache_UNKNOWN, 0 }, |
250 | | /* 15 */ { Cache_UNKNOWN, 0 }, |
251 | | /* 16 */ { Cache_UNKNOWN, 0 }, |
252 | | /* 17 */ { Cache_UNKNOWN, 0 }, |
253 | | /* 18 */ { Cache_UNKNOWN, 0 }, |
254 | | /* 19 */ { Cache_UNKNOWN, 0 }, |
255 | | /* 1a */ { Cache_UNKNOWN, 0 }, |
256 | | /* 1b */ { Cache_UNKNOWN, 0 }, |
257 | | /* 1c */ { Cache_UNKNOWN, 0 }, |
258 | | /* 1d */ { Cache_UNKNOWN, 0 }, |
259 | | /* 1e */ { Cache_UNKNOWN, 0 }, |
260 | | /* 1f */ { Cache_UNKNOWN, 0 }, |
261 | | /* 20 */ { Cache_UNKNOWN, 0 }, |
262 | | /* 21 */ { Cache_UNKNOWN, 0 }, |
263 | | /* 22 */ { Cache_L3, 64 }, |
264 | | /* 23 */ { Cache_L3, 64 }, |
265 | | /* 24 */ { Cache_UNKNOWN, 0 }, |
266 | | /* 25 */ { Cache_L3, 64 }, |
267 | | /* 26 */ { Cache_UNKNOWN, 0 }, |
268 | | /* 27 */ { Cache_UNKNOWN, 0 }, |
269 | | /* 28 */ { Cache_UNKNOWN, 0 }, |
270 | | /* 29 */ { Cache_L3, 64 }, |
271 | | /* 2a */ { Cache_UNKNOWN, 0 }, |
272 | | /* 2b */ { Cache_UNKNOWN, 0 }, |
273 | | /* 2c */ { Cache_L1d, 64 }, |
274 | | /* 2d */ { Cache_UNKNOWN, 0 }, |
275 | | /* 2e */ { Cache_UNKNOWN, 0 }, |
276 | | /* 2f */ { Cache_UNKNOWN, 0 }, |
277 | | /* 30 */ { Cache_L1i, 64 }, |
278 | | /* 31 */ { Cache_UNKNOWN, 0 }, |
279 | | /* 32 */ { Cache_UNKNOWN, 0 }, |
280 | | /* 33 */ { Cache_UNKNOWN, 0 }, |
281 | | /* 34 */ { Cache_UNKNOWN, 0 }, |
282 | | /* 35 */ { Cache_UNKNOWN, 0 }, |
283 | | /* 36 */ { Cache_UNKNOWN, 0 }, |
284 | | /* 37 */ { Cache_UNKNOWN, 0 }, |
285 | | /* 38 */ { Cache_UNKNOWN, 0 }, |
286 | | /* 39 */ { Cache_L2, 64 }, |
287 | | /* 3a */ { Cache_UNKNOWN, 0 }, |
288 | | /* 3b */ { Cache_L2, 64 }, |
289 | | /* 3c */ { Cache_L2, 64 }, |
290 | | /* 3d */ { Cache_UNKNOWN, 0 }, |
291 | | /* 3e */ { Cache_UNKNOWN, 0 }, |
292 | | /* 3f */ { Cache_UNKNOWN, 0 }, |
293 | | /* 40 */ { Cache_L2, 0 }, |
294 | | /* 41 */ { Cache_L2, 32 }, |
295 | | /* 42 */ { Cache_L2, 32 }, |
296 | | /* 43 */ { Cache_L2, 32 }, |
297 | | /* 44 */ { Cache_L2, 32 }, |
298 | | /* 45 */ { Cache_L2, 32 }, |
299 | | /* 46 */ { Cache_UNKNOWN, 0 }, |
300 | | /* 47 */ { Cache_UNKNOWN, 0 }, |
301 | | /* 48 */ { Cache_UNKNOWN, 0 }, |
302 | | /* 49 */ { Cache_UNKNOWN, 0 }, |
303 | | /* 4a */ { Cache_UNKNOWN, 0 }, |
304 | | /* 4b */ { Cache_UNKNOWN, 0 }, |
305 | | /* 4c */ { Cache_UNKNOWN, 0 }, |
306 | | /* 4d */ { Cache_UNKNOWN, 0 }, |
307 | | /* 4e */ { Cache_UNKNOWN, 0 }, |
308 | | /* 4f */ { Cache_UNKNOWN, 0 }, |
309 | | /* 50 */ { Cache_TLBi, 0 }, |
310 | | /* 51 */ { Cache_TLBi, 0 }, |
311 | | /* 52 */ { Cache_TLBi, 0 }, |
312 | | /* 53 */ { Cache_UNKNOWN, 0 }, |
313 | | /* 54 */ { Cache_UNKNOWN, 0 }, |
314 | | /* 55 */ { Cache_UNKNOWN, 0 }, |
315 | | /* 56 */ { Cache_UNKNOWN, 0 }, |
316 | | /* 57 */ { Cache_UNKNOWN, 0 }, |
317 | | /* 58 */ { Cache_UNKNOWN, 0 }, |
318 | | /* 59 */ { Cache_UNKNOWN, 0 }, |
319 | | /* 5a */ { Cache_UNKNOWN, 0 }, |
320 | | /* 5b */ { Cache_TLBd, 0 }, |
321 | | /* 5c */ { Cache_TLBd, 0 }, |
322 | | /* 5d */ { Cache_TLBd, 0 }, |
323 | | /* 5e */ { Cache_UNKNOWN, 0 }, |
324 | | /* 5f */ { Cache_UNKNOWN, 0 }, |
325 | | /* 60 */ { Cache_UNKNOWN, 0 }, |
326 | | /* 61 */ { Cache_UNKNOWN, 0 }, |
327 | | /* 62 */ { Cache_UNKNOWN, 0 }, |
328 | | /* 63 */ { Cache_UNKNOWN, 0 }, |
329 | | /* 64 */ { Cache_UNKNOWN, 0 }, |
330 | | /* 65 */ { Cache_UNKNOWN, 0 }, |
331 | | /* 66 */ { Cache_L1d, 64 }, |
332 | | /* 67 */ { Cache_L1d, 64 }, |
333 | | /* 68 */ { Cache_L1d, 64 }, |
334 | | /* 69 */ { Cache_UNKNOWN, 0 }, |
335 | | /* 6a */ { Cache_UNKNOWN, 0 }, |
336 | | /* 6b */ { Cache_UNKNOWN, 0 }, |
337 | | /* 6c */ { Cache_UNKNOWN, 0 }, |
338 | | /* 6d */ { Cache_UNKNOWN, 0 }, |
339 | | /* 6e */ { Cache_UNKNOWN, 0 }, |
340 | | /* 6f */ { Cache_UNKNOWN, 0 }, |
341 | | /* 70 */ { Cache_Trace, 1 }, |
342 | | /* 71 */ { Cache_Trace, 1 }, |
343 | | /* 72 */ { Cache_Trace, 1 }, |
344 | | /* 73 */ { Cache_UNKNOWN, 0 }, |
345 | | /* 74 */ { Cache_UNKNOWN, 0 }, |
346 | | /* 75 */ { Cache_UNKNOWN, 0 }, |
347 | | /* 76 */ { Cache_UNKNOWN, 0 }, |
348 | | /* 77 */ { Cache_UNKNOWN, 0 }, |
349 | | /* 78 */ { Cache_UNKNOWN, 0 }, |
350 | | /* 79 */ { Cache_L2, 64 }, |
351 | | /* 7a */ { Cache_L2, 64 }, |
352 | | /* 7b */ { Cache_L2, 64 }, |
353 | | /* 7c */ { Cache_L2, 64 }, |
354 | | /* 7d */ { Cache_UNKNOWN, 0 }, |
355 | | /* 7e */ { Cache_UNKNOWN, 0 }, |
356 | | /* 7f */ { Cache_UNKNOWN, 0 }, |
357 | | /* 80 */ { Cache_UNKNOWN, 0 }, |
358 | | /* 81 */ { Cache_UNKNOWN, 0 }, |
359 | | /* 82 */ { Cache_L2, 32 }, |
360 | | /* 83 */ { Cache_L2, 32 }, |
361 | | /* 84 */ { Cache_L2, 32 }, |
362 | | /* 85 */ { Cache_L2, 32 }, |
363 | | /* 86 */ { Cache_L2, 64 }, |
364 | | /* 87 */ { Cache_L2, 64 }, |
365 | | /* 88 */ { Cache_UNKNOWN, 0 }, |
366 | | /* 89 */ { Cache_UNKNOWN, 0 }, |
367 | | /* 8a */ { Cache_UNKNOWN, 0 }, |
368 | | /* 8b */ { Cache_UNKNOWN, 0 }, |
369 | | /* 8c */ { Cache_UNKNOWN, 0 }, |
370 | | /* 8d */ { Cache_UNKNOWN, 0 }, |
371 | | /* 8e */ { Cache_UNKNOWN, 0 }, |
372 | | /* 8f */ { Cache_UNKNOWN, 0 }, |
373 | | /* 90 */ { Cache_UNKNOWN, 0 }, |
374 | | /* 91 */ { Cache_UNKNOWN, 0 }, |
375 | | /* 92 */ { Cache_UNKNOWN, 0 }, |
376 | | /* 93 */ { Cache_UNKNOWN, 0 }, |
377 | | /* 94 */ { Cache_UNKNOWN, 0 }, |
378 | | /* 95 */ { Cache_UNKNOWN, 0 }, |
379 | | /* 96 */ { Cache_UNKNOWN, 0 }, |
380 | | /* 97 */ { Cache_UNKNOWN, 0 }, |
381 | | /* 98 */ { Cache_UNKNOWN, 0 }, |
382 | | /* 99 */ { Cache_UNKNOWN, 0 }, |
383 | | /* 9a */ { Cache_UNKNOWN, 0 }, |
384 | | /* 9b */ { Cache_UNKNOWN, 0 }, |
385 | | /* 9c */ { Cache_UNKNOWN, 0 }, |
386 | | /* 9d */ { Cache_UNKNOWN, 0 }, |
387 | | /* 9e */ { Cache_UNKNOWN, 0 }, |
388 | | /* 9f */ { Cache_UNKNOWN, 0 }, |
389 | | /* a0 */ { Cache_UNKNOWN, 0 }, |
390 | | /* a1 */ { Cache_UNKNOWN, 0 }, |
391 | | /* a2 */ { Cache_UNKNOWN, 0 }, |
392 | | /* a3 */ { Cache_UNKNOWN, 0 }, |
393 | | /* a4 */ { Cache_UNKNOWN, 0 }, |
394 | | /* a5 */ { Cache_UNKNOWN, 0 }, |
395 | | /* a6 */ { Cache_UNKNOWN, 0 }, |
396 | | /* a7 */ { Cache_UNKNOWN, 0 }, |
397 | | /* a8 */ { Cache_UNKNOWN, 0 }, |
398 | | /* a9 */ { Cache_UNKNOWN, 0 }, |
399 | | /* aa */ { Cache_UNKNOWN, 0 }, |
400 | | /* ab */ { Cache_UNKNOWN, 0 }, |
401 | | /* ac */ { Cache_UNKNOWN, 0 }, |
402 | | /* ad */ { Cache_UNKNOWN, 0 }, |
403 | | /* ae */ { Cache_UNKNOWN, 0 }, |
404 | | /* af */ { Cache_UNKNOWN, 0 }, |
405 | | /* b0 */ { Cache_TLBi, 0 }, |
406 | | /* b1 */ { Cache_UNKNOWN, 0 }, |
407 | | /* b2 */ { Cache_UNKNOWN, 0 }, |
408 | | /* b3 */ { Cache_TLBd, 0 }, |
409 | | /* b4 */ { Cache_UNKNOWN, 0 }, |
410 | | /* b5 */ { Cache_UNKNOWN, 0 }, |
411 | | /* b6 */ { Cache_UNKNOWN, 0 }, |
412 | | /* b7 */ { Cache_UNKNOWN, 0 }, |
413 | | /* b8 */ { Cache_UNKNOWN, 0 }, |
414 | | /* b9 */ { Cache_UNKNOWN, 0 }, |
415 | | /* ba */ { Cache_UNKNOWN, 0 }, |
416 | | /* bb */ { Cache_UNKNOWN, 0 }, |
417 | | /* bc */ { Cache_UNKNOWN, 0 }, |
418 | | /* bd */ { Cache_UNKNOWN, 0 }, |
419 | | /* be */ { Cache_UNKNOWN, 0 }, |
420 | | /* bf */ { Cache_UNKNOWN, 0 }, |
421 | | /* c0 */ { Cache_UNKNOWN, 0 }, |
422 | | /* c1 */ { Cache_UNKNOWN, 0 }, |
423 | | /* c2 */ { Cache_UNKNOWN, 0 }, |
424 | | /* c3 */ { Cache_UNKNOWN, 0 }, |
425 | | /* c4 */ { Cache_UNKNOWN, 0 }, |
426 | | /* c5 */ { Cache_UNKNOWN, 0 }, |
427 | | /* c6 */ { Cache_UNKNOWN, 0 }, |
428 | | /* c7 */ { Cache_UNKNOWN, 0 }, |
429 | | /* c8 */ { Cache_UNKNOWN, 0 }, |
430 | | /* c9 */ { Cache_UNKNOWN, 0 }, |
431 | | /* ca */ { Cache_UNKNOWN, 0 }, |
432 | | /* cb */ { Cache_UNKNOWN, 0 }, |
433 | | /* cc */ { Cache_UNKNOWN, 0 }, |
434 | | /* cd */ { Cache_UNKNOWN, 0 }, |
435 | | /* ce */ { Cache_UNKNOWN, 0 }, |
436 | | /* cf */ { Cache_UNKNOWN, 0 }, |
437 | | /* d0 */ { Cache_UNKNOWN, 0 }, |
438 | | /* d1 */ { Cache_UNKNOWN, 0 }, |
439 | | /* d2 */ { Cache_UNKNOWN, 0 }, |
440 | | /* d3 */ { Cache_UNKNOWN, 0 }, |
441 | | /* d4 */ { Cache_UNKNOWN, 0 }, |
442 | | /* d5 */ { Cache_UNKNOWN, 0 }, |
443 | | /* d6 */ { Cache_UNKNOWN, 0 }, |
444 | | /* d7 */ { Cache_UNKNOWN, 0 }, |
445 | | /* d8 */ { Cache_UNKNOWN, 0 }, |
446 | | /* d9 */ { Cache_UNKNOWN, 0 }, |
447 | | /* da */ { Cache_UNKNOWN, 0 }, |
448 | | /* db */ { Cache_UNKNOWN, 0 }, |
449 | | /* dc */ { Cache_UNKNOWN, 0 }, |
450 | | /* dd */ { Cache_UNKNOWN, 0 }, |
451 | | /* de */ { Cache_UNKNOWN, 0 }, |
452 | | /* df */ { Cache_UNKNOWN, 0 }, |
453 | | /* e0 */ { Cache_UNKNOWN, 0 }, |
454 | | /* e1 */ { Cache_UNKNOWN, 0 }, |
455 | | /* e2 */ { Cache_UNKNOWN, 0 }, |
456 | | /* e3 */ { Cache_UNKNOWN, 0 }, |
457 | | /* e4 */ { Cache_UNKNOWN, 0 }, |
458 | | /* e5 */ { Cache_UNKNOWN, 0 }, |
459 | | /* e6 */ { Cache_UNKNOWN, 0 }, |
460 | | /* e7 */ { Cache_UNKNOWN, 0 }, |
461 | | /* e8 */ { Cache_UNKNOWN, 0 }, |
462 | | /* e9 */ { Cache_UNKNOWN, 0 }, |
463 | | /* ea */ { Cache_UNKNOWN, 0 }, |
464 | | /* eb */ { Cache_UNKNOWN, 0 }, |
465 | | /* ec */ { Cache_UNKNOWN, 0 }, |
466 | | /* ed */ { Cache_UNKNOWN, 0 }, |
467 | | /* ee */ { Cache_UNKNOWN, 0 }, |
468 | | /* ef */ { Cache_UNKNOWN, 0 }, |
469 | | /* f0 */ { Cache_UNKNOWN, 0 }, |
470 | | /* f1 */ { Cache_UNKNOWN, 0 }, |
471 | | /* f2 */ { Cache_UNKNOWN, 0 }, |
472 | | /* f3 */ { Cache_UNKNOWN, 0 }, |
473 | | /* f4 */ { Cache_UNKNOWN, 0 }, |
474 | | /* f5 */ { Cache_UNKNOWN, 0 }, |
475 | | /* f6 */ { Cache_UNKNOWN, 0 }, |
476 | | /* f7 */ { Cache_UNKNOWN, 0 }, |
477 | | /* f8 */ { Cache_UNKNOWN, 0 }, |
478 | | /* f9 */ { Cache_UNKNOWN, 0 }, |
479 | | /* fa */ { Cache_UNKNOWN, 0 }, |
480 | | /* fb */ { Cache_UNKNOWN, 0 }, |
481 | | /* fc */ { Cache_UNKNOWN, 0 }, |
482 | | /* fd */ { Cache_UNKNOWN, 0 }, |
483 | | /* fe */ { Cache_UNKNOWN, 0 }, |
484 | | /* ff */ { Cache_UNKNOWN, 0 } |
485 | | }; |
486 | | |
487 | | /* |
488 | | * use the above table to determine the CacheEntryLineSize. |
489 | | */ |
490 | | static void |
491 | | getIntelCacheEntryLineSize(unsigned long val, int *level, |
492 | | unsigned long *lineSize) |
493 | 0 | { |
494 | 0 | CacheType type; |
495 | |
|
496 | 0 | type = CacheMap[val].type; |
497 | | /* only interested in data caches */ |
498 | | /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
499 | | * this data check has the side effect of rejecting that entry. If |
500 | | * that wasn't the case, we could have to reject it explicitly */ |
501 | 0 | if (CacheMap[val].lineSize == 0) { |
502 | 0 | return; |
503 | 0 | } |
504 | | /* look at the caches, skip types we aren't interested in. |
505 | | * if we already have a value for a lower level cache, skip the |
506 | | * current entry */ |
507 | 0 | if ((type == Cache_L1) || (type == Cache_L1d)) { |
508 | 0 | *level = 1; |
509 | 0 | *lineSize = CacheMap[val].lineSize; |
510 | 0 | } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
511 | 0 | *level = 2; |
512 | 0 | *lineSize = CacheMap[val].lineSize; |
513 | 0 | } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
514 | 0 | *level = 3; |
515 | 0 | *lineSize = CacheMap[val].lineSize; |
516 | 0 | } |
517 | 0 | return; |
518 | 0 | } |
519 | | |
520 | | static void |
521 | | getIntelRegisterCacheLineSize(unsigned long val, |
522 | | int *level, unsigned long *lineSize) |
523 | 0 | { |
524 | 0 | getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
525 | 0 | getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
526 | 0 | getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
527 | 0 | getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
528 | 0 | } |
529 | | |
530 | | /* |
531 | | * returns '0' if no recognized cache is found, or if the cache |
532 | | * information is supported by this processor |
533 | | */ |
534 | | static unsigned long |
535 | | getIntelCacheLineSize(int cpuidLevel) |
536 | 0 | { |
537 | 0 | int level = 4; |
538 | 0 | unsigned long lineSize = 0; |
539 | 0 | unsigned long eax, ebx, ecx, edx; |
540 | 0 | int repeat, count; |
541 | |
|
542 | 0 | if (cpuidLevel < 2) { |
543 | 0 | return 0; |
544 | 0 | } |
545 | | |
546 | | /* command '2' of the cpuid is intel's cache info call. Each byte of the |
547 | | * 4 registers contain a potential descriptor for the cache. The CacheMap |
548 | | * table maps the cache entry with the processor cache. Register 'al' |
549 | | * contains a count value that cpuid '2' needs to be called in order to |
550 | | * find all the cache descriptors. Only registers with the high bit set |
551 | | * to 'zero' have valid descriptors. This code loops through all the |
552 | | * required calls to cpuid '2' and passes any valid descriptors it finds |
553 | | * to the getIntelRegisterCacheLineSize code, which breaks the registers |
554 | | * down into their component descriptors. In the end the lineSize of the |
555 | | * lowest level cache data cache is returned. */ |
556 | 0 | freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
557 | 0 | repeat = eax & 0xf; |
558 | 0 | for (count = 0; count < repeat; count++) { |
559 | 0 | if ((eax & 0x80000000) == 0) { |
560 | 0 | getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
561 | 0 | } |
562 | 0 | if ((ebx & 0x80000000) == 0) { |
563 | 0 | getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
564 | 0 | } |
565 | 0 | if ((ecx & 0x80000000) == 0) { |
566 | 0 | getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
567 | 0 | } |
568 | 0 | if ((edx & 0x80000000) == 0) { |
569 | 0 | getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
570 | 0 | } |
571 | 0 | if (count + 1 != repeat) { |
572 | 0 | freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
573 | 0 | } |
574 | 0 | } |
575 | 0 | return lineSize; |
576 | 0 | } |
577 | | |
578 | | /* |
579 | | * returns '0' if the cache info is not supported by this processor. |
580 | | * This is based on the AMD extended cache commands for cpuid. |
581 | | * (see "AMD Processor Recognition Application Note" Publication 20734). |
582 | | * Some other processors use the identical scheme. |
583 | | * (see "Processor Recognition, Transmeta Corporation"). |
584 | | */ |
585 | | static unsigned long |
586 | | getOtherCacheLineSize(unsigned long cpuidLevel) |
587 | 0 | { |
588 | 0 | unsigned long lineSize = 0; |
589 | 0 | unsigned long eax, ebx, ecx, edx; |
590 | | |
591 | | /* get the Extended CPUID level */ |
592 | 0 | freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
593 | 0 | cpuidLevel = eax; |
594 | |
|
595 | 0 | if (cpuidLevel >= 0x80000005) { |
596 | 0 | freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
597 | 0 | lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
598 | 0 | } |
599 | 0 | return lineSize; |
600 | 0 | } |
601 | | |
602 | | static const char *const manMap[] = { |
603 | 0 | #define INTEL 0 |
604 | | "GenuineIntel", |
605 | | #define AMD 1 |
606 | | "AuthenticAMD", |
607 | | #define CYRIX 2 |
608 | | "CyrixInstead", |
609 | | #define CENTAUR 2 |
610 | | "CentaurHauls", |
611 | | #define NEXGEN 3 |
612 | | "NexGenDriven", |
613 | | #define TRANSMETA 4 |
614 | | "GenuineTMx86", |
615 | | #define RISE 5 |
616 | | "RiseRiseRise", |
617 | | #define UMC 6 |
618 | | "UMC UMC UMC ", |
619 | | #define SIS 7 |
620 | | "Sis Sis Sis ", |
621 | | #define NATIONAL 8 |
622 | | "Geode by NSC", |
623 | | }; |
624 | | |
625 | | static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); |
626 | | |
627 | 0 | #define MAN_UNKNOWN 9 |
628 | | |
629 | | #if !defined(AMD_64) |
630 | | #define SSE2_FLAG (1 << 26) |
631 | | unsigned long |
632 | | s_mpi_is_sse2() |
633 | | { |
634 | | unsigned long eax, ebx, ecx, edx; |
635 | | |
636 | | if (is386() || is486()) { |
637 | | return 0; |
638 | | } |
639 | | freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
640 | | |
641 | | /* has no SSE2 extensions */ |
642 | | if (eax == 0) { |
643 | | return 0; |
644 | | } |
645 | | |
646 | | freebl_cpuid(1, &eax, &ebx, &ecx, &edx); |
647 | | return (edx & SSE2_FLAG) == SSE2_FLAG; |
648 | | } |
649 | | #endif |
650 | | |
651 | | unsigned long |
652 | | s_mpi_getProcessorLineSize() |
653 | 0 | { |
654 | 0 | unsigned long eax, ebx, ecx, edx; |
655 | 0 | PRUint32 cpuid[3]; |
656 | 0 | unsigned long cpuidLevel; |
657 | 0 | unsigned long cacheLineSize = 0; |
658 | 0 | int manufacturer = MAN_UNKNOWN; |
659 | 0 | int i; |
660 | 0 | char string[13]; |
661 | |
|
662 | | #if !defined(AMD_64) |
663 | | if (is386()) { |
664 | | return 0; /* 386 had no cache */ |
665 | | } |
666 | | if (is486()) { |
667 | | return 32; /* really? need more info */ |
668 | | } |
669 | | #endif |
670 | | |
671 | | /* Pentium, cpuid command is available */ |
672 | 0 | freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
673 | 0 | cpuidLevel = eax; |
674 | | /* string holds the CPU's manufacturer ID string - a twelve |
675 | | * character ASCII string stored in ebx, edx, ecx, and |
676 | | * the 32-bit extended feature flags are in edx, ecx. |
677 | | */ |
678 | 0 | cpuid[0] = ebx; |
679 | 0 | cpuid[1] = ecx; |
680 | 0 | cpuid[2] = edx; |
681 | 0 | memcpy(string, cpuid, sizeof(cpuid)); |
682 | 0 | string[12] = 0; |
683 | |
|
684 | 0 | manufacturer = MAN_UNKNOWN; |
685 | 0 | for (i = 0; i < n_manufacturers; i++) { |
686 | 0 | if (strcmp(manMap[i], string) == 0) { |
687 | 0 | manufacturer = i; |
688 | 0 | } |
689 | 0 | } |
690 | |
|
691 | 0 | if (manufacturer == INTEL) { |
692 | 0 | cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
693 | 0 | } else { |
694 | 0 | cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
695 | 0 | } |
696 | | /* doesn't support cache info based on cpuid. This means |
697 | | * an old pentium class processor, which have cache lines of |
698 | | * 32. If we learn differently, we can use a switch based on |
699 | | * the Manufacturer id */ |
700 | 0 | if (cacheLineSize == 0) { |
701 | 0 | cacheLineSize = 32; |
702 | 0 | } |
703 | 0 | return cacheLineSize; |
704 | 0 | } |
705 | | #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
706 | | #endif |
707 | | |
708 | | #if defined(__ppc64__) |
709 | | /* |
710 | | * Sigh, The PPC has some really nice features to help us determine cache |
711 | | * size, since it had lots of direct control functions to do so. The POWER |
712 | | * processor even has an instruction to do this, but it was dropped in |
713 | | * PowerPC. Unfortunately most of them are not available in user mode. |
714 | | * |
715 | | * The dcbz function would be a great way to determine cache line size except |
716 | | * 1) it only works on write-back memory (it throws an exception otherwise), |
717 | | * and 2) because so many mac programs 'knew' the processor cache size was |
718 | | * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
719 | | * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
720 | | * these programs happy. dcbzl work if 64 bit instructions are supported. |
721 | | * If you know 64 bit instructions are supported, and that stack is |
722 | | * write-back, you can use this code. |
723 | | */ |
724 | | #include "memory.h" |
725 | | |
726 | | /* clear the cache line that contains 'array' */ |
727 | | static inline void |
728 | | dcbzl(char *array) |
729 | | { |
730 | | __asm__("dcbzl %0, %1" |
731 | | : /*no result*/ |
732 | | : "b%"(array), "r"(0) |
733 | | : "memory"); |
734 | | } |
735 | | |
736 | | #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) |
737 | | |
738 | | #define PPC_MAX_LINE_SIZE 256 |
739 | | unsigned long |
740 | | s_mpi_getProcessorLineSize() |
741 | | { |
742 | | char testArray[2 * PPC_MAX_LINE_SIZE + 1]; |
743 | | char *test; |
744 | | int i; |
745 | | |
746 | | /* align the array on a maximum line size boundary, so we |
747 | | * know we are starting to clear from the first address */ |
748 | | test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
749 | | /* set all the values to 1's */ |
750 | | memset(test, 0xff, PPC_MAX_LINE_SIZE); |
751 | | /* clear one cache block starting at 'test' */ |
752 | | dcbzl(test); |
753 | | |
754 | | /* find the size of the cleared area, that's our block size */ |
755 | | for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { |
756 | | if (test[i - 1] == 0) { |
757 | | return i; |
758 | | } |
759 | | } |
760 | | return 0; |
761 | | } |
762 | | |
763 | | #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
764 | | #endif |
765 | | |
766 | | /* |
767 | | * put other processor and platform specific cache code here |
768 | | * return the smallest cache line size in bytes on the processor |
769 | | * (usually the L1 cache). If the OS has a call, this would be |
770 | | * a greate place to put it. |
771 | | * |
772 | | * If there is no cache, return 0; |
773 | | * |
774 | | * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
775 | | * below aren't compiled. |
776 | | * |
777 | | */ |
778 | | |
779 | | /* If no way to get the processor cache line size has been defined, assume |
780 | | * it's 32 bytes (most common value, does not significantly impact performance) |
781 | | */ |
782 | | #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
783 | | unsigned long |
784 | | s_mpi_getProcessorLineSize() |
785 | | { |
786 | | return 32; |
787 | | } |
788 | | #endif |