Coverage Report

Created: 2025-07-01 06:25

/src/nss/lib/freebl/mpi/mpcpucache.c
Line
Count
Source (jump to first uncovered line)
1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
 * License, v. 2.0. If a copy of the MPL was not distributed with this
3
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
#include "mpi.h"
6
#include "prtypes.h"
7
8
/*
9
 * This file implements a single function: s_mpi_getProcessorLineSize();
10
 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
11
 * if a cache exists, or zero if there is no cache. If more than one
12
 * cache line exists, it should return the smallest line size (which is
13
 * usually the L1 cache).
14
 *
15
 * mp_modexp uses this information to make sure that private key information
16
 * isn't being leaked through the cache.
17
 *
18
 * Currently the file returns good data for most modern x86 processors, and
19
 * reasonable data on 64-bit ppc processors. All other processors are assumed
20
 * to have a cache line size of 32 bytes.
21
 *
22
 */
23
24
#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
25
/* X86 processors have special instructions that tell us about the cache */
26
#include "string.h"
27
28
#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
29
#define AMD_64 1
30
#endif
31
32
/* Generic CPUID function */
33
#if defined(AMD_64)
34
35
#if defined(__GNUC__)
36
37
void
38
freebl_cpuid(unsigned long op, unsigned long *eax,
39
             unsigned long *ebx, unsigned long *ecx,
40
             unsigned long *edx)
41
0
{
42
0
    __asm__("xor %%ecx, %%ecx\n\t"
43
0
            "cpuid\n\t"
44
0
            : "=a"(*eax),
45
0
              "=b"(*ebx),
46
0
              "=c"(*ecx),
47
0
              "=d"(*edx)
48
0
            : "0"(op));
49
0
}
50
51
#elif defined(_MSC_VER)
52
53
#include <intrin.h>
54
55
void
56
freebl_cpuid(unsigned long op, unsigned long *eax,
57
             unsigned long *ebx, unsigned long *ecx,
58
             unsigned long *edx)
59
{
60
    int intrinsic_out[4];
61
62
    __cpuid(intrinsic_out, op);
63
    *eax = intrinsic_out[0];
64
    *ebx = intrinsic_out[1];
65
    *ecx = intrinsic_out[2];
66
    *edx = intrinsic_out[3];
67
}
68
69
#endif
70
71
#else /* !defined(AMD_64) */
72
73
/* x86 */
74
75
#if defined(__GNUC__)
76
void
77
freebl_cpuid(unsigned long op, unsigned long *eax,
78
             unsigned long *ebx, unsigned long *ecx,
79
             unsigned long *edx)
80
{
81
    /* Some older processors don't fill the ecx register with cpuid, so clobber it
82
     * before calling cpuid, so that there's no risk of picking random bits that
83
     * erroneously indicate that absent CPU features are present.
84
     * Also, GCC isn't smart enough to save the ebx PIC register on its own
85
     * in this case, so do it by hand. Use edi to store ebx and pass the
86
     * value returned in ebx from cpuid through edi. */
87
    __asm__("xor %%ecx, %%ecx\n\t"
88
            "mov %%ebx,%%edi\n\t"
89
            "cpuid\n\t"
90
            "xchgl %%ebx,%%edi\n\t"
91
            : "=a"(*eax),
92
              "=D"(*ebx),
93
              "=c"(*ecx),
94
              "=d"(*edx)
95
            : "0"(op));
96
}
97
98
/*
99
 * try flipping a processor flag to determine CPU type
100
 */
101
static unsigned long
102
changeFlag(unsigned long flag)
103
{
104
    unsigned long changedFlags, originalFlags;
105
    __asm__("pushfl\n\t" /* get the flags */
106
            "popl %0\n\t"
107
            "movl %0,%1\n\t" /* save the original flags */
108
            "xorl %2,%0\n\t" /* flip the bit */
109
            "pushl %0\n\t"   /* set the flags */
110
            "popfl\n\t"
111
            "pushfl\n\t" /* get the flags again (for return) */
112
            "popl %0\n\t"
113
            "pushl %1\n\t" /* restore the original flags */
114
            "popfl\n\t"
115
            : "=r"(changedFlags),
116
              "=r"(originalFlags),
117
              "=r"(flag)
118
            : "2"(flag));
119
    return changedFlags ^ originalFlags;
120
}
121
122
#elif defined(_MSC_VER)
123
124
/*
125
 * windows versions of the above assembler
126
 */
127
#define wcpuid __asm __emit 0fh __asm __emit 0a2h
128
void
129
freebl_cpuid(unsigned long op, unsigned long *Reax,
130
             unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
131
{
132
    unsigned long Leax, Lebx, Lecx, Ledx;
133
    __asm {
134
        pushad
135
        xor     ecx,ecx
136
        mov     eax,op
137
        wcpuid
138
        mov     Leax,eax
139
        mov     Lebx,ebx
140
        mov     Lecx,ecx
141
        mov     Ledx,edx
142
        popad
143
    }
144
    *Reax = Leax;
145
    *Rebx = Lebx;
146
    *Recx = Lecx;
147
    *Redx = Ledx;
148
}
149
150
static unsigned long
151
changeFlag(unsigned long flag)
152
{
153
    unsigned long changedFlags, originalFlags;
154
    __asm {
155
        push eax
156
        push ebx
157
        pushfd /* get the flags */
158
            pop  eax
159
        push eax /* save the flags on the stack */
160
            mov  originalFlags,eax /* save the original flags */
161
        mov  ebx,flag
162
            xor  eax,ebx /* flip the bit */
163
        push eax /* set the flags */
164
            popfd
165
        pushfd /* get the flags again (for return) */
166
        pop  eax
167
        popfd /* restore the original flags */
168
        mov changedFlags,eax
169
        pop ebx
170
        pop eax
171
    }
172
    return changedFlags ^ originalFlags;
173
}
174
#endif
175
176
#endif
177
178
#if !defined(AMD_64)
179
#define AC_FLAG 0x40000
180
#define ID_FLAG 0x200000
181
182
/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
183
static int
184
is386()
185
{
186
    return changeFlag(AC_FLAG) == 0;
187
}
188
189
/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
190
static int
191
is486()
192
{
193
    return changeFlag(ID_FLAG) == 0;
194
}
195
#endif
196
197
/*
198
 * table for Intel Cache.
199
 * See Intel Application Note AP-485 for more information
200
 */
201
202
typedef unsigned char CacheTypeEntry;
203
204
typedef enum {
205
    Cache_NONE = 0,
206
    Cache_UNKNOWN = 1,
207
    Cache_TLB = 2,
208
    Cache_TLBi = 3,
209
    Cache_TLBd = 4,
210
    Cache_Trace = 5,
211
    Cache_L1 = 6,
212
    Cache_L1i = 7,
213
    Cache_L1d = 8,
214
    Cache_L2 = 9,
215
    Cache_L2i = 10,
216
    Cache_L2d = 11,
217
    Cache_L3 = 12,
218
    Cache_L3i = 13,
219
    Cache_L3d = 14
220
} CacheType;
221
222
struct _cache {
223
    CacheTypeEntry type;
224
    unsigned char lineSize;
225
};
226
static const struct _cache CacheMap[256] = {
227
    /* 00 */ { Cache_NONE, 0 },
228
    /* 01 */ { Cache_TLBi, 0 },
229
    /* 02 */ { Cache_TLBi, 0 },
230
    /* 03 */ { Cache_TLBd, 0 },
231
    /* 04 */ {
232
        Cache_TLBd,
233
    },
234
    /* 05 */ { Cache_UNKNOWN, 0 },
235
    /* 06 */ { Cache_L1i, 32 },
236
    /* 07 */ { Cache_UNKNOWN, 0 },
237
    /* 08 */ { Cache_L1i, 32 },
238
    /* 09 */ { Cache_UNKNOWN, 0 },
239
    /* 0a */ { Cache_L1d, 32 },
240
    /* 0b */ { Cache_UNKNOWN, 0 },
241
    /* 0c */ { Cache_L1d, 32 },
242
    /* 0d */ { Cache_UNKNOWN, 0 },
243
    /* 0e */ { Cache_UNKNOWN, 0 },
244
    /* 0f */ { Cache_UNKNOWN, 0 },
245
    /* 10 */ { Cache_UNKNOWN, 0 },
246
    /* 11 */ { Cache_UNKNOWN, 0 },
247
    /* 12 */ { Cache_UNKNOWN, 0 },
248
    /* 13 */ { Cache_UNKNOWN, 0 },
249
    /* 14 */ { Cache_UNKNOWN, 0 },
250
    /* 15 */ { Cache_UNKNOWN, 0 },
251
    /* 16 */ { Cache_UNKNOWN, 0 },
252
    /* 17 */ { Cache_UNKNOWN, 0 },
253
    /* 18 */ { Cache_UNKNOWN, 0 },
254
    /* 19 */ { Cache_UNKNOWN, 0 },
255
    /* 1a */ { Cache_UNKNOWN, 0 },
256
    /* 1b */ { Cache_UNKNOWN, 0 },
257
    /* 1c */ { Cache_UNKNOWN, 0 },
258
    /* 1d */ { Cache_UNKNOWN, 0 },
259
    /* 1e */ { Cache_UNKNOWN, 0 },
260
    /* 1f */ { Cache_UNKNOWN, 0 },
261
    /* 20 */ { Cache_UNKNOWN, 0 },
262
    /* 21 */ { Cache_UNKNOWN, 0 },
263
    /* 22 */ { Cache_L3, 64 },
264
    /* 23 */ { Cache_L3, 64 },
265
    /* 24 */ { Cache_UNKNOWN, 0 },
266
    /* 25 */ { Cache_L3, 64 },
267
    /* 26 */ { Cache_UNKNOWN, 0 },
268
    /* 27 */ { Cache_UNKNOWN, 0 },
269
    /* 28 */ { Cache_UNKNOWN, 0 },
270
    /* 29 */ { Cache_L3, 64 },
271
    /* 2a */ { Cache_UNKNOWN, 0 },
272
    /* 2b */ { Cache_UNKNOWN, 0 },
273
    /* 2c */ { Cache_L1d, 64 },
274
    /* 2d */ { Cache_UNKNOWN, 0 },
275
    /* 2e */ { Cache_UNKNOWN, 0 },
276
    /* 2f */ { Cache_UNKNOWN, 0 },
277
    /* 30 */ { Cache_L1i, 64 },
278
    /* 31 */ { Cache_UNKNOWN, 0 },
279
    /* 32 */ { Cache_UNKNOWN, 0 },
280
    /* 33 */ { Cache_UNKNOWN, 0 },
281
    /* 34 */ { Cache_UNKNOWN, 0 },
282
    /* 35 */ { Cache_UNKNOWN, 0 },
283
    /* 36 */ { Cache_UNKNOWN, 0 },
284
    /* 37 */ { Cache_UNKNOWN, 0 },
285
    /* 38 */ { Cache_UNKNOWN, 0 },
286
    /* 39 */ { Cache_L2, 64 },
287
    /* 3a */ { Cache_UNKNOWN, 0 },
288
    /* 3b */ { Cache_L2, 64 },
289
    /* 3c */ { Cache_L2, 64 },
290
    /* 3d */ { Cache_UNKNOWN, 0 },
291
    /* 3e */ { Cache_UNKNOWN, 0 },
292
    /* 3f */ { Cache_UNKNOWN, 0 },
293
    /* 40 */ { Cache_L2, 0 },
294
    /* 41 */ { Cache_L2, 32 },
295
    /* 42 */ { Cache_L2, 32 },
296
    /* 43 */ { Cache_L2, 32 },
297
    /* 44 */ { Cache_L2, 32 },
298
    /* 45 */ { Cache_L2, 32 },
299
    /* 46 */ { Cache_UNKNOWN, 0 },
300
    /* 47 */ { Cache_UNKNOWN, 0 },
301
    /* 48 */ { Cache_UNKNOWN, 0 },
302
    /* 49 */ { Cache_UNKNOWN, 0 },
303
    /* 4a */ { Cache_UNKNOWN, 0 },
304
    /* 4b */ { Cache_UNKNOWN, 0 },
305
    /* 4c */ { Cache_UNKNOWN, 0 },
306
    /* 4d */ { Cache_UNKNOWN, 0 },
307
    /* 4e */ { Cache_UNKNOWN, 0 },
308
    /* 4f */ { Cache_UNKNOWN, 0 },
309
    /* 50 */ { Cache_TLBi, 0 },
310
    /* 51 */ { Cache_TLBi, 0 },
311
    /* 52 */ { Cache_TLBi, 0 },
312
    /* 53 */ { Cache_UNKNOWN, 0 },
313
    /* 54 */ { Cache_UNKNOWN, 0 },
314
    /* 55 */ { Cache_UNKNOWN, 0 },
315
    /* 56 */ { Cache_UNKNOWN, 0 },
316
    /* 57 */ { Cache_UNKNOWN, 0 },
317
    /* 58 */ { Cache_UNKNOWN, 0 },
318
    /* 59 */ { Cache_UNKNOWN, 0 },
319
    /* 5a */ { Cache_UNKNOWN, 0 },
320
    /* 5b */ { Cache_TLBd, 0 },
321
    /* 5c */ { Cache_TLBd, 0 },
322
    /* 5d */ { Cache_TLBd, 0 },
323
    /* 5e */ { Cache_UNKNOWN, 0 },
324
    /* 5f */ { Cache_UNKNOWN, 0 },
325
    /* 60 */ { Cache_UNKNOWN, 0 },
326
    /* 61 */ { Cache_UNKNOWN, 0 },
327
    /* 62 */ { Cache_UNKNOWN, 0 },
328
    /* 63 */ { Cache_UNKNOWN, 0 },
329
    /* 64 */ { Cache_UNKNOWN, 0 },
330
    /* 65 */ { Cache_UNKNOWN, 0 },
331
    /* 66 */ { Cache_L1d, 64 },
332
    /* 67 */ { Cache_L1d, 64 },
333
    /* 68 */ { Cache_L1d, 64 },
334
    /* 69 */ { Cache_UNKNOWN, 0 },
335
    /* 6a */ { Cache_UNKNOWN, 0 },
336
    /* 6b */ { Cache_UNKNOWN, 0 },
337
    /* 6c */ { Cache_UNKNOWN, 0 },
338
    /* 6d */ { Cache_UNKNOWN, 0 },
339
    /* 6e */ { Cache_UNKNOWN, 0 },
340
    /* 6f */ { Cache_UNKNOWN, 0 },
341
    /* 70 */ { Cache_Trace, 1 },
342
    /* 71 */ { Cache_Trace, 1 },
343
    /* 72 */ { Cache_Trace, 1 },
344
    /* 73 */ { Cache_UNKNOWN, 0 },
345
    /* 74 */ { Cache_UNKNOWN, 0 },
346
    /* 75 */ { Cache_UNKNOWN, 0 },
347
    /* 76 */ { Cache_UNKNOWN, 0 },
348
    /* 77 */ { Cache_UNKNOWN, 0 },
349
    /* 78 */ { Cache_UNKNOWN, 0 },
350
    /* 79 */ { Cache_L2, 64 },
351
    /* 7a */ { Cache_L2, 64 },
352
    /* 7b */ { Cache_L2, 64 },
353
    /* 7c */ { Cache_L2, 64 },
354
    /* 7d */ { Cache_UNKNOWN, 0 },
355
    /* 7e */ { Cache_UNKNOWN, 0 },
356
    /* 7f */ { Cache_UNKNOWN, 0 },
357
    /* 80 */ { Cache_UNKNOWN, 0 },
358
    /* 81 */ { Cache_UNKNOWN, 0 },
359
    /* 82 */ { Cache_L2, 32 },
360
    /* 83 */ { Cache_L2, 32 },
361
    /* 84 */ { Cache_L2, 32 },
362
    /* 85 */ { Cache_L2, 32 },
363
    /* 86 */ { Cache_L2, 64 },
364
    /* 87 */ { Cache_L2, 64 },
365
    /* 88 */ { Cache_UNKNOWN, 0 },
366
    /* 89 */ { Cache_UNKNOWN, 0 },
367
    /* 8a */ { Cache_UNKNOWN, 0 },
368
    /* 8b */ { Cache_UNKNOWN, 0 },
369
    /* 8c */ { Cache_UNKNOWN, 0 },
370
    /* 8d */ { Cache_UNKNOWN, 0 },
371
    /* 8e */ { Cache_UNKNOWN, 0 },
372
    /* 8f */ { Cache_UNKNOWN, 0 },
373
    /* 90 */ { Cache_UNKNOWN, 0 },
374
    /* 91 */ { Cache_UNKNOWN, 0 },
375
    /* 92 */ { Cache_UNKNOWN, 0 },
376
    /* 93 */ { Cache_UNKNOWN, 0 },
377
    /* 94 */ { Cache_UNKNOWN, 0 },
378
    /* 95 */ { Cache_UNKNOWN, 0 },
379
    /* 96 */ { Cache_UNKNOWN, 0 },
380
    /* 97 */ { Cache_UNKNOWN, 0 },
381
    /* 98 */ { Cache_UNKNOWN, 0 },
382
    /* 99 */ { Cache_UNKNOWN, 0 },
383
    /* 9a */ { Cache_UNKNOWN, 0 },
384
    /* 9b */ { Cache_UNKNOWN, 0 },
385
    /* 9c */ { Cache_UNKNOWN, 0 },
386
    /* 9d */ { Cache_UNKNOWN, 0 },
387
    /* 9e */ { Cache_UNKNOWN, 0 },
388
    /* 9f */ { Cache_UNKNOWN, 0 },
389
    /* a0 */ { Cache_UNKNOWN, 0 },
390
    /* a1 */ { Cache_UNKNOWN, 0 },
391
    /* a2 */ { Cache_UNKNOWN, 0 },
392
    /* a3 */ { Cache_UNKNOWN, 0 },
393
    /* a4 */ { Cache_UNKNOWN, 0 },
394
    /* a5 */ { Cache_UNKNOWN, 0 },
395
    /* a6 */ { Cache_UNKNOWN, 0 },
396
    /* a7 */ { Cache_UNKNOWN, 0 },
397
    /* a8 */ { Cache_UNKNOWN, 0 },
398
    /* a9 */ { Cache_UNKNOWN, 0 },
399
    /* aa */ { Cache_UNKNOWN, 0 },
400
    /* ab */ { Cache_UNKNOWN, 0 },
401
    /* ac */ { Cache_UNKNOWN, 0 },
402
    /* ad */ { Cache_UNKNOWN, 0 },
403
    /* ae */ { Cache_UNKNOWN, 0 },
404
    /* af */ { Cache_UNKNOWN, 0 },
405
    /* b0 */ { Cache_TLBi, 0 },
406
    /* b1 */ { Cache_UNKNOWN, 0 },
407
    /* b2 */ { Cache_UNKNOWN, 0 },
408
    /* b3 */ { Cache_TLBd, 0 },
409
    /* b4 */ { Cache_UNKNOWN, 0 },
410
    /* b5 */ { Cache_UNKNOWN, 0 },
411
    /* b6 */ { Cache_UNKNOWN, 0 },
412
    /* b7 */ { Cache_UNKNOWN, 0 },
413
    /* b8 */ { Cache_UNKNOWN, 0 },
414
    /* b9 */ { Cache_UNKNOWN, 0 },
415
    /* ba */ { Cache_UNKNOWN, 0 },
416
    /* bb */ { Cache_UNKNOWN, 0 },
417
    /* bc */ { Cache_UNKNOWN, 0 },
418
    /* bd */ { Cache_UNKNOWN, 0 },
419
    /* be */ { Cache_UNKNOWN, 0 },
420
    /* bf */ { Cache_UNKNOWN, 0 },
421
    /* c0 */ { Cache_UNKNOWN, 0 },
422
    /* c1 */ { Cache_UNKNOWN, 0 },
423
    /* c2 */ { Cache_UNKNOWN, 0 },
424
    /* c3 */ { Cache_UNKNOWN, 0 },
425
    /* c4 */ { Cache_UNKNOWN, 0 },
426
    /* c5 */ { Cache_UNKNOWN, 0 },
427
    /* c6 */ { Cache_UNKNOWN, 0 },
428
    /* c7 */ { Cache_UNKNOWN, 0 },
429
    /* c8 */ { Cache_UNKNOWN, 0 },
430
    /* c9 */ { Cache_UNKNOWN, 0 },
431
    /* ca */ { Cache_UNKNOWN, 0 },
432
    /* cb */ { Cache_UNKNOWN, 0 },
433
    /* cc */ { Cache_UNKNOWN, 0 },
434
    /* cd */ { Cache_UNKNOWN, 0 },
435
    /* ce */ { Cache_UNKNOWN, 0 },
436
    /* cf */ { Cache_UNKNOWN, 0 },
437
    /* d0 */ { Cache_UNKNOWN, 0 },
438
    /* d1 */ { Cache_UNKNOWN, 0 },
439
    /* d2 */ { Cache_UNKNOWN, 0 },
440
    /* d3 */ { Cache_UNKNOWN, 0 },
441
    /* d4 */ { Cache_UNKNOWN, 0 },
442
    /* d5 */ { Cache_UNKNOWN, 0 },
443
    /* d6 */ { Cache_UNKNOWN, 0 },
444
    /* d7 */ { Cache_UNKNOWN, 0 },
445
    /* d8 */ { Cache_UNKNOWN, 0 },
446
    /* d9 */ { Cache_UNKNOWN, 0 },
447
    /* da */ { Cache_UNKNOWN, 0 },
448
    /* db */ { Cache_UNKNOWN, 0 },
449
    /* dc */ { Cache_UNKNOWN, 0 },
450
    /* dd */ { Cache_UNKNOWN, 0 },
451
    /* de */ { Cache_UNKNOWN, 0 },
452
    /* df */ { Cache_UNKNOWN, 0 },
453
    /* e0 */ { Cache_UNKNOWN, 0 },
454
    /* e1 */ { Cache_UNKNOWN, 0 },
455
    /* e2 */ { Cache_UNKNOWN, 0 },
456
    /* e3 */ { Cache_UNKNOWN, 0 },
457
    /* e4 */ { Cache_UNKNOWN, 0 },
458
    /* e5 */ { Cache_UNKNOWN, 0 },
459
    /* e6 */ { Cache_UNKNOWN, 0 },
460
    /* e7 */ { Cache_UNKNOWN, 0 },
461
    /* e8 */ { Cache_UNKNOWN, 0 },
462
    /* e9 */ { Cache_UNKNOWN, 0 },
463
    /* ea */ { Cache_UNKNOWN, 0 },
464
    /* eb */ { Cache_UNKNOWN, 0 },
465
    /* ec */ { Cache_UNKNOWN, 0 },
466
    /* ed */ { Cache_UNKNOWN, 0 },
467
    /* ee */ { Cache_UNKNOWN, 0 },
468
    /* ef */ { Cache_UNKNOWN, 0 },
469
    /* f0 */ { Cache_UNKNOWN, 0 },
470
    /* f1 */ { Cache_UNKNOWN, 0 },
471
    /* f2 */ { Cache_UNKNOWN, 0 },
472
    /* f3 */ { Cache_UNKNOWN, 0 },
473
    /* f4 */ { Cache_UNKNOWN, 0 },
474
    /* f5 */ { Cache_UNKNOWN, 0 },
475
    /* f6 */ { Cache_UNKNOWN, 0 },
476
    /* f7 */ { Cache_UNKNOWN, 0 },
477
    /* f8 */ { Cache_UNKNOWN, 0 },
478
    /* f9 */ { Cache_UNKNOWN, 0 },
479
    /* fa */ { Cache_UNKNOWN, 0 },
480
    /* fb */ { Cache_UNKNOWN, 0 },
481
    /* fc */ { Cache_UNKNOWN, 0 },
482
    /* fd */ { Cache_UNKNOWN, 0 },
483
    /* fe */ { Cache_UNKNOWN, 0 },
484
    /* ff */ { Cache_UNKNOWN, 0 }
485
};
486
487
/*
488
 * use the above table to determine the CacheEntryLineSize.
489
 */
490
static void
491
getIntelCacheEntryLineSize(unsigned long val, int *level,
492
                           unsigned long *lineSize)
493
0
{
494
0
    CacheType type;
495
496
0
    type = CacheMap[val].type;
497
    /* only interested in data caches */
498
    /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
499
     * this data check has the side effect of rejecting that entry. If
500
     * that wasn't the case, we could have to reject it explicitly */
501
0
    if (CacheMap[val].lineSize == 0) {
502
0
        return;
503
0
    }
504
    /* look at the caches, skip types we aren't interested in.
505
     * if we already have a value for a lower level cache, skip the
506
     * current entry */
507
0
    if ((type == Cache_L1) || (type == Cache_L1d)) {
508
0
        *level = 1;
509
0
        *lineSize = CacheMap[val].lineSize;
510
0
    } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
511
0
        *level = 2;
512
0
        *lineSize = CacheMap[val].lineSize;
513
0
    } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
514
0
        *level = 3;
515
0
        *lineSize = CacheMap[val].lineSize;
516
0
    }
517
0
    return;
518
0
}
519
520
static void
521
getIntelRegisterCacheLineSize(unsigned long val,
522
                              int *level, unsigned long *lineSize)
523
0
{
524
0
    getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
525
0
    getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
526
0
    getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
527
0
    getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
528
0
}
529
530
/*
531
 * returns '0' if no recognized cache is found, or if the cache
532
 * information is supported by this processor
533
 */
534
static unsigned long
535
getIntelCacheLineSize(int cpuidLevel)
536
0
{
537
0
    int level = 4;
538
0
    unsigned long lineSize = 0;
539
0
    unsigned long eax, ebx, ecx, edx;
540
0
    int repeat, count;
541
542
0
    if (cpuidLevel < 2) {
543
0
        return 0;
544
0
    }
545
546
    /* command '2' of the cpuid is intel's cache info call. Each byte of the
547
     * 4 registers contain a potential descriptor for the cache. The CacheMap
548
     * table maps the cache entry with the processor cache. Register 'al'
549
     * contains a count value that cpuid '2' needs to be called in order to
550
     * find all the cache descriptors. Only registers with the high bit set
551
     * to 'zero' have valid descriptors. This code loops through all the
552
     * required calls to cpuid '2' and passes any valid descriptors it finds
553
     * to the getIntelRegisterCacheLineSize code, which breaks the registers
554
     * down into their component descriptors. In the end the lineSize of the
555
     * lowest level cache data cache is returned. */
556
0
    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
557
0
    repeat = eax & 0xf;
558
0
    for (count = 0; count < repeat; count++) {
559
0
        if ((eax & 0x80000000) == 0) {
560
0
            getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
561
0
        }
562
0
        if ((ebx & 0x80000000) == 0) {
563
0
            getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
564
0
        }
565
0
        if ((ecx & 0x80000000) == 0) {
566
0
            getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
567
0
        }
568
0
        if ((edx & 0x80000000) == 0) {
569
0
            getIntelRegisterCacheLineSize(edx, &level, &lineSize);
570
0
        }
571
0
        if (count + 1 != repeat) {
572
0
            freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
573
0
        }
574
0
    }
575
0
    return lineSize;
576
0
}
577
578
/*
579
 * returns '0' if the cache info is not supported by this processor.
580
 * This is based on the AMD extended cache commands for cpuid.
581
 * (see "AMD Processor Recognition Application Note" Publication 20734).
582
 * Some other processors use the identical scheme.
583
 * (see "Processor Recognition, Transmeta Corporation").
584
 */
585
static unsigned long
586
getOtherCacheLineSize(unsigned long cpuidLevel)
587
0
{
588
0
    unsigned long lineSize = 0;
589
0
    unsigned long eax, ebx, ecx, edx;
590
591
    /* get the Extended CPUID level */
592
0
    freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
593
0
    cpuidLevel = eax;
594
595
0
    if (cpuidLevel >= 0x80000005) {
596
0
        freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
597
0
        lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
598
0
    }
599
0
    return lineSize;
600
0
}
601
602
static const char *const manMap[] = {
603
0
#define INTEL 0
604
    "GenuineIntel",
605
#define AMD 1
606
    "AuthenticAMD",
607
#define CYRIX 2
608
    "CyrixInstead",
609
#define CENTAUR 2
610
    "CentaurHauls",
611
#define NEXGEN 3
612
    "NexGenDriven",
613
#define TRANSMETA 4
614
    "GenuineTMx86",
615
#define RISE 5
616
    "RiseRiseRise",
617
#define UMC 6
618
    "UMC UMC UMC ",
619
#define SIS 7
620
    "Sis Sis Sis ",
621
#define NATIONAL 8
622
    "Geode by NSC",
623
};
624
625
static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
626
627
0
#define MAN_UNKNOWN 9
628
629
#if !defined(AMD_64)
630
#define SSE2_FLAG (1 << 26)
631
unsigned long
632
s_mpi_is_sse2()
633
{
634
    unsigned long eax, ebx, ecx, edx;
635
636
    if (is386() || is486()) {
637
        return 0;
638
    }
639
    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
640
641
    /* has no SSE2 extensions */
642
    if (eax == 0) {
643
        return 0;
644
    }
645
646
    freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
647
    return (edx & SSE2_FLAG) == SSE2_FLAG;
648
}
649
#endif
650
651
unsigned long
652
s_mpi_getProcessorLineSize()
653
0
{
654
0
    unsigned long eax, ebx, ecx, edx;
655
0
    PRUint32 cpuid[3];
656
0
    unsigned long cpuidLevel;
657
0
    unsigned long cacheLineSize = 0;
658
0
    int manufacturer = MAN_UNKNOWN;
659
0
    int i;
660
0
    char string[13];
661
662
#if !defined(AMD_64)
663
    if (is386()) {
664
        return 0; /* 386 had no cache */
665
    }
666
    if (is486()) {
667
        return 32; /* really? need more info */
668
    }
669
#endif
670
671
    /* Pentium, cpuid command is available */
672
0
    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
673
0
    cpuidLevel = eax;
674
    /* string holds the CPU's manufacturer ID string - a twelve
675
     * character ASCII string stored in ebx, edx, ecx, and
676
     * the 32-bit extended feature flags are in edx, ecx.
677
     */
678
0
    cpuid[0] = ebx;
679
0
    cpuid[1] = ecx;
680
0
    cpuid[2] = edx;
681
0
    memcpy(string, cpuid, sizeof(cpuid));
682
0
    string[12] = 0;
683
684
0
    manufacturer = MAN_UNKNOWN;
685
0
    for (i = 0; i < n_manufacturers; i++) {
686
0
        if (strcmp(manMap[i], string) == 0) {
687
0
            manufacturer = i;
688
0
        }
689
0
    }
690
691
0
    if (manufacturer == INTEL) {
692
0
        cacheLineSize = getIntelCacheLineSize(cpuidLevel);
693
0
    } else {
694
0
        cacheLineSize = getOtherCacheLineSize(cpuidLevel);
695
0
    }
696
    /* doesn't support cache info based on cpuid. This means
697
     * an old pentium class processor, which have cache lines of
698
     * 32. If we learn differently, we can use a switch based on
699
     * the Manufacturer id  */
700
0
    if (cacheLineSize == 0) {
701
0
        cacheLineSize = 32;
702
0
    }
703
0
    return cacheLineSize;
704
0
}
705
#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
706
#endif
707
708
#if defined(__ppc64__)
709
/*
710
 *  Sigh, The PPC has some really nice features to help us determine cache
711
 *  size, since it had lots of direct control functions to do so. The POWER
712
 *  processor even has an instruction to do this, but it was dropped in
713
 *  PowerPC. Unfortunately most of them are not available in user mode.
714
 *
715
 *  The dcbz function would be a great way to determine cache line size except
716
 *  1) it only works on write-back memory (it throws an exception otherwise),
717
 *  and 2) because so many mac programs 'knew' the processor cache size was
718
 *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
719
 *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
720
 *  these programs happy. dcbzl work if 64 bit instructions are supported.
721
 *  If you know 64 bit instructions are supported, and that stack is
722
 *  write-back, you can use this code.
723
 */
724
#include "memory.h"
725
726
/* clear the cache line that contains 'array' */
727
static inline void
728
dcbzl(char *array)
729
{
730
    __asm__("dcbzl %0, %1"
731
            : /*no result*/
732
            : "b%"(array), "r"(0)
733
            : "memory");
734
}
735
736
#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
737
738
#define PPC_MAX_LINE_SIZE 256
739
unsigned long
740
s_mpi_getProcessorLineSize()
741
{
742
    char testArray[2 * PPC_MAX_LINE_SIZE + 1];
743
    char *test;
744
    int i;
745
746
    /* align the array on a maximum line size boundary, so we
747
     * know we are starting to clear from the first address */
748
    test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
749
    /* set all the values to 1's */
750
    memset(test, 0xff, PPC_MAX_LINE_SIZE);
751
    /* clear one cache block starting at 'test' */
752
    dcbzl(test);
753
754
    /* find the size of the cleared area, that's our block size */
755
    for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
756
        if (test[i - 1] == 0) {
757
            return i;
758
        }
759
    }
760
    return 0;
761
}
762
763
#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
764
#endif
765
766
/*
767
 * put other processor and platform specific cache code here
768
 * return the smallest cache line size in bytes on the processor
769
 * (usually the L1 cache). If the OS has a call, this would be
770
 * a greate place to put it.
771
 *
772
 * If there is no cache, return 0;
773
 *
774
 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
775
 * below aren't compiled.
776
 *
777
 */
778
779
/* If no way to get the processor cache line size has been defined, assume
780
 * it's 32 bytes (most common value, does not significantly impact performance)
781
 */
782
#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
783
unsigned long
784
s_mpi_getProcessorLineSize()
785
{
786
    return 32;
787
}
788
#endif