Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/ScsTable.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// ScsTable.c
3
// Side-channel safe table
4
//
5
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
6
//
7
//
8
// These functions implement an table of large elements.
9
// Reading an element from the table is done in a way that does not reveal the
10
// element accessed through memory side channels.
11
// Basically, the whole table is read by the CPU, and the required data is selected
12
// using boolean operations.
13
//
14
15
#include "precomp.h"
16
17
//
18
// Items are multiple of SYMCRYPT_DIGIT_SIZE long.
19
//
20
// Format:
21
// The memory format is parameterized for optimal implementations on several
22
// different architectures.
23
//
24
// The following parameters define the format:
25
//  - group_size
26
//  - interleave_size
27
//
28
// Let nElements be the number of elements in the table.
29
// If necessary, the size of each element in the table is rounded up to a multiple of interleave_size.
30
// Each whole group of group_size elements is interleaved with each other.
31
// The last (nElements % group_size) elements are simply stored consecutively.
32
// (For now we simply require that nElements is a multiple of group_size.)
33
// Within each group of group_size, the data for the elements are interleaved in natural order
34
// using chunks of interleave_size bytes.
35
//
36
// The choice of group_size and interleave_size depends on the CPU architecture, CPU features,
37
// and even the element size. (E.g. 1024-bit elements might interleave @ 64 bytes on an AVX512
38
// capable CPU, but 256-bit elements would have to interleave at 16 or 32 bytes on that same CPU.)
39
//
40
41
// Currently these are constants as that allows easier optimizations...
42
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64
43
#define SYMCRYPT_SCSTABLE_USE64             1
44
0
#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE   32
45
0
#define SYMCRYPT_SCSTABLE_GROUP_SIZE        4
46
typedef UINT64 SYMCRYPT_SCSTABLE_TYPE;
47
#else
48
#define SYMCRYPT_SCSTABLE_USE64             0
49
#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE   16
50
#define SYMCRYPT_SCSTABLE_GROUP_SIZE        4
51
typedef UINT32 SYMCRYPT_SCSTABLE_TYPE;
52
#endif
53
54
UINT32
55
SYMCRYPT_CALL
56
SymCryptScsTableInit(
57
    _Out_   PSYMCRYPT_SCSTABLE  pScsTable,
58
            UINT32              nElements,
59
            UINT32              elementSize )
60
0
{
61
0
    UINT32  groupSize;
62
0
    UINT32  interleaveSize;
63
0
    UINT32  cbBuffer;
64
65
0
    SYMCRYPT_ASSERT( nElements > 0 );
66
67
0
#pragma warning( suppress: 4127 )       // conditional expression is constant
68
0
    if( SYMCRYPT_CPU_AMD64 && elementSize == 128 )
69
0
    {
70
        // Highly optimized assembler mode for 1024-bit entries for RSA-2048...
71
0
        interleaveSize = 128;
72
0
        groupSize = 1;
73
0
    } else {
74
        // Standard C implementation
75
0
        interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
76
0
        groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
77
0
    }
78
79
    // Right now, we limit ourselves to element sizes that are a multiple of the interleaveSize and
80
    // # elements that are a multiple of the group size.
81
    // We also limit ourselves to sensible input sizes
82
0
    SYMCRYPT_ASSERT( elementSize % interleaveSize == 0 && nElements % groupSize == 0 && (elementSize | nElements) < (1 << 16) && elementSize > 0 );
83
84
0
    cbBuffer = elementSize * nElements; // Each factor is < 2^16, so there is no overflow in the mul
85
86
0
    pScsTable->groupSize = groupSize;
87
0
    pScsTable->interleaveSize = interleaveSize;
88
0
    pScsTable->nElements = nElements;
89
0
    pScsTable->elementSize = elementSize;
90
0
    pScsTable->cbTableData = cbBuffer;
91
0
    pScsTable->pbTableData = NULL;
92
93
0
    return cbBuffer;
94
0
}
95
96
VOID
97
SYMCRYPT_CALL
98
SymCryptScsTableSetBuffer(
99
    _Inout_                             PSYMCRYPT_SCSTABLE  pScsTable,
100
    _Inout_updates_bytes_( cbBuffer )   PBYTE               pbBuffer,
101
                                        UINT32              cbBuffer )
102
0
{
103
0
    SYMCRYPT_ASSERT(cbBuffer >= pScsTable->cbTableData);
104
0
    UNREFERENCED_PARAMETER( cbBuffer );
105
106
0
    pScsTable->pbTableData = pbBuffer;
107
0
}
108
109
110
C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 16 || SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 32 );
111
// check that an interleave size is exactly 4 words
112
C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 4 * sizeof( SYMCRYPT_SCSTABLE_TYPE ) );
113
114
VOID
115
SYMCRYPT_CALL
116
SymCryptScsTableStoreC(
117
    _Inout_                     PSYMCRYPT_SCSTABLE  pScsTable,
118
                                UINT32              iIndex,
119
    _In_reads_bytes_( cbData )  PCBYTE              pbData,
120
                                UINT32              cbData )
121
0
{
122
0
    UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
123
0
    UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
124
0
    UINT32 elementSize = pScsTable->elementSize;
125
0
    UINT32 groupOffset;
126
127
0
    SYMCRYPT_ASSERT( groupSize ==  pScsTable->groupSize );
128
0
    SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize );
129
130
0
    SYMCRYPT_ASSERT( cbData == elementSize );
131
0
    UNREFERENCED_PARAMETER( cbData );
132
133
0
    SYMCRYPT_ASSERT(iIndex < pScsTable->nElements);
134
135
0
    groupOffset = iIndex % groupSize;
136
137
  // dcl - document why this can't be an integer overflow
138
0
    SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) (pScsTable->pbTableData + (iIndex - groupOffset) * elementSize + groupOffset * interleaveSize);
139
0
    SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pbData;
140
141
0
    UINT32 nInterleaves = elementSize / interleaveSize;
142
143
0
    do
144
0
    {
145
0
        pDst[0] = pSrc[0];
146
0
        pDst[1] = pSrc[1];
147
0
        pDst[2] = pSrc[2];
148
0
        pDst[3] = pSrc[3];
149
150
0
        pDst += interleaveSize * groupSize / sizeof( *pDst );
151
0
        pSrc += interleaveSize / sizeof( *pSrc );
152
0
        nInterleaves--;
153
0
    } while( nInterleaves > 0 );
154
155
0
}
156
157
#if SYMCRYPT_CPU_AMD64
158
VOID
159
SYMCRYPT_CALL
160
SymCryptScsTableStore128Xmm(
161
    _Inout_                     PSYMCRYPT_SCSTABLE  pScsTable,
162
                                UINT32              iIndex,
163
    _In_reads_bytes_( cbData )  PCBYTE              pbData,
164
                                UINT32              cbData )
165
0
{
166
0
    __m128i * pDst = (__m128i *) (pScsTable->pbTableData + iIndex * 128);
167
0
    __m128i * pSrc = (__m128i *) pbData;
168
169
0
    SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 );
170
0
    UNREFERENCED_PARAMETER( cbData );
171
172
0
    pDst[0] = pSrc[0];
173
0
    pDst[1] = pSrc[1];
174
0
    pDst[2] = pSrc[2];
175
0
    pDst[3] = pSrc[3];
176
0
    pDst[4] = pSrc[4];
177
0
    pDst[5] = pSrc[5];
178
0
    pDst[6] = pSrc[6];
179
0
    pDst[7] = pSrc[7];
180
0
}
181
#endif // AMD64
182
183
VOID
184
SYMCRYPT_CALL
185
SymCryptScsTableLoadC(
186
    _In_                        PSYMCRYPT_SCSTABLE  pScsTable,
187
                                UINT32              iIndex,
188
    _Out_writes_bytes_(cbData)  PBYTE               pbData,
189
                                UINT32              cbData )
190
0
{
191
0
    UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE;
192
0
    UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE;
193
0
    UINT32 elementSize = pScsTable->elementSize;
194
195
0
    SYMCRYPT_SCSTABLE_TYPE mask0, mask1, mask2, mask3;
196
0
    UINT32 i;
197
0
    UINT32 j;
198
0
    UINT32 nElements = pScsTable->nElements;
199
200
0
    const SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pScsTable->pbTableData;
201
0
    SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) pbData;
202
0
    SYMCRYPT_SCSTABLE_TYPE * pD;
203
204
0
    UINT32 nInterleaves = elementSize / interleaveSize;
205
206
207
0
    SYMCRYPT_ASSERT( groupSize ==  pScsTable->groupSize );
208
0
    SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize );
209
210
0
    SYMCRYPT_ASSERT( cbData >= sizeof( SYMCRYPT_SCSTABLE_TYPE ) * SYMCRYPT_SCSTABLE_GROUP_SIZE );
211
0
    SYMCRYPT_ASSERT( cbData == pScsTable->elementSize );
212
0
    UNREFERENCED_PARAMETER( cbData );
213
214
0
#if SYMCRYPT_SCSTABLE_USE64
215
0
#define SCS_MASK_EQUAL32( _a, _b )  ( ~(UINT64) ((INT64) ((UINT64)0 - (_a ^ _b)) >> 32 ) )
216
#else
217
#define SCS_MASK_EQUAL32( _a, _b )  (SYMCRYPT_MASK32_EQ( _a, _b ))
218
#endif
219
220
0
    i = 0;
221
222
0
    mask0 = SCS_MASK_EQUAL32( i+0, iIndex );
223
0
    mask1 = SCS_MASK_EQUAL32( i+1, iIndex );
224
0
    mask2 = SCS_MASK_EQUAL32( i+2, iIndex );
225
0
    mask3 = SCS_MASK_EQUAL32( i+3, iIndex );
226
227
0
    j = nInterleaves;
228
0
    pD = pDst;
229
230
0
    do {
231
0
        pD[0] = (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]);
232
0
        pD[1] = (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]);
233
0
        pD[2] = (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]);
234
0
        pD[3] = (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]);
235
0
        pD += interleaveSize / sizeof( *pD );
236
0
        pSrc += interleaveSize * groupSize / sizeof( *pSrc );
237
0
        j--;
238
0
    } while( j > 0 );
239
240
0
    i += groupSize;
241
242
0
    while (i + groupSize <= nElements)
243
0
    {
244
245
0
        mask0 = SCS_MASK_EQUAL32( i+0, iIndex );
246
0
        mask1 = SCS_MASK_EQUAL32( i+1, iIndex );
247
0
        mask2 = SCS_MASK_EQUAL32( i+2, iIndex );
248
0
        mask3 = SCS_MASK_EQUAL32( i+3, iIndex );
249
250
0
        j = nInterleaves;
251
0
        pD = pDst;
252
253
0
        do {
254
0
            pD[0] |= (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]);
255
0
            pD[1] |= (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]);
256
0
            pD[2] |= (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]);
257
0
            pD[3] |= (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]);
258
0
            pD += interleaveSize / sizeof( *pD );
259
0
            pSrc += interleaveSize * groupSize / sizeof( *pSrc );
260
0
            j--;
261
0
        } while( j > 0 );
262
263
0
        i += groupSize;
264
0
    }
265
0
}
266
267
#if SYMCRYPT_CPU_AMD64
268
VOID
269
SYMCRYPT_CALL
270
SymCryptScsTableLoad128Xmm(
271
    _In_                        PSYMCRYPT_SCSTABLE  pScsTable,
272
                                UINT32              iIndex,
273
    _Out_writes_bytes_(cbData)  PBYTE               pbData,
274
                                UINT32              cbData )
275
0
{
276
0
    UINT32 nElements = pScsTable->nElements;
277
278
0
    __m128i R0, R1, R2, R3, R4, R5, R6, R7;
279
0
    __m128i T0, T1;
280
281
0
    __m128i Count = _mm_setzero_si128();
282
0
    __m128i Ones = _mm_set_epi32( 1, 1, 1, 1 );
283
0
    __m128i Entry = _mm_set_epi32( iIndex, iIndex, iIndex, iIndex );
284
0
    __m128i Mask;
285
0
    __m128i * pSrc = (__m128i *) pScsTable->pbTableData;
286
0
    __m128i * pDst = (__m128i *) pbData;
287
288
0
    SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 );
289
0
    UNREFERENCED_PARAMETER( cbData );
290
291
0
    Mask = _mm_cmpeq_epi32( Count, Entry );
292
0
    Count = _mm_add_epi32( Count, Ones );
293
294
0
    R0 = _mm_and_si128( Mask, pSrc[0] );
295
0
    R1 = _mm_and_si128( Mask, pSrc[1] );
296
0
    R2 = _mm_and_si128( Mask, pSrc[2] );
297
0
    R3 = _mm_and_si128( Mask, pSrc[3] );
298
0
    R4 = _mm_and_si128( Mask, pSrc[4] );
299
0
    R5 = _mm_and_si128( Mask, pSrc[5] );
300
0
    R6 = _mm_and_si128( Mask, pSrc[6] );
301
0
    R7 = _mm_and_si128( Mask, pSrc[7] );
302
303
0
    pSrc += 8;
304
305
0
    while( --nElements > 0 )
306
0
    {
307
0
        Mask = _mm_cmpeq_epi32( Count, Entry );
308
0
        Count = _mm_add_epi32( Count, Ones );
309
310
0
        T0 = _mm_and_si128( Mask, pSrc[0] );        R0 = _mm_or_si128( R0, T0 );
311
0
        T1 = _mm_and_si128( Mask, pSrc[1] );        R1 = _mm_or_si128( R1, T1 );
312
0
        T0 = _mm_and_si128( Mask, pSrc[2] );        R2 = _mm_or_si128( R2, T0 );
313
0
        T1 = _mm_and_si128( Mask, pSrc[3] );        R3 = _mm_or_si128( R3, T1 );
314
0
        T0 = _mm_and_si128( Mask, pSrc[4] );        R4 = _mm_or_si128( R4, T0 );
315
0
        T1 = _mm_and_si128( Mask, pSrc[5] );        R5 = _mm_or_si128( R5, T1 );
316
0
        T0 = _mm_and_si128( Mask, pSrc[6] );        R6 = _mm_or_si128( R6, T0 );
317
0
        T1 = _mm_and_si128( Mask, pSrc[7] );        R7 = _mm_or_si128( R7, T1 );
318
0
        pSrc += 8;
319
0
    }
320
321
0
    pDst[0] = R0;
322
0
    pDst[1] = R1;
323
0
    pDst[2] = R2;
324
0
    pDst[3] = R3;
325
0
    pDst[4] = R4;
326
0
    pDst[5] = R5;
327
0
    pDst[6] = R6;
328
0
    pDst[7] = R7;
329
0
}
330
#endif // AMD64
331
332
VOID
333
SYMCRYPT_CALL
334
SymCryptScsTableStore(
335
    _Inout_                     PSYMCRYPT_SCSTABLE  pScsTable,
336
                                UINT32              iIndex,
337
    _In_reads_bytes_( cbData )  PCBYTE              pbData,
338
                                UINT32              cbData )
339
0
{
340
0
#if SYMCRYPT_CPU_AMD64
341
342
0
    if( pScsTable->elementSize == 128 )
343
0
    {
344
0
        SymCryptScsTableStore128Xmm( pScsTable, iIndex, pbData, cbData );
345
0
    } else {
346
0
        SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData );
347
0
    }
348
349
#else
350
351
    SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData );
352
353
#endif
354
0
}
355
356
VOID
357
SYMCRYPT_CALL
358
SymCryptScsTableLoad(
359
    _In_                        PSYMCRYPT_SCSTABLE  pScsTable,
360
                                UINT32              iIndex,
361
    _Out_writes_bytes_(cbData)  PBYTE               pbData,
362
                                UINT32              cbData )
363
0
{
364
    // This is the side-channel safe routine
365
366
0
#if SYMCRYPT_CPU_AMD64
367
368
0
    if( pScsTable->elementSize == 128 )
369
0
    {
370
0
        SymCryptScsTableLoad128Xmm( pScsTable, iIndex, pbData, cbData );
371
0
    } else {
372
0
        SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData );
373
0
    }
374
375
#else
376
377
    SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData );
378
379
#endif
380
0
}
381
382
VOID
383
SYMCRYPT_CALL
384
SymCryptScsTableWipe(
385
    _Inout_ PSYMCRYPT_SCSTABLE pScsTable )
386
0
{
387
0
    SymCryptWipe( pScsTable->pbTableData, pScsTable->cbTableData );
388
0
}