/src/SymCrypt/lib/ScsTable.c
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // ScsTable.c |
3 | | // Side-channel safe table |
4 | | // |
5 | | // Copyright (c) Microsoft Corporation. Licensed under the MIT license. |
6 | | // |
7 | | // |
8 | | // These functions implement an table of large elements. |
9 | | // Reading an element from the table is done in a way that does not reveal the |
10 | | // element accessed through memory side channels. |
11 | | // Basically, the whole table is read by the CPU, and the required data is selected |
12 | | // using boolean operations. |
13 | | // |
14 | | |
15 | | #include "precomp.h" |
16 | | |
17 | | // |
18 | | // Items are multiple of SYMCRYPT_DIGIT_SIZE long. |
19 | | // |
20 | | // Format: |
21 | | // The memory format is parameterized for optimal implementations on several |
22 | | // different architectures. |
23 | | // |
24 | | // The following parameters define the format: |
25 | | // - group_size |
26 | | // - interleave_size |
27 | | // |
28 | | // Let nElements be the number of elements in the table. |
29 | | // If necessary, the size of each element in the table is rounded up to a multiple of interleave_size. |
30 | | // Each whole group of group_size elements is interleaved with each other. |
31 | | // The last (nElements % group_size) elements are simply stored consecutively. |
32 | | // (For now we simply require that nElements is a multiple of group_size.) |
33 | | // Within each group of group_size, the data for the elements are interleaved in natural order |
34 | | // using chunks of interleave_size bytes. |
35 | | // |
36 | | // The choice of group_size and interleave_size depends on the CPU architecture, CPU features, |
37 | | // and even the element size. (E.g. 1024-bit elements might interleave @ 64 bytes on an AVX512 |
38 | | // capable CPU, but 256-bit elements would have to interleave at 16 or 32 bytes on that same CPU.) |
39 | | // |
40 | | |
41 | | // Currently these are constants as that allows easier optimizations... |
42 | | #if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 |
43 | | #define SYMCRYPT_SCSTABLE_USE64 1 |
44 | 0 | #define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 32 |
45 | 0 | #define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 |
46 | | typedef UINT64 SYMCRYPT_SCSTABLE_TYPE; |
47 | | #else |
48 | | #define SYMCRYPT_SCSTABLE_USE64 0 |
49 | | #define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 16 |
50 | | #define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 |
51 | | typedef UINT32 SYMCRYPT_SCSTABLE_TYPE; |
52 | | #endif |
53 | | |
54 | | UINT32 |
55 | | SYMCRYPT_CALL |
56 | | SymCryptScsTableInit( |
57 | | _Out_ PSYMCRYPT_SCSTABLE pScsTable, |
58 | | UINT32 nElements, |
59 | | UINT32 elementSize ) |
60 | 0 | { |
61 | 0 | UINT32 groupSize; |
62 | 0 | UINT32 interleaveSize; |
63 | 0 | UINT32 cbBuffer; |
64 | |
|
65 | 0 | SYMCRYPT_ASSERT( nElements > 0 ); |
66 | |
|
67 | 0 | #pragma warning( suppress: 4127 ) // conditional expression is constant |
68 | 0 | if( SYMCRYPT_CPU_AMD64 && elementSize == 128 ) |
69 | 0 | { |
70 | | // Highly optimized assembler mode for 1024-bit entries for RSA-2048... |
71 | 0 | interleaveSize = 128; |
72 | 0 | groupSize = 1; |
73 | 0 | } else { |
74 | | // Standard C implementation |
75 | 0 | interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; |
76 | 0 | groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; |
77 | 0 | } |
78 | | |
79 | | // Right now, we limit ourselves to element sizes that are a multiple of the interleaveSize and |
80 | | // # elements that are a multiple of the group size. |
81 | | // We also limit ourselves to sensible input sizes |
82 | 0 | SYMCRYPT_ASSERT( elementSize % interleaveSize == 0 && nElements % groupSize == 0 && (elementSize | nElements) < (1 << 16) && elementSize > 0 ); |
83 | |
|
84 | 0 | cbBuffer = elementSize * nElements; // Each factor is < 2^16, so there is no overflow in the mul |
85 | |
|
86 | 0 | pScsTable->groupSize = groupSize; |
87 | 0 | pScsTable->interleaveSize = interleaveSize; |
88 | 0 | pScsTable->nElements = nElements; |
89 | 0 | pScsTable->elementSize = elementSize; |
90 | 0 | pScsTable->cbTableData = cbBuffer; |
91 | 0 | pScsTable->pbTableData = NULL; |
92 | |
|
93 | 0 | return cbBuffer; |
94 | 0 | } |
95 | | |
96 | | VOID |
97 | | SYMCRYPT_CALL |
98 | | SymCryptScsTableSetBuffer( |
99 | | _Inout_ PSYMCRYPT_SCSTABLE pScsTable, |
100 | | _Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer, |
101 | | UINT32 cbBuffer ) |
102 | 0 | { |
103 | 0 | SYMCRYPT_ASSERT(cbBuffer >= pScsTable->cbTableData); |
104 | 0 | UNREFERENCED_PARAMETER( cbBuffer ); |
105 | |
|
106 | 0 | pScsTable->pbTableData = pbBuffer; |
107 | 0 | } |
108 | | |
109 | | |
110 | | C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 16 || SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 32 ); |
111 | | // check that an interleave size is exactly 4 words |
112 | | C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 4 * sizeof( SYMCRYPT_SCSTABLE_TYPE ) ); |
113 | | |
114 | | VOID |
115 | | SYMCRYPT_CALL |
116 | | SymCryptScsTableStoreC( |
117 | | _Inout_ PSYMCRYPT_SCSTABLE pScsTable, |
118 | | UINT32 iIndex, |
119 | | _In_reads_bytes_( cbData ) PCBYTE pbData, |
120 | | UINT32 cbData ) |
121 | 0 | { |
122 | 0 | UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; |
123 | 0 | UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; |
124 | 0 | UINT32 elementSize = pScsTable->elementSize; |
125 | 0 | UINT32 groupOffset; |
126 | |
|
127 | 0 | SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); |
128 | 0 | SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); |
129 | |
|
130 | 0 | SYMCRYPT_ASSERT( cbData == elementSize ); |
131 | 0 | UNREFERENCED_PARAMETER( cbData ); |
132 | |
|
133 | 0 | SYMCRYPT_ASSERT(iIndex < pScsTable->nElements); |
134 | |
|
135 | 0 | groupOffset = iIndex % groupSize; |
136 | | |
137 | | // dcl - document why this can't be an integer overflow |
138 | 0 | SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) (pScsTable->pbTableData + (iIndex - groupOffset) * elementSize + groupOffset * interleaveSize); |
139 | 0 | SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pbData; |
140 | |
|
141 | 0 | UINT32 nInterleaves = elementSize / interleaveSize; |
142 | |
|
143 | 0 | do |
144 | 0 | { |
145 | 0 | pDst[0] = pSrc[0]; |
146 | 0 | pDst[1] = pSrc[1]; |
147 | 0 | pDst[2] = pSrc[2]; |
148 | 0 | pDst[3] = pSrc[3]; |
149 | |
|
150 | 0 | pDst += interleaveSize * groupSize / sizeof( *pDst ); |
151 | 0 | pSrc += interleaveSize / sizeof( *pSrc ); |
152 | 0 | nInterleaves--; |
153 | 0 | } while( nInterleaves > 0 ); |
154 | |
|
155 | 0 | } |
156 | | |
157 | | #if SYMCRYPT_CPU_AMD64 |
158 | | VOID |
159 | | SYMCRYPT_CALL |
160 | | SymCryptScsTableStore128Xmm( |
161 | | _Inout_ PSYMCRYPT_SCSTABLE pScsTable, |
162 | | UINT32 iIndex, |
163 | | _In_reads_bytes_( cbData ) PCBYTE pbData, |
164 | | UINT32 cbData ) |
165 | 0 | { |
166 | 0 | __m128i * pDst = (__m128i *) (pScsTable->pbTableData + iIndex * 128); |
167 | 0 | __m128i * pSrc = (__m128i *) pbData; |
168 | |
|
169 | 0 | SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); |
170 | 0 | UNREFERENCED_PARAMETER( cbData ); |
171 | |
|
172 | 0 | pDst[0] = pSrc[0]; |
173 | 0 | pDst[1] = pSrc[1]; |
174 | 0 | pDst[2] = pSrc[2]; |
175 | 0 | pDst[3] = pSrc[3]; |
176 | 0 | pDst[4] = pSrc[4]; |
177 | 0 | pDst[5] = pSrc[5]; |
178 | 0 | pDst[6] = pSrc[6]; |
179 | 0 | pDst[7] = pSrc[7]; |
180 | 0 | } |
181 | | #endif // AMD64 |
182 | | |
183 | | VOID |
184 | | SYMCRYPT_CALL |
185 | | SymCryptScsTableLoadC( |
186 | | _In_ PSYMCRYPT_SCSTABLE pScsTable, |
187 | | UINT32 iIndex, |
188 | | _Out_writes_bytes_(cbData) PBYTE pbData, |
189 | | UINT32 cbData ) |
190 | 0 | { |
191 | 0 | UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; |
192 | 0 | UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; |
193 | 0 | UINT32 elementSize = pScsTable->elementSize; |
194 | |
|
195 | 0 | SYMCRYPT_SCSTABLE_TYPE mask0, mask1, mask2, mask3; |
196 | 0 | UINT32 i; |
197 | 0 | UINT32 j; |
198 | 0 | UINT32 nElements = pScsTable->nElements; |
199 | |
|
200 | 0 | const SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pScsTable->pbTableData; |
201 | 0 | SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) pbData; |
202 | 0 | SYMCRYPT_SCSTABLE_TYPE * pD; |
203 | |
|
204 | 0 | UINT32 nInterleaves = elementSize / interleaveSize; |
205 | | |
206 | |
|
207 | 0 | SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); |
208 | 0 | SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); |
209 | |
|
210 | 0 | SYMCRYPT_ASSERT( cbData >= sizeof( SYMCRYPT_SCSTABLE_TYPE ) * SYMCRYPT_SCSTABLE_GROUP_SIZE ); |
211 | 0 | SYMCRYPT_ASSERT( cbData == pScsTable->elementSize ); |
212 | 0 | UNREFERENCED_PARAMETER( cbData ); |
213 | |
|
214 | 0 | #if SYMCRYPT_SCSTABLE_USE64 |
215 | 0 | #define SCS_MASK_EQUAL32( _a, _b ) ( ~(UINT64) ((INT64) ((UINT64)0 - (_a ^ _b)) >> 32 ) ) |
216 | | #else |
217 | | #define SCS_MASK_EQUAL32( _a, _b ) (SYMCRYPT_MASK32_EQ( _a, _b )) |
218 | | #endif |
219 | |
|
220 | 0 | i = 0; |
221 | |
|
222 | 0 | mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); |
223 | 0 | mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); |
224 | 0 | mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); |
225 | 0 | mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); |
226 | |
|
227 | 0 | j = nInterleaves; |
228 | 0 | pD = pDst; |
229 | |
|
230 | 0 | do { |
231 | 0 | pD[0] = (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); |
232 | 0 | pD[1] = (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); |
233 | 0 | pD[2] = (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); |
234 | 0 | pD[3] = (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); |
235 | 0 | pD += interleaveSize / sizeof( *pD ); |
236 | 0 | pSrc += interleaveSize * groupSize / sizeof( *pSrc ); |
237 | 0 | j--; |
238 | 0 | } while( j > 0 ); |
239 | |
|
240 | 0 | i += groupSize; |
241 | |
|
242 | 0 | while (i + groupSize <= nElements) |
243 | 0 | { |
244 | |
|
245 | 0 | mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); |
246 | 0 | mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); |
247 | 0 | mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); |
248 | 0 | mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); |
249 | |
|
250 | 0 | j = nInterleaves; |
251 | 0 | pD = pDst; |
252 | |
|
253 | 0 | do { |
254 | 0 | pD[0] |= (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); |
255 | 0 | pD[1] |= (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); |
256 | 0 | pD[2] |= (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); |
257 | 0 | pD[3] |= (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); |
258 | 0 | pD += interleaveSize / sizeof( *pD ); |
259 | 0 | pSrc += interleaveSize * groupSize / sizeof( *pSrc ); |
260 | 0 | j--; |
261 | 0 | } while( j > 0 ); |
262 | |
|
263 | 0 | i += groupSize; |
264 | 0 | } |
265 | 0 | } |
266 | | |
267 | | #if SYMCRYPT_CPU_AMD64 |
268 | | VOID |
269 | | SYMCRYPT_CALL |
270 | | SymCryptScsTableLoad128Xmm( |
271 | | _In_ PSYMCRYPT_SCSTABLE pScsTable, |
272 | | UINT32 iIndex, |
273 | | _Out_writes_bytes_(cbData) PBYTE pbData, |
274 | | UINT32 cbData ) |
275 | 0 | { |
276 | 0 | UINT32 nElements = pScsTable->nElements; |
277 | |
|
278 | 0 | __m128i R0, R1, R2, R3, R4, R5, R6, R7; |
279 | 0 | __m128i T0, T1; |
280 | |
|
281 | 0 | __m128i Count = _mm_setzero_si128(); |
282 | 0 | __m128i Ones = _mm_set_epi32( 1, 1, 1, 1 ); |
283 | 0 | __m128i Entry = _mm_set_epi32( iIndex, iIndex, iIndex, iIndex ); |
284 | 0 | __m128i Mask; |
285 | 0 | __m128i * pSrc = (__m128i *) pScsTable->pbTableData; |
286 | 0 | __m128i * pDst = (__m128i *) pbData; |
287 | |
|
288 | 0 | SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); |
289 | 0 | UNREFERENCED_PARAMETER( cbData ); |
290 | |
|
291 | 0 | Mask = _mm_cmpeq_epi32( Count, Entry ); |
292 | 0 | Count = _mm_add_epi32( Count, Ones ); |
293 | |
|
294 | 0 | R0 = _mm_and_si128( Mask, pSrc[0] ); |
295 | 0 | R1 = _mm_and_si128( Mask, pSrc[1] ); |
296 | 0 | R2 = _mm_and_si128( Mask, pSrc[2] ); |
297 | 0 | R3 = _mm_and_si128( Mask, pSrc[3] ); |
298 | 0 | R4 = _mm_and_si128( Mask, pSrc[4] ); |
299 | 0 | R5 = _mm_and_si128( Mask, pSrc[5] ); |
300 | 0 | R6 = _mm_and_si128( Mask, pSrc[6] ); |
301 | 0 | R7 = _mm_and_si128( Mask, pSrc[7] ); |
302 | |
|
303 | 0 | pSrc += 8; |
304 | |
|
305 | 0 | while( --nElements > 0 ) |
306 | 0 | { |
307 | 0 | Mask = _mm_cmpeq_epi32( Count, Entry ); |
308 | 0 | Count = _mm_add_epi32( Count, Ones ); |
309 | |
|
310 | 0 | T0 = _mm_and_si128( Mask, pSrc[0] ); R0 = _mm_or_si128( R0, T0 ); |
311 | 0 | T1 = _mm_and_si128( Mask, pSrc[1] ); R1 = _mm_or_si128( R1, T1 ); |
312 | 0 | T0 = _mm_and_si128( Mask, pSrc[2] ); R2 = _mm_or_si128( R2, T0 ); |
313 | 0 | T1 = _mm_and_si128( Mask, pSrc[3] ); R3 = _mm_or_si128( R3, T1 ); |
314 | 0 | T0 = _mm_and_si128( Mask, pSrc[4] ); R4 = _mm_or_si128( R4, T0 ); |
315 | 0 | T1 = _mm_and_si128( Mask, pSrc[5] ); R5 = _mm_or_si128( R5, T1 ); |
316 | 0 | T0 = _mm_and_si128( Mask, pSrc[6] ); R6 = _mm_or_si128( R6, T0 ); |
317 | 0 | T1 = _mm_and_si128( Mask, pSrc[7] ); R7 = _mm_or_si128( R7, T1 ); |
318 | 0 | pSrc += 8; |
319 | 0 | } |
320 | |
|
321 | 0 | pDst[0] = R0; |
322 | 0 | pDst[1] = R1; |
323 | 0 | pDst[2] = R2; |
324 | 0 | pDst[3] = R3; |
325 | 0 | pDst[4] = R4; |
326 | 0 | pDst[5] = R5; |
327 | 0 | pDst[6] = R6; |
328 | 0 | pDst[7] = R7; |
329 | 0 | } |
330 | | #endif // AMD64 |
331 | | |
332 | | VOID |
333 | | SYMCRYPT_CALL |
334 | | SymCryptScsTableStore( |
335 | | _Inout_ PSYMCRYPT_SCSTABLE pScsTable, |
336 | | UINT32 iIndex, |
337 | | _In_reads_bytes_( cbData ) PCBYTE pbData, |
338 | | UINT32 cbData ) |
339 | 0 | { |
340 | 0 | #if SYMCRYPT_CPU_AMD64 |
341 | |
|
342 | 0 | if( pScsTable->elementSize == 128 ) |
343 | 0 | { |
344 | 0 | SymCryptScsTableStore128Xmm( pScsTable, iIndex, pbData, cbData ); |
345 | 0 | } else { |
346 | 0 | SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); |
347 | 0 | } |
348 | |
|
349 | | #else |
350 | | |
351 | | SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); |
352 | | |
353 | | #endif |
354 | 0 | } |
355 | | |
356 | | VOID |
357 | | SYMCRYPT_CALL |
358 | | SymCryptScsTableLoad( |
359 | | _In_ PSYMCRYPT_SCSTABLE pScsTable, |
360 | | UINT32 iIndex, |
361 | | _Out_writes_bytes_(cbData) PBYTE pbData, |
362 | | UINT32 cbData ) |
363 | 0 | { |
364 | | // This is the side-channel safe routine |
365 | |
|
366 | 0 | #if SYMCRYPT_CPU_AMD64 |
367 | |
|
368 | 0 | if( pScsTable->elementSize == 128 ) |
369 | 0 | { |
370 | 0 | SymCryptScsTableLoad128Xmm( pScsTable, iIndex, pbData, cbData ); |
371 | 0 | } else { |
372 | 0 | SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); |
373 | 0 | } |
374 | |
|
375 | | #else |
376 | | |
377 | | SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); |
378 | | |
379 | | #endif |
380 | 0 | } |
381 | | |
382 | | VOID |
383 | | SYMCRYPT_CALL |
384 | | SymCryptScsTableWipe( |
385 | | _Inout_ PSYMCRYPT_SCSTABLE pScsTable ) |
386 | 0 | { |
387 | 0 | SymCryptWipe( pScsTable->pbTableData, pScsTable->cbTableData ); |
388 | 0 | } |