/src/SymCrypt/lib/fdef_general.c
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // fdef_general.c General functions of the default format. |
3 | | // |
4 | | // Copyright (c) Microsoft Corporation. Licensed under the MIT license. |
5 | | // |
6 | | // |
7 | | // |
8 | | |
9 | | #include "precomp.h" |
10 | | |
11 | | #include "smallPrimes32.h" // For SymCryptTestTrialdivisionMaxSmallPrime |
12 | | |
13 | | #if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 |
14 | | |
15 | 0 | #define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (16) // Measured on amd64 |
16 | 0 | #define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (2) // Measured on amd64 |
17 | 0 | #define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (43000) // Measured on amd64 |
18 | | |
19 | | #elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM |
20 | | |
21 | | #define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 |
22 | | #define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 |
23 | | #define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 |
24 | | |
25 | | #else |
26 | | |
27 | | #define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 |
28 | | #define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 |
29 | | #define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 |
30 | | |
31 | | #endif |
32 | | |
33 | | |
34 | 0 | #define SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME (1<<22) // Some large limit to bound memory usage |
35 | | C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME <= UINT32_MAX ); |
36 | | C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME == ((UINT32) SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME) ); |
37 | | |
38 | | VOID |
39 | | SYMCRYPT_CALL |
40 | | SymCryptFdefMaskedCopyC( |
41 | | _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, |
42 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, |
43 | | UINT32 nDigits, |
44 | | UINT32 mask ) |
45 | | /* |
46 | | This function is dangerous, and would create a buffer overflow if nDigits > nDigits for pbDst |
47 | | It also appears that it is never called. Consider removing it if it is not needed. |
48 | | */ |
49 | 0 | { |
50 | 0 | UINT64 m64 = (UINT64)0 - (mask & 1); |
51 | 0 | PUINT64 pSrc = (PUINT64) pbSrc; // should be a const pointer to match pSrc |
52 | 0 | PUINT64 pDst = (PUINT64) pbDst; |
53 | 0 | SIZE_T i; |
54 | | |
55 | | // This allows 0xffffffff and 0, is that what you wanted? |
56 | | // If so, ( mask == 0xffffffff || mask == 0 ) |
57 | | // would be more readable. It is also odd that 1 is not valid, but it results in exactly the |
58 | | // same code flow as ~0. |
59 | 0 | SYMCRYPT_ASSERT( (mask + 1) < 2 ); // Check that mask is valid |
60 | | |
61 | | // This - nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ) |
62 | | // seems to occur often. Consider a macro with a name that explains what you are doing |
63 | | // A comment on the macro which explains why this multiplication is never a problem would be |
64 | | // helpful - I'm fairly sure it is not a problem. |
65 | 0 | for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) |
66 | 0 | { |
67 | 0 | pDst[i ] = (pSrc[i ] & m64) | (pDst[i ] & ~m64 ); |
68 | 0 | pDst[i+1] = (pSrc[i+1] & m64) | (pDst[i+1] & ~m64 ); |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | | VOID |
73 | | SYMCRYPT_CALL |
74 | | SymCryptFdefMaskedCopy( |
75 | | _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, |
76 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, |
77 | | UINT32 nDigits, |
78 | | UINT32 mask ) |
79 | 1.56M | { |
80 | 1.56M | #if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM |
81 | 1.56M | SYMCRYPT_ASSERT_ASYM_ALIGNED( pbSrc ); |
82 | 1.56M | SYMCRYPT_ASSERT_ASYM_ALIGNED( pbDst ); |
83 | 1.56M | SymCryptFdefMaskedCopyAsm( pbSrc, pbDst, nDigits, mask ); |
84 | | #else |
85 | | SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask ); |
86 | | #endif |
87 | 1.56M | } |
88 | | |
89 | | VOID |
90 | | SYMCRYPT_CALL |
91 | | SymCryptFdefConditionalSwapC( |
92 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, |
93 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, |
94 | | UINT32 nDigits, |
95 | | UINT32 cond ) |
96 | 0 | { |
97 | | /* |
98 | | Some documentation as to what the cond argument means would be helpful. |
99 | | */ |
100 | 0 | UINT64 m64 = (UINT64)0 - (cond & 1); |
101 | 0 | PUINT64 pSrc1 = (PUINT64) pbSrc1; |
102 | 0 | PUINT64 pSrc2 = (PUINT64) pbSrc2; |
103 | 0 | UINT64 tmp1 = 0; |
104 | 0 | UINT64 tmp2 = 0; |
105 | 0 | SIZE_T i; |
106 | | |
107 | | // Unlike the previous function, this only allows 0 and 1 why? |
108 | 0 | SYMCRYPT_ASSERT( cond < 2 ); // Check that the condition is valid |
109 | |
|
110 | 0 | for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) |
111 | 0 | { |
112 | 0 | tmp1 = (pSrc1[i ] ^ pSrc2[i ]) & m64; |
113 | 0 | tmp2 = (pSrc1[i+1] ^ pSrc2[i+1]) & m64; |
114 | |
|
115 | 0 | pSrc1[i ] ^= tmp1; pSrc2[i ] ^= tmp1; |
116 | 0 | pSrc1[i+1] ^= tmp2; pSrc2[i+1] ^= tmp2; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | VOID |
121 | | SYMCRYPT_CALL |
122 | | SymCryptFdefConditionalSwap( |
123 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, |
124 | | _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, |
125 | | UINT32 nDigits, |
126 | | UINT32 cond ) |
127 | 0 | { |
128 | 0 | SymCryptFdefConditionalSwapC( pbSrc1, pbSrc2, nDigits, cond ); |
129 | 0 | } |
130 | | |
131 | | |
132 | | UINT32 |
133 | | SymCryptFdefDigitsFromBits( UINT32 nBits ) |
134 | 15.1k | { |
135 | 15.1k | UINT32 res; |
136 | | |
137 | 15.1k | if( nBits == 0 ) |
138 | 0 | { |
139 | 0 | res = 1; |
140 | 0 | } |
141 | 15.1k | else |
142 | 15.1k | { |
143 | 15.1k | SYMCRYPT_ASSERT( nBits <= SYMCRYPT_INT_MAX_BITS ); |
144 | | |
145 | | // Callers with integers larger than SYMCRYPT_INT_MAX_BITS should not occur in real use cases |
146 | | // To avoid overflow issues, return the 0 digits to indicate an error which can be handled by |
147 | | // callers, or flow through into object allocation which will in turn recognize the invalid |
148 | | // digit count. |
149 | 15.1k | if( nBits > SYMCRYPT_INT_MAX_BITS ) |
150 | 0 | { |
151 | 0 | res = 0; |
152 | 15.1k | } else { |
153 | 15.1k | res = SYMCRYPT_FDEF_DIGITS_FROM_BITS( nBits ); |
154 | 15.1k | } |
155 | 15.1k | } |
156 | | |
157 | 15.1k | return res; |
158 | 15.1k | } |
159 | | |
160 | | // Let's limit max bits to the number of bits we actually test |
161 | | C_ASSERT( SYMCRYPT_INT_MAX_BITS < (1 << 30) ); // Larger values can cause overflows and sign confusion |
162 | | |
163 | | PSYMCRYPT_INT |
164 | | SYMCRYPT_CALL |
165 | | SymCryptFdefIntAllocate( UINT32 nDigits ) |
166 | 196 | { |
167 | 196 | PVOID p = NULL; |
168 | 196 | UINT32 cb; |
169 | 196 | PSYMCRYPT_INT res = NULL; |
170 | | |
171 | | // |
172 | | // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus |
173 | | // the result does not overflow and is upper bounded by 2^18. |
174 | | // |
175 | 196 | cb = SymCryptFdefSizeofIntFromDigits( nDigits ); |
176 | | |
177 | 196 | if( cb != 0 ) |
178 | 196 | { |
179 | 196 | p = SymCryptCallbackAlloc( cb ); |
180 | 196 | } |
181 | | |
182 | 196 | if( p == NULL ) |
183 | 0 | { |
184 | 0 | goto cleanup; |
185 | 0 | } |
186 | | |
187 | 196 | res = SymCryptIntCreate( p, cb, nDigits ); |
188 | | |
189 | 196 | cleanup: |
190 | 196 | return res; |
191 | 196 | } |
192 | | |
193 | | |
194 | | UINT32 |
195 | | SYMCRYPT_CALL |
196 | | SymCryptFdefSizeofIntFromDigits( UINT32 nDigits ) |
197 | 32.5k | { |
198 | 32.5k | SYMCRYPT_ASSERT( nDigits != 0 ); |
199 | 32.5k | SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); |
200 | | |
201 | | // Ensure we do not overflow the following calculation when provided with invalid inputs |
202 | 32.5k | if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) |
203 | 0 | { |
204 | 0 | return 0; |
205 | 0 | } |
206 | | |
207 | | // Note: ti stands for 'Type-Int' and it helps catch type errors when type-casting macros are used. |
208 | 32.5k | return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_INT, ti ) + nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; |
209 | 32.5k | } |
210 | | |
211 | | PSYMCRYPT_INT |
212 | | SYMCRYPT_CALL |
213 | | SymCryptFdefIntCreate( |
214 | | _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, |
215 | | SIZE_T cbBuffer, |
216 | | UINT32 nDigits ) |
217 | 12.8k | { |
218 | 12.8k | PSYMCRYPT_INT pInt = NULL; |
219 | 12.8k | UINT32 cb = SymCryptFdefSizeofIntFromDigits( nDigits ); |
220 | | |
221 | 12.8k | SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_INT) ); |
222 | 12.8k | SYMCRYPT_ASSERT( cbBuffer >= cb ); |
223 | 12.8k | if( (cb == 0) || (cbBuffer < cb) ) |
224 | 0 | { |
225 | 0 | goto cleanup; // return NULL |
226 | 0 | } |
227 | | |
228 | 12.8k | SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); |
229 | 12.8k | pInt = (PSYMCRYPT_INT) pbBuffer; |
230 | | |
231 | 12.8k | pInt->type = 'gI' << 16; |
232 | 12.8k | pInt->nDigits = nDigits; |
233 | | |
234 | | // |
235 | | // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus |
236 | | // the result does not overflow and is upper bounded by 2^18. |
237 | | // |
238 | 12.8k | pInt->cbSize = cb; |
239 | | |
240 | 12.8k | SYMCRYPT_SET_MAGIC( pInt ); |
241 | | |
242 | 12.8k | cleanup: |
243 | 12.8k | return pInt; |
244 | 12.8k | } |
245 | | |
246 | | |
247 | | VOID |
248 | | SymCryptFdefIntCopyFixup( |
249 | | _In_ PCSYMCRYPT_INT pSrc, |
250 | | _Out_ PSYMCRYPT_INT pDst ) |
251 | 0 | { |
252 | 0 | UNREFERENCED_PARAMETER( pSrc ); |
253 | 0 | UNREFERENCED_PARAMETER( pDst ); // not used in FRE builds... |
254 | |
|
255 | 0 | SYMCRYPT_SET_MAGIC( pDst ); |
256 | 0 | } |
257 | | |
258 | | VOID |
259 | | SymCryptFdefIntCopy( |
260 | | _In_ PCSYMCRYPT_INT piSrc, |
261 | | _Out_ PSYMCRYPT_INT piDst ) |
262 | 187k | { |
263 | 187k | SYMCRYPT_CHECK_MAGIC( piSrc ); |
264 | 187k | SYMCRYPT_CHECK_MAGIC( piDst ); |
265 | | |
266 | 187k | SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); |
267 | | |
268 | | // |
269 | | // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. |
270 | | // |
271 | 187k | if( piSrc != piDst ) |
272 | 185k | { |
273 | | // This is normally considered a banned, unsafe function. A note about why it is safe in this use |
274 | | // would be good. |
275 | 185k | memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), SYMCRYPT_OBJ_NBYTES( piDst )); |
276 | 185k | } |
277 | 187k | } |
278 | | |
279 | | VOID |
280 | | SymCryptFdefIntMaskedCopy( |
281 | | _In_ PCSYMCRYPT_INT piSrc, |
282 | | _Inout_ PSYMCRYPT_INT piDst, |
283 | | UINT32 mask ) |
284 | | /* |
285 | | Function notes would be helpful - what is mask, what does it do? |
286 | | */ |
287 | 60.5k | { |
288 | 60.5k | SYMCRYPT_CHECK_MAGIC( piSrc ); |
289 | 60.5k | SYMCRYPT_CHECK_MAGIC( piDst ); |
290 | | |
291 | 60.5k | SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); |
292 | | |
293 | 60.5k | SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, mask ); |
294 | 60.5k | } |
295 | | |
296 | | VOID |
297 | | SYMCRYPT_CALL |
298 | | SymCryptFdefIntConditionalCopy( |
299 | | _In_ PCSYMCRYPT_INT piSrc, |
300 | | _Inout_ PSYMCRYPT_INT piDst, |
301 | | UINT32 cond ) |
302 | 0 | { |
303 | 0 | SYMCRYPT_CHECK_MAGIC( piSrc ); |
304 | 0 | SYMCRYPT_CHECK_MAGIC( piDst ); |
305 | |
|
306 | 0 | SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); |
307 | |
|
308 | 0 | SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, SYMCRYPT_MASK32_NONZERO( cond ) ); |
309 | 0 | } |
310 | | |
311 | | VOID |
312 | | SYMCRYPT_CALL |
313 | | SymCryptFdefIntConditionalSwap( |
314 | | _Inout_ PSYMCRYPT_INT piSrc1, |
315 | | _Inout_ PSYMCRYPT_INT piSrc2, |
316 | | UINT32 cond ) |
317 | 0 | { |
318 | 0 | SYMCRYPT_CHECK_MAGIC( piSrc1 ); |
319 | 0 | SYMCRYPT_CHECK_MAGIC( piSrc2 ); |
320 | |
|
321 | 0 | SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits ); |
322 | |
|
323 | 0 | SymCryptFdefConditionalSwap( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), piSrc1->nDigits, cond ); |
324 | 0 | } |
325 | | |
326 | | UINT32 |
327 | | SYMCRYPT_CALL |
328 | | SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) |
329 | 0 | { |
330 | | // This does not overflow since the nDigits field is |
331 | | // bounded by SYMCRYPT_FDEF_UPB_DIGITS. |
332 | 0 | return SYMCRYPT_FDEF_DIGIT_BITS * piSrc->nDigits; |
333 | 0 | } |
334 | | |
335 | | UINT32 |
336 | | SYMCRYPT_CALL |
337 | | SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc ) |
338 | 0 | { |
339 | 0 | return piSrc->nDigits; |
340 | 0 | } |
341 | | |
342 | | SYMCRYPT_ERROR |
343 | | SymCryptFdefIntCopyMixedSize( |
344 | | _In_ PCSYMCRYPT_INT piSrc, |
345 | | _Out_ PSYMCRYPT_INT piDst ) |
346 | 0 | { |
347 | 0 | UINT32 n; |
348 | 0 | SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; |
349 | |
|
350 | 0 | SYMCRYPT_CHECK_MAGIC( piSrc ); |
351 | 0 | SYMCRYPT_CHECK_MAGIC( piDst ); |
352 | | |
353 | | // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. |
354 | 0 | if( piSrc == piDst ) |
355 | 0 | { |
356 | 0 | goto cleanup; |
357 | 0 | } |
358 | | |
359 | | // |
360 | | // Copy the digits that are available in both |
361 | | // |
362 | 0 | n = SYMCRYPT_MIN( piSrc->nDigits, piDst->nDigits ); |
363 | 0 | memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), n * SYMCRYPT_FDEF_DIGIT_SIZE ); |
364 | |
|
365 | 0 | if( piDst->nDigits > n ) |
366 | 0 | { |
367 | 0 | SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[n * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE ); |
368 | 0 | } |
369 | |
|
370 | 0 | if( piSrc->nDigits > n ) |
371 | 0 | { |
372 | | // Check that the rest of the source is zero |
373 | 0 | PUINT64 p = (PUINT64) &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[n * SYMCRYPT_FDEF_DIGIT_NUINT32]; |
374 | 0 | UINT64 v = 0; |
375 | 0 | UINT32 i = (piSrc->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); |
376 | 0 | while( i > 0 ) |
377 | 0 | { |
378 | 0 | v |= *p++; |
379 | 0 | i--; |
380 | 0 | } |
381 | | |
382 | | // |
383 | | // If the Src doesn't fit, we are allowed to publish that fact, so we can use an IF. |
384 | | // |
385 | 0 | if( v != 0 ) |
386 | 0 | { |
387 | 0 | scError = SYMCRYPT_BUFFER_TOO_SMALL; |
388 | 0 | goto cleanup; |
389 | 0 | } |
390 | 0 | } |
391 | | |
392 | 0 | cleanup: |
393 | 0 | return scError; |
394 | 0 | } |
395 | | |
396 | | |
397 | | UINT32 |
398 | | SYMCRYPT_CALL |
399 | | SymCryptFdefBitsizeOfUint32( UINT32 v ) |
400 | 6.62k | { |
401 | 6.62k | UINT32 res; |
402 | 6.62k | UINT32 mask; |
403 | 6.62k | UINT32 vUpper; |
404 | 6.62k | UINT32 vBit1; |
405 | | |
406 | | // This is tricky to do side-channel safe using only defined behaviour of the C language. |
407 | | |
408 | | // This is very difficult to make any sense of. A comment containing the C code that one would normally |
409 | | // write to do the same thing would be helpful. I will need to come back to this. |
410 | | // Also, there is no test coverage of this function. There should be a unit test to show that it does the same thing |
411 | | // as the code one would normally write. |
412 | | |
413 | 6.62k | vUpper = v & 0xffff0000; |
414 | 6.62k | mask = (UINT32) ( (0 -(UINT64)(vUpper)) >> 32 ); // mask = 0 or 0xffffffff |
415 | 6.62k | res = mask & 16; // Why do we want the 9th bit? Also, 0x10 would be better here |
416 | 6.62k | v = ((v & 0xffff) & ~mask) | ((vUpper >> 16) & mask); |
417 | | |
418 | 6.62k | vUpper = v & 0xff00; |
419 | 6.62k | mask = (0 - vUpper) >> 16; // mask = 0 or 0xffff |
420 | 6.62k | res |= mask & 8; |
421 | 6.62k | v = ((v & 0xff) & ~mask) | ((v >> 8) & mask); |
422 | | |
423 | 6.62k | vUpper = v & 0xf0; |
424 | 6.62k | mask = (0 - vUpper) >> 16; |
425 | 6.62k | res |= mask & 4; |
426 | 6.62k | v = ((v & 0xf) & ~mask) | ((v >> 4) & mask ); |
427 | | |
428 | 6.62k | vUpper = v & 0xc; |
429 | 6.62k | mask = (0 - vUpper) >> 16; |
430 | 6.62k | res |= mask & 2; |
431 | 6.62k | v = ((v & 0x3) & ~mask) | ((v >> 2) & mask); |
432 | | |
433 | | // |
434 | | // Only 2 bits left. |
435 | | // |
436 | 6.62k | vBit1 = (v >> 1) & 1; |
437 | 6.62k | res |= vBit1; |
438 | | |
439 | | // |
440 | | // Now we have the bit number of the MSbit set in res. |
441 | | // We need to increase this by one if v was nonzero, so that we |
442 | | // get 0 for v==0, and the # bits needed for v > 0 |
443 | | // |
444 | 6.62k | res += (v | vBit1) & 1; |
445 | | |
446 | 6.62k | return res; |
447 | 6.62k | } |
448 | | |
449 | | UINT32 |
450 | | SYMCRYPT_CALL |
451 | | SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ) |
452 | 6.62k | { |
453 | 6.62k | UINT32 nUint32 = SYMCRYPT_OBJ_NUINT32( piSrc ); |
454 | | |
455 | 6.62k | UINT32 res = 0; |
456 | 6.62k | UINT32 msNonzeroWord = 0; // most significant nonzero digit |
457 | 6.62k | UINT32 searchingMask = SYMCRYPT_MASK32_SET; // Set if still searching, 0 otherwise |
458 | 6.62k | UINT32 d; |
459 | 6.62k | UINT32 dIsNonzeroMask; |
460 | 6.62k | UINT32 foundMask; |
461 | | |
462 | 6.62k | SYMCRYPT_CHECK_MAGIC( piSrc ); |
463 | | |
464 | | // This while loop reveals the value of nUint32, is that OK? |
465 | | // If so, document why |
466 | 135k | while( nUint32 > 0 ) |
467 | 128k | { |
468 | | // |
469 | | // Invariant: |
470 | | // If no nonzero digit has been found, res = 0 and updateMask = -1. |
471 | | // If a nonzero digit has been found: |
472 | | // msNonzeroDigit = most significant nonzero digit in Src |
473 | | // res = index where most-significant nonzero digit was found |
474 | | // updateMask = 0 |
475 | | // |
476 | | |
477 | 128k | nUint32--; |
478 | 128k | d = SYMCRYPT_FDEF_INT_PUINT32( piSrc )[nUint32]; |
479 | | |
480 | 128k | dIsNonzeroMask = SYMCRYPT_MASK32_NONZERO( d ); |
481 | 128k | foundMask = dIsNonzeroMask & searchingMask; |
482 | 128k | res |= nUint32 & foundMask; |
483 | 128k | msNonzeroWord |= d & foundMask; |
484 | 128k | searchingMask &= ~foundMask; |
485 | 128k | } |
486 | | |
487 | | // |
488 | | // If all words are zero, then res == 0 and msNonzeroDigit == 0. |
489 | | // |
490 | 6.62k | res = res * 8 * sizeof( UINT32 ) + SymCryptFdefBitsizeOfUint32( msNonzeroWord ); |
491 | | |
492 | 6.62k | return res; |
493 | 6.62k | } |
494 | | |
495 | | VOID |
496 | | SYMCRYPT_CALL |
497 | | SymCryptFdefIntSetValueUint32( |
498 | | UINT32 u32Src, |
499 | | _Out_ PSYMCRYPT_INT piDst ) |
500 | 0 | { |
501 | 0 | SYMCRYPT_CHECK_MAGIC( piDst ); |
502 | |
|
503 | 0 | SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); |
504 | 0 | SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = u32Src; |
505 | 0 | } |
506 | | |
507 | | C_ASSERT( SYMCRYPT_FDEF_DIGIT_SIZE >= 8 ); // Code below fails if this doesn't hold |
508 | | |
509 | | VOID |
510 | | SYMCRYPT_CALL |
511 | | SymCryptFdefIntSetValueUint64( |
512 | | UINT64 u64Src, |
513 | | _Out_ PSYMCRYPT_INT piDst ) |
514 | 0 | { |
515 | 0 | SYMCRYPT_CHECK_MAGIC( piDst ); |
516 | |
|
517 | 0 | SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); |
518 | 0 | SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = (UINT32) u64Src; |
519 | 0 | SYMCRYPT_FDEF_INT_PUINT32( piDst )[1] = (UINT32)(u64Src >> 32); |
520 | 0 | } |
521 | | |
522 | | SYMCRYPT_ERROR |
523 | | SYMCRYPT_CALL |
524 | | SymCryptFdefRawSetValue( |
525 | | _In_reads_bytes_(cbSrc) PCBYTE pbSrc, |
526 | | SIZE_T cbSrc, |
527 | | SYMCRYPT_NUMBER_FORMAT format, |
528 | | _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst, |
529 | | UINT32 nDigits ) |
530 | 11.2k | { |
531 | 11.2k | SYMCRYPT_ERROR scError; |
532 | 11.2k | UINT32 b; |
533 | 11.2k | INT32 step; |
534 | 11.2k | UINT32 w; |
535 | 11.2k | UINT32 windex; |
536 | 11.2k | UINT32 i; |
537 | 11.2k | UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; |
538 | | |
539 | | // |
540 | | // This is a very simple and slow generic implementation; |
541 | | // We'll create optimized versions for specific CPU platforms |
542 | | // (e.g. use of memcpy) |
543 | | // |
544 | | |
545 | | // I assume the number format is public? |
546 | 11.2k | switch( format ) |
547 | 11.2k | { |
548 | 0 | case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: |
549 | 0 | step = 1; |
550 | 0 | break; |
551 | 11.2k | case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: |
552 | 11.2k | step = -1; |
553 | 11.2k | pbSrc += cbSrc; // avoid tripping pointer overflow sanitizer with cbSrc == 0 |
554 | 11.2k | pbSrc--; |
555 | 11.2k | break; |
556 | 0 | default: |
557 | 0 | scError = SYMCRYPT_INVALID_ARGUMENT; |
558 | 0 | goto cleanup; |
559 | 11.2k | } |
560 | | |
561 | 237k | for( windex = 0; windex < nWords; windex++ ) |
562 | 226k | { |
563 | 226k | w = 0; |
564 | 1.13M | for( i=0; i<4; i++ ) |
565 | 904k | { |
566 | | // read the next byte into b |
567 | 904k | if( cbSrc > 0 ) |
568 | 471k | { |
569 | 471k | b = *pbSrc; |
570 | 471k | cbSrc -= 1; |
571 | 471k | pbSrc += step; |
572 | 471k | w |= b << 8*i; |
573 | 471k | } |
574 | 904k | } |
575 | 226k | pDst[windex] = w; |
576 | 226k | } |
577 | | |
578 | | // Inspect any remaining input bytes |
579 | 11.2k | b = 0; |
580 | 21.3k | while( cbSrc > 0 ) |
581 | 10.1k | { |
582 | 10.1k | b |= *pbSrc; |
583 | 10.1k | pbSrc += step; |
584 | 10.1k | cbSrc -= 1; |
585 | 10.1k | } |
586 | | |
587 | 11.2k | if( b > 0 ) |
588 | 29 | { |
589 | 29 | scError = SYMCRYPT_BUFFER_TOO_SMALL; |
590 | 29 | goto cleanup; |
591 | 29 | } |
592 | | |
593 | 11.2k | scError = SYMCRYPT_NO_ERROR; |
594 | | |
595 | 11.2k | cleanup: |
596 | 11.2k | return scError; |
597 | 11.2k | } |
598 | | |
599 | | SYMCRYPT_ERROR |
600 | | SYMCRYPT_CALL |
601 | | SymCryptFdefIntSetValue( |
602 | | _In_reads_bytes_(cbSrc) PCBYTE pbSrc, |
603 | | SIZE_T cbSrc, |
604 | | SYMCRYPT_NUMBER_FORMAT format, |
605 | | _Out_ PSYMCRYPT_INT piDst ) |
606 | 6.90k | { |
607 | 6.90k | SYMCRYPT_ERROR scError; |
608 | | |
609 | 6.90k | SYMCRYPT_CHECK_MAGIC( piDst ); |
610 | | |
611 | 6.90k | scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); |
612 | | |
613 | 6.90k | return scError; |
614 | 6.90k | } |
615 | | |
616 | | |
617 | | SYMCRYPT_ERROR |
618 | | SYMCRYPT_CALL |
619 | | SymCryptFdefRawGetValue( |
620 | | _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, |
621 | | UINT32 nDigits, |
622 | | _Out_writes_bytes_(cbDst) PBYTE pbDst, |
623 | | SIZE_T cbDst, |
624 | | SYMCRYPT_NUMBER_FORMAT format ) |
625 | 2.05k | { |
626 | 2.05k | SYMCRYPT_ERROR scError; |
627 | 2.05k | UINT32 b; |
628 | 2.05k | INT32 step; |
629 | 2.05k | UINT32 w; |
630 | 2.05k | UINT32 windex; |
631 | 2.05k | UINT32 i; |
632 | 2.05k | UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; |
633 | | |
634 | | // |
635 | | // This is a very simple and slow generic implementation; |
636 | | // We'll create optimized versions for specific CPU platforms |
637 | | // (e.g. use of memcpy) |
638 | | // |
639 | | |
640 | 2.05k | switch( format ) |
641 | 2.05k | { |
642 | 0 | case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: |
643 | 0 | step = 1; |
644 | 0 | break; |
645 | 2.05k | case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: |
646 | 2.05k | step = -1; |
647 | 2.05k | pbDst += cbDst; // avoid tripping pointer overflow sanitizer with cbSrc == 0 |
648 | 2.05k | pbDst--; |
649 | 2.05k | break; |
650 | 0 | default: |
651 | 0 | scError = SYMCRYPT_INVALID_ARGUMENT; |
652 | 0 | goto cleanup; |
653 | 2.05k | } |
654 | | |
655 | 43.6k | for( windex = 0; windex < nWords; windex++ ) |
656 | 41.5k | { |
657 | 41.5k | w = pSrc[windex]; |
658 | 207k | for( i=0; i<4; i++ ) |
659 | 166k | { |
660 | 166k | b = w & 0xff; |
661 | 166k | w >>= 8; |
662 | | |
663 | | // write the next byte |
664 | 166k | if( cbDst > 0 ) |
665 | 102k | { |
666 | 102k | *pbDst = (BYTE)b; |
667 | 102k | cbDst -= 1; |
668 | 102k | pbDst += step; |
669 | 102k | } else { |
670 | 63.4k | if( b != 0 ) |
671 | 0 | { |
672 | 0 | scError = SYMCRYPT_BUFFER_TOO_SMALL; |
673 | 0 | goto cleanup; |
674 | 0 | } |
675 | 63.4k | } |
676 | 166k | } |
677 | 41.5k | } |
678 | | |
679 | | // Zero any remaining output bytes |
680 | 2.05k | while( cbDst > 0 ) |
681 | 0 | { |
682 | 0 | *pbDst = 0; |
683 | 0 | pbDst += step; |
684 | 0 | cbDst -= 1; |
685 | 0 | } |
686 | | |
687 | 2.05k | scError = SYMCRYPT_NO_ERROR; |
688 | | |
689 | 2.05k | cleanup: |
690 | 2.05k | return scError; |
691 | 2.05k | } |
692 | | |
693 | | SYMCRYPT_ERROR |
694 | | SYMCRYPT_CALL |
695 | | SymCryptFdefIntGetValue( |
696 | | _In_ PCSYMCRYPT_INT piSrc, |
697 | | _Out_writes_bytes_(cbDst) PBYTE pbDst, |
698 | | SIZE_T cbDst, |
699 | | SYMCRYPT_NUMBER_FORMAT format ) |
700 | 0 | { |
701 | 0 | SYMCRYPT_ERROR scError; |
702 | |
|
703 | 0 | SYMCRYPT_CHECK_MAGIC( piSrc ); |
704 | |
|
705 | 0 | scError = SymCryptFdefRawGetValue( &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0], piSrc->nDigits, pbDst, cbDst, format ); |
706 | |
|
707 | 0 | return scError; |
708 | 0 | } |
709 | | |
710 | | |
711 | | UINT32 |
712 | | SYMCRYPT_CALL |
713 | | SymCryptFdefIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ) |
714 | 885k | { |
715 | | // nDigits cannot be zero, so we don't have to test |
716 | 885k | return SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0]; |
717 | 885k | } |
718 | | |
719 | | UINT64 |
720 | | SYMCRYPT_CALL |
721 | | SymCryptFdefIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ) |
722 | 1.89k | { |
723 | | // nDigits cannot be zero, so we don't have to test |
724 | 1.89k | PCUINT32 p = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); |
725 | 1.89k | return ((UINT64)(p[1]) << 32) | p[0]; |
726 | 1.89k | } |
727 | | |
728 | | UINT32 |
729 | | SYMCRYPT_CALL |
730 | | SymCryptFdefRawIsEqualUint32( |
731 | | _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, |
732 | | UINT32 nDigits, |
733 | | _In_ UINT32 u32Src2 ) |
734 | 766k | { |
735 | 766k | UINT32 d; |
736 | 766k | UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; |
737 | | |
738 | 766k | d = pSrc1[0] ^ u32Src2; |
739 | 17.6M | for( UINT32 i=1; i<nWords; i++) |
740 | 16.8M | { |
741 | 16.8M | d |= pSrc1[i]; |
742 | 16.8M | } |
743 | | |
744 | 766k | return SYMCRYPT_MASK32_ZERO( d ); |
745 | 766k | } |
746 | | |
747 | | UINT32 |
748 | | SYMCRYPT_CALL |
749 | | SymCryptFdefIntIsEqualUint32( |
750 | | _In_ PCSYMCRYPT_INT piSrc1, |
751 | | _In_ UINT32 u32Src2 ) |
752 | 683k | { |
753 | 683k | return SymCryptFdefRawIsEqualUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[0], piSrc1->nDigits, u32Src2 ); |
754 | 683k | } |
755 | | |
756 | | UINT32 |
757 | | SYMCRYPT_CALL |
758 | | SymCryptFdefIntIsEqual( |
759 | | _In_ PCSYMCRYPT_INT piSrc1, |
760 | | _In_ PCSYMCRYPT_INT piSrc2 ) |
761 | 0 | { |
762 | 0 | UINT32 d; |
763 | 0 | UINT32 n1 = SYMCRYPT_OBJ_NUINT32( piSrc1 ); |
764 | 0 | UINT32 n2 = SYMCRYPT_OBJ_NUINT32( piSrc2 ); |
765 | 0 | UINT32 i; |
766 | 0 | UINT32 n; |
767 | 0 | PCUINT32 pSrc1 = SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ); |
768 | 0 | PCUINT32 pSrc2 = SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ); |
769 | |
|
770 | 0 | n = SYMCRYPT_MIN( n1, n2 ); |
771 | 0 | d = 0; |
772 | 0 | for( i=0; i < n ; i++ ) |
773 | 0 | { |
774 | 0 | d |= pSrc1[i] ^ pSrc2[i]; |
775 | 0 | } |
776 | | |
777 | | // i == n1 or i == n2, so at most one of the 2 loops below is ever run |
778 | |
|
779 | 0 | while( i < n1 ) |
780 | 0 | { |
781 | 0 | d |= pSrc1[i]; |
782 | 0 | i++; |
783 | 0 | } |
784 | |
|
785 | 0 | while( i < n2 ) |
786 | 0 | { |
787 | 0 | d |= pSrc2[i]; |
788 | 0 | i++; |
789 | 0 | } |
790 | |
|
791 | 0 | return SYMCRYPT_MASK32_ZERO( d ); |
792 | 0 | } |
793 | | |
794 | | PSYMCRYPT_DIVISOR |
795 | | SYMCRYPT_CALL |
796 | | SymCryptFdefDivisorAllocate( UINT32 nDigits ) |
797 | 0 | { |
798 | 0 | PVOID p = NULL; |
799 | 0 | UINT32 cb; |
800 | 0 | PSYMCRYPT_DIVISOR res = NULL; |
801 | | |
802 | | // |
803 | | // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus |
804 | | // the result does not overflow and is upper bounded by 2^19. |
805 | | // |
806 | 0 | cb = SymCryptFdefSizeofDivisorFromDigits( nDigits ); |
807 | |
|
808 | 0 | if( cb != 0 ) |
809 | 0 | { |
810 | 0 | p = SymCryptCallbackAlloc( cb ); |
811 | 0 | } |
812 | |
|
813 | 0 | if( p == NULL ) |
814 | 0 | { |
815 | 0 | goto cleanup; |
816 | 0 | } |
817 | | |
818 | 0 | res = SymCryptFdefDivisorCreate( p, cb, nDigits ); |
819 | |
|
820 | 0 | cleanup: |
821 | 0 | return res; |
822 | 0 | } |
823 | | |
824 | | UINT32 |
825 | | SYMCRYPT_CALL |
826 | | SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits ) |
827 | 6.62k | { |
828 | 6.62k | SYMCRYPT_ASSERT( nDigits != 0 ); |
829 | 6.62k | SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); |
830 | | |
831 | | // Ensure we do not overflow the following calculation when provided with invalid inputs |
832 | 6.62k | if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) |
833 | 0 | { |
834 | 0 | return 0; |
835 | 0 | } |
836 | | |
837 | 6.62k | return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ) + SymCryptFdefSizeofIntFromDigits( nDigits ); |
838 | 6.62k | } |
839 | | |
840 | | PSYMCRYPT_DIVISOR |
841 | | SYMCRYPT_CALL |
842 | | SymCryptFdefDivisorCreate( |
843 | | _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, |
844 | | SIZE_T cbBuffer, |
845 | | UINT32 nDigits ) |
846 | 1.89k | { |
847 | 1.89k | PSYMCRYPT_DIVISOR pdDiv = NULL; |
848 | 1.89k | UINT32 cb = SymCryptSizeofDivisorFromDigits( nDigits ); |
849 | | |
850 | 1.89k | SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_DIVISOR) ); |
851 | 1.89k | SYMCRYPT_ASSERT( cbBuffer >= cb ); |
852 | 1.89k | if( (cb == 0) || (cbBuffer < cb) ) |
853 | 0 | { |
854 | 0 | goto cleanup; // return NULL |
855 | 0 | } |
856 | | |
857 | 1.89k | SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); |
858 | 1.89k | pdDiv = (PSYMCRYPT_DIVISOR) pbBuffer; |
859 | | |
860 | 1.89k | pdDiv->type = 'gD' << 16; |
861 | 1.89k | pdDiv->nDigits = nDigits; |
862 | | |
863 | | // |
864 | | // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus |
865 | | // the result does not overflow and is upper bounded by 2^19. |
866 | | // |
867 | 1.89k | pdDiv->cbSize = cb; |
868 | | |
869 | 1.89k | SYMCRYPT_SET_MAGIC( pdDiv ); |
870 | | |
871 | 1.89k | SymCryptIntCreate( (PBYTE)&pdDiv->Int, cbBuffer - SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ), nDigits ); |
872 | | |
873 | 1.89k | cleanup: |
874 | 1.89k | return pdDiv; |
875 | 1.89k | } |
876 | | |
877 | | VOID |
878 | | SymCryptFdefDivisorCopyFixup( |
879 | | _In_ PCSYMCRYPT_DIVISOR pdSrc, |
880 | | _Out_ PSYMCRYPT_DIVISOR pdDst ) |
881 | 0 | { |
882 | 0 | UNREFERENCED_PARAMETER( pdSrc ); |
883 | 0 | UNREFERENCED_PARAMETER( pdDst ); |
884 | |
|
885 | 0 | SymCryptFdefIntCopyFixup( &pdSrc->Int, &pdDst->Int ); |
886 | |
|
887 | 0 | SYMCRYPT_SET_MAGIC( pdDst ); |
888 | 0 | } |
889 | | |
890 | | VOID |
891 | | SymCryptFdefDivisorCopy( |
892 | | _In_ PCSYMCRYPT_DIVISOR pdSrc, |
893 | | _Out_ PSYMCRYPT_DIVISOR pdDst ) |
894 | 0 | { |
895 | 0 | SYMCRYPT_CHECK_MAGIC( pdSrc ); |
896 | 0 | SYMCRYPT_CHECK_MAGIC( pdDst ); |
897 | |
|
898 | 0 | SYMCRYPT_ASSERT( pdSrc->nDigits == pdDst->nDigits ); |
899 | | |
900 | | // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. |
901 | 0 | if( pdSrc != pdDst ) |
902 | 0 | { |
903 | 0 | memcpy( pdDst, pdSrc, pdDst->cbSize ); |
904 | |
|
905 | 0 | SymCryptFdefDivisorCopyFixup( pdSrc, pdDst ); |
906 | 0 | } |
907 | 0 | } |
908 | | |
909 | | |
910 | | VOID |
911 | | SYMCRYPT_CALL |
912 | | SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin ) |
913 | 5.02M | { |
914 | 5.02M | #if SYMCRYPT_DEBUG |
915 | 5.02M | SYMCRYPT_ASSERT( cbScratch >= cbMin ); |
916 | 5.02M | SymCryptWipe( pbScratch, cbMin ); |
917 | | #else |
918 | | UNREFERENCED_PARAMETER( pbScratch ); |
919 | | UNREFERENCED_PARAMETER( cbScratch ); |
920 | | UNREFERENCED_PARAMETER( cbMin ); |
921 | | #endif |
922 | 5.02M | } |
923 | | |
924 | | UINT32 |
925 | | SymCryptTestTrialdivisionMaxSmallPrime( |
926 | | _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) |
927 | 0 | { |
928 | 0 | return pContext->maxTrialPrime; |
929 | 0 | } |
930 | | |
931 | | UINT64 |
932 | | SymCryptInverseMod2e64( UINT64 m ) |
933 | 1.89k | { |
934 | | // Compute the inv64 value such that inv64 * m = 1 mod 2^64 for odd m. |
935 | | // If m is even, there exists no inverse, this function will return a |
936 | | // useless value in constant time. |
937 | | // |
938 | | // We use Newton's method to search for a zero of f(x) := x^-1 - m, working modulo 2^64 |
939 | | // We get the iteration formula |
940 | | // x_{i+1} = x_i - f(x_i)/f'(x_i) |
941 | | // = x_i - (x_i^-1 - m)/(-x_i^-2) |
942 | | // = x_i + x_i^2(1/x_i - m) |
943 | | // = x_i + x_i - (x_i^2 * m) |
944 | | // = x_i (2 - x_i*m) |
945 | | // |
946 | | // Let x_i = d + 2^n * e where d = inv64 = m^-1 mod 2^64, and 2^n * e is the error term that is zero in the n least |
947 | | // significant bits. We have |
948 | | // x_{i+1} = (d + 2^n * e) (2 - (d + 2^n * e) * m) |
949 | | // = (d + 2^n * e) (2 - d*m - 2^n * e * m) |
950 | | // = (d + 2^n * e) (2 - 1 - 2^n * e * m) |
951 | | // = (d + 2^n * e) (1 - 2^n * e * m) |
952 | | // = d - (2^n * e * (d*m)) + (2^n * e) - (2^{2n} * e^2 * m) |
953 | | // = d - (2^{2n} * e^2 * m) |
954 | | // In other words, the error has been squared and multiplied by m. In our case, working modulo 2^64, the number of correct bits |
955 | | // on the least significant side is doubled. |
956 | | // |
957 | | // To get a 4-bit correct estimate for m^-1 given odd m, we consider the least significant 4 bits of m and inv: |
958 | | // m = ... m_3 m_2 m_1 m_0 |
959 | | // inv = ... i_3 i_2 i_1 i_0 |
960 | | // We want to directly compute i_[3..0] s.t. (m*inv) & 0xf == 1 |
961 | | // working through some simple simultaneous equations it is easily shown that: |
962 | | // i_0 = m_0 = 1 |
963 | | // i_1 = m_1 |
964 | | // i_2 = m_2 |
965 | | // i_3 = m_1 ^ m_2 ^ m_3 |
966 | | // Once we have 4 correct bits, we can double that multiple times using Newton's method. |
967 | | // |
968 | | // We use 32-bit operations for most of the iterations for speed on 32-bit platforms. |
969 | | // |
970 | 1.89k | UINT32 inv32; |
971 | 1.89k | UINT64 inv64; |
972 | 1.89k | UINT32 m32; |
973 | | |
974 | 1.89k | m32 = (UINT32)m; |
975 | | |
976 | 1.89k | inv32 = m32 ^ (((m32 - 1) * 0x6) & 0x8); // sets inv32 bits [3..0] |
977 | 1.89k | SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xf) == 1) ); |
978 | | |
979 | 1.89k | inv32 = inv32 * (2 - inv32 * m32 ); |
980 | 1.89k | SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xff) == 1) ); |
981 | | |
982 | 1.89k | inv32 = inv32 * (2 - inv32 * m32 ); |
983 | 1.89k | SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xffff) == 1) ); |
984 | | |
985 | 1.89k | inv32 = inv32 * (2 - inv32 * m32 ); |
986 | 1.89k | SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv32 * m32) == 1) ); |
987 | | |
988 | 1.89k | inv64 = inv32; |
989 | 1.89k | inv64 = inv64 * (2 - inv64 * m ); |
990 | 1.89k | SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv64 * m) == 1) ); |
991 | | |
992 | 1.89k | return inv64; |
993 | 1.89k | } |
994 | | |
995 | | |
996 | | VOID |
997 | | SYMCRYPT_CALL |
998 | | SymCryptFdefInitTrialdivisionPrime( |
999 | | UINT32 prime, |
1000 | | _Out_ PSYMCRYPT_TRIALDIVISION_PRIME pPrime ) |
1001 | 0 | { |
1002 | | // Compute the inverse of the prime mod 2^64 |
1003 | 0 | pPrime->invMod2e64 = SymCryptInverseMod2e64( prime ); |
1004 | 0 | pPrime->compareLimit = ((UINT64) -1) / prime; |
1005 | 0 | } |
1006 | | |
1007 | | FORCEINLINE |
1008 | | UINT32 |
1009 | | SymCryptIsMultipleOfSmallPrime( UINT64 value, PCSYMCRYPT_TRIALDIVISION_PRIME pPrime ) |
1010 | 0 | { |
1011 | 0 | return (value * pPrime->invMod2e64) <= pPrime->compareLimit; |
1012 | 0 | } |
1013 | | |
1014 | | VOID |
1015 | | SYMCRYPT_CALL |
1016 | | SymCryptFdefInitTrialDivisionGroup( PSYMCRYPT_TRIALDIVISION_GROUP pGroup, UINT32 nPrimes, UINT32 primeProd ) |
1017 | 0 | { |
1018 | 0 | UINT32 f; |
1019 | 0 | UINT32 r; |
1020 | 0 | UINT32 i; |
1021 | |
|
1022 | 0 | pGroup->nPrimes = nPrimes; |
1023 | | |
1024 | | // These % operations are expensive; maybe we can optimize this further. |
1025 | | // In assembler we can do the UINT64 % UINT32 -> UINT32 |
1026 | | // hopefully the compiler is smart enough... |
1027 | |
|
1028 | 0 | f = (UINT32) (((UINT64)1 << 32) % primeProd); |
1029 | 0 | pGroup->factor[0] = f; |
1030 | 0 | r = f; |
1031 | 0 | for( i=1; i<9; i++ ) |
1032 | 0 | { |
1033 | 0 | r = (UINT32) (SYMCRYPT_MUL32x32TO64( r, f ) % primeProd); |
1034 | 0 | pGroup->factor[i] = r; |
1035 | 0 | } |
1036 | 0 | } |
1037 | | |
1038 | | UINT32 |
1039 | | SYMCRYPT_CALL |
1040 | | SymCryptGenerateSmallPrimes( UINT32 maxPrime, PUINT32 * ppList ) |
1041 | 0 | { |
1042 | | // returns a list of small primes, excluding 2, 3, 5, and 17. |
1043 | 0 | UINT32 nPrimes = 0; |
1044 | 0 | PUINT32 pList = NULL; |
1045 | | |
1046 | | // pSieve[i] corresponds to 2*i+1 |
1047 | | // value X is in location X/2 |
1048 | 0 | UINT32 nSieve; |
1049 | 0 | PBYTE pSieve; |
1050 | |
|
1051 | 0 | UINT32 pi; |
1052 | 0 | UINT32 p; |
1053 | 0 | UINT32 si; |
1054 | 0 | UINT32 i; |
1055 | |
|
1056 | 0 | maxPrime = SYMCRYPT_MAX( maxPrime, 32 ); // simplify error handling by always producing primes at least up to 32 |
1057 | 0 | maxPrime = SYMCRYPT_MIN( maxPrime, 1 << 24 ); // Limit prime list to something sane (sieve = 8 MB, list = 4 MB or so). |
1058 | | |
1059 | | // highest index is (maxPrime - 1)/2 which encodes maxPrime if odd, or maxPrime-1 if even |
1060 | 0 | nSieve = (maxPrime - 1) / 2 + 1; |
1061 | |
|
1062 | 0 | pSieve = SymCryptCallbackAlloc( nSieve ); |
1063 | 0 | if( pSieve == NULL ) |
1064 | 0 | { |
1065 | 0 | goto cleanup; |
1066 | 0 | } |
1067 | | |
1068 | 0 | SymCryptWipe( pSieve, nSieve ); |
1069 | | |
1070 | |
|
1071 | 0 | pi = 1; // index of first prime 3 |
1072 | 0 | p = 2*pi + 1; // prime value |
1073 | 0 | for(;;) |
1074 | 0 | { |
1075 | 0 | si = 2*(pi*pi + pi); // index of p^2 |
1076 | 0 | if( si > nSieve ) |
1077 | 0 | { |
1078 | 0 | break; // We're done sieving |
1079 | 0 | } |
1080 | 0 | while( si < nSieve ) |
1081 | 0 | { |
1082 | 0 | pSieve[si] = 1; |
1083 | 0 | si += p; |
1084 | 0 | } |
1085 | | // Search for the next prime |
1086 | 0 | do { |
1087 | 0 | pi += 1; |
1088 | 0 | } while( pSieve[pi] != 0 ); |
1089 | 0 | p = 2*pi + 1; |
1090 | 0 | } |
1091 | | |
1092 | | // Eliminate 3, 5, and 17 |
1093 | 0 | pSieve[1] = 1; |
1094 | 0 | pSieve[2] = 1; |
1095 | 0 | pSieve[8] = 1; |
1096 | |
|
1097 | 0 | for( i=1; i<nSieve; i++ ) |
1098 | 0 | { |
1099 | 0 | nPrimes += 1 - pSieve[i]; |
1100 | 0 | } |
1101 | | |
1102 | | // dcl - I suspect that this is not a problem, but please document |
1103 | | // why this multiplication cannot overflow. I assume there is a practical limit on nPrimes, but unsure |
1104 | | // what that would be. |
1105 | 0 | pList = SymCryptCallbackAlloc( nPrimes * sizeof( UINT32 ) ); |
1106 | 0 | if( pList == NULL ) |
1107 | 0 | { |
1108 | 0 | goto cleanup; |
1109 | 0 | } |
1110 | | |
1111 | 0 | pi = 0; |
1112 | 0 | for( i=1; i<nSieve; i++ ) |
1113 | 0 | { |
1114 | 0 | if( pSieve[i] == 0 ) |
1115 | 0 | { |
1116 | 0 | pList[pi++] = 2*i+1; |
1117 | 0 | } |
1118 | 0 | } |
1119 | |
|
1120 | 0 | SYMCRYPT_ASSERT( pi == nPrimes ); |
1121 | |
|
1122 | 0 | cleanup: |
1123 | 0 | if( pSieve != NULL ) |
1124 | 0 | { |
1125 | 0 | SymCryptWipe( pSieve, nSieve ); |
1126 | 0 | SymCryptCallbackFree( pSieve ); |
1127 | 0 | } |
1128 | |
|
1129 | 0 | *ppList = pList; |
1130 | 0 | return nPrimes; |
1131 | 0 | } |
1132 | | |
1133 | | |
1134 | | PCSYMCRYPT_TRIALDIVISION_CONTEXT |
1135 | | SYMCRYPT_CALL |
1136 | | SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits ) |
1137 | 0 | { |
1138 | 0 | PSYMCRYPT_TRIALDIVISION_CONTEXT pRes = NULL; |
1139 | 0 | PBYTE pAlloc; |
1140 | 0 | UINT32 nBytes; |
1141 | 0 | UINT32 iPrime; |
1142 | 0 | UINT32 iGroup; |
1143 | 0 | UINT32 nPrimes; |
1144 | 0 | UINT32 nGroups; |
1145 | 0 | UINT32 M; |
1146 | 0 | UINT32 iGroupSpec; |
1147 | 0 | UINT32 i; |
1148 | 0 | UINT32 j; |
1149 | 0 | UINT64 cRabinMillerCost; |
1150 | 0 | UINT64 cPerPrimeCost; |
1151 | 0 | UINT64 tmp64; |
1152 | 0 | UINT32 maxPrime; |
1153 | 0 | UINT32 minPrime; |
1154 | 0 | UINT32 nSmallPrimes = 0; |
1155 | 0 | UINT32 n; |
1156 | 0 | UINT32 nP; |
1157 | 0 | UINT32 nG; |
1158 | 0 | PUINT32 pSmallPrimeList = NULL; |
1159 | | |
1160 | | // First we estimate the largest prime we will do trial division with |
1161 | | // Inputs: |
1162 | | // - cycles/digit of reduction per group of primes |
1163 | | // - cycles/prime of divide test |
1164 | | // - cycles per digit^3 for a Rabin-Miller test |
1165 | | // We optimize in this model, which is pretty accurate for large inputs but underestimates the RM cost |
1166 | | // for smaller sizes. |
1167 | | |
1168 | | // Compute the Rabin-Miller cost estimate. We reduce it by 20% because our cost model does not take |
1169 | | // into account some of the trial-division cost such as memory footprint, cache pressure, |
1170 | | // setup cost, etc. Reducing the Rabin-Miller cost leads us to do fewer trial divisions to approximately |
1171 | | // balance the hidden costs. |
1172 | |
|
1173 | 0 | if( nDigits <= 1000 ) |
1174 | 0 | { |
1175 | | // nDigits is small enough to not have any overflows in this computation |
1176 | 0 | if( nDigits == 0 ) |
1177 | 0 | { |
1178 | 0 | goto cleanup; // return NULL |
1179 | 0 | } |
1180 | | |
1181 | 0 | cRabinMillerCost = (UINT64) nDigits * nDigits * nDigits * (SYMCRYPT_RABINMILLER_DIGIT_CYCLES * 8 / 10); |
1182 | 0 | i = 0; |
1183 | 0 | minPrime = 0; |
1184 | 0 | for(;;) |
1185 | 0 | { |
1186 | 0 | nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; |
1187 | 0 | maxPrime = g_SymCryptSmallPrimeGroupsSpec[i].maxPrime; |
1188 | 0 | nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; |
1189 | 0 | cPerPrimeCost = (UINT64) nDigits * SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES / nPrimes + SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES; |
1190 | | |
1191 | | // If the last group isn't worth it, we shouldn't go to even fewer primes |
1192 | 0 | if( nGroups == 0 || maxPrime * cPerPrimeCost >= cRabinMillerCost) |
1193 | 0 | { |
1194 | 0 | break; |
1195 | 0 | } |
1196 | 0 | i++; |
1197 | 0 | minPrime = maxPrime; |
1198 | 0 | } |
1199 | | |
1200 | | // Now we know how many primes are in the last groups, let's find out how large the largest prime should be |
1201 | 0 | tmp64 = cRabinMillerCost / cPerPrimeCost; |
1202 | 0 | tmp64 = SYMCRYPT_MIN( tmp64, SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME ); |
1203 | 0 | maxPrime = (UINT32) tmp64; |
1204 | 0 | maxPrime = SYMCRYPT_MAX( maxPrime, minPrime ); // Make sure we don't fall into the previous group size that we don't want |
1205 | 0 | } |
1206 | 0 | else |
1207 | 0 | { |
1208 | 0 | maxPrime = SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME; |
1209 | 0 | } |
1210 | | |
1211 | 0 | nSmallPrimes = SymCryptGenerateSmallPrimes( maxPrime, &pSmallPrimeList ); |
1212 | | |
1213 | | // Find out how many groups we'll have, and how many actual primes we'll use |
1214 | 0 | n = nSmallPrimes; |
1215 | 0 | nG = 0; |
1216 | 0 | nP = 0; |
1217 | 0 | i = 0; |
1218 | 0 | for(;;) |
1219 | 0 | { |
1220 | 0 | nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; |
1221 | 0 | nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; |
1222 | |
|
1223 | 0 | if( n < nPrimes * nGroups || nGroups == 0 ) |
1224 | 0 | { |
1225 | | // At the right nPrimes, compute exactly how many groups to add |
1226 | 0 | n = n / nPrimes; |
1227 | 0 | nG += n; |
1228 | 0 | nP += n * nPrimes; |
1229 | 0 | n = 0; // No primes left |
1230 | 0 | break; |
1231 | 0 | } |
1232 | | |
1233 | | // Use up all the groups of this size... |
1234 | 0 | nG += nGroups; |
1235 | 0 | nP += nPrimes * nGroups; |
1236 | 0 | n -= nPrimes * nGroups; |
1237 | 0 | i++; |
1238 | 0 | } |
1239 | | |
1240 | | // dcl - Potential integer overflow |
1241 | | // Need to document sizes, and limits of nG, nP, and confirm |
1242 | | // an overflow is not possible, also recall that size_t varies in size, but nBytes is 32-bit |
1243 | 0 | nBytes = sizeof( SYMCRYPT_TRIALDIVISION_CONTEXT ) |
1244 | 0 | + (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ) // + 1 for 0 sentinel |
1245 | 0 | + (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ) // + 1 for 0 sentinel |
1246 | 0 | + (nP + 1) * sizeof( UINT32 ); // + 1 for 0 sentinel |
1247 | |
|
1248 | 0 | pAlloc = SymCryptCallbackAlloc( nBytes ); |
1249 | 0 | if( pAlloc == NULL ) |
1250 | 0 | { |
1251 | 0 | goto cleanup; |
1252 | 0 | } |
1253 | | |
1254 | 0 | pRes = (PSYMCRYPT_TRIALDIVISION_CONTEXT) pAlloc; |
1255 | 0 | pAlloc += sizeof( *pRes ); |
1256 | |
|
1257 | 0 | pRes->nBytesAlloc = nBytes; |
1258 | |
|
1259 | 0 | pRes->pGroupList = (PSYMCRYPT_TRIALDIVISION_GROUP)pAlloc; |
1260 | 0 | pAlloc += (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ); |
1261 | |
|
1262 | 0 | pRes->pPrimeList = (PSYMCRYPT_TRIALDIVISION_PRIME) pAlloc; |
1263 | 0 | pAlloc += (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ); |
1264 | |
|
1265 | 0 | pRes->pPrimes = (PUINT32) pAlloc; |
1266 | 0 | pAlloc += (nP + 1) * sizeof( UINT32 ); |
1267 | |
|
1268 | 0 | SYMCRYPT_ASSERT( nBytes == (SIZE_T)(pAlloc - (PBYTE)pRes) ); |
1269 | | |
1270 | | // Initialize the primes 3, 5, and 17 |
1271 | 0 | SymCryptFdefInitTrialdivisionPrime( 3, &pRes->Primes3_5_17[0] ); |
1272 | 0 | SymCryptFdefInitTrialdivisionPrime( 5, &pRes->Primes3_5_17[1] ); |
1273 | 0 | SymCryptFdefInitTrialdivisionPrime( 17, &pRes->Primes3_5_17[2] ); |
1274 | |
|
1275 | 0 | memcpy( pRes->pPrimes, pSmallPrimeList, nP * sizeof( UINT32 ) ); |
1276 | 0 | pRes->pPrimes[nP] = 0; |
1277 | 0 | pRes->maxTrialPrime = pRes->pPrimes[nP-1]; |
1278 | | |
1279 | | /* |
1280 | | *** Old code to decrypt the nibble encoding. Keep in case we want it back later... |
1281 | | // Generate the other primes from the difference table. |
1282 | | // We initialize the prime structures, and a list of the primes that is used to compute the group specs |
1283 | | |
1284 | | pNibs = &g_SymCryptSmallPrimeDifferenceNibbles[0]; |
1285 | | |
1286 | | smallPrime = 3; |
1287 | | nPrimes = 0; |
1288 | | while( smallPrime < SYMCRYPT_MAX_SMALL_PRIME ) |
1289 | | { |
1290 | | b = *pNibs++; |
1291 | | nib = b & 0xf; |
1292 | | |
1293 | | if( nib == 0 ) |
1294 | | { |
1295 | | smallPrime += 30; |
1296 | | // No check for termination here as we wouldn't encode a 0 if there wasn't another prime. |
1297 | | } else { |
1298 | | smallPrime += 2*nib; |
1299 | | pRes->pPrimes[nPrimes] = smallPrime; |
1300 | | SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); |
1301 | | nPrimes++; |
1302 | | if( smallPrime >= SYMCRYPT_MAX_SMALL_PRIME ) |
1303 | | { |
1304 | | break; |
1305 | | } |
1306 | | } |
1307 | | nib = b >> 4; |
1308 | | if( nib == 0 ) |
1309 | | { |
1310 | | smallPrime += 30; |
1311 | | } else { |
1312 | | smallPrime += 2*nib; |
1313 | | pRes->pPrimes[nPrimes] = smallPrime; |
1314 | | SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); |
1315 | | nPrimes++; |
1316 | | } |
1317 | | } |
1318 | | SYMCRYPT_ASSERT( smallPrime == SYMCRYPT_MAX_SMALL_PRIME && nPrimes == SYMCRYPT_N_SMALL_PRIMES_ENCODED ); |
1319 | | */ |
1320 | |
|
1321 | 0 | for( iPrime = 0; iPrime < nP; iPrime++ ) |
1322 | 0 | { |
1323 | 0 | SymCryptFdefInitTrialdivisionPrime( pRes->pPrimes[iPrime], &pRes->pPrimeList[iPrime] ); |
1324 | 0 | } |
1325 | | |
1326 | | // Add the trailing 0s |
1327 | 0 | pRes->pPrimeList[nP].invMod2e64 = 0; |
1328 | 0 | pRes->pPrimeList[nP].compareLimit = 0; |
1329 | | |
1330 | | // Make sure we have the 32-bit tables, not the 64-bit ones. |
1331 | | // dcl - warning suppression is not portable. Also, if it is a compile time constant, shouldn't it be a compile assert? |
1332 | 0 | #pragma warning( suppress: 4127 ) // conditional expression is constant |
1333 | 0 | SYMCRYPT_ASSERT( SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT <= (UINT32)-1 ); |
1334 | |
|
1335 | 0 | iGroup = 0; |
1336 | 0 | iPrime = 0; |
1337 | 0 | iGroupSpec = 0; |
1338 | 0 | nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; |
1339 | 0 | nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; |
1340 | 0 | while( iPrime < nP ) |
1341 | 0 | { |
1342 | 0 | if( nGroups == 0 ) |
1343 | 0 | { |
1344 | 0 | iGroupSpec +=1 ; |
1345 | 0 | nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; |
1346 | 0 | nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; |
1347 | 0 | if( nGroups == 0 ) |
1348 | 0 | { |
1349 | 0 | nGroups = nG - iGroup; |
1350 | 0 | } |
1351 | 0 | } |
1352 | |
|
1353 | 0 | SYMCRYPT_ASSERT( iPrime + nPrimes <= nP ); |
1354 | 0 | M = pRes->pPrimes[iPrime++]; |
1355 | 0 | for( j=1; j<nPrimes; j++ ) |
1356 | 0 | { |
1357 | 0 | SYMCRYPT_ASSERT( M <= SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT / pRes->pPrimes[iPrime] ); |
1358 | 0 | M *= pRes->pPrimes[iPrime++]; |
1359 | 0 | } |
1360 | 0 | SymCryptFdefInitTrialDivisionGroup( &pRes->pGroupList[iGroup], nPrimes, M ); |
1361 | 0 | iGroup++; |
1362 | |
|
1363 | 0 | nGroups--; |
1364 | 0 | } |
1365 | |
|
1366 | 0 | SYMCRYPT_ASSERT( iPrime == nP && iGroup == nG ); |
1367 | | |
1368 | | // Add the trailing sentinel group |
1369 | 0 | pRes->pGroupList[iGroup].nPrimes = 0; |
1370 | |
|
1371 | 0 | cleanup: |
1372 | 0 | if( pSmallPrimeList != NULL ) |
1373 | 0 | { |
1374 | 0 | SymCryptWipe( pSmallPrimeList, nSmallPrimes * sizeof( UINT32 ) ); |
1375 | 0 | SymCryptCallbackFree( pSmallPrimeList ); |
1376 | 0 | pSmallPrimeList = NULL; |
1377 | 0 | } |
1378 | 0 | return pRes; |
1379 | 0 | } |
1380 | | |
1381 | | VOID |
1382 | | SYMCRYPT_CALL |
1383 | | SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) |
1384 | 0 | { |
1385 | | // No security reason to wipe it, but our test code verifies that we wipe everything... |
1386 | | // Perf cost is minor |
1387 | 0 | SymCryptWipe( (PBYTE) pContext, pContext->nBytesAlloc ); |
1388 | 0 | SymCryptCallbackFree( (PSYMCRYPT_TRIALDIVISION_CONTEXT) pContext ); |
1389 | 0 | } |
1390 | | |
1391 | | UINT32 |
1392 | | SYMCRYPT_CALL |
1393 | | SymCryptFdefIntFindSmallDivisor( |
1394 | | _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, |
1395 | | _In_ PCSYMCRYPT_INT piSrc, |
1396 | | _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, |
1397 | | SIZE_T cbScratch ) |
1398 | 0 | { |
1399 | 0 | PCUINT32 pSrc = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); |
1400 | 0 | PCUINT32 p; |
1401 | 0 | UINT32 nDigits = piSrc->nDigits; |
1402 | 0 | UINT32 nUint32 = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; |
1403 | 0 | UINT64 Acc; |
1404 | 0 | PCSYMCRYPT_TRIALDIVISION_GROUP pGroup; |
1405 | 0 | PCSYMCRYPT_TRIALDIVISION_PRIME pPrime; |
1406 | 0 | UINT32 nPrimes; |
1407 | 0 | UINT32 res; |
1408 | | |
1409 | | // Check for 2. Not really needed for prime generation, but it makes the function easier to test/document/describe. |
1410 | 0 | if( (*pSrc & 1) == 0 ) |
1411 | 0 | { |
1412 | 0 | res = 2; |
1413 | 0 | goto cleanup; |
1414 | 0 | } |
1415 | | |
1416 | | // Check the factors 3, 5, 17. These are special as they divide 2^32 - 1 |
1417 | | // (We could also do 257 and 65537 but that doesn't seem worth the added complexity.) |
1418 | 0 | Acc = 0; |
1419 | 0 | p = pSrc; |
1420 | 0 | do { |
1421 | | #if SYMCRYPT_FDEF_DIGIT_SIZE == 16 |
1422 | | Acc = Acc + p[0] + p[1] + p[2] + p[3]; |
1423 | | p += 4; |
1424 | | #elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 |
1425 | 0 | Acc = Acc + p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]; |
1426 | 0 | p += 8; |
1427 | | #else |
1428 | | // dcl - ideally, #error would have a descriptive message so it is easily found in code if encountered, same below |
1429 | | #error ?? |
1430 | | #endif |
1431 | 0 | } while( p < pSrc + nUint32 ); |
1432 | |
|
1433 | 0 | if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[0] ) ) |
1434 | 0 | { |
1435 | 0 | res = 3; |
1436 | 0 | goto cleanup; |
1437 | 0 | } |
1438 | | |
1439 | 0 | if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[1] ) ) |
1440 | 0 | { |
1441 | 0 | res = 5; |
1442 | 0 | goto cleanup; |
1443 | 0 | } |
1444 | | |
1445 | 0 | if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[2] ) ) |
1446 | 0 | { |
1447 | 0 | res = 17; |
1448 | 0 | goto cleanup; |
1449 | 0 | } |
1450 | | |
1451 | 0 | pGroup = pContext->pGroupList; |
1452 | 0 | pPrime = pContext->pPrimeList; |
1453 | 0 | while( (nPrimes = pGroup->nPrimes) != 0 ) |
1454 | 0 | { |
1455 | | // Reduce Src modulo the group product to a 64-bit value |
1456 | 0 | Acc = 0; |
1457 | 0 | p = pSrc + nUint32; |
1458 | |
|
1459 | | #if SYMCRYPT_FDEF_DIGIT_SIZE == 16 |
1460 | | if( (nUint32 & 4) != 0 ) |
1461 | | { |
1462 | | // nUInt32 is 4 mod 8, process the top 4 words only |
1463 | | p -= 4; |
1464 | | Acc = |
1465 | | p[0] + |
1466 | | SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + |
1467 | | SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + |
1468 | | SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ); |
1469 | | } else { |
1470 | | // Process 8 words to start |
1471 | | p -= 8; |
1472 | | Acc = |
1473 | | p[0] + |
1474 | | SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + |
1475 | | SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + |
1476 | | SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + |
1477 | | SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + |
1478 | | SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + |
1479 | | SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + |
1480 | | SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); |
1481 | | } |
1482 | | #elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 |
1483 | |
|
1484 | 0 | p -= 8; |
1485 | 0 | Acc = |
1486 | 0 | p[0] + |
1487 | 0 | SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + |
1488 | 0 | SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + |
1489 | 0 | SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + |
1490 | 0 | SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + |
1491 | 0 | SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + |
1492 | 0 | SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + |
1493 | 0 | SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); |
1494 | |
|
1495 | | #else |
1496 | | #error ?? |
1497 | | #endif |
1498 | 0 | while( p > pSrc ) |
1499 | 0 | { |
1500 | 0 | p -= 8; |
1501 | 0 | Acc = |
1502 | 0 | p[0] + |
1503 | 0 | SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + |
1504 | 0 | SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + |
1505 | 0 | SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + |
1506 | 0 | SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + |
1507 | 0 | SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + |
1508 | 0 | SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + |
1509 | 0 | SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ) + |
1510 | 0 | SYMCRYPT_MUL32x32TO64( (UINT32) Acc , pGroup->factor[7] ) + |
1511 | 0 | SYMCRYPT_MUL32x32TO64( (UINT32)(Acc >> 32), pGroup->factor[8] ); |
1512 | 0 | } |
1513 | | |
1514 | | // Now we check whether we have a multiple of one of the primes |
1515 | 0 | while( nPrimes > 0 ) |
1516 | 0 | { |
1517 | 0 | if( SymCryptIsMultipleOfSmallPrime( Acc, pPrime ) ) |
1518 | 0 | { |
1519 | 0 | res = pContext->pPrimes[ (pPrime - pContext->pPrimeList) ]; // pointer subtraction auto-divides by size... |
1520 | 0 | goto cleanup; |
1521 | 0 | } |
1522 | 0 | pPrime++; |
1523 | 0 | nPrimes--; |
1524 | 0 | } |
1525 | | |
1526 | 0 | pGroup++; |
1527 | 0 | } |
1528 | | |
1529 | 0 | UNREFERENCED_PARAMETER( pbScratch ); |
1530 | 0 | UNREFERENCED_PARAMETER( cbScratch ); |
1531 | | |
1532 | | // Did not find a small factor, return zero |
1533 | 0 | res = 0; |
1534 | |
|
1535 | 0 | cleanup: |
1536 | 0 | return res; |
1537 | 0 | } |