Coverage Report

Created: 2024-11-21 07:03

/src/SymCrypt/lib/fdef_int.c
Line
Count
Source (jump to first uncovered line)
1
//
2
// fdef_int.c   INT functions for default number format
3
//
4
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
5
//
6
7
#include "precomp.h"
8
9
//
10
// Default big-number format:
11
// INT objects are stored in two parts:
12
//  a SYMCRYPT_FDEF_INT structure
13
//  an array of UINT32; the # elements in the array is a multiple of SYMCRYPT_FDEF_DIGIT_SIZE/4.
14
//
15
// The pointer passed points to the start of the UINT32 array, just after the SYMCRYPT_FDEF_INT structure.
16
//
17
// The generic implementation accesses the digits as an array of UINT32, but on 64-bit CPUs
18
// the code can also view it as an array of UINT64.
19
//
20
21
UINT32
22
SYMCRYPT_CALL
23
SymCryptFdefRawAddC(
24
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
25
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
26
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
27
                                                            UINT32      nDigits )
28
0
{
29
0
    UINT32 i;
30
0
    UINT64 t;
31
32
0
    t = 0;
33
0
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
34
0
    {
35
0
        t = t + pSrc1[i] + pSrc2[i];
36
0
        pDst[i] = (UINT32) t;
37
0
        t >>= 32;
38
0
    }
39
40
0
    return (UINT32) t;
41
0
}
42
43
UINT32
44
SYMCRYPT_CALL
45
SymCryptFdefRawAdd(
46
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
47
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
48
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
49
                                                            UINT32      nDigits )
50
449k
{
51
449k
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM
52
449k
    return SymCryptFdefRawAddAsm( pSrc1, pSrc2, pDst, nDigits );
53
#else
54
    return SymCryptFdefRawAddC( pSrc1, pSrc2, pDst, nDigits );
55
#endif
56
449k
}
57
58
59
UINT32
60
SYMCRYPT_CALL
61
SymCryptFdefRawAddUint32(
62
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    Src1,
63
                                                            UINT32      Src2,
64
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     Dst,
65
                                                            UINT32      nDigits )
66
69.9k
{
67
69.9k
    UINT32 i;
68
69.9k
    UINT64 t;
69
70
69.9k
    t = Src2;
71
1.60M
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
72
1.53M
    {
73
1.53M
        t = t + Src1[i];
74
1.53M
        Dst[i] = (UINT32) t;
75
1.53M
        t >>= 32;
76
1.53M
    }
77
78
69.9k
    return (UINT32) t;
79
69.9k
}
80
81
UINT32
82
SYMCRYPT_CALL
83
SymCryptFdefIntAddUint32(
84
    _In_    PCSYMCRYPT_INT  piSrc1,
85
            UINT32          u32Src2,
86
    _Out_   PSYMCRYPT_INT   piDst )
87
69.6k
{
88
69.6k
    SYMCRYPT_CHECK_MAGIC( piSrc1 );
89
69.6k
    SYMCRYPT_CHECK_MAGIC( piDst );
90
91
69.6k
    SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits );
92
93
69.6k
    return SymCryptFdefRawAddUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits );
94
69.6k
}
95
96
UINT32
97
SYMCRYPT_CALL
98
SymCryptFdefIntAddSameSize(
99
    _In_    PCSYMCRYPT_INT piSrc1,
100
    _In_    PCSYMCRYPT_INT piSrc2,
101
    _Out_   PSYMCRYPT_INT  piDst )
102
0
{
103
0
    SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc2->nDigits == piDst->nDigits );
104
105
0
    return SymCryptFdefRawAdd(  SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ),
106
0
                                SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ),
107
0
                                SYMCRYPT_FDEF_INT_PUINT32( piDst ),
108
0
                                piDst->nDigits );
109
0
}
110
111
UINT32
112
SYMCRYPT_CALL
113
SymCryptFdefIntAddMixedSize(
114
    _In_    PCSYMCRYPT_INT piSrc1,
115
    _In_    PCSYMCRYPT_INT piSrc2,
116
    _Out_   PSYMCRYPT_INT  piDst )
117
0
{
118
0
    UINT32      nS1 = piSrc1->nDigits;
119
0
    UINT32      nS2 = piSrc2->nDigits;
120
0
    UINT32      nD  = piDst->nDigits;
121
0
    UINT32      c;
122
0
    UINT32      nW;
123
124
0
    SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 );
125
126
0
    if( nS1 < nS2 )
127
0
    {
128
0
        c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 );
129
0
        c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 );
130
0
        nW = nS2;
131
0
    } else {
132
        // nS2 < nS1
133
0
        c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 );
134
0
        c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 );
135
0
        nW = nS1;
136
0
    }
137
138
0
    if( nW < nD )
139
0
    {
140
0
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - nW) * SYMCRYPT_FDEF_DIGIT_SIZE );
141
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32] = c;
142
0
        c = 0;
143
0
    }
144
145
0
    return c;
146
0
}
147
148
UINT32
149
SYMCRYPT_CALL
150
SymCryptFdefRawSubC(
151
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
152
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
153
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
154
                                                            UINT32      nDigits )
155
0
{
156
0
    UINT32 i;
157
0
    UINT64 t;
158
0
    UINT32 c;
159
160
0
    c = 0;
161
0
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
162
0
    {
163
        // c == 1 for carry, 0 for no carry
164
0
        t = (UINT64) pSrc1[i] - pSrc2[i] - c;
165
0
        pDst[i] = (UINT32) t;
166
0
        c = (UINT32)(t >> 32) & 1;
167
0
    }
168
169
0
    return c;
170
0
}
171
172
UINT32
173
SYMCRYPT_CALL
174
SymCryptFdefRawSub(
175
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
176
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
177
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
178
                                                            UINT32      nDigits )
179
870k
{
180
870k
#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM
181
870k
    return SymCryptFdefRawSubAsm( pSrc1, pSrc2, pDst, nDigits );
182
#else
183
    return SymCryptFdefRawSubC( pSrc1, pSrc2, pDst, nDigits );
184
#endif
185
870k
}
186
187
188
UINT32
189
SYMCRYPT_CALL
190
SymCryptFdefRawSubUint32(
191
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
192
                                                            UINT32      Src2,
193
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
194
                                                            UINT32      nDigits )
195
71.2k
{
196
71.2k
    UINT32 i;
197
71.2k
    UINT64 t;
198
71.2k
    UINT32 c;
199
200
71.2k
    c = Src2;
201
1.63M
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
202
1.56M
    {
203
1.56M
        t = (UINT64)pSrc1[i] - c;
204
1.56M
        pDst[i] = (UINT32) t;
205
1.56M
        c = (UINT32)(t >> 32) & 1;
206
1.56M
    }
207
208
71.2k
    return c;
209
71.2k
}
210
211
UINT32
212
SYMCRYPT_CALL
213
SymCryptFdefRawNeg(
214
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
215
                                                            UINT32      carryIn,
216
    _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32     pDst,
217
                                                            UINT32      nDigits )
218
154k
{
219
154k
    UINT32 i;
220
154k
    UINT64 t;
221
154k
    UINT32 c;
222
223
154k
    c = carryIn;
224
3.74M
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
225
3.59M
    {
226
3.59M
        t = (UINT64)0 - pSrc1[i] - c;
227
3.59M
        pDst[i] = (UINT32) t;
228
3.59M
        c = (UINT32)(t >> 32) & 1;
229
3.59M
    }
230
231
154k
    return c;
232
154k
}
233
234
UINT32
235
SYMCRYPT_CALL
236
SymCryptFdefIntSubUint32(
237
    _In_    PCSYMCRYPT_INT  piSrc1,
238
            UINT32          u32Src2,
239
    _Out_   PSYMCRYPT_INT   piDst )
240
70.1k
{
241
70.1k
    SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits );
242
243
70.1k
    return SymCryptFdefRawSubUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits );
244
70.1k
}
245
246
UINT32
247
SYMCRYPT_CALL
248
SymCryptFdefIntSubSameSize(
249
    _In_    PCSYMCRYPT_INT piSrc1,
250
    _In_    PCSYMCRYPT_INT piSrc2,
251
    _Out_   PSYMCRYPT_INT  piDst )
252
337k
{
253
337k
    SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc1->nDigits == piDst->nDigits );
254
255
337k
    return SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits );
256
337k
}
257
258
UINT32
259
SYMCRYPT_CALL
260
SymCryptFdefIntSubMixedSize(
261
    _In_    PCSYMCRYPT_INT piSrc1,
262
    _In_    PCSYMCRYPT_INT piSrc2,
263
    _Out_   PSYMCRYPT_INT  piDst )
264
0
{
265
0
    UINT32      nS1 = piSrc1->nDigits;
266
0
    UINT32      nS2 = piSrc2->nDigits;
267
0
    UINT32      nD  = piDst->nDigits;
268
0
    UINT32      c;
269
0
    UINT32      n;
270
271
0
    SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 );
272
273
0
    if( nS1 < nS2 )
274
0
    {
275
0
        c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 );
276
0
        c = SymCryptFdefRawNeg( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 );
277
0
        n = nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32;
278
0
    } else {
279
        // nS2 < nS1
280
0
        c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 );
281
0
        c = SymCryptFdefRawSubUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 );
282
0
        n = nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32;
283
0
    }
284
285
    //
286
    // Set the rest of the result to 0s or 1s
287
    //
288
0
    while( n < nD * SYMCRYPT_FDEF_DIGIT_NUINT32 )
289
0
    {
290
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[n++] = 0 - c;
291
0
    }
292
293
0
    return c;
294
0
}
295
296
UINT32
297
SYMCRYPT_CALL
298
SymCryptFdefRawIsLessThanC(
299
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
300
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
301
                                                            UINT32      nDigits )
302
4.49k
{
303
4.49k
    UINT32 i;
304
4.49k
    UINT64 t;
305
4.49k
    UINT32 c;
306
307
    // We just do a subtraction without writing and return the carry
308
4.49k
    c = 0;
309
97.6k
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
310
93.2k
    {
311
        // c == 1 for carry, 0 for no carry
312
93.2k
        t = (UINT64) pSrc1[i] - pSrc2[i] - c;
313
93.2k
        c = (UINT32)(t >> 32) & 1;
314
93.2k
    }
315
316
    // All booleans are returned as masks
317
4.49k
    return 0 - c;
318
4.49k
}
319
320
UINT32
321
SYMCRYPT_CALL
322
SymCryptFdefRawIsLessThan(
323
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
324
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc2,
325
                                                            UINT32      nDigits )
326
4.49k
{
327
#if 0 & SYMCRYPT_CPU_AMD64
328
//    return SymCryptFdefRawIsLessThanAsm( pSrc1, pSrc2, nDigits );
329
#else
330
4.49k
    return SymCryptFdefRawIsLessThanC( pSrc1, pSrc2, nDigits );
331
4.49k
#endif
332
4.49k
}
333
334
UINT32
335
SYMCRYPT_CALL
336
SymCryptFdefRawIsZeroC(
337
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
338
                                                            UINT32      nDigits )
339
0
{
340
0
    UINT32 i;
341
0
    UINT32 c;
342
343
0
    c = 0;
344
0
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
345
0
    {
346
0
        c |= pSrc1[i];
347
0
    }
348
349
    // All booleans are returned as masks
350
0
    return SYMCRYPT_MASK32_ZERO( c );
351
0
}
352
353
UINT32
354
SYMCRYPT_CALL
355
SymCryptFdefRawIsZero(
356
    _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE )   PCUINT32    pSrc1,
357
                                                            UINT32      nDigits )
358
0
{
359
#if 0 & SYMCRYPT_CPU_AMD64
360
//    return SymCryptFdefRawIsZeroAsm( pSrc1, nDigits );
361
#else
362
0
    return SymCryptFdefRawIsZeroC( pSrc1, nDigits );
363
0
#endif
364
0
}
365
366
UINT32
367
SYMCRYPT_CALL
368
SymCryptFdefIntIsLessThan(
369
    _In_    PCSYMCRYPT_INT  piSrc1,
370
    _In_    PCSYMCRYPT_INT  piSrc2 )
371
4.22k
{
372
4.22k
    UINT32  nD1 = piSrc1->nDigits;
373
4.22k
    UINT32  nD2 = piSrc2->nDigits;
374
375
4.22k
    UINT32 res;
376
377
4.22k
    if( nD1 == nD2 )
378
4.22k
    {
379
4.22k
        res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 );
380
4.22k
    } else if( nD1 < nD2 ) {
381
0
        res =  SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 );
382
0
        res |= ~SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[ nD1 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD2 - nD1 );
383
0
    } else {
384
0
        res =  SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD2 );
385
0
        res &= SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[ nD2 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD1 - nD2 );
386
0
    }
387
388
4.22k
    return res;
389
4.22k
}
390
391
392
VOID
393
SYMCRYPT_CALL
394
SymCryptFdefIntNeg(
395
    _In_    PCSYMCRYPT_INT  piSrc,
396
    _Out_   PSYMCRYPT_INT   piDst )
397
153k
{
398
153k
    UINT32 nDigits = piDst->nDigits;
399
153k
    SYMCRYPT_ASSERT( piSrc->nDigits == nDigits );
400
401
153k
    SymCryptFdefRawNeg( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), 0, SYMCRYPT_FDEF_INT_PUINT32( piDst ), nDigits );
402
153k
}
403
404
405
VOID
406
SYMCRYPT_CALL
407
SymCryptFdefIntMulPow2(
408
    _In_    PCSYMCRYPT_INT  piSrc,
409
            SIZE_T          Exp,
410
    _Out_   PSYMCRYPT_INT   piDst )
411
0
{
412
0
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
413
414
0
    SIZE_T  shiftWords = Exp / (8 * sizeof( UINT32 ) );
415
0
    SIZE_T  shiftBits  = Exp % (8 * sizeof( UINT32 ) );
416
417
0
    UINT32  nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
418
419
0
    if( shiftWords >= nWords )
420
0
    {
421
0
        SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), nWords * sizeof( UINT32 ) );
422
0
        goto cleanup;
423
0
    }
424
425
0
    SIZE_T i = nWords;
426
0
    while( i > shiftWords )
427
0
    {
428
0
        i--;
429
0
        UINT64 t = (UINT64)SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords] << 32;
430
0
        if( i > shiftWords )
431
0
        {
432
0
            t |= SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords - 1];
433
0
        }
434
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32)(t >> (32 - shiftBits));
435
0
    }
436
437
0
    while( i > 0 )
438
0
    {
439
0
        i--;
440
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0;
441
0
    }
442
443
0
cleanup:
444
0
    ;
445
0
}
446
447
// In shift-based operations which we have no assembly for, and we'd like to use 32-bit words
448
// on 32-bit architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT &
449
// friends.
450
451
// Note that accessing the FDEF uint32 array as an array of NATIVE_UINTs relies on
452
// the little-endianness of the target if NATIVE_UINT is larger than 32 bits.
453
// AMD64 is little endian and ARM64 code is always expected to execute in little
454
// endian mode, but this is not true in general for an arbitrary 64 bit platform.
455
//
456
// If we need to support a 64 bit big endian platform, we need to either
457
// restrict its NATIVE_UINT to 32 bits, or introduce load and store macros.
458
13.0M
#define SYMCRYPT_FDEF_INT_PNATIVE_UINT(p) ((NATIVE_UINT*) SYMCRYPT_FDEF_INT_PUINT32( p ))
459
// Ensure that sizeof(NATIVE_UINT) > 4 only when compiling for known little endian target
460
C_ASSERT( (NATIVE_BYTES <= 4) || SYMCRYPT_CPU_AMD64 || SYMCRYPT_CPU_ARM64 );
461
462
538k
#define SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT  ((NATIVE_UINT)(SYMCRYPT_FDEF_DIGIT_SIZE / NATIVE_BYTES))
463
464
// Ensure that digit is divisible by native word size!
465
C_ASSERT(SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT * NATIVE_BYTES == SYMCRYPT_FDEF_DIGIT_SIZE);
466
467
VOID
468
SYMCRYPT_CALL
469
SymCryptFdefIntDivPow2(
470
    _In_    PCSYMCRYPT_INT  piSrc,
471
            SIZE_T          exp,
472
    _Out_   PSYMCRYPT_INT   piDst )
473
538k
{
474
538k
    SIZE_T  shiftWords = exp / NATIVE_BITS;
475
538k
    SIZE_T  shiftRightBits  = exp % NATIVE_BITS;
476
538k
    SIZE_T  shiftLeftBits   = (NATIVE_BITS-1) - shiftRightBits;
477
538k
    NATIVE_UINT lowWord, highWord, highPart;
478
538k
    SIZE_T i = 0;
479
480
538k
    NATIVE_UINT nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT;
481
482
538k
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
483
484
538k
    shiftWords = SYMCRYPT_MIN(shiftWords, nWords);
485
538k
    if( shiftWords < nWords )
486
538k
    {
487
538k
        lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[shiftWords];
488
6.23M
        while( i+shiftWords+1 < nWords )
489
5.69M
        {
490
5.69M
            highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+shiftWords+1];
491
492
            // We always shift highWord left by 1 to keep variable shiftLeftBits in range [0,NATIVE_BITS-1]
493
5.69M
            highPart = (highWord << shiftLeftBits)<<1;
494
495
5.69M
            SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits) | highPart;
496
497
5.69M
            lowWord = highWord;
498
5.69M
            i++;
499
5.69M
        }
500
538k
        SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits);
501
538k
        i++;
502
538k
    }
503
504
538k
    SYMCRYPT_ASSERT(i + shiftWords == nWords);
505
506
538k
    SymCryptWipe( &SYMCRYPT_FDEF_INT_PNATIVE_UINT( piDst )[nWords-shiftWords], shiftWords * NATIVE_BYTES );
507
538k
}
508
509
VOID
510
SYMCRYPT_CALL
511
SymCryptFdefIntShr1(
512
            UINT32          highestBit,
513
    _In_    PCSYMCRYPT_INT  piSrc,
514
    _Out_   PSYMCRYPT_INT   piDst )
515
0
{
516
0
    UINT32  nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT;
517
518
0
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
519
0
    SYMCRYPT_ASSERT( highestBit < 2 );
520
521
0
    SIZE_T i = 0;
522
0
    NATIVE_UINT lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[0];
523
0
    NATIVE_UINT highWord = 0;
524
0
    while( i+1 < nWords )
525
0
    {
526
0
        highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+1];
527
528
0
        SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | (highWord << (NATIVE_BITS - 1));
529
530
0
        lowWord = highWord;
531
0
        i++;
532
0
    }
533
    
534
0
    SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | ((NATIVE_UINT)highestBit) << (NATIVE_BITS - 1);
535
0
}
536
537
VOID
538
SYMCRYPT_CALL
539
SymCryptFdefIntModPow2(
540
    _In_    PCSYMCRYPT_INT  piSrc,
541
            SIZE_T          exp,
542
    _Out_   PSYMCRYPT_INT   piDst )
543
0
{
544
0
    SIZE_T  expWords = exp / 32;        // index of word with the partial mask
545
0
    SIZE_T  expBits  = exp % 32;        // # bits to leave in that word
546
547
0
    UINT32  nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
548
549
0
    SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits );
550
551
0
    if( piSrc != piDst )
552
0
    {
553
0
        memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nWords * sizeof( UINT32 ) );
554
0
    }
555
556
0
    if( expWords >= nWords )
557
0
    {
558
        // exp is so large that Dst = Src is sufficient.
559
0
        goto cleanup;
560
0
    }
561
562
0
    for( SIZE_T i=expWords + 1; i < nWords; i++ )
563
0
    {
564
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0;
565
0
    }
566
567
0
    if( expBits != 0 )
568
0
    {
569
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] &= ((UINT32) -1) >> (32 - expBits );
570
0
    } else {
571
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] = 0;
572
0
    }
573
574
0
cleanup:
575
0
    ;
576
0
}
577
578
UINT32
579
SYMCRYPT_CALL
580
SymCryptFdefIntGetBit(
581
    _In_    PCSYMCRYPT_INT  piSrc,
582
            UINT32          iBit )
583
795
{
584
795
    SYMCRYPT_ASSERT( iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS );
585
586
795
    return (((SYMCRYPT_FDEF_INT_PUINT32( piSrc)[iBit / 32]) >> (iBit % 32)) & 1);
587
795
}
588
589
UINT32
590
SYMCRYPT_CALL
591
SymCryptFdefIntGetBits(
592
    _In_    PCSYMCRYPT_INT  piSrc,
593
            UINT32          iBit,
594
            UINT32          nBits )
595
0
{
596
0
    UINT32 mainMask = 0;
597
0
    UINT32 result = 0;
598
599
0
    SYMCRYPT_ASSERT( (nBits > 0) &&
600
0
                     (nBits < 33) &&
601
0
                     (iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) &&
602
0
                     (iBit + nBits <= piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) );
603
604
0
    mainMask = (UINT32)(-1) >> (32-nBits);
605
606
    // Get the lower word first (it exists since iBit is smaller than the max bit)
607
0
    result = SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32];
608
609
    // Shift to the right accordingly
610
0
    result >>= (iBit%32);
611
612
    // Get the upper word (if we need it)
613
    // Note: the iBit and nBits values are public
614
0
    if ((iBit%32!=0) && ( iBit/32 + 1 < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ))
615
0
    {
616
0
        result |= ( SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32+1] << (32 - iBit%32) );
617
0
    }
618
619
    // Mask out the top bits
620
0
    result &= mainMask;
621
622
0
    return result;
623
0
}
624
625
VOID
626
SYMCRYPT_CALL
627
SymCryptFdefIntSetBits(
628
    _In_    PSYMCRYPT_INT   piDst,
629
            UINT32          value,
630
            UINT32          iBit,
631
            UINT32          nBits )
632
0
{
633
0
    UINT32 mainMask = 0;
634
635
0
    UINT32 alignedVal = 0;
636
0
    UINT32 alignedMask = 0;
637
638
0
    SYMCRYPT_ASSERT( (nBits > 0) &&
639
0
                     (nBits < 33) &&
640
0
                     (iBit < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) &&
641
0
                     (iBit + nBits <= piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) );
642
643
    // Zero out the not needed bits of the value
644
0
    mainMask = (UINT32)(-1) >> (32-nBits);
645
0
    value &= mainMask;
646
647
    //
648
    // Lower word
649
    //
650
651
    // Create the needed mask
652
0
    alignedMask = mainMask << (iBit%32);
653
654
    // Align the value
655
0
    alignedVal = value << (iBit%32);
656
657
    // Set the lower word first (it exists since iBit is smaller than the max bit)
658
0
    SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] & ~alignedMask) | alignedVal;
659
660
    //
661
    // Upper word
662
    //
663
664
0
    if ((iBit%32!=0) && ( iBit/32 + 1 < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ))
665
0
    {
666
        // Create the needed mask
667
0
        alignedMask = mainMask >> (32 - iBit%32);
668
669
        // Align the value
670
0
        alignedVal = value >> (32 - iBit%32);
671
672
        // Set the upper word
673
0
        SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] & ~alignedMask) | alignedVal;
674
0
    }
675
676
0
}
677
678
679
UINT32
680
SYMCRYPT_CALL
681
SymCryptFdefIntMulUint32(
682
    _In_                            PCSYMCRYPT_INT  piSrc1,
683
                                    UINT32          Src2,
684
    _Out_                           PSYMCRYPT_INT   piDst )
685
0
{
686
0
    UINT32  nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
687
688
0
    SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits );
689
690
0
    UINT64 c = 0;
691
0
    for( UINT32 i=0; i<nWords; i++ )
692
0
    {
693
0
        c += SYMCRYPT_MUL32x32TO64( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[i], Src2 );
694
0
        SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32) c;
695
0
        c >>= 32;
696
0
    }
697
698
0
    return (UINT32) c;
699
0
}
700
701
702
VOID
703
SYMCRYPT_CALL
704
SymCryptFdefIntMulSameSize(
705
    _In_                            PCSYMCRYPT_INT  piSrc1,
706
    _In_                            PCSYMCRYPT_INT  piSrc2,
707
    _Out_                           PSYMCRYPT_INT   piDst,
708
    _Out_writes_bytes_( cbScratch ) PBYTE           pbScratch,
709
                                    SIZE_T          cbScratch )
710
0
{
711
0
    SymCryptFdefIntMulMixedSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch );
712
0
}
713
714
VOID
715
SYMCRYPT_CALL
716
SymCryptFdefIntSquare(
717
    _In_                            PCSYMCRYPT_INT  piSrc,
718
    _Out_                           PSYMCRYPT_INT   piDst,
719
    _Out_writes_bytes_( cbScratch ) PBYTE           pbScratch,
720
                                    SIZE_T          cbScratch )
721
0
{
722
0
    UINT32      nS = piSrc->nDigits;
723
0
    UINT32      nD = piDst->nDigits;
724
725
0
    SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) );
726
727
0
    SYMCRYPT_ASSERT( 2*nS <= nD );
728
729
0
    SymCryptFdefRawSquare( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nS, SYMCRYPT_FDEF_INT_PUINT32( piDst ) );
730
731
0
    if( 2*nS < nD )
732
0
    {
733
0
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[2 * nS * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - 2*nS) * SYMCRYPT_FDEF_DIGIT_SIZE );
734
0
    }
735
0
}
736
737
738
VOID
739
SYMCRYPT_CALL
740
SymCryptFdefRawMulC(
741
    _In_reads_(nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32)              PCUINT32    pSrc1,
742
                                                                    UINT32      nDigits1,
743
    _In_reads_(nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32)              PCUINT32    pSrc2,
744
                                                                    UINT32      nDigits2,
745
    _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32)   PUINT32     pDst )
746
0
{
747
0
    UINT32 nWords1 = nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32;
748
0
    UINT32 nWords2 = nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32;
749
750
    // Set Dst to zero
751
0
    SymCryptWipe( pDst, (nDigits1+nDigits2) * SYMCRYPT_FDEF_DIGIT_SIZE );
752
753
0
    for( UINT32 i = 0; i < nWords1; i++ )
754
0
    {
755
0
        UINT32 m = pSrc1[i];
756
0
        UINT64 c = 0;
757
0
        for( UINT32 j = 0; j < nWords2; j++ )
758
0
        {
759
            // Invariant: c < 2^32
760
0
            c += SYMCRYPT_MUL32x32TO64( pSrc2[j], m );
761
0
            c += pDst[i+j];
762
            // There is no overflow on C because the max value is
763
            // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1.
764
0
            pDst[i+j] = (UINT32) c;
765
0
            c >>= 32;
766
0
        }
767
0
        pDst[i + nWords2] = (UINT32) c;
768
0
    }
769
0
}
770
771
VOID
772
SYMCRYPT_CALL
773
SymCryptFdefRawMul(
774
    _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32)                PCUINT32    pSrc1,
775
                                                                    UINT32      nDigits1,
776
    _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32)                PCUINT32    pSrc2,
777
                                                                    UINT32      nDigits2,
778
    _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32)   PUINT32     pDst )
779
302k
{
780
302k
#if SYMCRYPT_CPU_AMD64
781
302k
    if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) )
782
0
    {
783
0
        SymCryptFdefRawMulMulx( pSrc1, nDigits1, pSrc2, nDigits2, pDst );
784
302k
    } else {
785
302k
        SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst );
786
302k
    }
787
#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM
788
    SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst );
789
#else
790
    SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst );
791
#endif
792
302k
}
793
794
VOID
795
SYMCRYPT_CALL
796
SymCryptFdefRawSquareC(
797
    _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32)   PCUINT32    pSrc,
798
                                                        UINT32      nDigits,
799
    _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32     pDst )
800
0
{
801
0
    UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
802
803
0
    UINT32 m = 0;
804
0
    UINT64 c = 0;
805
806
    // Set Dst to zero
807
0
    SymCryptWipe( pDst, (2*nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE );
808
809
    // First Pass - Addition of the cross products x_i*x_j with i!=j
810
0
    for( UINT32 i = 0; i < nWords; i++ )
811
0
    {
812
0
        m = pSrc[i];
813
0
        c = 0;
814
0
        for( UINT32 j = i+1; j < nWords; j++ )
815
0
        {
816
            // Invariant: c < 2^32
817
0
            c += SYMCRYPT_MUL32x32TO64( pSrc[j], m );
818
0
            c += pDst[i+j];
819
            // There is no overflow on C because the max value is
820
            // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1.
821
0
            pDst[i+j] = (UINT32) c;
822
0
            c >>= 32;
823
0
        }
824
0
        pDst[i + nWords] = (UINT32) c;
825
0
    }
826
827
    // Second Pass - Shifting all results 1 bit left
828
0
    c = 0;
829
0
    for( UINT32 i = 1; i < 2*nWords; i++ )
830
0
    {
831
0
        c |= (((UINT64)pDst[i])<<1);
832
0
        pDst[i] = (UINT32)c;
833
0
        c >>= 32;
834
0
    }
835
836
    // Third Pass - Adding the squares on the even columns and propagating the sum
837
0
    c = 0;
838
0
    for( UINT32 i = 0; i < nWords; i++ )
839
0
    {
840
        //
841
        // Even column
842
        //
843
0
        m = pSrc[i];
844
0
        c += SYMCRYPT_MUL32x32TO64( m, m );
845
0
        c += pDst[2*i];
846
        // There is no overflow on C because the max value is
847
        // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1
848
849
0
        pDst[2*i] = (UINT32) c;
850
0
        c >>= 32;
851
852
        //
853
        // Odd column
854
        //
855
0
        c += pDst[2*i+1];
856
        // There is no overflow on C because the max value is
857
        // 2^32 - 1 + 2^32 - 1 = 2^33 - 2
858
859
0
        pDst[2*i+1] = (UINT32) c;
860
0
        c >>= 32;
861
0
    }
862
0
}
863
864
VOID
865
SYMCRYPT_CALL
866
SymCryptFdefRawSquare(
867
    _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32)         PCUINT32    pSrc,
868
                                                            UINT32      nDigits,
869
    _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32)     PUINT32     pDst )
870
155k
{
871
155k
#if SYMCRYPT_CPU_AMD64
872
155k
    if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) )
873
0
    {
874
0
        SymCryptFdefRawSquareMulx( pSrc, nDigits, pDst );
875
155k
    } else {
876
155k
        SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst );
877
155k
    }
878
#elif SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM
879
    SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst );
880
#elif SYMCRYPT_CPU_X86
881
    SymCryptFdefRawMulAsm( pSrc, nDigits, pSrc, nDigits, pDst );
882
#else
883
    SymCryptFdefRawSquareC( pSrc, nDigits, pDst );
884
#endif
885
155k
}
886
887
VOID
888
SYMCRYPT_CALL
889
SymCryptFdefIntMulMixedSize(
890
    _In_                            PCSYMCRYPT_INT  piSrc1,
891
    _In_                            PCSYMCRYPT_INT  piSrc2,
892
    _Out_                           PSYMCRYPT_INT   piDst,
893
    _Out_writes_bytes_( cbScratch ) PBYTE           pbScratch,
894
                                    SIZE_T          cbScratch )
895
0
{
896
0
    UINT32      nS1 = piSrc1->nDigits;
897
0
    UINT32      nS2 = piSrc2->nDigits;
898
0
    UINT32      nD  = piDst ->nDigits;
899
900
0
    SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) );
901
902
0
    SYMCRYPT_ASSERT( nS1 + nS2 <= nD );
903
904
0
    SymCryptFdefRawMul( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), nS1, SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nS2, SYMCRYPT_FDEF_INT_PUINT32( piDst ) );
905
906
0
    if( nS1 + nS2 < nD )
907
0
    {
908
0
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[(nS1 + nS2) * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - (nS1 + nS2)) * SYMCRYPT_FDEF_DIGIT_SIZE );
909
0
    }
910
0
}
911
912
913
PSYMCRYPT_INT
914
SYMCRYPT_CALL
915
SymCryptFdefIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc )
916
8.22k
{
917
8.22k
    return &pdSrc->Int;
918
8.22k
}
919
920
VOID
921
SYMCRYPT_CALL
922
SymCryptFdefIntToDivisor(
923
    _In_                            PCSYMCRYPT_INT      piSrc,
924
    _Out_                           PSYMCRYPT_DIVISOR   pdDst,
925
                                    UINT32              totalOperations,
926
                                    UINT32              flags,
927
    _Out_writes_bytes_( cbScratch ) PBYTE               pbScratch,
928
                                    SIZE_T              cbScratch )
929
1.89k
{
930
1.89k
    UINT32      W;
931
1.89k
    UINT32      nBits;
932
1.89k
    UINT32      nWords;
933
1.89k
    UINT32      bitToTest;
934
1.89k
    UINT64      P;
935
936
1.89k
    UNREFERENCED_PARAMETER( totalOperations );
937
1.89k
    UNREFERENCED_PARAMETER( flags );
938
939
1.89k
    SYMCRYPT_CHECK_MAGIC( piSrc );
940
1.89k
    SYMCRYPT_CHECK_MAGIC( pdDst );
941
942
1.89k
    SYMCRYPT_ASSERT( piSrc->nDigits == pdDst->nDigits );
943
944
1.89k
    SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( piSrc->nDigits ) );
945
946
    //
947
    // Copy the Int.
948
    //
949
1.89k
    SymCryptFdefIntCopy( piSrc, &pdDst->Int );
950
951
    //
952
    // For an N-bit divisor M, and D-bit divisor digit size,
953
    // the value W is defined as
954
    //     floor( (2^{N+D} - 1) / M } - 2^D
955
    // which is the largest W such that (W * M + 2^D * M )< 2^{N+D}
956
    // To compute W we use a binary search.
957
    // This can be optimized, but this is the simplest side-channel safe solution.
958
    // We can compute the upper bits of W * M + 2^D * M in a simple loop.
959
    //
960
    // For now we only compute a 32-bit W for a 32-bit digit divisor size.
961
    //
962
963
1.89k
    nBits = SymCryptIntBitsizeOfValue( &pdDst->Int );
964
965
1.89k
    SYMCRYPT_ASSERT( nBits != 0 );
966
1.89k
    if( nBits == 0 )
967
0
    {
968
        // Can't create a divisor from a Int whose value is 0
969
970
        // We really should not have any callers which get here (it is a requirement that Src != 0)
971
        // We assert in CHKed builds
972
        // In release set the divisor to 1 instead
973
0
        SymCryptIntSetValueUint32( 1, &pdDst->Int );
974
0
    }
975
976
1.89k
    pdDst->nBits = nBits;
977
978
1.89k
    nWords = (nBits + 31)/32;
979
1.89k
    bitToTest = (UINT32)1 << 31;
980
1.89k
    W = 0;
981
62.4k
    while( bitToTest > 0 )
982
60.5k
    {
983
60.5k
        W |= bitToTest;
984
        // Do the multiplication
985
60.5k
        P = 0;
986
761k
        for( UINT32 i=0; i<nWords; i++ )
987
700k
        {
988
            // Invariant:
989
            // P <= 2^{2D} - 2 which ensures the mul-add doesn't generate an overflow
990
            // P  = floor( (W + 2^32)*M[0..i-1] / 2^{32*i} )
991
700k
            P += SYMCRYPT_MUL32x32TO64( W, SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i] );
992
700k
            P >>= 32;
993
700k
            P += SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i];
994
700k
        }
995
        // We are interested in bit N+D, and P[0] is bit nWords*D, this shift brings the relevant bit to position 0
996
60.5k
        P >>= ((nBits+31) % 32) + 1;
997
        // If the bit is 1, W*M is too large and we reset the corresponding bit in W.
998
60.5k
        W ^= bitToTest & (0 - ((UINT32)P & 1));
999
60.5k
        bitToTest >>= 1;
1000
60.5k
    }
1001
1.89k
    pdDst->td.fdef.W = W;
1002
1003
1.89k
    SYMCRYPT_SET_MAGIC( pdDst );
1004
1.89k
}
1005
1006
UINT32
1007
SYMCRYPT_CALL
1008
SymCryptFdefRawMultSubUint32(
1009
    _Inout_updates_( nUint32 + 1 )  PUINT32     pAcc,
1010
    _In_reads_( nUint32 )           PCUINT32    pSrc1,
1011
                                    UINT32      Src2,
1012
                                    UINT32      nUint32 )
1013
144k
{
1014
    //
1015
    // pAcc -= pSrc1 * Src2
1016
    // BEWARE: this is only used by the DivMod routine, and works in Words rather than Digits
1017
    // making optimizations hard.
1018
    //
1019
1020
144k
    UINT32 i;
1021
144k
    UINT64 tmul;
1022
144k
    UINT64 tsub;
1023
144k
    UINT32 c;
1024
1025
144k
    tmul = 0;
1026
144k
    c = 0;
1027
2.04M
    for( i=0; i<nUint32; i++ )
1028
1.90M
    {
1029
1.90M
        tmul += SYMCRYPT_MUL32x32TO64( pSrc1[i], Src2 );
1030
1.90M
        tsub  = (UINT64)pAcc[i] - (UINT32) tmul - c;
1031
1.90M
        pAcc[i] = (UINT32) tsub;
1032
1.90M
        c = (tsub >> 32) & 1;
1033
1.90M
        tmul >>= 32;
1034
1.90M
    }
1035
1036
    // Writing the last word is strictly speaking not necessary, but a really good check that things are going right.
1037
    // We can remove the write, but still need the computation of c so it gains very little.
1038
1039
144k
    tsub = (UINT64) pAcc[i] - (UINT32) tmul - c;
1040
144k
    pAcc[i] = (UINT32) tsub;
1041
144k
    c = (tsub >> 32) & 1;
1042
1043
144k
    return c;
1044
144k
}
1045
1046
UINT32
1047
SYMCRYPT_CALL
1048
SymCryptFdefRawMaskedAddSubdigit(
1049
    _Inout_updates_( nUint32 )  PUINT32     pAcc,
1050
    _In_reads_( nUint32 )       PCUINT32    pSrc,
1051
                                UINT32      mask,
1052
                                UINT32      nUint32 )
1053
144k
{
1054
144k
    UINT32 i;
1055
144k
    UINT64 t;
1056
1057
144k
    t = 0;
1058
2.04M
    for( i=0; i<nUint32; i++ )
1059
1.90M
    {
1060
1.90M
        t = t + pAcc[i] + (mask & pSrc[i]);
1061
1.90M
        pAcc[i] = (UINT32) t;
1062
1.90M
        t >>= 32;
1063
1.90M
    }
1064
1065
144k
    return (UINT32) t;
1066
144k
}
1067
1068
UINT32
1069
SYMCRYPT_CALL
1070
SymCryptFdefRawMaskedAdd(
1071
    _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 )  PUINT32     pAcc,
1072
    _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 )       PCUINT32    pSrc,
1073
                                                            UINT32      mask,
1074
                                                            UINT32      nDigits )
1075
0
{
1076
0
    return SymCryptFdefRawMaskedAddSubdigit( pAcc, pSrc, mask, nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 );
1077
0
}
1078
1079
UINT32
1080
SYMCRYPT_CALL
1081
SymCryptFdefRawMaskedSub(
1082
    _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 )  PUINT32     pAcc,
1083
    _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 )       PCUINT32    pSrc,
1084
                                                            UINT32      mask,
1085
                                                            UINT32      nDigits )
1086
0
{
1087
0
    UINT32 i;
1088
0
    UINT64 t;
1089
0
    UINT32 c;
1090
1091
0
    c = 0;
1092
0
    for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ )
1093
0
    {
1094
0
        t = (UINT64) pAcc[i] - (mask & pSrc[i]) - c;
1095
0
        pAcc[i] = (UINT32) t;
1096
0
        c = (UINT32)(t >>= 32) & 1;
1097
0
    }
1098
1099
0
    return c;
1100
0
}
1101
1102
1103
1104
VOID
1105
SYMCRYPT_CALL
1106
SymCryptFdefRawDivMod(
1107
    _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32)           PCUINT32                pNum,
1108
                                                                UINT32                  nDigits,
1109
    _In_                                                        PCSYMCRYPT_DIVISOR      pdDivisor,
1110
    _Out_writes_opt_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32)     PUINT32                 pQuotient,
1111
    _Out_writes_opt_(SYMCRYPT_OBJ_NUINT32(pdDivisor))           PUINT32                 pRemainder,
1112
    _Out_writes_bytes_( cbScratch )                             PBYTE                   pbScratch,
1113
                                                                SIZE_T                  cbScratch )
1114
8.32k
{
1115
8.32k
    UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32;
1116
8.32k
    UINT32 activeDivWords = (pdDivisor->nBits + 8 * sizeof(UINT32) - 1) / (8 * sizeof( UINT32 ) );
1117
8.32k
    UINT32 remainderWords = SYMCRYPT_OBJ_NUINT32( pdDivisor );
1118
1119
8.32k
    UINT32 cbScratchNeeded = (nWords+4) * sizeof( UINT32 );
1120
8.32k
    PUINT32 pTmp = (PUINT32) pbScratch;
1121
8.32k
    UINT32 Qest;
1122
8.32k
    UINT32 Q;
1123
8.32k
    UINT32 c;
1124
8.32k
    UINT32 d;
1125
8.32k
    UINT32 shift;
1126
8.32k
    UINT32 X0, X1;
1127
8.32k
    UINT32 W;
1128
8.32k
    UINT64 T;
1129
8.32k
    UINT32 nQ;
1130
1131
8.32k
    SYMCRYPT_ASSERT( cbScratch >= cbScratchNeeded );
1132
8.32k
    SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch );
1133
1134
8.32k
    if( nWords < activeDivWords )
1135
0
    {
1136
        //
1137
        // input is smaller in size than the significant size of the divisor, no division to do.
1138
        // Note that both values in the if() statement are public, so this does not create a side channel.
1139
        //
1140
1141
        // Set quotient to zero, and the remainder to the input value
1142
0
        if( pQuotient != NULL )
1143
0
        {
1144
0
            SymCryptWipe( pQuotient, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE );
1145
0
        }
1146
1147
0
        if( pRemainder != NULL )
1148
0
        {
1149
0
            SYMCRYPT_ASSERT( remainderWords >= nWords );
1150
0
            memcpy( pRemainder, pNum, nWords * sizeof( UINT32 ) );
1151
0
            SymCryptWipe( &pRemainder[nWords], (remainderWords - nWords) * sizeof( UINT32 ) );        // clear the rest of the remainder words
1152
0
        }
1153
1154
0
        SymCryptFdefClaimScratch( pbScratch, cbScratch, cbScratchNeeded );
1155
0
        goto cleanup;
1156
0
    }
1157
1158
    //
1159
    // We have two zero words in front and two zero words behind the tmp value to allow unrestricted accesses.
1160
    // We keep the explicit offset of 2 rather than adjust the pTmp pointer to avoid negative indexes which appear
1161
    // to be buffer overflows, and cause trouble with unsigned computations of negative index values that overflow
1162
    // to 2^32 - 1 on a 64-bit CPU.
1163
    //
1164
8.32k
    pTmp[0] = pTmp[1] = 0;
1165
8.32k
    memcpy( &pTmp[2], pNum, nWords * sizeof( UINT32 ) );
1166
8.32k
    pTmp[nWords + 2] = pTmp[nWords + 3] = 0;
1167
8.32k
    shift = (0 - pdDivisor->nBits) & 31;   // # bits we have to shift top words to the left to align with the W value
1168
1169
    // We generate the quotient words one at a time, starting at the most significant position
1170
    // The top (divWords - 1) words are always zero
1171
1172
8.32k
    if( pQuotient != NULL )
1173
0
    {
1174
0
        SymCryptWipe( &pQuotient[nWords - activeDivWords + 1], (activeDivWords - 1) * sizeof( UINT32 ) );
1175
0
    }
1176
1177
8.32k
    nQ = nWords - activeDivWords + 1;
1178
1179
    // There is always at least one word of Q to be computed, so we can use a do-while loop which
1180
    // also avoids the UINT32 underflow.
1181
8.32k
    do
1182
144k
    {
1183
144k
        nQ--;
1184
144k
        X0 = ( ((UINT64) pTmp[nQ + activeDivWords + 2] << 32) + pTmp[nQ + activeDivWords + 1] ) >> (32 - shift);
1185
144k
        X1 = ( ((UINT64) pTmp[nQ + activeDivWords + 1] << 32) + pTmp[nQ + activeDivWords + 0] ) >> (32 - shift);
1186
1187
144k
        W = (UINT32) pdDivisor->td.fdef.W;
1188
144k
        T = SYMCRYPT_MUL32x32TO64( W, X0 ) + (((UINT64)X0) << 32) + X1 + ((W>>1) & ((UINT32)0 - (X1 >> 31)));
1189
144k
        Qest = (UINT32)(T >> 32);
1190
        // At this point the estimator is correct or one too small, add one but don't overflow
1191
144k
        Qest += 1;
1192
144k
        Qest += SYMCRYPT_MASK32_ZERO( Qest );
1193
1194
144k
        c = SymCryptFdefRawMultSubUint32( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), Qest, activeDivWords );
1195
144k
        Q = Qest - c;
1196
144k
        d = SymCryptFdefRawMaskedAddSubdigit( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), (0-c), activeDivWords );
1197
144k
        SYMCRYPT_ASSERT( c == d );
1198
144k
        SYMCRYPT_ASSERT( pTmp[nQ + activeDivWords+2] == (0 - c) );
1199
1200
144k
        if( pQuotient != NULL )
1201
0
        {
1202
0
            pQuotient[nQ] = Q;
1203
0
        }
1204
144k
    } while( nQ > 0 );
1205
1206
8.32k
    if( pRemainder != NULL )
1207
8.32k
    {
1208
8.32k
        memcpy( pRemainder, pTmp+2, activeDivWords * sizeof( UINT32 ) );
1209
8.32k
        SymCryptWipe( &pRemainder[activeDivWords], (remainderWords - activeDivWords) * sizeof( UINT32 ) );
1210
8.32k
    }
1211
1212
8.32k
cleanup:
1213
8.32k
    return;         // label needs a statement to follow it...
1214
8.32k
}
1215
1216
1217
VOID
1218
SYMCRYPT_CALL
1219
SymCryptFdefIntDivMod(
1220
    _In_                            PCSYMCRYPT_INT      piSrc,
1221
    _In_                            PCSYMCRYPT_DIVISOR  pdDivisor,
1222
    _Out_opt_                       PSYMCRYPT_INT       piQuotient,
1223
    _Out_opt_                       PSYMCRYPT_INT       piRemainder,
1224
    _Out_writes_bytes_( cbScratch ) PBYTE               pbScratch,
1225
                                    SIZE_T              cbScratch )
1226
603
{
1227
603
    UINT32  nDigits = SYMCRYPT_OBJ_NDIGITS( piSrc );
1228
1229
603
    SYMCRYPT_ASSERT( piQuotient  == NULL || piQuotient->nDigits >= piSrc->nDigits );
1230
603
    SYMCRYPT_ASSERT( piRemainder == NULL || piRemainder->nDigits >= pdDivisor->nDigits );
1231
1232
603
    SymCryptFdefRawDivMod(
1233
603
        SYMCRYPT_FDEF_INT_PUINT32( piSrc ),
1234
603
        nDigits,
1235
603
        pdDivisor,
1236
603
        piQuotient  == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piQuotient ),
1237
603
        piRemainder == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piRemainder ),
1238
603
        pbScratch,
1239
603
        cbScratch
1240
603
        );
1241
1242
603
    if ((piQuotient != NULL) && (piQuotient->nDigits > piSrc->nDigits))
1243
0
    {
1244
0
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piQuotient )[piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piQuotient->nDigits - piSrc->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE );
1245
0
    }
1246
1247
603
    if ((piRemainder != NULL) && (piRemainder->nDigits > pdDivisor->nDigits))
1248
0
    {
1249
0
        SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piRemainder )[pdDivisor->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piRemainder->nDigits - pdDivisor->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE );
1250
0
    }
1251
603
}