Coverage Report

Created: 2024-11-21 07:03

/src/mpdecimal-4.0.0/libmpdec/umodarith.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008-2024 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
27
28
#ifndef LIBMPDEC_UMODARITH_H_
29
#define LIBMPDEC_UMODARITH_H_
30
31
32
#include "constants.h"
33
#include "mpdecimal.h"
34
#include "typearith.h"
35
36
37
/* Bignum: Low level routines for unsigned modular arithmetic. These are
38
   used in the fast convolution functions for very large coefficients. */
39
40
41
/**************************************************************************/
42
/*                        ANSI modular arithmetic                         */
43
/**************************************************************************/
44
45
/*
46
 * Restrictions: a < m and b < m
47
 * ACL2 proof: umodarith.lisp: addmod-correct
48
 */
49
static inline mpd_uint_t
50
addmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
51
0
{
52
0
    mpd_uint_t s;
53
54
0
    s = a + b;
55
0
    s = (s < a) ? s - m : s;
56
0
    s = (s >= m) ? s - m : s;
57
58
0
    return s;
59
0
}
Unexecuted instantiation: convolute.c:addmod
Unexecuted instantiation: crt.c:addmod
Unexecuted instantiation: fourstep.c:addmod
Unexecuted instantiation: numbertheory.c:addmod
Unexecuted instantiation: sixstep.c:addmod
Unexecuted instantiation: difradix2.c:addmod
60
61
/*
62
 * Restrictions: a < m and b < m
63
 * ACL2 proof: umodarith.lisp: submod-2-correct
64
 */
65
static inline mpd_uint_t
66
submod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
67
0
{
68
0
    mpd_uint_t d;
69
70
0
    d = a - b;
71
0
    d = (a < b) ? d + m : d;
72
73
0
    return d;
74
0
}
Unexecuted instantiation: convolute.c:submod
Unexecuted instantiation: crt.c:submod
Unexecuted instantiation: fourstep.c:submod
Unexecuted instantiation: numbertheory.c:submod
Unexecuted instantiation: sixstep.c:submod
Unexecuted instantiation: difradix2.c:submod
75
76
/*
77
 * Restrictions: a < 2m and b < 2m
78
 * ACL2 proof: umodarith.lisp: section ext-submod
79
 */
80
static inline mpd_uint_t
81
ext_submod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
82
0
{
83
0
    mpd_uint_t d;
84
85
0
    a = (a >= m) ? a - m : a;
86
0
    b = (b >= m) ? b - m : b;
87
88
0
    d = a - b;
89
0
    d = (a < b) ? d + m : d;
90
91
0
    return d;
92
0
}
Unexecuted instantiation: convolute.c:ext_submod
Unexecuted instantiation: crt.c:ext_submod
Unexecuted instantiation: fourstep.c:ext_submod
Unexecuted instantiation: numbertheory.c:ext_submod
Unexecuted instantiation: sixstep.c:ext_submod
Unexecuted instantiation: difradix2.c:ext_submod
93
94
/*
95
 * Reduce double word modulo m.
96
 * Restrictions: m != 0
97
 * ACL2 proof: umodarith.lisp: section dw-reduce
98
 */
99
static inline mpd_uint_t
100
dw_reduce(mpd_uint_t hi, mpd_uint_t lo, mpd_uint_t m)
101
0
{
102
0
    mpd_uint_t r1, r2, w;
103
104
0
    _mpd_div_word(&w, &r1, hi, m);
105
0
    _mpd_div_words(&w, &r2, r1, lo, m);
106
107
0
    return r2;
108
0
}
Unexecuted instantiation: convolute.c:dw_reduce
Unexecuted instantiation: crt.c:dw_reduce
Unexecuted instantiation: fourstep.c:dw_reduce
Unexecuted instantiation: numbertheory.c:dw_reduce
Unexecuted instantiation: sixstep.c:dw_reduce
Unexecuted instantiation: difradix2.c:dw_reduce
109
110
/*
111
 * Subtract double word from a.
112
 * Restrictions: a < m
113
 * ACL2 proof: umodarith.lisp: section dw-submod
114
 */
115
static inline mpd_uint_t
116
dw_submod(mpd_uint_t a, mpd_uint_t hi, mpd_uint_t lo, mpd_uint_t m)
117
0
{
118
0
    mpd_uint_t d, r;
119
120
0
    r = dw_reduce(hi, lo, m);
121
0
    d = a - r;
122
0
    d = (a < r) ? d + m : d;
123
124
0
    return d;
125
0
}
Unexecuted instantiation: convolute.c:dw_submod
Unexecuted instantiation: crt.c:dw_submod
Unexecuted instantiation: fourstep.c:dw_submod
Unexecuted instantiation: numbertheory.c:dw_submod
Unexecuted instantiation: sixstep.c:dw_submod
Unexecuted instantiation: difradix2.c:dw_submod
126
127
#ifdef CONFIG_64
128
129
/**************************************************************************/
130
/*                        64-bit modular arithmetic                       */
131
/**************************************************************************/
132
133
/*
134
 * A proof of the algorithm is in literature/mulmod-64.txt. An ACL2
135
 * proof is in umodarith.lisp: section "Fast modular reduction".
136
 *
137
 * Algorithm: calculate (a * b) % p:
138
 *
139
 *   a) hi, lo <- a * b       # Calculate a * b.
140
 *
141
 *   b) hi, lo <-  R(hi, lo)  # Reduce modulo p.
142
 *
143
 *   c) Repeat step b) until 0 <= hi * 2**64 + lo < 2*p.
144
 *
145
 *   d) If the result is less than p, return lo. Otherwise return lo - p.
146
 */
147
148
static inline mpd_uint_t
149
x64_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
150
0
{
151
0
    mpd_uint_t hi, lo, x, y;
152
153
154
0
    _mpd_mul_words(&hi, &lo, a, b);
155
156
0
    if (m & (1ULL<<32)) { /* P1 */
157
158
        /* first reduction */
159
0
        x = y = hi;
160
0
        hi >>= 32;
161
162
0
        x = lo - x;
163
0
        if (x > lo) hi--;
164
165
0
        y <<= 32;
166
0
        lo = y + x;
167
0
        if (lo < y) hi++;
168
169
        /* second reduction */
170
0
        x = y = hi;
171
0
        hi >>= 32;
172
173
0
        x = lo - x;
174
0
        if (x > lo) hi--;
175
176
0
        y <<= 32;
177
0
        lo = y + x;
178
0
        if (lo < y) hi++;
179
180
0
        return (hi || lo >= m ? lo - m : lo);
181
0
    }
182
0
    else if (m & (1ULL<<34)) { /* P2 */
183
184
        /* first reduction */
185
0
        x = y = hi;
186
0
        hi >>= 30;
187
188
0
        x = lo - x;
189
0
        if (x > lo) hi--;
190
191
0
        y <<= 34;
192
0
        lo = y + x;
193
0
        if (lo < y) hi++;
194
195
        /* second reduction */
196
0
        x = y = hi;
197
0
        hi >>= 30;
198
199
0
        x = lo - x;
200
0
        if (x > lo) hi--;
201
202
0
        y <<= 34;
203
0
        lo = y + x;
204
0
        if (lo < y) hi++;
205
206
        /* third reduction */
207
0
        x = y = hi;
208
0
        hi >>= 30;
209
210
0
        x = lo - x;
211
0
        if (x > lo) hi--;
212
213
0
        y <<= 34;
214
0
        lo = y + x;
215
0
        if (lo < y) hi++;
216
217
0
        return (hi || lo >= m ? lo - m : lo);
218
0
    }
219
0
    else { /* P3 */
220
221
        /* first reduction */
222
0
        x = y = hi;
223
0
        hi >>= 24;
224
225
0
        x = lo - x;
226
0
        if (x > lo) hi--;
227
228
0
        y <<= 40;
229
0
        lo = y + x;
230
0
        if (lo < y) hi++;
231
232
        /* second reduction */
233
0
        x = y = hi;
234
0
        hi >>= 24;
235
236
0
        x = lo - x;
237
0
        if (x > lo) hi--;
238
239
0
        y <<= 40;
240
0
        lo = y + x;
241
0
        if (lo < y) hi++;
242
243
        /* third reduction */
244
0
        x = y = hi;
245
0
        hi >>= 24;
246
247
0
        x = lo - x;
248
0
        if (x > lo) hi--;
249
250
0
        y <<= 40;
251
0
        lo = y + x;
252
0
        if (lo < y) hi++;
253
254
0
        return (hi || lo >= m ? lo - m : lo);
255
0
    }
256
0
}
Unexecuted instantiation: convolute.c:x64_mulmod
Unexecuted instantiation: crt.c:x64_mulmod
Unexecuted instantiation: fourstep.c:x64_mulmod
Unexecuted instantiation: numbertheory.c:x64_mulmod
Unexecuted instantiation: sixstep.c:x64_mulmod
Unexecuted instantiation: difradix2.c:x64_mulmod
257
258
static inline void
259
x64_mulmod2c(mpd_uint_t *a, mpd_uint_t *b, mpd_uint_t w, mpd_uint_t m)
260
0
{
261
0
    *a = x64_mulmod(*a, w, m);
262
0
    *b = x64_mulmod(*b, w, m);
263
0
}
Unexecuted instantiation: convolute.c:x64_mulmod2c
Unexecuted instantiation: crt.c:x64_mulmod2c
Unexecuted instantiation: fourstep.c:x64_mulmod2c
Unexecuted instantiation: numbertheory.c:x64_mulmod2c
Unexecuted instantiation: sixstep.c:x64_mulmod2c
Unexecuted instantiation: difradix2.c:x64_mulmod2c
264
265
static inline void
266
x64_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
267
            mpd_uint_t m)
268
0
{
269
0
    *a0 = x64_mulmod(*a0, b0, m);
270
0
    *a1 = x64_mulmod(*a1, b1, m);
271
0
}
Unexecuted instantiation: convolute.c:x64_mulmod2
Unexecuted instantiation: crt.c:x64_mulmod2
Unexecuted instantiation: fourstep.c:x64_mulmod2
Unexecuted instantiation: numbertheory.c:x64_mulmod2
Unexecuted instantiation: sixstep.c:x64_mulmod2
Unexecuted instantiation: difradix2.c:x64_mulmod2
272
273
static inline mpd_uint_t
274
x64_powmod(mpd_uint_t base, mpd_uint_t exp, mpd_uint_t umod)
275
0
{
276
0
    mpd_uint_t r = 1;
277
278
0
    while (exp > 0) {
279
0
        if (exp & 1)
280
0
            r = x64_mulmod(r, base, umod);
281
0
        base = x64_mulmod(base, base, umod);
282
0
        exp >>= 1;
283
0
    }
284
285
0
    return r;
286
0
}
Unexecuted instantiation: convolute.c:x64_powmod
Unexecuted instantiation: crt.c:x64_powmod
Unexecuted instantiation: fourstep.c:x64_powmod
Unexecuted instantiation: numbertheory.c:x64_powmod
Unexecuted instantiation: sixstep.c:x64_powmod
Unexecuted instantiation: difradix2.c:x64_powmod
287
288
/* END CONFIG_64 */
289
#else /* CONFIG_32 */
290
291
292
/**************************************************************************/
293
/*                        32-bit modular arithmetic                       */
294
/**************************************************************************/
295
296
#if defined(ANSI)
297
#if !defined(LEGACY_COMPILER)
298
/* HAVE_UINT64_T */
299
static inline mpd_uint_t
300
std_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
301
{
302
    return ((mpd_uuint_t) a * b) % m;
303
}
304
305
static inline void
306
std_mulmod2c(mpd_uint_t *a, mpd_uint_t *b, mpd_uint_t w, mpd_uint_t m)
307
{
308
    *a = ((mpd_uuint_t) *a * w) % m;
309
    *b = ((mpd_uuint_t) *b * w) % m;
310
}
311
312
static inline void
313
std_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
314
            mpd_uint_t m)
315
{
316
    *a0 = ((mpd_uuint_t) *a0 * b0) % m;
317
    *a1 = ((mpd_uuint_t) *a1 * b1) % m;
318
}
319
/* END HAVE_UINT64_T */
320
#else
321
/* LEGACY_COMPILER */
322
static inline mpd_uint_t
323
std_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
324
{
325
    mpd_uint_t hi, lo, q, r;
326
    _mpd_mul_words(&hi, &lo, a, b);
327
    _mpd_div_words(&q, &r, hi, lo, m);
328
    return r;
329
}
330
331
static inline void
332
std_mulmod2c(mpd_uint_t *a, mpd_uint_t *b, mpd_uint_t w, mpd_uint_t m)
333
{
334
    *a = std_mulmod(*a, w, m);
335
    *b = std_mulmod(*b, w, m);
336
}
337
338
static inline void
339
std_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
340
            mpd_uint_t m)
341
{
342
    *a0 = std_mulmod(*a0, b0, m);
343
    *a1 = std_mulmod(*a1, b1, m);
344
}
345
/* END LEGACY_COMPILER */
346
#endif
347
348
static inline mpd_uint_t
349
std_powmod(mpd_uint_t base, mpd_uint_t exp, mpd_uint_t umod)
350
{
351
    mpd_uint_t r = 1;
352
353
    while (exp > 0) {
354
        if (exp & 1)
355
            r = std_mulmod(r, base, umod);
356
        base = std_mulmod(base, base, umod);
357
        exp >>= 1;
358
    }
359
360
    return r;
361
}
362
#endif /* ANSI CONFIG_32 */
363
364
365
/**************************************************************************/
366
/*                    Pentium Pro modular arithmetic                      */
367
/**************************************************************************/
368
369
/*
370
 * A proof of the algorithm is in literature/mulmod-ppro.txt. The FPU
371
 * control word must be set to 64-bit precision and truncation mode
372
 * prior to using these functions.
373
 *
374
 * Algorithm: calculate (a * b) % p:
375
 *
376
 *   p    := prime < 2**31
377
 *   pinv := (long double)1.0 / p (precalculated)
378
 *
379
 *   a) n = a * b              # Calculate exact product.
380
 *   b) qest = n * pinv        # Calculate estimate for q = n / p.
381
 *   c) q = (qest+2**63)-2**63 # Truncate qest to the exact quotient.
382
 *   d) r = n - q * p          # Calculate remainder.
383
 *
384
 * Remarks:
385
 *
386
 *   - p = dmod and pinv = dinvmod.
387
 *   - dinvmod points to an array of three uint32_t, which is interpreted
388
 *     as an 80 bit long double by fldt.
389
 *   - Intel compilers prior to version 11 do not seem to handle the
390
 *     __GNUC__ inline assembly correctly.
391
 *   - random tests are provided in tests/extended/ppro_mulmod.c
392
 */
393
394
#if defined(PPRO)
395
#if defined(ASM)
396
397
/* Return (a * b) % dmod */
398
static inline mpd_uint_t
399
ppro_mulmod(mpd_uint_t a, mpd_uint_t b, double *dmod, uint32_t *dinvmod)
400
{
401
    mpd_uint_t retval;
402
403
    __asm__ (
404
            "fildl  %2\n\t"
405
            "fildl  %1\n\t"
406
            "fmulp  %%st, %%st(1)\n\t"
407
            "fldt   (%4)\n\t"
408
            "fmul   %%st(1), %%st\n\t"
409
            "flds   %5\n\t"
410
            "fadd   %%st, %%st(1)\n\t"
411
            "fsubrp %%st, %%st(1)\n\t"
412
            "fldl   (%3)\n\t"
413
            "fmulp  %%st, %%st(1)\n\t"
414
            "fsubrp %%st, %%st(1)\n\t"
415
            "fistpl %0\n\t"
416
            : "=m" (retval)
417
            : "m" (a), "m" (b), "r" (dmod), "r" (dinvmod), "m" (MPD_TWO63)
418
            : "st", "memory"
419
    );
420
421
    return retval;
422
}
423
424
/*
425
 * Two modular multiplications in parallel:
426
 *      *a0 = (*a0 * w) % dmod
427
 *      *a1 = (*a1 * w) % dmod
428
 */
429
static inline void
430
ppro_mulmod2c(mpd_uint_t *a0, mpd_uint_t *a1, mpd_uint_t w,
431
              double *dmod, uint32_t *dinvmod)
432
{
433
    __asm__ (
434
            "fildl  %2\n\t"
435
            "fildl  (%1)\n\t"
436
            "fmul   %%st(1), %%st\n\t"
437
            "fxch   %%st(1)\n\t"
438
            "fildl  (%0)\n\t"
439
            "fmulp  %%st, %%st(1) \n\t"
440
            "fldt   (%4)\n\t"
441
            "flds   %5\n\t"
442
            "fld    %%st(2)\n\t"
443
            "fmul   %%st(2)\n\t"
444
            "fadd   %%st(1)\n\t"
445
            "fsub   %%st(1)\n\t"
446
            "fmull  (%3)\n\t"
447
            "fsubrp %%st, %%st(3)\n\t"
448
            "fxch   %%st(2)\n\t"
449
            "fistpl (%0)\n\t"
450
            "fmul   %%st(2)\n\t"
451
            "fadd   %%st(1)\n\t"
452
            "fsubp  %%st, %%st(1)\n\t"
453
            "fmull  (%3)\n\t"
454
            "fsubrp %%st, %%st(1)\n\t"
455
            "fistpl (%1)\n\t"
456
            : : "r" (a0), "r" (a1), "m" (w),
457
                "r" (dmod), "r" (dinvmod),
458
                "m" (MPD_TWO63)
459
            : "st", "memory"
460
    );
461
}
462
463
/*
464
 * Two modular multiplications in parallel:
465
 *      *a0 = (*a0 * b0) % dmod
466
 *      *a1 = (*a1 * b1) % dmod
467
 */
468
static inline void
469
ppro_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
470
             double *dmod, uint32_t *dinvmod)
471
{
472
    __asm__ (
473
            "fildl  %3\n\t"
474
            "fildl  (%2)\n\t"
475
            "fmulp  %%st, %%st(1)\n\t"
476
            "fildl  %1\n\t"
477
            "fildl  (%0)\n\t"
478
            "fmulp  %%st, %%st(1)\n\t"
479
            "fldt   (%5)\n\t"
480
            "fld    %%st(2)\n\t"
481
            "fmul   %%st(1), %%st\n\t"
482
            "fxch   %%st(1)\n\t"
483
            "fmul   %%st(2), %%st\n\t"
484
            "flds   %6\n\t"
485
            "fldl   (%4)\n\t"
486
            "fxch   %%st(3)\n\t"
487
            "fadd   %%st(1), %%st\n\t"
488
            "fxch   %%st(2)\n\t"
489
            "fadd   %%st(1), %%st\n\t"
490
            "fxch   %%st(2)\n\t"
491
            "fsub   %%st(1), %%st\n\t"
492
            "fxch   %%st(2)\n\t"
493
            "fsubp  %%st, %%st(1)\n\t"
494
            "fxch   %%st(1)\n\t"
495
            "fmul   %%st(2), %%st\n\t"
496
            "fxch   %%st(1)\n\t"
497
            "fmulp  %%st, %%st(2)\n\t"
498
            "fsubrp %%st, %%st(3)\n\t"
499
            "fsubrp %%st, %%st(1)\n\t"
500
            "fxch   %%st(1)\n\t"
501
            "fistpl (%2)\n\t"
502
            "fistpl (%0)\n\t"
503
            : : "r" (a0), "m" (b0), "r" (a1), "m" (b1),
504
                "r" (dmod), "r" (dinvmod),
505
                "m" (MPD_TWO63)
506
            : "st", "memory"
507
    );
508
}
509
/* END PPRO GCC ASM */
510
#elif defined(MASM)
511
512
/* Return (a * b) % dmod */
513
static inline mpd_uint_t __cdecl
514
ppro_mulmod(mpd_uint_t a, mpd_uint_t b, double *dmod, uint32_t *dinvmod)
515
{
516
    mpd_uint_t retval;
517
518
    __asm {
519
        mov     eax, dinvmod
520
        mov     edx, dmod
521
        fild    b
522
        fild    a
523
        fmulp   st(1), st
524
        fld     TBYTE PTR [eax]
525
        fmul    st, st(1)
526
        fld     MPD_TWO63
527
        fadd    st(1), st
528
        fsubp   st(1), st
529
        fld     QWORD PTR [edx]
530
        fmulp   st(1), st
531
        fsubp   st(1), st
532
        fistp   retval
533
    }
534
535
    return retval;
536
}
537
538
/*
539
 * Two modular multiplications in parallel:
540
 *      *a0 = (*a0 * w) % dmod
541
 *      *a1 = (*a1 * w) % dmod
542
 */
543
static inline mpd_uint_t __cdecl
544
ppro_mulmod2c(mpd_uint_t *a0, mpd_uint_t *a1, mpd_uint_t w,
545
              double *dmod, uint32_t *dinvmod)
546
{
547
    __asm {
548
        mov     ecx, dmod
549
        mov     edx, a1
550
        mov     ebx, dinvmod
551
        mov     eax, a0
552
        fild    w
553
        fild    DWORD PTR [edx]
554
        fmul    st, st(1)
555
        fxch    st(1)
556
        fild    DWORD PTR [eax]
557
        fmulp   st(1), st
558
        fld     TBYTE PTR [ebx]
559
        fld     MPD_TWO63
560
        fld     st(2)
561
        fmul    st, st(2)
562
        fadd    st, st(1)
563
        fsub    st, st(1)
564
        fmul    QWORD PTR [ecx]
565
        fsubp   st(3), st
566
        fxch    st(2)
567
        fistp   DWORD PTR [eax]
568
        fmul    st, st(2)
569
        fadd    st, st(1)
570
        fsubrp  st(1), st
571
        fmul    QWORD PTR [ecx]
572
        fsubp   st(1), st
573
        fistp   DWORD PTR [edx]
574
    }
575
}
576
577
/*
578
 * Two modular multiplications in parallel:
579
 *      *a0 = (*a0 * b0) % dmod
580
 *      *a1 = (*a1 * b1) % dmod
581
 */
582
static inline void __cdecl
583
ppro_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
584
             double *dmod, uint32_t *dinvmod)
585
{
586
    __asm {
587
        mov     ecx, dmod
588
        mov     edx, a1
589
        mov     ebx, dinvmod
590
        mov     eax, a0
591
        fild    b1
592
        fild    DWORD PTR [edx]
593
        fmulp   st(1), st
594
        fild    b0
595
        fild    DWORD PTR [eax]
596
        fmulp   st(1), st
597
        fld     TBYTE PTR [ebx]
598
        fld     st(2)
599
        fmul    st, st(1)
600
        fxch    st(1)
601
        fmul    st, st(2)
602
        fld     DWORD PTR MPD_TWO63
603
        fld     QWORD PTR [ecx]
604
        fxch    st(3)
605
        fadd    st, st(1)
606
        fxch    st(2)
607
        fadd    st, st(1)
608
        fxch    st(2)
609
        fsub    st, st(1)
610
        fxch    st(2)
611
        fsubrp  st(1), st
612
        fxch    st(1)
613
        fmul    st, st(2)
614
        fxch    st(1)
615
        fmulp   st(2), st
616
        fsubp   st(3), st
617
        fsubp   st(1), st
618
        fxch    st(1)
619
        fistp   DWORD PTR [edx]
620
        fistp   DWORD PTR [eax]
621
    }
622
}
623
#endif /* PPRO MASM (_MSC_VER) */
624
625
626
/* Return (base ** exp) % dmod */
627
static inline mpd_uint_t
628
ppro_powmod(mpd_uint_t base, mpd_uint_t exp, double *dmod, uint32_t *dinvmod)
629
{
630
    mpd_uint_t r = 1;
631
632
    while (exp > 0) {
633
        if (exp & 1)
634
            r = ppro_mulmod(r, base, dmod, dinvmod);
635
        base = ppro_mulmod(base, base, dmod, dinvmod);
636
        exp >>= 1;
637
    }
638
639
    return r;
640
}
641
#endif /* PPRO */
642
#endif /* CONFIG_32 */
643
644
645
#endif /* LIBMPDEC_UMODARITH_H_ */