Coverage Report

Created: 2025-03-06 06:58

/src/gmp/mpn/mulmod_bknp1.c
Line
Count
Source (jump to first uncovered line)
1
/* Mulptiplication mod B^n+1, for small operands.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2020-2022 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
39
#ifndef MOD_BKNP1_USE11
40
#define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
41
#endif
42
#ifndef MOD_BKNP1_ONLY3
43
#define MOD_BKNP1_ONLY3 0
44
#endif
45
46
/* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
47
static void
48
_mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
49
0
{
50
0
  mp_limb_t hl;
51
0
  mp_srcptr hp;
52
0
  unsigned i;
53
54
#if MOD_BKNP1_ONLY3
55
  ASSERT (k == 3);
56
  k = 3;
57
#endif
58
0
  ASSERT (k > 2);
59
0
  ASSERT (k % 2 == 1);
60
61
0
  --k;
62
63
0
  rp += k * n;
64
0
  op += k * n;
65
0
  hp = op;
66
0
  hl = hp[n]; /* initial op[k*n]. */
67
0
  ASSERT (hl < GMP_NUMB_MAX - 1);
68
69
0
#if MOD_BKNP1_ONLY3 == 0
70
  /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
71
     rp[n] = cy;            */
72
0
  *rp = 0;
73
0
#endif
74
75
0
  i = k >> 1;
76
0
  do
77
0
   {
78
0
     mp_limb_t cy, bw;
79
0
     rp -= n;
80
0
     op -= n;
81
0
     cy = hl + mpn_add_n (rp, op, hp, n);
82
#if MOD_BKNP1_ONLY3
83
     rp[n] = cy;
84
#else
85
0
     MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
86
0
#endif
87
0
     rp -= n;
88
0
     op -= n;
89
0
     bw = hl + mpn_sub_n (rp, op, hp, n);
90
0
     MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
91
0
   }
92
0
  while (--i != 0);
93
94
0
  for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */
95
0
    {
96
0
      *rp = 0;
97
0
      i = k >> 1;
98
0
      do
99
0
  {
100
0
    rp -= n;
101
0
    MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
102
0
    rp -= n;
103
0
    MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
104
0
  }
105
0
      while (--i != 0);
106
0
    }
107
0
}
108
109
static void
110
_mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
111
0
{
112
0
  ASSERT (r[n] == h);
113
114
  /* Fully normalise */
115
0
  MPN_DECR_U (r, n + 1, h);
116
0
  h -= r[n];
117
0
  r[n] = 0;
118
0
  MPN_INCR_U (r, n + 1, h);
119
0
}
120
121
static void
122
_mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
123
0
{
124
0
  r[n] = 0;
125
0
  MPN_INCR_U (r, n + 1, -h);
126
0
  if (UNLIKELY (r[n] != 0))
127
0
    _mpn_modbnp1_pn_ip (r, n, 1);
128
0
}
129
130
static void
131
_mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
132
0
{
133
0
  if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
134
0
    {
135
0
      _mpn_modbnp1_neg_ip (r, n, h);
136
0
    }
137
0
  else
138
0
    {
139
0
      r[n] = h;
140
0
      if (h)
141
0
  _mpn_modbnp1_pn_ip(r, n, h);
142
0
    }
143
0
}
144
145
/* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
146
/* Used when rn < on < 2*rn. */
147
static void
148
_mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
149
0
{
150
0
  mp_limb_t bw;
151
152
#if 0
153
  if (UNLIKELY (on <= rn))
154
    {
155
      MPN_COPY (rp, op, on);
156
      MPN_ZERO (rp + on, rn - on);
157
      return;
158
    }
159
#endif
160
161
0
  ASSERT (on > rn);
162
0
  ASSERT (on <= 2 * rn);
163
164
0
  bw = mpn_sub (rp, op, rn, op + rn, on - rn);
165
0
  rp[rn] = 0;
166
0
  MPN_INCR_U (rp, rn + 1, bw);
167
0
}
168
169
/* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
170
/* With odd k >= 3. */
171
static void
172
_mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
173
0
{
174
0
  mp_limb_t cy;
175
176
#if MOD_BKNP1_ONLY3
177
  ASSERT (k == 3);
178
  k = 3;
179
#endif
180
0
  ASSERT (k & 1);
181
0
  k >>= 1;
182
0
  ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);
183
0
  ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
184
185
0
  cy = - mpn_sub_n (rp, op, op + rn, rn);
186
0
  for (;;) {
187
0
    op += 2 * rn;
188
0
    cy += mpn_add_n (rp, rp, op, rn);
189
0
    if (--k == 0)
190
0
      break;
191
0
    cy -= mpn_sub_n (rp, rp, op + rn, rn);
192
0
  };
193
194
0
  cy += op[rn];
195
0
  _mpn_modbnp1_nc_ip (rp, rn, cy);
196
0
}
197
198
/* For the various mpn_divexact_byN here, fall back to using either
199
   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
200
   faster if it is native.  For now, since mpn_divexact_1 is native on
201
   platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
202
   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
203
204
#ifndef mpn_divexact_by5
205
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
206
#define BINVERT_5 \
207
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
208
#define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
209
#else
210
#define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
211
#endif
212
#endif
213
214
#ifndef mpn_divexact_by7
215
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
216
#define BINVERT_7 \
217
0
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
218
0
#define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
219
#else
220
#define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
221
#endif
222
#endif
223
224
#ifndef mpn_divexact_by11
225
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
226
#define BINVERT_11 \
227
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
228
#define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
229
#else
230
#define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
231
#endif
232
#endif
233
234
#ifndef mpn_divexact_by13
235
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
236
#define BINVERT_13 \
237
0
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
238
0
#define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
239
#else
240
#define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
241
#endif
242
#endif
243
244
#ifndef mpn_divexact_by17
245
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
246
#define BINVERT_17 \
247
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
248
#define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
249
#else
250
#define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
251
#endif
252
#endif
253
254
/* Thanks to Chinese remainder theorem, store
255
   in {rp, k*n+1} the value mod (B^(k*n)+1), given
256
   {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
257
   {bp, n+1} mod (B^n+1) .
258
   {tp, n+1} is a scratch area.
259
   tp == rp or rp == ap are possible.
260
*/
261
static void
262
_mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
263
    mp_size_t n, unsigned k, mp_ptr tp)
264
0
{
265
0
  mp_limb_t mod;
266
0
  unsigned i;
267
268
#if MOD_BKNP1_ONLY3
269
  ASSERT (k == 3);
270
  k = 3;
271
#endif
272
0
  _mpn_modbnp1_kn (tp, ap, n, k);
273
0
  if (mpn_sub_n (tp, bp, tp, n + 1))
274
0
    _mpn_modbnp1_neg_ip (tp, n, tp[n]);
275
276
#if MOD_BKNP1_USE11
277
  if (UNLIKELY (k == 11))
278
    {
279
      ASSERT (GMP_NUMB_BITS % 2 == 0);
280
      /* mod <- -Mod(B^n+1,11)^-1 */
281
      mod = n * (GMP_NUMB_BITS % 5) % 5;
282
      if ((mod > 2) || UNLIKELY (mod == 0))
283
  mod += 5;
284
285
      mod *= mpn_mod_1 (tp, n + 1, 11);
286
    }
287
  else
288
#endif
289
0
    {
290
0
#if GMP_NUMB_BITS % 8 == 0
291
  /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)  */
292
  /* (2^6 - 1) = 3^2 * 7      */
293
0
  mod = mpn_mod_34lsub1 (tp, n + 1);
294
0
  ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
295
  /* (2^12 - 1) = 3^2 * 5 * 7 * 13    */
296
  /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */
297
0
  ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
298
299
0
#if GMP_NUMB_BITS % 3 != 0
300
0
  if (UNLIKELY (k != 3))
301
0
    {
302
0
      ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
303
0
      if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
304
0
  mod <<= 1; /* k >> 1 = 1 << 1 */
305
0
      else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
306
0
  mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
307
0
      else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
308
0
  mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
309
0
      else /* k == 17 */
310
0
  mod <<= 3; /* k >> 1 = 1 << 3 */
311
#if 0
312
      if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
313
    (GMP_NUMB_BITS == 16) && (k == 13))
314
  mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
315
         (mod >> (3 * GMP_NUMB_BITS >> 2)));
316
#endif
317
0
    }
318
#else
319
  ASSERT (GMP_NUMB_MAX % k == 0);
320
  /* 2^{GMP_NUMB_BITS} - 1  = 0 (mod k) */
321
  /* 2^{GMP_NUMB_BITS}    = 1 (mod k) */
322
  /* 2^{n*GMP_NUMB_BITS} + 1  = 2 (mod k) */
323
  /* -2^{-1}  = k >> 1 (mod k) */
324
  mod *= k >> 1;
325
#endif
326
#else
327
  ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
328
#endif
329
0
    }
330
331
0
  MPN_INCR_U (tp, n + 1, mod);
332
0
  tp[n] += mod;
333
334
0
  if (LIKELY (k == 3))
335
0
    ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
336
0
  else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
337
0
    mpn_divexact_by5 (tp, tp, n + 1);
338
0
  else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))
339
0
     || LIKELY (k == 7))
340
0
    mpn_divexact_by7 (tp, tp, n + 1);
341
#if MOD_BKNP1_USE11
342
  else if (k == 11)
343
    mpn_divexact_by11 (tp, tp, n + 1);
344
#endif
345
0
  else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
346
0
    mpn_divexact_by13 (tp, tp, n + 1);
347
0
  else /* (k == 17) */
348
0
    mpn_divexact_by17 (tp, tp, n + 1);
349
350
0
  rp += k * n;
351
0
  ap += k * n; /* tp - 1 */
352
353
0
  rp -= n;
354
0
  ap -= n;
355
0
  ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
356
357
0
  i = k >> 1;
358
0
  do
359
0
   {
360
0
      mp_limb_t cy, bw;
361
0
      rp -= n;
362
0
      ap -= n;
363
0
      bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
364
0
      MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
365
0
      rp -= n;
366
0
      ap -= n;
367
0
      cy = mpn_add_n (rp, ap, tp, n) + tp[n];
368
0
      MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
369
0
    }
370
0
  while (--i != 0);
371
372
  /* if (LIKELY (rp[k * n])) */
373
0
    _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
374
0
}
375
376
377
static void
378
_mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
379
        mp_ptr tp)
380
0
{
381
0
  mp_limb_t cy;
382
0
  unsigned k;
383
384
0
  ASSERT (0 < rn);
385
0
  ASSERT ((ap[rn] | bp[rn]) <= 1);
386
387
0
  if (UNLIKELY (ap[rn] | bp[rn]))
388
0
    {
389
0
      if (ap[rn])
390
0
  cy = bp[rn] + mpn_neg (rp, bp, rn);
391
0
      else /* ap[rn] == 0 */
392
0
  cy = mpn_neg (rp, ap, rn);
393
0
    }
394
0
  else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
395
0
    {
396
0
      rn /= k;
397
0
      mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
398
0
      return;
399
0
    }
400
0
  else
401
0
    {
402
0
      mpn_mul_n (tp, ap, bp, rn);
403
0
      cy = mpn_sub_n (rp, tp, tp + rn, rn);
404
0
    }
405
0
  rp[rn] = 0;
406
0
  MPN_INCR_U (rp, rn + 1, cy);
407
0
}
408
409
/* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
410
/* tp must point to at least 4*(k-1)*n+1 limbs*/
411
void
412
mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
413
      mp_size_t n, unsigned k, mp_ptr tp)
414
0
{
415
0
  mp_ptr hp;
416
417
#if MOD_BKNP1_ONLY3
418
  ASSERT (k == 3);
419
  k = 3;
420
#endif
421
0
  ASSERT (k > 2);
422
0
  ASSERT (k % 2 == 1);
423
424
  /* a % (B^{nn}+1)/(B^{nn/k}+1) */
425
0
  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
426
  /* b % (B^{nn}+1)/(B^{nn/k}+1) */
427
0
  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
428
0
  mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
429
0
  _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
430
431
0
  hp = tp + k * n + 1;
432
  /* a % (B^{nn/k}+1) */
433
0
  ASSERT (ap[k * n] <= 1);
434
0
  _mpn_modbnp1_kn (hp, ap, n, k);
435
  /* b % (B^{nn/k}+1) */
436
0
  ASSERT (bp[k * n] <= 1);
437
0
  _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
438
0
  _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
439
440
0
  _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
441
0
}
442
443
444
static void
445
_mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
446
        mp_ptr tp)
447
0
{
448
0
  mp_limb_t cy;
449
0
  unsigned k;
450
451
0
  ASSERT (0 < rn);
452
453
0
  if (UNLIKELY (ap[rn]))
454
0
    {
455
0
      ASSERT (ap[rn] == 1);
456
0
      *rp = 1;
457
0
      MPN_FILL (rp + 1, rn, 0);
458
0
      return;
459
0
    }
460
0
  else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
461
0
    {
462
0
      rn /= k;
463
0
      mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
464
0
      return;
465
0
    }
466
0
  else
467
0
    {
468
0
      mpn_sqr (tp, ap, rn);
469
0
      cy = mpn_sub_n (rp, tp, tp + rn, rn);
470
0
    }
471
0
  rp[rn] = 0;
472
0
  MPN_INCR_U (rp, rn + 1, cy);
473
0
}
474
475
/* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
476
/* tp must point to at least 3*(k-1)*n+1 limbs*/
477
void
478
mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
479
      mp_size_t n, unsigned k, mp_ptr tp)
480
0
{
481
0
  mp_ptr hp;
482
483
#if MOD_BKNP1_ONLY3
484
  ASSERT (k == 3);
485
  k = 3;
486
#endif
487
0
  ASSERT (k > 2);
488
0
  ASSERT (k % 2 == 1);
489
490
  /* a % (B^{nn}+1)/(B^{nn/k}+1) */
491
0
  _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
492
0
  mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
493
0
  _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
494
495
0
  hp = tp + k * n + 1;
496
  /* a % (B^{nn/k}+1) */
497
0
  ASSERT (ap[k * n] <= 1);
498
0
  _mpn_modbnp1_kn (hp, ap, n, k);
499
0
  _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
500
501
0
  _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
502
0
}