Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/toom8h_mul.c
Line
Count
Source (jump to first uncovered line)
1
/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
#include "gmp-impl.h"
39
40
41
#if GMP_NUMB_BITS < 29
42
#error Not implemented.
43
#endif
44
45
#if GMP_NUMB_BITS < 43
46
#define BIT_CORRECTION 1
47
#define CORRECTION_BITS GMP_NUMB_BITS
48
#else
49
41.2k
#define BIT_CORRECTION 0
50
#define CORRECTION_BITS 0
51
#endif
52
53
54
#if TUNE_PROGRAM_BUILD
55
#define MAYBE_mul_basecase 1
56
#define MAYBE_mul_toom22   1
57
#define MAYBE_mul_toom33   1
58
#define MAYBE_mul_toom44   1
59
#define MAYBE_mul_toom8h   1
60
#else
61
#define MAYBE_mul_basecase            \
62
329k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
63
#define MAYBE_mul_toom22            \
64
329k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
65
#define MAYBE_mul_toom33            \
66
166k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
67
#define MAYBE_mul_toom44            \
68
23.7k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
69
#define MAYBE_mul_toom8h            \
70
14.1k
  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
71
#endif
72
73
#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)     \
74
164k
  do {                 \
75
164k
    if (MAYBE_mul_basecase            \
76
164k
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {     \
77
0
      mpn_mul_basecase (p, a, n, b, n);         \
78
0
      if (f) mpn_mul_basecase (p2, a2, n, b2, n);     \
79
164k
    } else if (MAYBE_mul_toom22            \
80
164k
       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {   \
81
81.7k
      mpn_toom22_mul (p, a, n, b, n, ws);       \
82
81.7k
      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);     \
83
83.2k
    } else if (MAYBE_mul_toom33            \
84
83.2k
       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {   \
85
71.3k
      mpn_toom33_mul (p, a, n, b, n, ws);       \
86
71.3k
      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);     \
87
71.3k
    } else if (MAYBE_mul_toom44            \
88
11.8k
       && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {   \
89
4.79k
      mpn_toom44_mul (p, a, n, b, n, ws);       \
90
4.79k
      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);     \
91
7.07k
    } else if (! MAYBE_mul_toom8h          \
92
7.07k
       || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {   \
93
3.88k
      mpn_toom6h_mul (p, a, n, b, n, ws);       \
94
3.88k
      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);     \
95
3.88k
    } else {               \
96
3.19k
      mpn_toom8h_mul (p, a, n, b, n, ws);       \
97
3.19k
      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);     \
98
3.19k
    }                  \
99
164k
  } while (0)
100
101
#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)   \
102
192
  do { mpn_mul (p, a, na, b, nb); } while (0)
103
104
/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
105
   With: an >= bn >= 86, an*5 <  bn * 11.
106
   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
107
108
   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
109
*/
110
/* Estimate on needed scratch:
111
   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
112
   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
113
 */
114
115
void
116
mpn_toom8h_mul   (mp_ptr pp,
117
      mp_srcptr ap, mp_size_t an,
118
      mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
119
20.6k
{
120
20.6k
  mp_size_t n, s, t;
121
20.6k
  int p, q, half;
122
20.6k
  int sign;
123
124
  /***************************** decomposition *******************************/
125
126
20.6k
  ASSERT (an >= bn);
127
  /* Can not handle too small operands */
128
20.6k
  ASSERT (bn >= 86);
129
  /* Can not handle too much unbalancement */
130
20.6k
  ASSERT (an <= bn*4);
131
20.6k
  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
132
20.6k
  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
133
20.6k
  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
134
135
  /* Limit num/den is a rational number between
136
     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
137
20.6k
#define LIMIT_numerator (21)
138
20.6k
#define LIMIT_denominat (20)
139
140
20.6k
  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
141
20.3k
    {
142
20.3k
      half = 0;
143
20.3k
      n = 1 + ((an - 1)>>3);
144
20.3k
      p = q = 7;
145
20.3k
      s = an - 7 * n;
146
20.3k
      t = bn - 7 * n;
147
20.3k
    }
148
237
  else
149
237
    {
150
237
      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
151
192
  { p = 9; q = 8; }
152
45
      else if (GMP_NUMB_BITS <= 9*3 ||
153
45
         an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
154
45
  { p = 9; q = 7; }
155
0
      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
156
0
  { p =10; q = 7; }
157
0
      else if (GMP_NUMB_BITS <= 10*3 ||
158
0
         an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
159
0
  { p =10; q = 6; }
160
0
      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
161
0
  { p =11; q = 6; }
162
0
      else if (GMP_NUMB_BITS <= 11*3 ||
163
0
         an * 4 < 9 * bn)
164
0
  { p =11; q = 5; }
165
0
      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
166
0
  { p =12; q = 5; }
167
0
      else if (GMP_NUMB_BITS <= 12*3 ||
168
0
         an * 9 < 28 * bn )  /* is 4*... <12*... */
169
0
  { p =12; q = 4; }
170
0
      else
171
0
  { p =13; q = 4; }
172
173
237
      half = (p+q)&1;
174
237
      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
175
237
      p--; q--;
176
177
237
      s = an - p * n;
178
237
      t = bn - q * n;
179
180
237
      if(half) { /* Recover from badly chosen splitting */
181
192
  if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
182
192
  else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
183
192
      }
184
237
    }
185
20.6k
#undef LIMIT_numerator
186
20.6k
#undef LIMIT_denominat
187
188
20.6k
  ASSERT (0 < s && s <= n);
189
20.6k
  ASSERT (0 < t && t <= n);
190
20.6k
  ASSERT (half || s + t > 3);
191
20.6k
  ASSERT (n > 2);
192
193
20.6k
#define   r6    (pp + 3 * n)      /* 3n+1 */
194
20.6k
#define   r4    (pp + 7 * n)      /* 3n+1 */
195
20.6k
#define   r2    (pp +11 * n)      /* 3n+1 */
196
20.6k
#define   r0    (pp +15 * n)      /* s+t <= 2*n */
197
41.2k
#define   r7    (scratch)      /* 3n+1 */
198
41.2k
#define   r5    (scratch + 3 * n + 1)    /* 3n+1 */
199
41.2k
#define   r3    (scratch + 6 * n + 2)    /* 3n+1 */
200
41.2k
#define   r1    (scratch + 9 * n + 3)    /* 3n+1 */
201
144k
#define   v0    (pp +11 * n)      /* n+1 */
202
144k
#define   v1    (pp +12 * n+1)      /* n+1 */
203
144k
#define   v2    (pp +13 * n+2)      /* n+1 */
204
144k
#define   v3    (scratch +12 * n + 4)    /* n+1 */
205
20.6k
#define   wsi   (scratch +12 * n + 4)    /* 3n+1 */
206
20.6k
#define   wse   (scratch +13 * n + 5)   /* 2n+1 */
207
208
  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
209
     need all of them  */
210
/*   if (scratch == NULL) */
211
/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
212
20.6k
  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
213
20.6k
  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
214
215
  /********************** evaluation and recursive calls *********************/
216
217
  /* $\pm1/8$ */
218
20.6k
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
219
20.6k
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
220
  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
221
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
222
20.6k
  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
223
224
  /* $\pm1/4$ */
225
20.6k
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
226
20.6k
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
227
  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
228
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
229
20.6k
  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
230
231
  /* $\pm2$ */
232
20.6k
  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
233
20.6k
   mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
234
  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
235
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
236
20.6k
  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
237
238
  /* $\pm8$ */
239
20.6k
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
240
20.6k
   mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
241
  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
242
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
243
20.6k
  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
244
245
  /* $\pm1/2$ */
246
20.6k
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
247
20.6k
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
248
  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
249
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
250
20.6k
  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
251
252
  /* $\pm1$ */
253
20.6k
  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
254
20.6k
  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
255
0
    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
256
20.6k
  else
257
20.6k
    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
258
  /* A(-1)*B(-1) */ /* A(1)*B(1) */
259
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
260
20.6k
  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
261
262
  /* $\pm4$ */
263
20.6k
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
264
20.6k
   mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
265
  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
266
20.6k
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
267
20.6k
  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
268
269
20.6k
#undef v0
270
20.6k
#undef v1
271
20.6k
#undef v2
272
20.6k
#undef v3
273
20.6k
#undef wse
274
275
  /* A(0)*B(0) */
276
20.6k
  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
277
278
  /* Infinity */
279
20.6k
  if (UNLIKELY (half != 0)) {
280
192
    if (s > t) {
281
121
      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
282
121
    } else {
283
71
      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
284
71
    };
285
192
  };
286
287
20.6k
  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
288
289
20.6k
#undef r0
290
20.6k
#undef r1
291
20.6k
#undef r2
292
20.6k
#undef r3
293
20.6k
#undef r4
294
20.6k
#undef r5
295
20.6k
#undef r6
296
20.6k
#undef wsi
297
20.6k
}
298
299
#undef TOOM8H_MUL_N_REC
300
#undef TOOM8H_MUL_REC
301
#undef MAYBE_mul_basecase
302
#undef MAYBE_mul_toom22
303
#undef MAYBE_mul_toom33
304
#undef MAYBE_mul_toom44
305
#undef MAYBE_mul_toom8h