Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/toom8h_mul.c
Line
Count
Source (jump to first uncovered line)
1
/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
#include "gmp-impl.h"
39
40
41
#if GMP_NUMB_BITS < 29
42
#error Not implemented.
43
#endif
44
45
#if GMP_NUMB_BITS < 43
46
#define BIT_CORRECTION 1
47
#define CORRECTION_BITS GMP_NUMB_BITS
48
#else
49
988
#define BIT_CORRECTION 0
50
#define CORRECTION_BITS 0
51
#endif
52
53
54
#if TUNE_PROGRAM_BUILD
55
#define MAYBE_mul_basecase 1
56
#define MAYBE_mul_toom22   1
57
#define MAYBE_mul_toom33   1
58
#define MAYBE_mul_toom44   1
59
#define MAYBE_mul_toom8h   1
60
#else
61
#define MAYBE_mul_basecase            \
62
7.90k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
63
#define MAYBE_mul_toom22            \
64
7.90k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
65
#define MAYBE_mul_toom33            \
66
2.03k
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
67
#define MAYBE_mul_toom44            \
68
640
  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
69
#define MAYBE_mul_toom8h            \
70
224
  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
71
#endif
72
73
#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)     \
74
3.95k
  do {                 \
75
3.95k
    if (MAYBE_mul_basecase            \
76
3.95k
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {     \
77
0
      mpn_mul_basecase (p, a, n, b, n);         \
78
0
      if (f) mpn_mul_basecase (p2, a2, n, b2, n);     \
79
3.95k
    } else if (MAYBE_mul_toom22            \
80
3.95k
       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {   \
81
2.93k
      mpn_toom22_mul (p, a, n, b, n, ws);       \
82
2.93k
      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);     \
83
2.93k
    } else if (MAYBE_mul_toom33            \
84
1.01k
       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {   \
85
696
      mpn_toom33_mul (p, a, n, b, n, ws);       \
86
696
      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);     \
87
696
    } else if (MAYBE_mul_toom44            \
88
320
       && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {   \
89
208
      mpn_toom44_mul (p, a, n, b, n, ws);       \
90
208
      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);     \
91
208
    } else if (! MAYBE_mul_toom8h          \
92
112
       || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {   \
93
8
      mpn_toom6h_mul (p, a, n, b, n, ws);       \
94
8
      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);     \
95
104
    } else {               \
96
104
      mpn_toom8h_mul (p, a, n, b, n, ws);       \
97
104
      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);     \
98
104
    }                  \
99
3.95k
  } while (0)
100
101
#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)   \
102
1
  do { mpn_mul (p, a, na, b, nb); } while (0)
103
104
/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
105
   With: an >= bn >= 86, an*5 <  bn * 11.
106
   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
107
108
   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
109
*/
110
/* Estimate on needed scratch:
111
   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
112
   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
113
 */
114
115
void
116
mpn_toom8h_mul   (mp_ptr pp,
117
      mp_srcptr ap, mp_size_t an,
118
      mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
119
494
{
120
494
  mp_size_t n, s, t;
121
494
  int p, q, half;
122
494
  unsigned sign;
123
124
  /***************************** decomposition *******************************/
125
126
494
  ASSERT (an >= bn);
127
  /* Can not handle too small operands */
128
494
  ASSERT (bn >= 86);
129
  /* Can not handle too much unbalancement */
130
494
  ASSERT (an <= bn*4);
131
494
  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
132
494
  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
133
494
  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
134
135
  /* Limit num/den is a rational number between
136
     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
137
494
#define LIMIT_numerator (21)
138
494
#define LIMIT_denominat (20)
139
140
494
  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
141
493
    {
142
493
      half = 0;
143
493
      n = 1 + ((an - 1)>>3);
144
493
      p = q = 7;
145
493
      s = an - 7 * n;
146
493
      t = bn - 7 * n;
147
493
    }
148
1
  else
149
1
    {
150
1
      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
151
1
  { p = 9; q = 8; }
152
0
      else if (GMP_NUMB_BITS <= 9*3 ||
153
0
         an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
154
0
  { p = 9; q = 7; }
155
0
      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
156
0
  { p =10; q = 7; }
157
0
      else if (GMP_NUMB_BITS <= 10*3 ||
158
0
         an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
159
0
  { p =10; q = 6; }
160
0
      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
161
0
  { p =11; q = 6; }
162
0
      else if (GMP_NUMB_BITS <= 11*3 ||
163
0
         an * 4 < 9 * bn)
164
0
  { p =11; q = 5; }
165
0
      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
166
0
  { p =12; q = 5; }
167
0
      else if (GMP_NUMB_BITS <= 12*3 ||
168
0
         an * 9 < 28 * bn )  /* is 4*... <12*... */
169
0
  { p =12; q = 4; }
170
0
      else
171
0
  { p =13; q = 4; }
172
173
1
      half = (p+q)&1;
174
1
      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
175
1
      p--; q--;
176
177
1
      s = an - p * n;
178
1
      t = bn - q * n;
179
180
1
      if(half) { /* Recover from badly chosen splitting */
181
1
  if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
182
1
  else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
183
1
      }
184
1
    }
185
494
#undef LIMIT_numerator
186
494
#undef LIMIT_denominat
187
188
494
  ASSERT (0 < s && s <= n);
189
494
  ASSERT (0 < t && t <= n);
190
494
  ASSERT (half || s + t > 3);
191
494
  ASSERT (n > 2);
192
193
494
#define   r6    (pp + 3 * n)      /* 3n+1 */
194
494
#define   r4    (pp + 7 * n)      /* 3n+1 */
195
494
#define   r2    (pp +11 * n)      /* 3n+1 */
196
494
#define   r0    (pp +15 * n)      /* s+t <= 2*n */
197
988
#define   r7    (scratch)      /* 3n+1 */
198
988
#define   r5    (scratch + 3 * n + 1)    /* 3n+1 */
199
988
#define   r3    (scratch + 6 * n + 2)    /* 3n+1 */
200
988
#define   r1    (scratch + 9 * n + 3)    /* 3n+1 */
201
3.45k
#define   v0    (pp +11 * n)      /* n+1 */
202
3.45k
#define   v1    (pp +12 * n+1)      /* n+1 */
203
3.45k
#define   v2    (pp +13 * n+2)      /* n+1 */
204
3.45k
#define   v3    (scratch +12 * n + 4)    /* n+1 */
205
494
#define   wsi   (scratch +12 * n + 4)    /* 3n+1 */
206
494
#define   wse   (scratch +13 * n + 5)   /* 2n+1 */
207
208
  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
209
     need all of them  */
210
/*   if (scratch == NULL) */
211
/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
212
494
  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
213
494
  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
214
215
  /********************** evaluation and recursive calls *********************/
216
217
  /* $\pm1/8$ */
218
494
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
219
494
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
220
  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
221
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
222
494
  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
223
224
  /* $\pm1/4$ */
225
494
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
226
494
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
227
  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
228
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
229
494
  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
230
231
  /* $\pm2$ */
232
494
  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
233
494
   mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
234
  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
235
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
236
494
  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
237
238
  /* $\pm8$ */
239
494
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
240
494
   mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
241
  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
242
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
243
494
  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
244
245
  /* $\pm1/2$ */
246
494
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
247
494
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
248
  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
249
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
250
494
  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
251
252
  /* $\pm1$ */
253
494
  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
254
494
  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
255
0
    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
256
494
  else
257
494
    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
258
  /* A(-1)*B(-1) */ /* A(1)*B(1) */
259
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
260
494
  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
261
262
  /* $\pm4$ */
263
494
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
264
494
   mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
265
  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
266
494
  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
267
494
  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
268
269
494
#undef v0
270
494
#undef v1
271
494
#undef v2
272
494
#undef v3
273
494
#undef wse
274
275
  /* A(0)*B(0) */
276
494
  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
277
278
  /* Infinity */
279
494
  if (UNLIKELY (half != 0)) {
280
1
    if (s > t) {
281
1
      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
282
1
    } else {
283
0
      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
284
0
    };
285
1
  };
286
287
494
  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
288
289
494
#undef r0
290
494
#undef r1
291
494
#undef r2
292
494
#undef r3
293
494
#undef r4
294
494
#undef r5
295
494
#undef r6
296
494
#undef wsi
297
494
}
298
299
#undef TOOM8H_MUL_N_REC
300
#undef TOOM8H_MUL_REC
301
#undef MAYBE_mul_basecase
302
#undef MAYBE_mul_toom22
303
#undef MAYBE_mul_toom33
304
#undef MAYBE_mul_toom44
305
#undef MAYBE_mul_toom8h