Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/toom6h_mul.c
Line
Count
Source (jump to first uncovered line)
1
/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
#include "gmp-impl.h"
39
40
41
#if GMP_NUMB_BITS < 21
42
#error Not implemented.
43
#endif
44
45
#if TUNE_PROGRAM_BUILD
46
#define MAYBE_mul_basecase 1
47
#define MAYBE_mul_toom22   1
48
#define MAYBE_mul_toom33   1
49
#define MAYBE_mul_toom6h   1
50
#else
51
#define MAYBE_mul_basecase            \
52
229k
  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
53
#define MAYBE_mul_toom22            \
54
229k
  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
55
#define MAYBE_mul_toom33            \
56
2.96k
  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
57
#define MAYBE_mul_toom6h            \
58
0
  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
59
#endif
60
61
#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)     \
62
114k
  do {                 \
63
114k
    if (MAYBE_mul_basecase            \
64
114k
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {     \
65
0
      mpn_mul_basecase (p, a, n, b, n);         \
66
0
      if (f)               \
67
0
  mpn_mul_basecase (p2, a2, n, b2, n);       \
68
114k
    } else if (MAYBE_mul_toom22            \
69
114k
         && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {   \
70
113k
      mpn_toom22_mul (p, a, n, b, n, ws);       \
71
113k
      if (f)               \
72
113k
  mpn_toom22_mul (p2, a2, n, b2, n, ws);       \
73
113k
    } else if (MAYBE_mul_toom33            \
74
1.48k
         && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {   \
75
1.48k
      mpn_toom33_mul (p, a, n, b, n, ws);       \
76
1.48k
      if (f)               \
77
1.48k
  mpn_toom33_mul (p2, a2, n, b2, n, ws);       \
78
1.48k
    } else if (! MAYBE_mul_toom6h          \
79
0
         || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {   \
80
0
      mpn_toom44_mul (p, a, n, b, n, ws);       \
81
0
      if (f)               \
82
0
  mpn_toom44_mul (p2, a2, n, b2, n, ws);       \
83
0
    } else {               \
84
0
      mpn_toom6h_mul (p, a, n, b, n, ws);       \
85
0
      if (f)               \
86
0
  mpn_toom6h_mul (p2, a2, n, b2, n, ws);       \
87
0
    }                  \
88
114k
  } while (0)
89
90
#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)   \
91
392
  do { mpn_mul (p, a, na, b, nb);     \
92
392
  } while (0)
93
94
/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
95
   With: an >= bn >= 46, an*6 <  bn * 17.
96
   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
97
98
   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
99
*/
100
/* Estimate on needed scratch:
101
   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
102
   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
103
 */
104
105
void
106
mpn_toom6h_mul   (mp_ptr pp,
107
      mp_srcptr ap, mp_size_t an,
108
      mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
109
19.1k
{
110
19.1k
  mp_size_t n, s, t;
111
19.1k
  int p, q, half;
112
19.1k
  int sign;
113
114
  /***************************** decomposition *******************************/
115
116
19.1k
  ASSERT (an >= bn);
117
  /* Can not handle too much unbalancement */
118
19.1k
  ASSERT (bn >= 42);
119
  /* Can not handle too much unbalancement */
120
19.1k
  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
121
122
  /* Limit num/den is a rational number between
123
     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
124
19.1k
#define LIMIT_numerator (18)
125
19.1k
#define LIMIT_denominat (17)
126
127
19.1k
  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
128
18.6k
    {
129
18.6k
      n = 1 + (an - 1) / (size_t) 6;
130
18.6k
      p = q = 5;
131
18.6k
      half = 0;
132
133
18.6k
      s = an - 5 * n;
134
18.6k
      t = bn - 5 * n;
135
18.6k
    }
136
403
  else {
137
403
    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
138
392
      { p = 7; q = 6; }
139
11
    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
140
11
      { p = 7; q = 5; }
141
0
    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
142
0
      { p = 8; q = 5; }
143
0
    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
144
0
      { p = 8; q = 4; }
145
0
    else
146
0
      { p = 9; q = 4; }
147
148
403
    half = (p ^ q) & 1;
149
403
    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
150
403
    p--; q--;
151
152
403
    s = an - p * n;
153
403
    t = bn - q * n;
154
155
    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
156
403
    if (half) { /* Recover from badly chosen splitting */
157
392
      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
158
392
      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
159
392
    }
160
403
  }
161
19.1k
#undef LIMIT_numerator
162
19.1k
#undef LIMIT_denominat
163
164
19.1k
  ASSERT (0 < s && s <= n);
165
19.1k
  ASSERT (0 < t && t <= n);
166
19.1k
  ASSERT (half || s + t > 3);
167
19.1k
  ASSERT (n > 2);
168
169
19.1k
#define   r4    (pp + 3 * n)      /* 3n+1 */
170
19.1k
#define   r2    (pp + 7 * n)      /* 3n+1 */
171
19.1k
#define   r0    (pp +11 * n)      /* s+t <= 2*n */
172
38.2k
#define   r5    (scratch)      /* 3n+1 */
173
38.2k
#define   r3    (scratch + 3 * n + 1)    /* 3n+1 */
174
38.2k
#define   r1    (scratch + 6 * n + 2)    /* 3n+1 */
175
95.5k
#define   v0    (pp + 7 * n)      /* n+1 */
176
95.5k
#define   v1    (pp + 8 * n+1)      /* n+1 */
177
95.5k
#define   v2    (pp + 9 * n+2)      /* n+1 */
178
95.5k
#define   v3    (scratch + 9 * n + 3)    /* n+1 */
179
19.1k
#define   wsi   (scratch + 9 * n + 3)    /* 3n+1 */
180
19.1k
#define   wse   (scratch +10 * n + 4)   /* 2n+1 */
181
182
  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
183
     need all of them  */
184
/*   if (scratch == NULL) */
185
/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
186
19.1k
  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
187
19.1k
  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
188
189
  /********************** evaluation and recursive calls *********************/
190
  /* $\pm1/2$ */
191
19.1k
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
192
19.1k
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
193
  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
194
19.1k
  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
195
19.1k
  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
196
197
  /* $\pm1$ */
198
19.1k
  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
199
19.1k
  if (UNLIKELY (q == 3))
200
0
    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
201
19.1k
  else
202
19.1k
    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
203
  /* A(-1)*B(-1) */ /* A(1)*B(1) */
204
19.1k
  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
205
19.1k
  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
206
207
  /* $\pm4$ */
208
19.1k
  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
209
19.1k
   mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
210
  /* A(-4)*B(-4) */
211
19.1k
  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
212
19.1k
  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
213
214
  /* $\pm1/4$ */
215
19.1k
  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
216
19.1k
   mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
217
  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
218
19.1k
  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
219
19.1k
  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
220
221
  /* $\pm2$ */
222
19.1k
  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
223
19.1k
   mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
224
  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
225
19.1k
  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
226
19.1k
  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
227
228
19.1k
#undef v0
229
19.1k
#undef v1
230
19.1k
#undef v2
231
19.1k
#undef v3
232
19.1k
#undef wse
233
234
  /* A(0)*B(0) */
235
19.1k
  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
236
237
  /* Infinity */
238
19.1k
  if (UNLIKELY (half != 0)) {
239
392
    if (s > t) {
240
88
      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
241
304
    } else {
242
304
      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
243
304
    };
244
392
  };
245
246
19.1k
  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
247
248
19.1k
#undef r0
249
19.1k
#undef r1
250
19.1k
#undef r2
251
19.1k
#undef r3
252
19.1k
#undef r4
253
19.1k
#undef r5
254
19.1k
#undef wsi
255
19.1k
}
256
257
#undef TOOM6H_MUL_N_REC
258
#undef TOOM6H_MUL_REC
259
#undef MAYBE_mul_basecase
260
#undef MAYBE_mul_toom22
261
#undef MAYBE_mul_toom33
262
#undef MAYBE_mul_toom6h