Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/toom63_mul.c
Line
Count
Source (jump to first uncovered line)
1
/* Implementation of the algorithm for Toom-Cook 4.5-way.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
38
#include "gmp-impl.h"
39
40
/* Stores |{ap,n}-{bp,n}| in {rp,n}. */
41
/* It returns 0 or ~0, depending on the sign of the result. */
42
static unsigned
43
abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
44
308
{
45
308
  mp_limb_t  x, y;
46
389
  while (--n >= 0)
47
389
    {
48
389
      x = ap[n];
49
389
      y = bp[n];
50
389
      if (x != y)
51
308
  {
52
308
    n++;
53
308
    if (x > y)
54
33
      {
55
33
        mpn_sub_n (rp, ap, bp, n);
56
33
        return 0;
57
33
      }
58
275
    else
59
275
      {
60
275
        mpn_sub_n (rp, bp, ap, n);
61
275
        return ~ (unsigned) 0;
62
275
      }
63
308
  }
64
81
      rp[n] = 0;
65
81
    }
66
0
  return 0;
67
308
}
68
69
/* It returns 0 or ~0, depending on the sign of the result rm. */
70
static unsigned
71
308
abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
72
308
  unsigned result;
73
308
  result = abs_sub_n (rm, rp, rs, n);
74
308
  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
75
308
  return result;
76
308
}
77
78
79
/* Toom-4.5, the splitting 6x3 unbalanced version.
80
   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
81
82
  <--s-><--n--><--n--><--n--><--n--><--n-->
83
   ____ ______ ______ ______ ______ ______
84
  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
85
      |b2_|__b1__|__b0__|
86
      <-t-><--n--><--n-->
87
88
*/
89
#define TOOM_63_MUL_N_REC(p, a, b, n, ws)   \
90
1.07k
  do { mpn_mul_n (p, a, b, n);        \
91
1.07k
  } while (0)
92
93
#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)    \
94
154
  do { mpn_mul (p, a, na, b, nb);     \
95
154
  } while (0)
96
97
void
98
mpn_toom63_mul (mp_ptr pp,
99
    mp_srcptr ap, mp_size_t an,
100
    mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
101
154
{
102
154
  mp_size_t n, s, t;
103
154
  mp_limb_t cy;
104
154
  unsigned sign;
105
106
  /***************************** decomposition *******************************/
107
154
#define a5  (ap + 5 * n)
108
462
#define b0  (bp + 0 * n)
109
770
#define b1  (bp + 1 * n)
110
462
#define b2  (bp + 2 * n)
111
112
154
  ASSERT (an >= bn);
113
154
  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
114
115
154
  s = an - 5 * n;
116
154
  t = bn - 2 * n;
117
118
154
  ASSERT (0 < s && s <= n);
119
154
  ASSERT (0 < t && t <= n);
120
  /* WARNING! it assumes s+t>=n */
121
154
  ASSERT ( s + t >= n );
122
154
  ASSERT ( s + t > 4);
123
  /* WARNING! it assumes n>1 */
124
154
  ASSERT ( n > 2);
125
126
154
#define   r8    pp        /* 2n   */
127
308
#define   r7    scratch        /* 3n+1 */
128
154
#define   r5    (pp + 3*n)      /* 3n+1 */
129
462
#define   v0    (pp + 3*n)      /* n+1 */
130
616
#define   v1    (pp + 4*n+1)      /* n+1 */
131
462
#define   v2    (pp + 5*n+2)      /* n+1 */
132
2.15k
#define   v3    (pp + 6*n+3)      /* n+1 */
133
308
#define   r3    (scratch + 3 * n + 1)    /* 3n+1 */
134
154
#define   r1    (pp + 7*n)      /* s+t <= 2*n */
135
770
#define   ws    (scratch + 6 * n + 2)    /* ??? */
136
137
  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
138
     need all of them, when DO_mpn_sublsh_n usea a scratch  */
139
/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
140
141
  /********************** evaluation and recursive calls *********************/
142
  /* $\pm4$ */
143
154
  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
144
154
  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
145
  /* FIXME: use addlsh */
146
154
  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
147
154
  if ( n == t )
148
4
    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
149
150
  else
150
150
    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
151
154
  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
152
154
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
153
154
  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
154
154
  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
155
156
  /* $\pm1$ */
157
154
  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
158
  /* Compute bs1 and bsm1. Code taken from toom33 */
159
154
  cy = mpn_add (ws, b0, n, b2, t);
160
#if HAVE_NATIVE_mpn_add_n_sub_n
161
  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
162
    {
163
      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
164
      v3[n] = cy >> 1;
165
      v1[n] = 0;
166
      sign = ~sign;
167
    }
168
  else
169
    {
170
      mp_limb_t cy2;
171
      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
172
      v3[n] = cy + (cy2 >> 1);
173
      v1[n] = cy - (cy2 & 1);
174
    }
175
#else
176
154
  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
177
154
  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
178
64
    {
179
64
      mpn_sub_n (v1, b1, ws, n);
180
64
      v1[n] = 0;
181
64
      sign = ~sign;
182
64
    }
183
90
  else
184
90
    {
185
90
      cy -= mpn_sub_n (v1, ws, b1, n);
186
90
      v1[n] = cy;
187
90
    }
188
154
#endif
189
154
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
190
154
  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
191
154
  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
192
193
  /* $\pm2$ */
194
154
  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
195
154
  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
196
  /* FIXME: use addlsh or addlsh2 */
197
154
  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
198
154
  if ( n == t )
199
4
    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
200
150
  else
201
150
    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
202
154
  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
203
154
  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
204
154
  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
205
154
  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
206
207
  /* A(0)*B(0) */
208
154
  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
209
210
  /* Infinity */
211
154
  if (s > t) {
212
1
    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
213
153
  } else {
214
153
    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
215
153
  };
216
217
154
  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
218
219
154
#undef a5
220
154
#undef b0
221
154
#undef b1
222
154
#undef b2
223
154
#undef r1
224
154
#undef r3
225
154
#undef r5
226
154
#undef v0
227
154
#undef v1
228
154
#undef v2
229
154
#undef v3
230
154
#undef r7
231
154
#undef r8
232
154
#undef ws
233
154
}