Coverage Report

Created: 2025-12-31 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gmp/mpn/toom_interpolate_6pts.c
Line
Count
Source
1
/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
39
#define BINVERT_3 MODLIMB_INVERSE_3
40
41
/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
42
#ifndef mpn_divexact_by3
43
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
44
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
45
#else
46
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
47
#endif
48
#endif
49
50
/* Interpolation for Toom-3.5, using the evaluation points: infinity,
51
   1, -1, 2, -2. More precisely, we want to compute
52
   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
53
   six values
54
55
     w5 = f(0),
56
     w4 = f(-1),
57
     w3 = f(1)
58
     w2 = f(-2),
59
     w1 = f(2),
60
     w0 = limit at infinity of f(x) / x^5,
61
62
   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
63
   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
64
   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
65
   significant limbs small). f(-1) and f(-2) may be negative, signs
66
   determined by the flag bits. All intermediate results are positive.
67
   Inputs are destroyed.
68
69
   Interpolation sequence was taken from the paper: "Integer and
70
   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
71
   Some slight variations were introduced: adaptation to "gmp
72
   instruction set", and a final saving of an operation by interlacing
73
   interpolation and recomposition phases.
74
*/
75
76
void
77
mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
78
         mp_ptr w4, mp_ptr w2, mp_ptr w1,
79
         mp_size_t w0n)
80
508
{
81
508
  mp_limb_t cy;
82
  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
83
508
  mp_limb_t cy4, cy6, embankment;
84
85
508
  ASSERT( n > 0 );
86
508
  ASSERT( 2*n >= w0n && w0n > 0 );
87
88
1.01k
#define w5  pp          /* 2n   */
89
4.06k
#define w3  (pp + 2 * n)      /* 2n+1 */
90
3.04k
#define w0  (pp + 5 * n)      /* w0n  */
91
92
  /* Interpolate with sequence:
93
     W2 =(W1 - W2)>>2
94
     W1 =(W1 - W5)>>1
95
     W1 =(W1 - W2)>>1
96
     W4 =(W3 - W4)>>1
97
     W2 =(W2 - W4)/3
98
     W3 = W3 - W4 - W5
99
     W1 =(W1 - W3)/3
100
     // Last steps are mixed with recomposition...
101
     W2 = W2 - W0<<2
102
     W4 = W4 - W2
103
     W3 = W3 - W1
104
     W2 = W2 - W0
105
  */
106
107
  /* W2 =(W1 - W2)>>2 */
108
508
  if (flags & toom6_vm2_neg)
109
231
    mpn_add_n (w2, w1, w2, 2 * n + 1);
110
277
  else
111
277
    mpn_sub_n (w2, w1, w2, 2 * n + 1);
112
508
  mpn_rshift (w2, w2, 2 * n + 1, 2);
113
114
  /* W1 =(W1 - W5)>>1 */
115
508
  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
116
508
  mpn_rshift (w1, w1, 2 * n + 1, 1);
117
118
  /* W1 =(W1 - W2)>>1 */
119
#if HAVE_NATIVE_mpn_rsh1sub_n
120
  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
121
#else
122
508
  mpn_sub_n (w1, w1, w2, 2 * n + 1);
123
508
  mpn_rshift (w1, w1, 2 * n + 1, 1);
124
508
#endif
125
126
  /* W4 =(W3 - W4)>>1 */
127
508
  if (flags & toom6_vm1_neg)
128
250
    {
129
#if HAVE_NATIVE_mpn_rsh1add_n
130
      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
131
#else
132
250
      mpn_add_n (w4, w3, w4, 2 * n + 1);
133
250
      mpn_rshift (w4, w4, 2 * n + 1, 1);
134
250
#endif
135
250
    }
136
258
  else
137
258
    {
138
#if HAVE_NATIVE_mpn_rsh1sub_n
139
      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
140
#else
141
258
      mpn_sub_n (w4, w3, w4, 2 * n + 1);
142
258
      mpn_rshift (w4, w4, 2 * n + 1, 1);
143
258
#endif
144
258
    }
145
146
  /* W2 =(W2 - W4)/3 */
147
508
  mpn_sub_n (w2, w2, w4, 2 * n + 1);
148
508
  mpn_divexact_by3 (w2, w2, 2 * n + 1);
149
150
  /* W3 = W3 - W4 - W5 */
151
508
  mpn_sub_n (w3, w3, w4, 2 * n + 1);
152
508
  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
153
154
  /* W1 =(W1 - W3)/3 */
155
508
  mpn_sub_n (w1, w1, w3, 2 * n + 1);
156
508
  mpn_divexact_by3 (w1, w1, 2 * n + 1);
157
158
  /*
159
    [1 0 0 0 0 0;
160
     0 1 0 0 0 0;
161
     1 0 1 0 0 0;
162
     0 1 0 1 0 0;
163
     1 0 1 0 1 0;
164
     0 0 0 0 0 1]
165
166
    pp[] prior to operations:
167
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
168
169
    summation scheme for remaining operations:
170
     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
171
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
172
            || H w4  | L w4  |
173
        || H w2  | L w2  |
174
      || H w1  | L w1  |
175
          ||-H w1  |-L w1  |
176
         |-H w0  |-L w0 ||-H w2  |-L w2  |
177
  */
178
508
  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
179
508
  MPN_INCR_U (pp + 3 * n + 1, n, cy);
180
181
  /* W2 -= W0<<2 */
182
#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
183
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
184
  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
185
#else
186
  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
187
#endif
188
#else
189
  /* {W4,2*n+1} is now free and can be overwritten. */
190
508
  cy = mpn_lshift(w4, w0, w0n, 2);
191
508
  cy+= mpn_sub_n(w2, w2, w4, w0n);
192
508
#endif
193
508
  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
194
195
  /* W4L = W4L - W2L */
196
508
  cy = mpn_sub_n (pp + n, pp + n, w2, n);
197
508
  MPN_DECR_U (w3, 2 * n + 1, cy);
198
199
  /* W3H = W3H + W2L */
200
508
  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
201
  /* W1L + W2H */
202
508
  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
203
508
  MPN_INCR_U (w1 + n, n + 1, cy);
204
205
  /* W0 = W0 + W1H */
206
508
  if (LIKELY (w0n > n))
207
508
    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
208
0
  else
209
0
    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
210
211
  /*
212
    summation scheme for the next operation:
213
     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
214
     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
215
         ...-w0___|-w1_w2 |
216
  */
217
  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
218
508
  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
219
220
  /* embankment is a "dirty trick" to avoid carry/borrow propagation
221
     beyond allocated memory */
222
508
  embankment = w0[w0n - 1] - 1;
223
508
  w0[w0n - 1] = 1;
224
508
  if (LIKELY (w0n > n)) {
225
508
    if (cy4 > cy6)
226
508
      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
227
400
    else
228
508
      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
229
508
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
230
508
    MPN_INCR_U (w0 + n, w0n - n, cy6);
231
508
  } else {
232
0
    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
233
0
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
234
0
  }
235
508
  w0[w0n - 1] += embankment;
236
237
508
#undef w5
238
508
#undef w3
239
508
#undef w0
240
241
508
}