Coverage Report

Created: 2025-03-18 06:55

/src/gmp/mpn/toom_interpolate_8pts.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
39
0
#define BINVERT_3 MODLIMB_INVERSE_3
40
41
#define BINVERT_15 \
42
0
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43
44
0
#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45
46
#ifndef mpn_divexact_by3
47
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49
#else
50
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51
#endif
52
#endif
53
54
#ifndef mpn_divexact_by45
55
#if GMP_NUMB_BITS % 12 == 0
56
#define mpn_divexact_by45(dst,src,size) \
57
  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58
#else
59
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60
0
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61
#else
62
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63
#endif
64
#endif
65
#endif
66
67
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
68
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69
#else
70
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71
#endif
72
73
#if HAVE_NATIVE_mpn_sublsh_n
74
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75
#else
76
static mp_limb_t
77
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78
0
{
79
#if USE_MUL_1 && 0
80
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81
#else
82
0
  mp_limb_t __cy;
83
0
  __cy = mpn_lshift (ws,src,n,s);
84
0
  return __cy + mpn_sub_n (dst,dst,ws,n);
85
0
#endif
86
0
}
87
#endif
88
89
90
#if HAVE_NATIVE_mpn_subrsh
91
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92
#else
93
/* This is not a correct definition, it assumes no carry */
94
0
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)       \
95
0
do {                 \
96
0
  mp_limb_t __cy;             \
97
0
  MPN_DECR_U (dst, nd, src[0] >> s);          \
98
0
  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);  \
99
0
  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);       \
100
0
} while (0)
101
#endif
102
103
/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104
   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105
   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106
   degree 7 (or 6), given the 8 (rsp. 7) values:
107
108
     r1 = limit at infinity of f(x) / x^7,
109
     r2 = f(4),
110
     r3 = f(-4),
111
     r4 = f(2),
112
     r5 = f(-2),
113
     r6 = f(1),
114
     r7 = f(-1),
115
     r8 = f(0).
116
117
   All couples of the form f(n),f(-n) must be already mixed with
118
   toom_couple_handling(f(n),...,f(-n),...)
119
120
   The result is stored in {pp, spt + 7*n (or 6*n)}.
121
   At entry, r8 is stored at {pp, 2n},
122
   r5 is stored at {pp + 3n, 3n + 1}.
123
124
   The other values are 2n+... limbs each (with most significant limbs small).
125
126
   All intermediate results are positive.
127
   Inputs are destroyed.
128
*/
129
130
void
131
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132
         mp_ptr r3, mp_ptr r7,
133
         mp_size_t spt, mp_ptr ws)
134
0
{
135
0
  mp_limb_signed_t cy;
136
0
  mp_ptr r5, r1;
137
0
  r5 = (pp + 3 * n);      /* 3n+1 */
138
0
  r1 = (pp + 7 * n);      /* spt */
139
140
  /******************************* interpolation *****************************/
141
142
0
  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143
0
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144
0
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145
146
0
  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147
0
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148
0
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149
150
0
  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
151
0
  cy = mpn_sub_n (r7, r7, r1, spt);
152
0
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153
154
0
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155
0
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156
157
0
  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158
159
0
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160
161
0
  mpn_divexact_by45 (r3, r3, 3 * n + 1);
162
163
0
  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164
165
0
  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166
167
  /* last interpolation steps... */
168
  /* ... are mixed with recomposition */
169
170
  /***************************** recomposition *******************************/
171
  /*
172
    pp[] prior to operations:
173
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
174
175
    summation scheme for remaining operations:
176
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
177
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
178
    ||_H r3|_M r3|_L*r3|
179
          ||_H_r7|_M_r7|_L_r7|
180
          ||-H r3|-M r3|-L*r3|
181
          ||-H*r5|-M_r5|-L_r5|
182
  */
183
184
0
  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185
0
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186
0
  if (cy > 0) {
187
0
    MPN_INCR_U (r7 + n, 2*n + 1, 1);
188
0
    cy = 0;
189
0
  }
190
191
0
  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
192
0
  MPN_DECR_U (r7 + 2*n, n + 1, cy);
193
194
0
  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
195
0
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
196
0
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
197
0
  if (UNLIKELY(0 > cy))
198
0
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
199
0
  else
200
0
    MPN_INCR_U (r5 + n + 1, 2*n, cy);
201
202
0
  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203
204
0
  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
205
0
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
206
0
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
207
0
  if (LIKELY(spt != n))
208
0
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
209
0
  else
210
0
    ASSERT (r3[3*n] + cy == 0);
211
0
}