Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/toom_interpolate_8pts.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3
   Contributed to the GNU project by Marco Bodrato.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9
Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
39
154
#define BINVERT_3 MODLIMB_INVERSE_3
40
41
#define BINVERT_15 \
42
154
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43
44
154
#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45
46
#ifndef mpn_divexact_by3
47
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49
#else
50
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51
#endif
52
#endif
53
54
#ifndef mpn_divexact_by45
55
#if GMP_NUMB_BITS % 12 == 0
56
#define mpn_divexact_by45(dst,src,size) \
57
  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58
#else
59
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60
154
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61
#else
62
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63
#endif
64
#endif
65
#endif
66
67
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
68
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69
#else
70
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71
#endif
72
73
#if HAVE_NATIVE_mpn_sublsh_n
74
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75
#else
76
static mp_limb_t
77
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78
770
{
79
#if USE_MUL_1 && 0
80
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81
#else
82
770
  mp_limb_t __cy;
83
770
  __cy = mpn_lshift (ws,src,n,s);
84
770
  return __cy + mpn_sub_n (dst,dst,ws,n);
85
770
#endif
86
770
}
87
#endif
88
89
90
#if HAVE_NATIVE_mpn_subrsh
91
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92
#else
93
/* This is not a correct definition, it assumes no carry */
94
308
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)       \
95
308
do {                 \
96
308
  mp_limb_t __cy;             \
97
308
  MPN_DECR_U (dst, nd, src[0] >> s);          \
98
308
  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);  \
99
308
  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);       \
100
308
} while (0)
101
#endif
102
103
/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104
   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105
   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106
   degree 7 (or 6), given the 8 (rsp. 7) values:
107
108
     r1 = limit at infinity of f(x) / x^7,
109
     r2 = f(4),
110
     r3 = f(-4),
111
     r4 = f(2),
112
     r5 = f(-2),
113
     r6 = f(1),
114
     r7 = f(-1),
115
     r8 = f(0).
116
117
   All couples of the form f(n),f(-n) must be already mixed with
118
   toom_couple_handling(f(n),...,f(-n),...)
119
120
   The result is stored in {pp, spt + 7*n (or 6*n)}.
121
   At entry, r8 is stored at {pp, 2n},
122
   r5 is stored at {pp + 3n, 3n + 1}.
123
124
   The other values are 2n+... limbs each (with most significant limbs small).
125
126
   All intermediate results are positive.
127
   Inputs are destroyed.
128
*/
129
130
void
131
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132
         mp_ptr r3, mp_ptr r7,
133
         mp_size_t spt, mp_ptr ws)
134
154
{
135
154
  mp_limb_signed_t cy;
136
154
  mp_ptr r5, r1;
137
154
  r5 = (pp + 3 * n);      /* 3n+1 */
138
154
  r1 = (pp + 7 * n);      /* spt */
139
140
  /******************************* interpolation *****************************/
141
142
154
  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143
154
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144
154
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145
146
154
  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147
154
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148
154
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149
150
154
  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
151
154
  cy = mpn_sub_n (r7, r7, r1, spt);
152
154
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153
154
154
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155
154
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156
157
154
  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158
159
154
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160
161
154
  mpn_divexact_by45 (r3, r3, 3 * n + 1);
162
163
154
  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164
165
154
  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166
167
  /* last interpolation steps... */
168
  /* ... are mixed with recomposition */
169
170
  /***************************** recomposition *******************************/
171
  /*
172
    pp[] prior to operations:
173
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
174
175
    summation scheme for remaining operations:
176
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
177
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
178
    ||_H r3|_M r3|_L*r3|
179
          ||_H_r7|_M_r7|_L_r7|
180
          ||-H r3|-M r3|-L*r3|
181
          ||-H*r5|-M_r5|-L_r5|
182
  */
183
184
154
  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185
154
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186
154
  if (cy > 0) {
187
11
    MPN_INCR_U (r7 + n, 2*n + 1, 1);
188
11
    cy = 0;
189
11
  }
190
191
154
  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
192
154
  MPN_DECR_U (r7 + 2*n, n + 1, cy);
193
194
154
  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
195
154
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
196
154
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
197
154
  if (UNLIKELY(0 > cy))
198
0
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
199
154
  else
200
154
    MPN_INCR_U (r5 + n + 1, 2*n, cy);
201
202
154
  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203
204
154
  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
205
154
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
206
154
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
207
154
  if (LIKELY(spt != n))
208
154
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
209
0
  else
210
154
    ASSERT (r3[3*n] + cy == 0);
211
154
}