Coverage Report

Created: 2023-02-22 06:39

/src/gmp-6.2.1/mpn/toom_interpolate_5pts.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
2
3
   Contributed to the GNU project by Robert Harley.
4
   Improvements by Paul Zimmermann and Marco Bodrato.
5
6
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10
Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
40
void
41
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
42
         mp_size_t k, mp_size_t twor, int sa,
43
         mp_limb_t vinf0)
44
2.29k
{
45
2.29k
  mp_limb_t cy, saved;
46
2.29k
  mp_size_t twok;
47
2.29k
  mp_size_t kk1;
48
2.29k
  mp_ptr c1, v1, c3, vinf;
49
50
2.29k
  twok = k + k;
51
2.29k
  kk1 = twok + 1;
52
53
2.29k
  c1 = c  + k;
54
2.29k
  v1 = c1 + k;
55
2.29k
  c3 = v1 + k;
56
2.29k
  vinf = c3 + k;
57
58
2.29k
#define v0 (c)
59
  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
60
     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
61
  */
62
2.29k
  if (sa)
63
2.29k
    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
64
1.57k
  else
65
2.29k
    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
66
67
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
68
       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
69
70
2.29k
  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
71
                  /* (5 3 1 1 0)*/
72
73
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
74
       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
75
76
  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
77
     tm1 >= 0                                         (0  1 0  1 0)
78
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
79
     and the division by two is exact.
80
     If (sa!=0) the sign of vm1 is negative */
81
2.29k
  if (sa)
82
716
    {
83
716
#ifdef HAVE_NATIVE_mpn_rsh1add_n
84
716
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
85
#else
86
      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
87
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
88
#endif
89
716
    }
90
1.57k
  else
91
1.57k
    {
92
1.57k
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
93
1.57k
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
94
#else
95
      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
96
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
97
#endif
98
1.57k
    }
99
100
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
101
       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
102
103
  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
104
     t1 >= 0
105
  */
106
2.29k
  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
107
108
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
109
       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
110
111
  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
112
     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
113
  */
114
2.29k
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
115
2.29k
  mpn_rsh1sub_n (v2, v2, v1, kk1);
116
#else
117
  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
118
  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
119
#endif
120
121
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
122
       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
123
124
  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
125
     result is v1 >= 0
126
  */
127
2.29k
  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
128
129
  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
130
2.29k
  cy = mpn_add_n (c1, c1, vm1, kk1);
131
2.29k
  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
132
  /* Memory allocated for vm1 is now free, it can be recycled ...*/
133
134
  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
135
     result is v2 >= 0 */
136
2.29k
  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
137
2.29k
  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
138
2.29k
#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
139
2.29k
  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
140
#else
141
  /* Overwrite unused vm1 */
142
  cy = mpn_lshift (vm1, vinf, twor, 1);
143
  cy += mpn_sub_n (v2, v2, vm1, twor);
144
#endif
145
2.29k
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
146
147
  /* Current matrix is
148
     [1 0 0 0 0; vinf
149
      0 1 0 0 0; v2
150
      1 0 1 0 0; v1
151
      0 1 0 1 0; vm1
152
      0 0 0 0 1] v0
153
     Some values already are in-place (we added vm1 in the correct position)
154
     | vinf|  v1 |  v0 |
155
        | vm1 |
156
     One still is in a separated area
157
  | +v2 |
158
     We have to compute v1-=vinf; vm1 -= v2,
159
     |-vinf|
160
        | -v2 |
161
     Carefully reordering operations we can avoid to compute twice the sum
162
     of the high half of v2 plus the low half of vinf.
163
  */
164
165
  /* Add the high half of t2 in {vinf} */
166
2.29k
  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
167
2.29k
    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
168
2.29k
    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
169
2.29k
  } else { /* triggered only by very unbalanced cases like
170
        (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
171
0
    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
172
0
  }
173
  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
174
     result is >= 0 */
175
  /* Side effect: we also subtracted (high half) vm1 -= v2 */
176
2.29k
  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
177
2.29k
  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
178
2.29k
  vinf[0] = saved;
179
2.29k
  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
180
181
  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
182
     Operate only on the low half.
183
  */
184
2.29k
  cy = mpn_sub_n (c1, c1, v2, k);
185
2.29k
  MPN_DECR_U (v1, kk1, cy);
186
187
  /********************* Beginning the final phase **********************/
188
189
  /* Most of the recomposition was done */
190
191
  /* add t2 in {c+3k, ...}, but only the low half */
192
2.29k
  cy = mpn_add_n (c3, c3, v2, k);
193
2.29k
  vinf[0] += cy;
194
2.29k
  ASSERT(vinf[0] >= cy); /* No carry */
195
2.29k
  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
196
197
2.29k
#undef v0
198
2.29k
}