Coverage Report

Created: 2024-11-25 06:29

/src/gmp/mpn/sqr_basecase.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_sqr_basecase -- Internal routine to square a natural number
2
   of length n.
3
4
   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
5
   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8
Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free
9
Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
#include "longlong.h"
39
40
41
#if HAVE_NATIVE_mpn_sqr_diagonal
42
#define MPN_SQR_DIAGONAL(rp, up, n)         \
43
  mpn_sqr_diagonal (rp, up, n)
44
#else
45
#define MPN_SQR_DIAGONAL(rp, up, n)         \
46
43.8M
  do {                 \
47
43.8M
    mp_size_t _i;             \
48
733M
    for (_i = 0; _i < (n); _i++)         \
49
689M
      {                 \
50
689M
  mp_limb_t ul, lpl;            \
51
689M
  ul = (up)[_i];              \
52
689M
  umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);  \
53
689M
  (rp)[2 * _i] = lpl >> GMP_NAIL_BITS;       \
54
689M
      }                  \
55
43.8M
  } while (0)
56
#endif
57
58
#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
59
#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)       \
60
  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
61
#else
62
#if HAVE_NATIVE_mpn_addlsh1_n
63
#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)       \
64
  do {                  \
65
    mp_limb_t cy;             \
66
    MPN_SQR_DIAGONAL (rp, up, n);         \
67
    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);     \
68
    rp[2 * n - 1] += cy;            \
69
  } while (0)
70
#else
71
#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)       \
72
43.8M
  do {                 \
73
43.8M
    mp_limb_t cy;             \
74
43.8M
    MPN_SQR_DIAGONAL (rp, up, n);         \
75
43.8M
    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);        \
76
43.8M
    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);     \
77
43.8M
    rp[2 * n - 1] += cy;            \
78
43.8M
  } while (0)
79
#endif
80
#endif
81
82
83
#undef READY_WITH_mpn_sqr_basecase
84
85
86
#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
87
void
88
mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
89
{
90
  mp_size_t i;
91
  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
92
  mp_ptr tp = tarr;
93
  mp_limb_t cy;
94
95
  /* must fit 2*n limbs in tarr */
96
  ASSERT (n <= SQR_TOOM2_THRESHOLD);
97
98
  if ((n & 1) != 0)
99
    {
100
      if (n == 1)
101
  {
102
    mp_limb_t ul, lpl;
103
    ul = up[0];
104
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
105
    rp[0] = lpl >> GMP_NAIL_BITS;
106
    return;
107
  }
108
109
      MPN_ZERO (tp, n);
110
111
      for (i = 0; i <= n - 2; i += 2)
112
  {
113
    cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
114
    tp[n + i] = cy;
115
  }
116
    }
117
  else
118
    {
119
      if (n == 2)
120
  {
121
#if HAVE_NATIVE_mpn_mul_2
122
    rp[3] = mpn_mul_2 (rp, up, 2, up);
123
#else
124
    rp[0] = 0;
125
    rp[1] = 0;
126
    rp[3] = mpn_addmul_2 (rp, up, 2, up);
127
#endif
128
    return;
129
  }
130
131
      MPN_ZERO (tp, n);
132
133
      for (i = 0; i <= n - 4; i += 2)
134
  {
135
    cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
136
    tp[n + i] = cy;
137
  }
138
      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
139
      tp[2 * n - 3] = cy;
140
    }
141
142
  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
143
}
144
#define READY_WITH_mpn_sqr_basecase
145
#endif
146
147
148
#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
149
150
/* mpn_sqr_basecase using plain mpn_addmul_2.
151
152
   This is tricky, since we have to let mpn_addmul_2 make some undesirable
153
   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
154
   This forces us to conditionally add or subtract the mpn_sqr_diagonal
155
   results.  Examples of the product we form:
156
157
   n = 4              n = 5   n = 6
158
   u1u0 * u3u2u1      u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
159
   u2 * u3        u3u2 * u4u3 u3u2 * u5u4u3
160
          u4 * u5
161
   add: u0 u2 u3      add: u0 u2 u4 add: u0 u2 u4 u5
162
   sub: u1        sub: u1 u3  sub: u1 u3
163
*/
164
165
void
166
mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
167
{
168
  mp_size_t i;
169
  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
170
  mp_ptr tp = tarr;
171
  mp_limb_t cy;
172
173
  /* must fit 2*n limbs in tarr */
174
  ASSERT (n <= SQR_TOOM2_THRESHOLD);
175
176
  if ((n & 1) != 0)
177
    {
178
      mp_limb_t x0, x1;
179
180
      if (n == 1)
181
  {
182
    mp_limb_t ul, lpl;
183
    ul = up[0];
184
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
185
    rp[0] = lpl >> GMP_NAIL_BITS;
186
    return;
187
  }
188
189
      /* The code below doesn't like unnormalized operands.  Since such
190
   operands are unusual, handle them with a dumb recursion.  */
191
      if (up[n - 1] == 0)
192
  {
193
    rp[2 * n - 2] = 0;
194
    rp[2 * n - 1] = 0;
195
    mpn_sqr_basecase (rp, up, n - 1);
196
    return;
197
  }
198
199
      MPN_ZERO (tp, n);
200
201
      for (i = 0; i <= n - 2; i += 2)
202
  {
203
    cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
204
    tp[n + i] = cy;
205
  }
206
207
      MPN_SQR_DIAGONAL (rp, up, n);
208
209
      for (i = 2;; i += 4)
210
  {
211
    x0 = rp[i + 0];
212
    rp[i + 0] = (-x0) & GMP_NUMB_MASK;
213
    x1 = rp[i + 1];
214
    rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
215
    __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
216
    if (i + 4 >= 2 * n)
217
      break;
218
    mpn_incr_u (rp + i + 4, cy);
219
  }
220
    }
221
  else
222
    {
223
      mp_limb_t x0, x1;
224
225
      if (n == 2)
226
  {
227
#if HAVE_NATIVE_mpn_mul_2
228
    rp[3] = mpn_mul_2 (rp, up, 2, up);
229
#else
230
    rp[0] = 0;
231
    rp[1] = 0;
232
    rp[3] = mpn_addmul_2 (rp, up, 2, up);
233
#endif
234
    return;
235
  }
236
237
      /* The code below doesn't like unnormalized operands.  Since such
238
   operands are unusual, handle them with a dumb recursion.  */
239
      if (up[n - 1] == 0)
240
  {
241
    rp[2 * n - 2] = 0;
242
    rp[2 * n - 1] = 0;
243
    mpn_sqr_basecase (rp, up, n - 1);
244
    return;
245
  }
246
247
      MPN_ZERO (tp, n);
248
249
      for (i = 0; i <= n - 4; i += 2)
250
  {
251
    cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
252
    tp[n + i] = cy;
253
  }
254
      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
255
      tp[2 * n - 3] = cy;
256
257
      MPN_SQR_DIAGONAL (rp, up, n);
258
259
      for (i = 2;; i += 4)
260
  {
261
    x0 = rp[i + 0];
262
    rp[i + 0] = (-x0) & GMP_NUMB_MASK;
263
    x1 = rp[i + 1];
264
    rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
265
    if (i + 6 >= 2 * n)
266
      break;
267
    __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
268
    mpn_incr_u (rp + i + 4, cy);
269
  }
270
      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
271
    }
272
273
#if HAVE_NATIVE_mpn_addlsh1_n
274
  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
275
#else
276
  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
277
  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
278
#endif
279
  rp[2 * n - 1] += cy;
280
}
281
#define READY_WITH_mpn_sqr_basecase
282
#endif
283
284
285
#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1
286
287
/* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack
288
   allocation.  */
289
void
290
mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
291
{
292
  if (n == 1)
293
    {
294
      mp_limb_t ul, lpl;
295
      ul = up[0];
296
      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
297
      rp[0] = lpl >> GMP_NAIL_BITS;
298
    }
299
  else
300
    {
301
      mp_size_t i;
302
      mp_ptr xp;
303
304
      rp += 1;
305
      rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]);
306
      for (i = n - 2; i != 0; i--)
307
  {
308
    up += 1;
309
    rp += 2;
310
    rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]);
311
  }
312
313
      xp = rp - 2 * n + 3;
314
      mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n);
315
    }
316
}
317
#define READY_WITH_mpn_sqr_basecase
318
#endif
319
320
321
#if ! defined (READY_WITH_mpn_sqr_basecase)
322
323
/* Default mpn_sqr_basecase using mpn_addmul_1.  */
324
void
325
mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
326
43.8M
{
327
43.8M
  mp_size_t i;
328
329
43.8M
  ASSERT (n >= 1);
330
43.8M
  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
331
332
43.8M
  if (n == 1)
333
0
    {
334
0
      mp_limb_t ul, lpl;
335
0
      ul = up[0];
336
0
      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
337
0
      rp[0] = lpl >> GMP_NAIL_BITS;
338
0
    }
339
43.8M
  else
340
43.8M
    {
341
43.8M
      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
342
43.8M
      mp_ptr tp = tarr;
343
43.8M
      mp_limb_t cy;
344
345
      /* must fit 2*n limbs in tarr */
346
43.8M
      ASSERT (n <= SQR_TOOM2_THRESHOLD);
347
348
43.8M
      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
349
43.8M
      tp[n - 1] = cy;
350
645M
      for (i = 2; i < n; i++)
351
601M
  {
352
601M
    mp_limb_t cy;
353
601M
    cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
354
601M
    tp[n + i - 2] = cy;
355
601M
  }
356
357
43.8M
      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
358
43.8M
    }
359
43.8M
}
360
#define READY_WITH_mpn_sqr_basecase
361
#endif