Coverage Report

Created: 2025-11-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gmp/mpn/sqrlo_basecase.c
Line
Count
Source
1
/* mpn_sqrlo_basecase -- Internal routine to square a natural number
2
   of length n.
3
4
   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
5
   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8
Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
9
2016 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
#include "longlong.h"
39
40
#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
41
#if HAVE_NATIVE_mpn_addmul_1
42
#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
43
#else
44
#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
45
#endif
46
#endif
47
48
#if HAVE_NATIVE_mpn_sqr_diagonal
49
#define MPN_SQR_DIAGONAL(rp, up, n)         \
50
  mpn_sqr_diagonal (rp, up, n)
51
#else
52
#define MPN_SQR_DIAGONAL(rp, up, n)         \
53
485k
  do {                 \
54
485k
    mp_size_t _i;             \
55
4.65M
    for (_i = 0; _i < (n); _i++)         \
56
4.16M
      {                 \
57
4.16M
  mp_limb_t ul, lpl;            \
58
4.16M
  ul = (up)[_i];              \
59
4.16M
  umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);  \
60
4.16M
  (rp)[2 * _i] = lpl >> GMP_NAIL_BITS;       \
61
4.16M
      }                  \
62
485k
  } while (0)
63
#endif
64
65
#define MPN_SQRLO_DIAGONAL(rp, up, n)         \
66
485k
  do {                 \
67
485k
    mp_size_t nhalf;              \
68
485k
    nhalf = (n) >> 1;             \
69
485k
    MPN_SQR_DIAGONAL ((rp), (up), nhalf);       \
70
485k
    if (((n) & 1) != 0)             \
71
485k
      {                 \
72
263k
  mp_limb_t op;             \
73
263k
  op = (up)[nhalf];           \
74
263k
  (rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;     \
75
263k
      }                  \
76
485k
  } while (0)
77
78
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
79
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)       \
80
  do {                  \
81
    MPN_SQRLO_DIAGONAL((rp), (up), (n));        \
82
    mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);      \
83
  } while (0)
84
#else
85
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)       \
86
485k
  do {                 \
87
485k
    MPN_SQRLO_DIAGONAL((rp), (up), (n));        \
88
485k
    mpn_lshift ((tp), (tp), (n) - 1, 1);        \
89
485k
    mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);     \
90
485k
  } while (0)
91
#endif
92
93
/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
94
#define SQRLO_BASECASE_ALLOC            \
95
  (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
96
97
/* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
98
#ifndef SQRLO_SPECIAL_CASES
99
1.81M
#define SQRLO_SPECIAL_CASES 2
100
#endif
101
102
#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
103
#define MAYBE_special_cases 1
104
#else
105
#define MAYBE_special_cases \
106
1.81M
  ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
107
#endif
108
109
void
110
mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
111
909k
{
112
909k
  mp_limb_t ul;
113
114
909k
  ASSERT (n >= 1);
115
909k
  ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
116
117
909k
  ul = up[0];
118
119
909k
  if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
120
424k
    {
121
#if SQRLO_SPECIAL_CASES == 1
122
      rp[0] = (ul * ul) & GMP_NUMB_MASK;
123
#else
124
424k
      if (n == 1)
125
351k
  rp[0] = (ul * ul) & GMP_NUMB_MASK;
126
72.8k
      else
127
72.8k
  {
128
72.8k
    mp_limb_t hi, lo, ul1;
129
72.8k
    umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
130
72.8k
    rp[0] = lo >> GMP_NAIL_BITS;
131
72.8k
    ul1 = up[1];
132
72.8k
#if SQRLO_SPECIAL_CASES == 2
133
72.8k
    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
134
#else
135
    if (n == 2)
136
      rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
137
    else
138
      {
139
        mp_limb_t hi1;
140
#if GMP_NAIL_BITS != 0
141
        ul <<= 1;
142
#endif
143
        umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
144
        hi1 += ul * up[2];
145
#if GMP_NAIL_BITS == 0
146
        hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
147
        add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
148
#else
149
        hi += lo >> GMP_NAIL_BITS;
150
        rp[1] = hi & GMP_NUMB_MASK;
151
        rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
152
#endif
153
      }
154
#endif
155
72.8k
  }
156
424k
#endif
157
424k
    }
158
485k
  else
159
485k
    {
160
485k
      mp_limb_t tp[SQRLO_BASECASE_ALLOC];
161
485k
      mp_size_t i;
162
163
      /* must fit n-1 limbs in tp */
164
485k
      ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
165
166
485k
      --n;
167
485k
#if SQRLO_SHORTCUT_MULTIPLICATIONS
168
485k
      {
169
485k
  mp_limb_t cy;
170
171
485k
  cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
172
3.94M
  for (i = 1; 2 * i + 1 < n; ++i)
173
3.46M
    {
174
3.46M
      ul = up[i];
175
3.46M
      cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
176
3.46M
    }
177
485k
  tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
178
485k
      }
179
#else
180
      mpn_mul_1 (tp, up + 1, n, ul);
181
      for (i = 1; 2 * i < n; ++i)
182
  mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
183
#endif
184
185
485k
      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
186
485k
    }
187
909k
}
188
#undef SQRLO_SPECIAL_CASES
189
#undef MAYBE_special_cases
190
#undef SQRLO_BASECASE_ALLOC
191
#undef SQRLO_SHORTCUT_MULTIPLICATIONS
192
#undef MPN_SQR_DIAGONAL
193
#undef MPN_SQRLO_DIAGONAL
194
#undef MPN_SQRLO_DIAG_ADDLSH1