Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/perfsqr.c
Line
Count
Source
1
/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
2
   zero otherwise.
3
4
Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
5
Foundation, Inc.
6
7
This file is part of the GNU MP Library.
8
9
The GNU MP Library is free software; you can redistribute it and/or modify
10
it under the terms of either:
11
12
  * the GNU Lesser General Public License as published by the Free
13
    Software Foundation; either version 3 of the License, or (at your
14
    option) any later version.
15
16
or
17
18
  * the GNU General Public License as published by the Free Software
19
    Foundation; either version 2 of the License, or (at your option) any
20
    later version.
21
22
or both in parallel, as here.
23
24
The GNU MP Library is distributed in the hope that it will be useful, but
25
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27
for more details.
28
29
You should have received copies of the GNU General Public License and the
30
GNU Lesser General Public License along with the GNU MP Library.  If not,
31
see https://www.gnu.org/licenses/.  */
32
33
#include <stdio.h> /* for NULL */
34
#include "gmp-impl.h"
35
#include "longlong.h"
36
37
#include "perfsqr.h"
38
39
40
/* change this to "#define TRACE(x) x" for diagnostics */
41
#define TRACE(x)
42
43
44
45
/* PERFSQR_MOD_* detects non-squares using residue tests.
46
47
   A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes
48
   {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or
49
   64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
50
   or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
51
   used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
52
   case too.
53
54
   PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
55
   PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
56
   data indicating residues and non-residues modulo those divisors.  The
57
   table data is in 1 or 2 limbs worth of bits respectively, per the size of
58
   each d.
59
60
   A "modexact" style remainder is taken to reduce r modulo d.
61
   PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
62
   the table data.  Notice there's just one multiplication by a constant
63
   "inv", for each d.
64
65
   The modexact doesn't produce a true r%d remainder, instead idx satisfies
66
   "-(idx<<PERFSQR_MOD_BITS) == r mod d".  Because d is odd, this factor
67
   -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
68
   accounted for by having the table data suitably permuted.
69
70
   The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
71
   In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
72
   each divisor d meaning the modexact multiply can take place entirely
73
   within one limb, giving the compiler the chance to optimize it, in a way
74
   that say umul_ppmm would not give.
75
76
   There's no need for the divisors d to be prime, in fact gen-psqr.c makes
77
   a deliberate effort to combine factors so as to reduce the number of
78
   separate tests done on r.  But such combining is limited to d <=
79
   2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
80
81
   Alternatives:
82
83
   It'd be possible to use bigger divisors d, and more than 2 limbs of table
84
   data, but this doesn't look like it would be of much help to the prime
85
   factors in the usual moduli 2^24-1 or 2^48-1.
86
87
   The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
88
   just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
89
   factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have
90
   fewer prime factors.
91
92
   The nails case usually ends up using mpn_mod_1, which is a lot slower
93
   than mpn_mod_34lsub1.  Perhaps other such special moduli could be found
94
   for the nails case.  Two-term things like 2^30-2^15-1 might be
95
   candidates.  Or at worst some on-the-fly de-nailing would allow the plain
96
   2^24-1 to be used.  Currently nails are too preliminary to be worried
97
   about.
98
99
*/
100
101
355
#define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
102
103
210
#define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)
104
105
#define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)
105
106
#define PERFSQR_MOD_34(r, up, usize)        \
107
105
  do {               \
108
105
    (r) = mpn_mod_34lsub1 (up, usize);       \
109
105
    (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);    \
110
105
  } while (0)
111
112
/* FIXME: The %= here isn't good, and might destroy any savings from keeping
113
   the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
114
   Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
115
   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
116
   normal case, so lets not worry too much about mod_1.  */
117
#define PERFSQR_MOD_PP(r, up, usize)          \
118
  do {                  \
119
    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \
120
      {                 \
121
  (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,   \
122
        PERFSQR_PP_INVERTED);     \
123
  (r) %= PERFSQR_PP;            \
124
      }                 \
125
    else                \
126
      {                 \
127
  (r) = mpn_mod_1 (up, usize, PERFSQR_PP);      \
128
      }                 \
129
  } while (0)
130
131
#define PERFSQR_MOD_IDX(idx, r, d, inv)       \
132
355
  do {               \
133
355
    mp_limb_t  q;           \
134
355
    ASSERT ((r) <= PERFSQR_MOD_MASK);       \
135
355
    ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);   \
136
355
    ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);   \
137
355
                \
138
355
    q = ((r) * (inv)) & PERFSQR_MOD_MASK;     \
139
355
    ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));   \
140
355
    (idx) = (q * (d)) >> PERFSQR_MOD_BITS;     \
141
355
  } while (0)
142
143
#define PERFSQR_MOD_1(r, d, inv, mask)        \
144
83
  do {               \
145
83
    unsigned   idx;           \
146
83
    ASSERT ((d) <= GMP_LIMB_BITS);        \
147
83
    PERFSQR_MOD_IDX(idx, r, d, inv);       \
148
83
    TRACE (printf ("  PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \
149
83
       d, r%d, idx));       \
150
83
    if ((((mask) >> idx) & 1) == 0)       \
151
83
      {               \
152
10
  TRACE (printf ("  non-square\n"));      \
153
10
  return 0;           \
154
10
      }                \
155
83
  } while (0)
156
157
/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
158
   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
159
#define PERFSQR_MOD_2(r, d, inv, mhi, mlo)      \
160
272
  do {               \
161
272
    mp_limb_t  m;           \
162
272
    unsigned   idx;           \
163
272
    ASSERT ((d) <= 2*GMP_LIMB_BITS);        \
164
272
                \
165
272
    PERFSQR_MOD_IDX (idx, r, d, inv);        \
166
272
    TRACE (printf ("  PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \
167
272
       d, r%d, idx));       \
168
272
    m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));  \
169
272
    idx %= GMP_LIMB_BITS;         \
170
272
    if (((m >> idx) & 1) == 0)         \
171
272
      {               \
172
40
  TRACE (printf ("  non-square\n"));      \
173
40
  return 0;           \
174
40
      }                \
175
272
  } while (0)
176
177
178
int
179
mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
180
131
{
181
131
  ASSERT (usize >= 1);
182
183
131
  TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
184
185
  /* The first test excludes 212/256 (82.8%) of the perfect square candidates
186
     in O(1) time.  */
187
131
  {
188
131
    unsigned  idx = up[0] % 0x100;
189
131
    if (((sq_res_0x100[idx / GMP_LIMB_BITS]
190
131
    >> (idx % GMP_LIMB_BITS)) & 1) == 0)
191
26
      return 0;
192
131
  }
193
194
#if 0
195
  /* Check that we have even multiplicity of 2, and then check that the rest is
196
     a possible perfect square.  Leave disabled until we can determine this
197
     really is an improvement.  It it is, it could completely replace the
198
     simple probe above, since this should throw out more non-squares, but at
199
     the expense of somewhat more cycles.  */
200
  {
201
    mp_limb_t lo;
202
    int cnt;
203
    lo = up[0];
204
    while (lo == 0)
205
      up++, lo = up[0], usize--;
206
    count_trailing_zeros (cnt, lo);
207
    if ((cnt & 1) != 0)
208
      return 0;     /* return of not even multiplicity of 2 */
209
    lo >>= cnt;     /* shift down to align lowest non-zero bit */
210
    lo >>= 1;     /* shift away lowest non-zero bit */
211
    if ((lo & 3) != 0)
212
      return 0;
213
  }
214
#endif
215
216
217
  /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
218
     according to their residues modulo small primes (or powers of
219
     primes).  See perfsqr.h.  */
220
105
  PERFSQR_MOD_TEST (up, usize);
221
222
223
  /* For the third and last test, we finally compute the square root,
224
     to make sure we've really got a perfect square.  */
225
55
  {
226
55
    mp_ptr root_ptr;
227
55
    int res;
228
55
    TMP_DECL;
229
230
55
    TMP_MARK;
231
55
    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
232
233
    /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
234
55
    res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
235
55
    TMP_FREE;
236
237
55
    return res;
238
105
  }
239
105
}