Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/rootrem.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2
   store the truncated integer part at rootp and the remainder at remp.
3
4
   Contributed by Paul Zimmermann (algorithm) and
5
   Paul Zimmermann and Torbjorn Granlund (implementation).
6
   Marco Bodrato wrote logbased_root to seed the loop.
7
8
   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
9
   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
10
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11
12
Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
13
14
This file is part of the GNU MP Library.
15
16
The GNU MP Library is free software; you can redistribute it and/or modify
17
it under the terms of either:
18
19
  * the GNU Lesser General Public License as published by the Free
20
    Software Foundation; either version 3 of the License, or (at your
21
    option) any later version.
22
23
or
24
25
  * the GNU General Public License as published by the Free Software
26
    Foundation; either version 2 of the License, or (at your option) any
27
    later version.
28
29
or both in parallel, as here.
30
31
The GNU MP Library is distributed in the hope that it will be useful, but
32
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
34
for more details.
35
36
You should have received copies of the GNU General Public License and the
37
GNU Lesser General Public License along with the GNU MP Library.  If not,
38
see https://www.gnu.org/licenses/.  */
39
40
/* FIXME:
41
     This implementation is not optimal when remp == NULL, since the complexity
42
     is M(n), whereas it should be M(n/k) on average.
43
*/
44
45
#include <stdio.h>    /* for NULL */
46
47
#include "gmp-impl.h"
48
#include "longlong.h"
49
50
static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
51
               mp_limb_t, int);
52
53
#define MPN_RSHIFT(rp,up,un,cnt) \
54
9.14k
  do {                 \
55
9.14k
    if ((cnt) != 0)             \
56
9.14k
      mpn_rshift (rp, up, un, cnt);         \
57
9.14k
    else                \
58
9.14k
      {                 \
59
415
  MPN_COPY_INCR (rp, up, un);          \
60
415
      }                  \
61
9.14k
  } while (0)
62
63
#define MPN_LSHIFT(cy,rp,up,un,cnt) \
64
9.12k
  do {                 \
65
9.12k
    if ((cnt) != 0)             \
66
9.12k
      cy = mpn_lshift (rp, up, un, cnt);       \
67
9.12k
    else                \
68
9.12k
      {                 \
69
198
  MPN_COPY_DECR (rp, up, un);          \
70
198
  cy = 0;               \
71
198
      }                  \
72
9.12k
  } while (0)
73
74
75
/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
76
   If remp <> NULL, put in {remp, un} the remainder.
77
   Return the size (in limbs) of the remainder if remp <> NULL,
78
    or a non-zero value iff the remainder is non-zero when remp = NULL.
79
   Assumes:
80
   (a) up[un-1] is not zero
81
   (b) rootp has at least space for ceil(un/k) limbs
82
   (c) remp has at least space for un limbs (in case remp <> NULL)
83
   (d) the operands do not overlap.
84
85
   The auxiliary memory usage is 3*un+2 if remp = NULL,
86
   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
87
*/
88
mp_size_t
89
mpn_rootrem (mp_ptr rootp, mp_ptr remp,
90
       mp_srcptr up, mp_size_t un, mp_limb_t k)
91
671
{
92
671
  ASSERT (un > 0);
93
671
  ASSERT (up[un - 1] != 0);
94
671
  ASSERT (k > 1);
95
96
671
  if (UNLIKELY (k == 2))
97
15
    return mpn_sqrtrem (rootp, remp, up, un);
98
  /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
99
656
  if (remp == NULL && (un + 2) / 3 > k)
100
    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
101
       with one more limb, allowing us to compute the exact integral result. */
102
215
    {
103
215
      mp_ptr sp, wp;
104
215
      mp_size_t rn, sn, wn;
105
215
      TMP_DECL;
106
215
      TMP_MARK;
107
215
      wn = un + k;
108
215
      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
109
215
      TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
110
215
       sp, sn); /* approximate root of padded input */
111
215
      MPN_COPY (wp + k, up, un);
112
215
      MPN_FILL (wp, k, 0);
113
215
      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
114
      /* The approximate root S = {sp,sn} is either the correct root of
115
   {sp,sn}, or 1 too large.  Thus unless the least significant limb of
116
   S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
117
   limb.  (In case sp[0]=1, we can deduce the root, but not decide
118
   whether it is exact or not.) */
119
215
      MPN_COPY (rootp, sp + 1, sn - 1);
120
215
      TMP_FREE;
121
215
      return rn;
122
215
    }
123
441
  else
124
441
    {
125
441
      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
126
441
    }
127
656
}
128
129
3.84k
#define LOGROOT_USED_BITS 8
130
1.92k
#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
131
641
#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
132
/* Puts in *rootp some bits of the k^nt root of the number
133
   2^bitn * 1.op ; where op represents the "fractional" bits.
134
135
   The returned value is the number of bits of the root minus one;
136
   i.e. an approximation of the root will be
137
   (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
138
139
   Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
140
   one is not counted).
141
 */
142
static unsigned
143
logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
144
641
{
145
  /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
146
641
  static const
147
641
  unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
148
641
       23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
149
641
       44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
150
641
       64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
151
641
       83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
152
641
      101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
153
641
      118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
154
641
      135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
155
641
      150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
156
641
      165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
157
641
      180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
158
641
      194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
159
641
      207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
160
641
      220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
161
641
      232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
162
641
      245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
163
164
  /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
165
641
  static const
166
641
  unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
167
641
       12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
168
641
       23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
169
641
       36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
170
641
       49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
171
641
       62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
172
641
       76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
173
641
       91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
174
641
      107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
175
641
      123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
176
641
      139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
177
641
      157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
178
641
      175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
179
641
      194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
180
641
      214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
181
641
      235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
182
641
  mp_bitcnt_t retval;
183
184
641
  if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
185
0
    {
186
      /* In the unlikely case, we use two divisions and a modulo. */
187
0
      retval = bitn / k;
188
0
      bitn %= k;
189
0
      bitn = (bitn << LOGROOT_USED_BITS |
190
0
        vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
191
0
    }
192
641
  else
193
641
    {
194
641
      bitn = (bitn << LOGROOT_USED_BITS |
195
641
        vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
196
641
      retval = bitn >> LOGROOT_USED_BITS;
197
641
      bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
198
641
    }
199
641
  ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
200
641
  *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
201
641
    | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
202
641
  return retval;
203
641
}
204
205
/* if approx is non-zero, does not compute the final remainder */
206
static mp_size_t
207
mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
208
          mp_limb_t k, int approx)
209
656
{
210
656
  mp_ptr qp, rp, sp, wp, scratch;
211
656
  mp_size_t qn, rn, sn, wn, nl, bn;
212
656
  mp_limb_t save, save2, cy, uh;
213
656
  mp_bitcnt_t unb; /* number of significant bits of {up,un} */
214
656
  mp_bitcnt_t xnb; /* number of significant bits of the result */
215
656
  mp_bitcnt_t b, kk;
216
656
  mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
217
656
  int ni;
218
656
  int perf_pow;
219
656
  unsigned ulz, snb, c, logk;
220
656
  TMP_DECL;
221
222
  /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
223
656
  uh = up[un - 1];
224
656
  count_leading_zeros (ulz, uh);
225
656
  ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
226
656
  unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
227
  /* unb is the (truncated) logarithm of the input U in base 2*/
228
229
656
  if (unb < k) /* root is 1 */
230
15
    {
231
15
      rootp[0] = 1;
232
15
      if (remp == NULL)
233
4
  un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
234
11
      else
235
11
  {
236
11
    mpn_sub_1 (remp, up, un, CNST_LIMB (1));
237
11
    un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
238
           if we demand u to be normalized  */
239
11
  }
240
15
      return un;
241
15
    }
242
  /* if (unb - k < k/2 + k/16) // root is 2 */
243
244
641
  if (ulz == GMP_NUMB_BITS)
245
14
    uh = up[un - 2];
246
627
  else
247
627
    uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
248
641
  ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
249
250
641
  xnb = logbased_root (rootp, uh, unb, k);
251
641
  snb = LOGROOT_RETURNED_BITS - 1;
252
  /* xnb+1 is the number of bits of the root R */
253
  /* snb+1 is the number of bits of the current approximation S */
254
255
641
  kk = k * xnb;   /* number of truncated bits in the input */
256
257
  /* FIXME: Should we skip the next two loops when xnb <= snb ? */
258
885
  for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
259
244
    ;
260
  /* logk = ceil(log(k)/log(2)) + 1 */
261
262
  /* xnb is the number of remaining bits to determine in the kth root */
263
5.21k
  for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
264
4.57k
    {
265
      /* invariant: here we want xnb+1 total bits for the kth root */
266
267
      /* if c is the new value of xnb, this means that we'll go from a
268
   root of c+1 bits (say s') to a root of xnb+1 bits.
269
   It is proved in the book "Modern Computer Arithmetic" by Brent
270
   and Zimmermann, Chapter 1, that
271
   if s' >= k*beta, then at most one correction is necessary.
272
   Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
273
   c >= ceil((xnb + log2(k))/2). */
274
4.57k
      if (xnb > logk)
275
4.57k
  xnb = (xnb + logk) / 2;
276
0
      else
277
0
  --xnb; /* add just one bit at a time */
278
4.57k
    }
279
280
641
  *rootp >>= snb - xnb;
281
641
  kk -= xnb;
282
283
641
  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
284
  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
285
     sizes[i] <= 2 * sizes[i+1].
286
     Newton iteration will first compute sizes[ni-1] extra bits,
287
     then sizes[ni-2], ..., then sizes[0] = b. */
288
289
641
  TMP_MARK;
290
  /* qp and wp need enough space to store S'^k where S' is an approximate
291
     root. Since S' can be as large as S+2, the worst case is when S=2 and
292
     S'=4. But then since we know the number of bits of S in advance, S'
293
     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
294
     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
295
     fits in un limbs, the number of extra limbs needed is bounded by
296
     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
297
  /* THINK: with the use of logbased_root, maybe the constant is
298
     258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
299
641
#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
300
641
  TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
301
641
         qp, un + EXTRA,  /* will contain quotient and remainder
302
           of R/(k*S^(k-1)), and S^k */
303
641
         wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
304
           and temporary for mpn_pow_1 */
305
306
641
  if (remp == NULL)
307
265
    rp = scratch;  /* will contain the remainder */
308
376
  else
309
376
    rp = remp;
310
641
  sp = rootp;
311
312
641
  sn = 1;   /* Initial approximation has one limb */
313
314
5.21k
  for (b = xnb; ni != 0; --ni)
315
4.57k
    {
316
      /* 1: loop invariant:
317
   {sp, sn} is the current approximation of the root, which has
318
      exactly 1 + sizes[ni] bits.
319
   {rp, rn} is the current remainder
320
   {wp, wn} = {sp, sn}^(k-1)
321
   kk = number of truncated bits of the input
322
      */
323
324
      /* Since each iteration treats b bits from the root and thus k*b bits
325
   from the input, and we already considered b bits from the input,
326
   we now have to take another (k-1)*b bits from the input. */
327
4.57k
      kk -= (k - 1) * b; /* remaining input bits */
328
      /* {rp, rn} = floor({up, un} / 2^kk) */
329
4.57k
      rn = un - kk / GMP_NUMB_BITS;
330
4.57k
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
331
4.57k
      rn -= rp[rn - 1] == 0;
332
333
      /* 9: current buffers: {sp,sn}, {rp,rn} */
334
335
4.57k
      for (c = 0;; c++)
336
5.04k
  {
337
    /* Compute S^k in {qp,qn}. */
338
    /* W <- S^(k-1) for the next iteration,
339
       and S^k = W * S. */
340
5.04k
    wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
341
5.04k
    mpn_mul (qp, wp, wn, sp, sn);
342
5.04k
    qn = wn + sn;
343
5.04k
    qn -= qp[qn - 1] == 0;
344
345
5.04k
    perf_pow = 1;
346
    /* if S^k > floor(U/2^kk), the root approximation was too large */
347
5.04k
    if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
348
474
      MPN_DECR_U (sp, sn, 1);
349
4.57k
    else
350
4.57k
      break;
351
5.04k
  }
352
353
      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
354
355
      /* sometimes two corrections are needed with logbased_root*/
356
4.57k
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
357
4.57k
      ASSERT_ALWAYS (rn >= qn);
358
359
4.57k
      b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
360
              next iteration */
361
4.57k
      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
362
363
4.57k
      kk = kk - b;
364
      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
365
4.57k
      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
366
      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
367
     - floor(kk / GMP_NUMB_BITS)
368
       <= 1 + (kk + b - 1) / GMP_NUMB_BITS
369
      - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
370
       = 2 + (b - 2) / GMP_NUMB_BITS
371
   thus since nl is an integer:
372
   nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
373
374
      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
375
376
      /* R = R - Q = floor(U/2^kk) - S^k */
377
4.57k
      if (perf_pow != 0)
378
4.55k
  {
379
4.55k
    mpn_sub (rp, rp, rn, qp, qn);
380
4.55k
    MPN_NORMALIZE_NOT_ZERO (rp, rn);
381
382
    /* first multiply the remainder by 2^b */
383
4.55k
    MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
384
4.55k
    rn = rn + bn;
385
4.55k
    if (cy != 0)
386
1.87k
      {
387
1.87k
        rp[rn] = cy;
388
1.87k
        rn++;
389
1.87k
      }
390
391
4.55k
    save = rp[bn];
392
    /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
393
4.55k
    if (nl - 1 > bn)
394
1.50k
      save2 = rp[bn + 1];
395
4.55k
  }
396
13
      else
397
13
  {
398
13
    rn = bn;
399
13
    save2 = save = 0;
400
13
  }
401
      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
402
403
      /* Now insert bits [kk,kk+b-1] from the input U */
404
4.57k
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
405
      /* set to zero high bits of rp[bn] */
406
4.57k
      rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
407
      /* restore corresponding bits */
408
4.57k
      rp[bn] |= save;
409
4.57k
      if (nl - 1 > bn)
410
1.50k
  rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
411
             they start by bit 0 in rp[0], so they use
412
             at most ceil(b/GMP_NUMB_BITS) limbs */
413
      /* FIXME: Should we normalise {rp,rn} here ?*/
414
415
      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
416
417
      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
418
4.57k
      cy = mpn_mul_1 (wp, wp, wn, k);
419
4.57k
      wp[wn] = cy;
420
4.57k
      wn += cy != 0;
421
422
      /* 6: current buffers: {sp,sn}, {qp,qn} */
423
424
      /* multiply the root approximation by 2^b */
425
4.57k
      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
426
4.57k
      sn = sn + b / GMP_NUMB_BITS;
427
4.57k
      if (cy != 0)
428
1.42k
  {
429
1.42k
    sp[sn] = cy;
430
1.42k
    sn++;
431
1.42k
  }
432
433
4.57k
      save = sp[b / GMP_NUMB_BITS];
434
435
      /* Number of limbs used by b bits, when least significant bit is
436
   aligned to least limb */
437
4.57k
      bn = (b - 1) / GMP_NUMB_BITS + 1;
438
439
      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
440
441
      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
442
4.57k
      if (UNLIKELY (rn < wn))
443
19
  {
444
19
    MPN_FILL (sp, bn, 0);
445
19
  }
446
4.55k
      else
447
4.55k
  {
448
4.55k
    qn = rn - wn; /* expected quotient size */
449
4.55k
    if (qn <= bn) { /* Divide only if result is not too big. */
450
4.55k
      mpn_div_q (qp, rp, rn, wp, wn, scratch);
451
4.55k
      qn += qp[qn] != 0;
452
4.55k
    }
453
454
      /* 5: current buffers: {sp,sn}, {qp,qn}.
455
   Note: {rp,rn} is not needed any more since we'll compute it from
456
   scratch at the end of the loop.
457
       */
458
459
      /* the quotient should be smaller than 2^b, since the previous
460
   approximation was correctly rounded toward zero */
461
4.55k
    if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
462
4.55k
        qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
463
21
      {
464
22
        for (qn = 1; qn < bn; ++qn)
465
1
    sp[qn - 1] = GMP_NUMB_MAX;
466
21
        sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
467
21
      }
468
4.53k
    else
469
4.53k
      {
470
      /* 7: current buffers: {sp,sn}, {qp,qn} */
471
472
      /* Combine sB and q to form sB + q.  */
473
4.53k
        MPN_COPY (sp, qp, qn);
474
4.53k
        MPN_ZERO (sp + qn, bn - qn);
475
4.53k
      }
476
4.55k
  }
477
4.57k
      sp[b / GMP_NUMB_BITS] |= save;
478
479
      /* 8: current buffer: {sp,sn} */
480
481
4.57k
    }
482
483
  /* otherwise we have rn > 0, thus the return value is ok */
484
641
  if (!approx || sp[0] <= CNST_LIMB (1))
485
445
    {
486
445
      for (c = 0;; c++)
487
559
  {
488
    /* Compute S^k in {qp,qn}. */
489
    /* Last iteration: we don't need W anymore. */
490
    /* mpn_pow_1 requires that both qp and wp have enough
491
       space to store the result {sp,sn}^k + 1 limb */
492
559
    qn = mpn_pow_1 (qp, sp, sn, k, wp);
493
494
559
    perf_pow = 1;
495
559
    if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
496
114
      MPN_DECR_U (sp, sn, 1);
497
445
    else
498
445
      break;
499
559
  };
500
501
      /* sometimes two corrections are needed with logbased_root*/
502
445
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
503
504
445
      rn = perf_pow != 0;
505
445
      if (rn != 0 && remp != NULL)
506
366
  {
507
366
    mpn_sub (remp, up, un, qp, qn);
508
366
    rn = un;
509
366
    MPN_NORMALIZE_NOT_ZERO (remp, rn);
510
366
  }
511
445
    }
512
513
641
  TMP_FREE;
514
641
  return rn;
515
641
}