Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/rootrem.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2
   store the truncated integer part at rootp and the remainder at remp.
3
4
   Contributed by Paul Zimmermann (algorithm) and
5
   Paul Zimmermann and Torbjorn Granlund (implementation).
6
   Marco Bodrato wrote logbased_root to seed the loop.
7
8
   THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
9
   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
10
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11
12
Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
13
14
This file is part of the GNU MP Library.
15
16
The GNU MP Library is free software; you can redistribute it and/or modify
17
it under the terms of either:
18
19
  * the GNU Lesser General Public License as published by the Free
20
    Software Foundation; either version 3 of the License, or (at your
21
    option) any later version.
22
23
or
24
25
  * the GNU General Public License as published by the Free Software
26
    Foundation; either version 2 of the License, or (at your option) any
27
    later version.
28
29
or both in parallel, as here.
30
31
The GNU MP Library is distributed in the hope that it will be useful, but
32
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
34
for more details.
35
36
You should have received copies of the GNU General Public License and the
37
GNU Lesser General Public License along with the GNU MP Library.  If not,
38
see https://www.gnu.org/licenses/.  */
39
40
/* FIXME:
41
     This implementation is not optimal when remp == NULL, since the complexity
42
     is M(n), whereas it should be M(n/k) on average.
43
*/
44
45
#include <stdio.h>    /* for NULL */
46
47
#include "gmp-impl.h"
48
#include "longlong.h"
49
50
static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
51
               mp_limb_t, int);
52
53
#define MPN_RSHIFT(rp,up,un,cnt) \
54
2.05k
  do {                 \
55
2.05k
    if ((cnt) != 0)             \
56
2.05k
      mpn_rshift (rp, up, un, cnt);         \
57
2.05k
    else                \
58
2.05k
      {                 \
59
57
  MPN_COPY_INCR (rp, up, un);          \
60
57
      }                  \
61
2.05k
  } while (0)
62
63
#define MPN_LSHIFT(cy,rp,up,un,cnt) \
64
2.05k
  do {                 \
65
2.05k
    if ((cnt) != 0)             \
66
2.05k
      cy = mpn_lshift (rp, up, un, cnt);       \
67
2.05k
    else                \
68
2.05k
      {                 \
69
12
  MPN_COPY_DECR (rp, up, un);          \
70
12
  cy = 0;               \
71
12
      }                  \
72
2.05k
  } while (0)
73
74
75
/* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
76
   If remp <> NULL, put in {remp, un} the remainder.
77
   Return the size (in limbs) of the remainder if remp <> NULL,
78
    or a non-zero value iff the remainder is non-zero when remp = NULL.
79
   Assumes:
80
   (a) up[un-1] is not zero
81
   (b) rootp has at least space for ceil(un/k) limbs
82
   (c) remp has at least space for un limbs (in case remp <> NULL)
83
   (d) the operands do not overlap.
84
85
   The auxiliary memory usage is 3*un+2 if remp = NULL,
86
   and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
87
*/
88
mp_size_t
89
mpn_rootrem (mp_ptr rootp, mp_ptr remp,
90
       mp_srcptr up, mp_size_t un, mp_limb_t k)
91
174
{
92
174
  ASSERT (un > 0);
93
174
  ASSERT (up[un - 1] != 0);
94
174
  ASSERT (k > 1);
95
96
174
  if (UNLIKELY (k == 2))
97
0
    return mpn_sqrtrem (rootp, remp, up, un);
98
  /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
99
174
  if (remp == NULL && (un + 2) / 3 > k)
100
    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
101
       with one more limb, allowing us to compute the exact integral result. */
102
26
    {
103
26
      mp_ptr sp, wp;
104
26
      mp_size_t rn, sn, wn;
105
26
      TMP_DECL;
106
26
      TMP_MARK;
107
26
      wn = un + k;
108
26
      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
109
26
      TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
110
26
       sp, sn); /* approximate root of padded input */
111
26
      MPN_COPY (wp + k, up, un);
112
26
      MPN_FILL (wp, k, 0);
113
26
      rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
114
      /* The approximate root S = {sp,sn} is either the correct root of
115
   {sp,sn}, or 1 too large.  Thus unless the least significant limb of
116
   S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
117
   limb.  (In case sp[0]=1, we can deduce the root, but not decide
118
   whether it is exact or not.) */
119
26
      MPN_COPY (rootp, sp + 1, sn - 1);
120
26
      TMP_FREE;
121
26
      return rn;
122
26
    }
123
148
  else
124
148
    {
125
148
      return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
126
148
    }
127
174
}
128
129
960
#define LOGROOT_USED_BITS 8
130
480
#define LOGROOT_NEEDS_TWO_CORRECTIONS 1
131
160
#define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
132
/* Puts in *rootp some bits of the k^nt root of the number
133
   2^bitn * 1.op ; where op represents the "fractional" bits.
134
135
   The returned value is the number of bits of the root minus one;
136
   i.e. an approximation of the root will be
137
   (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
138
139
   Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
140
   one is not counted).
141
 */
142
static unsigned
143
logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
144
160
{
145
  /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
146
160
  static const
147
160
  unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
148
160
       23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
149
160
       44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
150
160
       64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
151
160
       83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
152
160
      101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
153
160
      118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
154
160
      135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
155
160
      150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
156
160
      165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
157
160
      180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
158
160
      194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
159
160
      207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
160
160
      220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
161
160
      232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
162
160
      245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
163
164
  /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
165
160
  static const
166
160
  unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
167
160
       12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
168
160
       23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
169
160
       36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
170
160
       49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
171
160
       62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
172
160
       76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
173
160
       91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
174
160
      107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
175
160
      123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
176
160
      139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
177
160
      157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
178
160
      175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
179
160
      194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
180
160
      214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
181
160
      235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
182
160
  mp_bitcnt_t retval;
183
184
160
  if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
185
0
    {
186
      /* In the unlikely case, we use two divisions and a modulo. */
187
0
      retval = bitn / k;
188
0
      bitn %= k;
189
0
      bitn = (bitn << LOGROOT_USED_BITS |
190
0
        vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
191
0
    }
192
160
  else
193
160
    {
194
160
      bitn = (bitn << LOGROOT_USED_BITS |
195
160
        vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
196
160
      retval = bitn >> LOGROOT_USED_BITS;
197
160
      bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
198
160
    }
199
160
  ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
200
160
  *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
201
160
    | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
202
160
  return retval;
203
160
}
204
205
/* if approx is non-zero, does not compute the final remainder */
206
static mp_size_t
207
mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
208
          mp_limb_t k, int approx)
209
174
{
210
174
  mp_ptr qp, rp, sp, wp, scratch;
211
174
  mp_size_t qn, rn, sn, wn, nl, bn;
212
174
  mp_limb_t save, save2, cy, uh;
213
174
  mp_bitcnt_t unb; /* number of significant bits of {up,un} */
214
174
  mp_bitcnt_t xnb; /* number of significant bits of the result */
215
174
  mp_bitcnt_t b, kk;
216
174
  mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
217
174
  int ni;
218
174
  int perf_pow;
219
174
  unsigned ulz, snb, c, logk;
220
174
  TMP_DECL;
221
222
  /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
223
174
  uh = up[un - 1];
224
174
  count_leading_zeros (ulz, uh);
225
174
  ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
226
174
  unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
227
  /* unb is the (truncated) logarithm of the input U in base 2*/
228
229
174
  if (unb < k) /* root is 1 */
230
14
    {
231
14
      rootp[0] = 1;
232
14
      if (remp == NULL)
233
2
  un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
234
12
      else
235
12
  {
236
12
    mpn_sub_1 (remp, up, un, CNST_LIMB (1));
237
12
    un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
238
           if we demand u to be normalized  */
239
12
  }
240
14
      return un;
241
14
    }
242
  /* if (unb - k < k/2 + k/16) // root is 2 */
243
244
160
  if (ulz == GMP_NUMB_BITS)
245
4
    uh = up[un - 2];
246
156
  else
247
156
    uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
248
160
  ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
249
250
160
  xnb = logbased_root (rootp, uh, unb, k);
251
160
  snb = LOGROOT_RETURNED_BITS - 1;
252
  /* xnb+1 is the number of bits of the root R */
253
  /* snb+1 is the number of bits of the current approximation S */
254
255
160
  kk = k * xnb;   /* number of truncated bits in the input */
256
257
  /* FIXME: Should we skip the next two loops when xnb <= snb ? */
258
160
  for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
259
0
    ;
260
  /* logk = ceil(log(k)/log(2)) + 1 */
261
262
  /* xnb is the number of remaining bits to determine in the kth root */
263
1.18k
  for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
264
1.02k
    {
265
      /* invariant: here we want xnb+1 total bits for the kth root */
266
267
      /* if c is the new value of xnb, this means that we'll go from a
268
   root of c+1 bits (say s') to a root of xnb+1 bits.
269
   It is proved in the book "Modern Computer Arithmetic" by Brent
270
   and Zimmermann, Chapter 1, that
271
   if s' >= k*beta, then at most one correction is necessary.
272
   Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
273
   c >= ceil((xnb + log2(k))/2). */
274
1.02k
      if (xnb > logk)
275
1.02k
  xnb = (xnb + logk) / 2;
276
0
      else
277
0
  --xnb; /* add just one bit at a time */
278
1.02k
    }
279
280
160
  *rootp >>= snb - xnb;
281
160
  kk -= xnb;
282
283
160
  ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
284
  /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
285
     sizes[i] <= 2 * sizes[i+1].
286
     Newton iteration will first compute sizes[ni-1] extra bits,
287
     then sizes[ni-2], ..., then sizes[0] = b. */
288
289
160
  TMP_MARK;
290
  /* qp and wp need enough space to store S'^k where S' is an approximate
291
     root. Since S' can be as large as S+2, the worst case is when S=2 and
292
     S'=4. But then since we know the number of bits of S in advance, S'
293
     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
294
     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
295
     fits in un limbs, the number of extra limbs needed is bounded by
296
     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
297
  /* THINK: with the use of logbased_root, maybe the constant is
298
     258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
299
160
#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
300
160
  TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
301
160
         qp, un + EXTRA,  /* will contain quotient and remainder
302
           of R/(k*S^(k-1)), and S^k */
303
160
         wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
304
           and temporary for mpn_pow_1 */
305
306
160
  if (remp == NULL)
307
53
    rp = scratch;  /* will contain the remainder */
308
107
  else
309
107
    rp = remp;
310
160
  sp = rootp;
311
312
160
  sn = 1;   /* Initial approximation has one limb */
313
314
1.18k
  for (b = xnb; ni != 0; --ni)
315
1.02k
    {
316
      /* 1: loop invariant:
317
   {sp, sn} is the current approximation of the root, which has
318
      exactly 1 + sizes[ni] bits.
319
   {rp, rn} is the current remainder
320
   {wp, wn} = {sp, sn}^(k-1)
321
   kk = number of truncated bits of the input
322
      */
323
324
      /* Since each iteration treats b bits from the root and thus k*b bits
325
   from the input, and we already considered b bits from the input,
326
   we now have to take another (k-1)*b bits from the input. */
327
1.02k
      kk -= (k - 1) * b; /* remaining input bits */
328
      /* {rp, rn} = floor({up, un} / 2^kk) */
329
1.02k
      rn = un - kk / GMP_NUMB_BITS;
330
1.02k
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
331
1.02k
      rn -= rp[rn - 1] == 0;
332
333
      /* 9: current buffers: {sp,sn}, {rp,rn} */
334
335
1.02k
      for (c = 0;; c++)
336
1.13k
  {
337
    /* Compute S^k in {qp,qn}. */
338
    /* W <- S^(k-1) for the next iteration,
339
       and S^k = W * S. */
340
1.13k
    wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
341
1.13k
    mpn_mul (qp, wp, wn, sp, sn);
342
1.13k
    qn = wn + sn;
343
1.13k
    qn -= qp[qn - 1] == 0;
344
345
1.13k
    perf_pow = 1;
346
    /* if S^k > floor(U/2^kk), the root approximation was too large */
347
1.13k
    if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
348
109
      MPN_DECR_U (sp, sn, 1);
349
1.02k
    else
350
1.02k
      break;
351
1.13k
  }
352
353
      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
354
355
      /* sometimes two corrections are needed with logbased_root*/
356
1.02k
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
357
1.02k
      ASSERT_ALWAYS (rn >= qn);
358
359
1.02k
      b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
360
              next iteration */
361
1.02k
      bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
362
363
1.02k
      kk = kk - b;
364
      /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
365
1.02k
      nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
366
      /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
367
     - floor(kk / GMP_NUMB_BITS)
368
       <= 1 + (kk + b - 1) / GMP_NUMB_BITS
369
      - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
370
       = 2 + (b - 2) / GMP_NUMB_BITS
371
   thus since nl is an integer:
372
   nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
373
374
      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
375
376
      /* R = R - Q = floor(U/2^kk) - S^k */
377
1.02k
      if (perf_pow != 0)
378
1.02k
  {
379
1.02k
    mpn_sub (rp, rp, rn, qp, qn);
380
1.02k
    MPN_NORMALIZE_NOT_ZERO (rp, rn);
381
382
    /* first multiply the remainder by 2^b */
383
1.02k
    MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
384
1.02k
    rn = rn + bn;
385
1.02k
    if (cy != 0)
386
348
      {
387
348
        rp[rn] = cy;
388
348
        rn++;
389
348
      }
390
391
1.02k
    save = rp[bn];
392
    /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
393
1.02k
    if (nl - 1 > bn)
394
327
      save2 = rp[bn + 1];
395
1.02k
  }
396
0
      else
397
0
  {
398
0
    rn = bn;
399
0
    save2 = save = 0;
400
0
  }
401
      /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
402
403
      /* Now insert bits [kk,kk+b-1] from the input U */
404
1.02k
      MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
405
      /* set to zero high bits of rp[bn] */
406
1.02k
      rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
407
      /* restore corresponding bits */
408
1.02k
      rp[bn] |= save;
409
1.02k
      if (nl - 1 > bn)
410
327
  rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
411
             they start by bit 0 in rp[0], so they use
412
             at most ceil(b/GMP_NUMB_BITS) limbs */
413
      /* FIXME: Should we normalise {rp,rn} here ?*/
414
415
      /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
416
417
      /* compute {wp, wn} = k * {sp, sn}^(k-1) */
418
1.02k
      cy = mpn_mul_1 (wp, wp, wn, k);
419
1.02k
      wp[wn] = cy;
420
1.02k
      wn += cy != 0;
421
422
      /* 6: current buffers: {sp,sn}, {qp,qn} */
423
424
      /* multiply the root approximation by 2^b */
425
1.02k
      MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
426
1.02k
      sn = sn + b / GMP_NUMB_BITS;
427
1.02k
      if (cy != 0)
428
301
  {
429
301
    sp[sn] = cy;
430
301
    sn++;
431
301
  }
432
433
1.02k
      save = sp[b / GMP_NUMB_BITS];
434
435
      /* Number of limbs used by b bits, when least significant bit is
436
   aligned to least limb */
437
1.02k
      bn = (b - 1) / GMP_NUMB_BITS + 1;
438
439
      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
440
441
      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
442
1.02k
      if (UNLIKELY (rn < wn))
443
0
  {
444
0
    MPN_FILL (sp, bn, 0);
445
0
  }
446
1.02k
      else
447
1.02k
  {
448
1.02k
    qn = rn - wn; /* expected quotient size */
449
1.02k
    if (qn <= bn) { /* Divide only if result is not too big. */
450
1.02k
      mpn_div_q (qp, rp, rn, wp, wn, scratch);
451
1.02k
      qn += qp[qn] != 0;
452
1.02k
    }
453
454
      /* 5: current buffers: {sp,sn}, {qp,qn}.
455
   Note: {rp,rn} is not needed any more since we'll compute it from
456
   scratch at the end of the loop.
457
       */
458
459
      /* the quotient should be smaller than 2^b, since the previous
460
   approximation was correctly rounded toward zero */
461
1.02k
    if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
462
1.02k
        qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
463
5
      {
464
5
        for (qn = 1; qn < bn; ++qn)
465
0
    sp[qn - 1] = GMP_NUMB_MAX;
466
5
        sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
467
5
      }
468
1.02k
    else
469
1.02k
      {
470
      /* 7: current buffers: {sp,sn}, {qp,qn} */
471
472
      /* Combine sB and q to form sB + q.  */
473
1.02k
        MPN_COPY (sp, qp, qn);
474
1.02k
        MPN_ZERO (sp + qn, bn - qn);
475
1.02k
      }
476
1.02k
  }
477
1.02k
      sp[b / GMP_NUMB_BITS] |= save;
478
479
      /* 8: current buffer: {sp,sn} */
480
481
1.02k
    }
482
483
  /* otherwise we have rn > 0, thus the return value is ok */
484
160
  if (!approx || sp[0] <= CNST_LIMB (1))
485
135
    {
486
135
      for (c = 0;; c++)
487
151
  {
488
    /* Compute S^k in {qp,qn}. */
489
    /* Last iteration: we don't need W anymore. */
490
    /* mpn_pow_1 requires that both qp and wp have enough
491
       space to store the result {sp,sn}^k + 1 limb */
492
151
    qn = mpn_pow_1 (qp, sp, sn, k, wp);
493
494
151
    perf_pow = 1;
495
151
    if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
496
16
      MPN_DECR_U (sp, sn, 1);
497
135
    else
498
135
      break;
499
151
  };
500
501
      /* sometimes two corrections are needed with logbased_root*/
502
135
      ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
503
504
135
      rn = perf_pow != 0;
505
135
      if (rn != 0 && remp != NULL)
506
107
  {
507
107
    mpn_sub (remp, up, un, qp, qn);
508
107
    rn = un;
509
107
    MPN_NORMALIZE_NOT_ZERO (remp, rn);
510
107
  }
511
135
    }
512
513
160
  TMP_FREE;
514
160
  return rn;
515
160
}