/src/libgmp/mpn/rootrem.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and |
2 | | store the truncated integer part at rootp and the remainder at remp. |
3 | | |
4 | | Contributed by Paul Zimmermann (algorithm) and |
5 | | Paul Zimmermann and Torbjorn Granlund (implementation). |
6 | | Marco Bodrato wrote logbased_root to seed the loop. |
7 | | |
8 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S |
9 | | ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST |
10 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
11 | | |
12 | | Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc. |
13 | | |
14 | | This file is part of the GNU MP Library. |
15 | | |
16 | | The GNU MP Library is free software; you can redistribute it and/or modify |
17 | | it under the terms of either: |
18 | | |
19 | | * the GNU Lesser General Public License as published by the Free |
20 | | Software Foundation; either version 3 of the License, or (at your |
21 | | option) any later version. |
22 | | |
23 | | or |
24 | | |
25 | | * the GNU General Public License as published by the Free Software |
26 | | Foundation; either version 2 of the License, or (at your option) any |
27 | | later version. |
28 | | |
29 | | or both in parallel, as here. |
30 | | |
31 | | The GNU MP Library is distributed in the hope that it will be useful, but |
32 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
33 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
34 | | for more details. |
35 | | |
36 | | You should have received copies of the GNU General Public License and the |
37 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
38 | | see https://www.gnu.org/licenses/. */ |
39 | | |
40 | | /* FIXME: |
41 | | This implementation is not optimal when remp == NULL, since the complexity |
42 | | is M(n), whereas it should be M(n/k) on average. |
43 | | */ |
44 | | |
45 | | #include <stdio.h> /* for NULL */ |
46 | | |
47 | | #include "gmp-impl.h" |
48 | | #include "longlong.h" |
49 | | |
50 | | static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, |
51 | | mp_limb_t, int); |
52 | | |
53 | | #define MPN_RSHIFT(rp,up,un,cnt) \ |
54 | 2.05k | do { \ |
55 | 2.05k | if ((cnt) != 0) \ |
56 | 2.05k | mpn_rshift (rp, up, un, cnt); \ |
57 | 2.05k | else \ |
58 | 2.05k | { \ |
59 | 57 | MPN_COPY_INCR (rp, up, un); \ |
60 | 57 | } \ |
61 | 2.05k | } while (0) |
62 | | |
63 | | #define MPN_LSHIFT(cy,rp,up,un,cnt) \ |
64 | 2.05k | do { \ |
65 | 2.05k | if ((cnt) != 0) \ |
66 | 2.05k | cy = mpn_lshift (rp, up, un, cnt); \ |
67 | 2.05k | else \ |
68 | 2.05k | { \ |
69 | 12 | MPN_COPY_DECR (rp, up, un); \ |
70 | 12 | cy = 0; \ |
71 | 12 | } \ |
72 | 2.05k | } while (0) |
73 | | |
74 | | |
75 | | /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero. |
76 | | If remp <> NULL, put in {remp, un} the remainder. |
77 | | Return the size (in limbs) of the remainder if remp <> NULL, |
78 | | or a non-zero value iff the remainder is non-zero when remp = NULL. |
79 | | Assumes: |
80 | | (a) up[un-1] is not zero |
81 | | (b) rootp has at least space for ceil(un/k) limbs |
82 | | (c) remp has at least space for un limbs (in case remp <> NULL) |
83 | | (d) the operands do not overlap. |
84 | | |
85 | | The auxiliary memory usage is 3*un+2 if remp = NULL, |
86 | | and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment. |
87 | | */ |
88 | | mp_size_t |
89 | | mpn_rootrem (mp_ptr rootp, mp_ptr remp, |
90 | | mp_srcptr up, mp_size_t un, mp_limb_t k) |
91 | 174 | { |
92 | 174 | ASSERT (un > 0); |
93 | 174 | ASSERT (up[un - 1] != 0); |
94 | 174 | ASSERT (k > 1); |
95 | | |
96 | 174 | if (UNLIKELY (k == 2)) |
97 | 0 | return mpn_sqrtrem (rootp, remp, up, un); |
98 | | /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */ |
99 | 174 | if (remp == NULL && (un + 2) / 3 > k) |
100 | | /* Pad {up,un} with k zero limbs. This will produce an approximate root |
101 | | with one more limb, allowing us to compute the exact integral result. */ |
102 | 26 | { |
103 | 26 | mp_ptr sp, wp; |
104 | 26 | mp_size_t rn, sn, wn; |
105 | 26 | TMP_DECL; |
106 | 26 | TMP_MARK; |
107 | 26 | wn = un + k; |
108 | 26 | sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */ |
109 | 26 | TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */ |
110 | 26 | sp, sn); /* approximate root of padded input */ |
111 | 26 | MPN_COPY (wp + k, up, un); |
112 | 26 | MPN_FILL (wp, k, 0); |
113 | 26 | rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); |
114 | | /* The approximate root S = {sp,sn} is either the correct root of |
115 | | {sp,sn}, or 1 too large. Thus unless the least significant limb of |
116 | | S is 0 or 1, we can deduce the root of {up,un} is S truncated by one |
117 | | limb. (In case sp[0]=1, we can deduce the root, but not decide |
118 | | whether it is exact or not.) */ |
119 | 26 | MPN_COPY (rootp, sp + 1, sn - 1); |
120 | 26 | TMP_FREE; |
121 | 26 | return rn; |
122 | 26 | } |
123 | 148 | else |
124 | 148 | { |
125 | 148 | return mpn_rootrem_internal (rootp, remp, up, un, k, 0); |
126 | 148 | } |
127 | 174 | } |
128 | | |
129 | 960 | #define LOGROOT_USED_BITS 8 |
130 | 480 | #define LOGROOT_NEEDS_TWO_CORRECTIONS 1 |
131 | 160 | #define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS) |
132 | | /* Puts in *rootp some bits of the k^nt root of the number |
133 | | 2^bitn * 1.op ; where op represents the "fractional" bits. |
134 | | |
135 | | The returned value is the number of bits of the root minus one; |
136 | | i.e. an approximation of the root will be |
137 | | (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1). |
138 | | |
139 | | Currently, only LOGROOT_USED_BITS bits of op are used (the implicit |
140 | | one is not counted). |
141 | | */ |
142 | | static unsigned |
143 | | logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k) |
144 | 160 | { |
145 | | /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */ |
146 | 160 | static const |
147 | 160 | unsigned char vlog[] = {1, 2, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 18, 19, 21, 22, |
148 | 160 | 23, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43, |
149 | 160 | 44, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63, |
150 | 160 | 64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82, |
151 | 160 | 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100, |
152 | 160 | 101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, |
153 | 160 | 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134, |
154 | 160 | 135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, |
155 | 160 | 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164, |
156 | 160 | 165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179, |
157 | 160 | 180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193, |
158 | 160 | 194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206, |
159 | 160 | 207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219, |
160 | 160 | 220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232, |
161 | 160 | 232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244, |
162 | 160 | 245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255}; |
163 | | |
164 | | /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */ |
165 | 160 | static const |
166 | 160 | unsigned char vexp[] = {0, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 10, 11, |
167 | 160 | 12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, |
168 | 160 | 23, 24, 25, 26, 26, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, |
169 | 160 | 36, 37, 37, 38, 39, 40, 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, |
170 | 160 | 49, 50, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 61, |
171 | 160 | 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, |
172 | 160 | 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, |
173 | 160 | 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, |
174 | 160 | 107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122, |
175 | 160 | 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, |
176 | 160 | 139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, |
177 | 160 | 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174, |
178 | 160 | 175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193, |
179 | 160 | 194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213, |
180 | 160 | 214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234, |
181 | 160 | 235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255}; |
182 | 160 | mp_bitcnt_t retval; |
183 | | |
184 | 160 | if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS)) |
185 | 0 | { |
186 | | /* In the unlikely case, we use two divisions and a modulo. */ |
187 | 0 | retval = bitn / k; |
188 | 0 | bitn %= k; |
189 | 0 | bitn = (bitn << LOGROOT_USED_BITS | |
190 | 0 | vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k; |
191 | 0 | } |
192 | 160 | else |
193 | 160 | { |
194 | 160 | bitn = (bitn << LOGROOT_USED_BITS | |
195 | 160 | vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k; |
196 | 160 | retval = bitn >> LOGROOT_USED_BITS; |
197 | 160 | bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1; |
198 | 160 | } |
199 | 160 | ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS); |
200 | 160 | *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS) |
201 | 160 | | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS; |
202 | 160 | return retval; |
203 | 160 | } |
204 | | |
205 | | /* if approx is non-zero, does not compute the final remainder */ |
206 | | static mp_size_t |
207 | | mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un, |
208 | | mp_limb_t k, int approx) |
209 | 174 | { |
210 | 174 | mp_ptr qp, rp, sp, wp, scratch; |
211 | 174 | mp_size_t qn, rn, sn, wn, nl, bn; |
212 | 174 | mp_limb_t save, save2, cy, uh; |
213 | 174 | mp_bitcnt_t unb; /* number of significant bits of {up,un} */ |
214 | 174 | mp_bitcnt_t xnb; /* number of significant bits of the result */ |
215 | 174 | mp_bitcnt_t b, kk; |
216 | 174 | mp_bitcnt_t sizes[GMP_NUMB_BITS + 1]; |
217 | 174 | int ni; |
218 | 174 | int perf_pow; |
219 | 174 | unsigned ulz, snb, c, logk; |
220 | 174 | TMP_DECL; |
221 | | |
222 | | /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */ |
223 | 174 | uh = up[un - 1]; |
224 | 174 | count_leading_zeros (ulz, uh); |
225 | 174 | ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */ |
226 | 174 | unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz; |
227 | | /* unb is the (truncated) logarithm of the input U in base 2*/ |
228 | | |
229 | 174 | if (unb < k) /* root is 1 */ |
230 | 14 | { |
231 | 14 | rootp[0] = 1; |
232 | 14 | if (remp == NULL) |
233 | 2 | un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */ |
234 | 12 | else |
235 | 12 | { |
236 | 12 | mpn_sub_1 (remp, up, un, CNST_LIMB (1)); |
237 | 12 | un -= (remp [un - 1] == 0); /* There should be at most one zero limb, |
238 | | if we demand u to be normalized */ |
239 | 12 | } |
240 | 14 | return un; |
241 | 14 | } |
242 | | /* if (unb - k < k/2 + k/16) // root is 2 */ |
243 | | |
244 | 160 | if (ulz == GMP_NUMB_BITS) |
245 | 4 | uh = up[un - 2]; |
246 | 156 | else |
247 | 156 | uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz); |
248 | 160 | ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1); |
249 | | |
250 | 160 | xnb = logbased_root (rootp, uh, unb, k); |
251 | 160 | snb = LOGROOT_RETURNED_BITS - 1; |
252 | | /* xnb+1 is the number of bits of the root R */ |
253 | | /* snb+1 is the number of bits of the current approximation S */ |
254 | | |
255 | 160 | kk = k * xnb; /* number of truncated bits in the input */ |
256 | | |
257 | | /* FIXME: Should we skip the next two loops when xnb <= snb ? */ |
258 | 160 | for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk ) |
259 | 0 | ; |
260 | | /* logk = ceil(log(k)/log(2)) + 1 */ |
261 | | |
262 | | /* xnb is the number of remaining bits to determine in the kth root */ |
263 | 1.18k | for (ni = 0; (sizes[ni] = xnb) > snb; ++ni) |
264 | 1.02k | { |
265 | | /* invariant: here we want xnb+1 total bits for the kth root */ |
266 | | |
267 | | /* if c is the new value of xnb, this means that we'll go from a |
268 | | root of c+1 bits (say s') to a root of xnb+1 bits. |
269 | | It is proved in the book "Modern Computer Arithmetic" by Brent |
270 | | and Zimmermann, Chapter 1, that |
271 | | if s' >= k*beta, then at most one correction is necessary. |
272 | | Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that |
273 | | c >= ceil((xnb + log2(k))/2). */ |
274 | 1.02k | if (xnb > logk) |
275 | 1.02k | xnb = (xnb + logk) / 2; |
276 | 0 | else |
277 | 0 | --xnb; /* add just one bit at a time */ |
278 | 1.02k | } |
279 | | |
280 | 160 | *rootp >>= snb - xnb; |
281 | 160 | kk -= xnb; |
282 | | |
283 | 160 | ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); |
284 | | /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with |
285 | | sizes[i] <= 2 * sizes[i+1]. |
286 | | Newton iteration will first compute sizes[ni-1] extra bits, |
287 | | then sizes[ni-2], ..., then sizes[0] = b. */ |
288 | | |
289 | 160 | TMP_MARK; |
290 | | /* qp and wp need enough space to store S'^k where S' is an approximate |
291 | | root. Since S' can be as large as S+2, the worst case is when S=2 and |
292 | | S'=4. But then since we know the number of bits of S in advance, S' |
293 | | can only be 3 at most. Similarly for S=4, then S' can be 6 at most. |
294 | | So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k |
295 | | fits in un limbs, the number of extra limbs needed is bounded by |
296 | | ceil(k*log2(3/2)/GMP_NUMB_BITS). */ |
297 | | /* THINK: with the use of logbased_root, maybe the constant is |
298 | | 258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */ |
299 | 160 | #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS) |
300 | 160 | TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */ |
301 | 160 | qp, un + EXTRA, /* will contain quotient and remainder |
302 | | of R/(k*S^(k-1)), and S^k */ |
303 | 160 | wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1), |
304 | | and temporary for mpn_pow_1 */ |
305 | | |
306 | 160 | if (remp == NULL) |
307 | 53 | rp = scratch; /* will contain the remainder */ |
308 | 107 | else |
309 | 107 | rp = remp; |
310 | 160 | sp = rootp; |
311 | | |
312 | 160 | sn = 1; /* Initial approximation has one limb */ |
313 | | |
314 | 1.18k | for (b = xnb; ni != 0; --ni) |
315 | 1.02k | { |
316 | | /* 1: loop invariant: |
317 | | {sp, sn} is the current approximation of the root, which has |
318 | | exactly 1 + sizes[ni] bits. |
319 | | {rp, rn} is the current remainder |
320 | | {wp, wn} = {sp, sn}^(k-1) |
321 | | kk = number of truncated bits of the input |
322 | | */ |
323 | | |
324 | | /* Since each iteration treats b bits from the root and thus k*b bits |
325 | | from the input, and we already considered b bits from the input, |
326 | | we now have to take another (k-1)*b bits from the input. */ |
327 | 1.02k | kk -= (k - 1) * b; /* remaining input bits */ |
328 | | /* {rp, rn} = floor({up, un} / 2^kk) */ |
329 | 1.02k | rn = un - kk / GMP_NUMB_BITS; |
330 | 1.02k | MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS); |
331 | 1.02k | rn -= rp[rn - 1] == 0; |
332 | | |
333 | | /* 9: current buffers: {sp,sn}, {rp,rn} */ |
334 | | |
335 | 1.02k | for (c = 0;; c++) |
336 | 1.13k | { |
337 | | /* Compute S^k in {qp,qn}. */ |
338 | | /* W <- S^(k-1) for the next iteration, |
339 | | and S^k = W * S. */ |
340 | 1.13k | wn = mpn_pow_1 (wp, sp, sn, k - 1, qp); |
341 | 1.13k | mpn_mul (qp, wp, wn, sp, sn); |
342 | 1.13k | qn = wn + sn; |
343 | 1.13k | qn -= qp[qn - 1] == 0; |
344 | | |
345 | 1.13k | perf_pow = 1; |
346 | | /* if S^k > floor(U/2^kk), the root approximation was too large */ |
347 | 1.13k | if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0)) |
348 | 109 | MPN_DECR_U (sp, sn, 1); |
349 | 1.02k | else |
350 | 1.02k | break; |
351 | 1.13k | } |
352 | | |
353 | | /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */ |
354 | | |
355 | | /* sometimes two corrections are needed with logbased_root*/ |
356 | 1.02k | ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS); |
357 | 1.02k | ASSERT_ALWAYS (rn >= qn); |
358 | | |
359 | 1.02k | b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the |
360 | | next iteration */ |
361 | 1.02k | bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */ |
362 | | |
363 | 1.02k | kk = kk - b; |
364 | | /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */ |
365 | 1.02k | nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); |
366 | | /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) |
367 | | - floor(kk / GMP_NUMB_BITS) |
368 | | <= 1 + (kk + b - 1) / GMP_NUMB_BITS |
369 | | - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS |
370 | | = 2 + (b - 2) / GMP_NUMB_BITS |
371 | | thus since nl is an integer: |
372 | | nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */ |
373 | | |
374 | | /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
375 | | |
376 | | /* R = R - Q = floor(U/2^kk) - S^k */ |
377 | 1.02k | if (perf_pow != 0) |
378 | 1.02k | { |
379 | 1.02k | mpn_sub (rp, rp, rn, qp, qn); |
380 | 1.02k | MPN_NORMALIZE_NOT_ZERO (rp, rn); |
381 | | |
382 | | /* first multiply the remainder by 2^b */ |
383 | 1.02k | MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS); |
384 | 1.02k | rn = rn + bn; |
385 | 1.02k | if (cy != 0) |
386 | 348 | { |
387 | 348 | rp[rn] = cy; |
388 | 348 | rn++; |
389 | 348 | } |
390 | | |
391 | 1.02k | save = rp[bn]; |
392 | | /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ |
393 | 1.02k | if (nl - 1 > bn) |
394 | 327 | save2 = rp[bn + 1]; |
395 | 1.02k | } |
396 | 0 | else |
397 | 0 | { |
398 | 0 | rn = bn; |
399 | 0 | save2 = save = 0; |
400 | 0 | } |
401 | | /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
402 | | |
403 | | /* Now insert bits [kk,kk+b-1] from the input U */ |
404 | 1.02k | MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); |
405 | | /* set to zero high bits of rp[bn] */ |
406 | 1.02k | rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1; |
407 | | /* restore corresponding bits */ |
408 | 1.02k | rp[bn] |= save; |
409 | 1.02k | if (nl - 1 > bn) |
410 | 327 | rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since |
411 | | they start by bit 0 in rp[0], so they use |
412 | | at most ceil(b/GMP_NUMB_BITS) limbs */ |
413 | | /* FIXME: Should we normalise {rp,rn} here ?*/ |
414 | | |
415 | | /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
416 | | |
417 | | /* compute {wp, wn} = k * {sp, sn}^(k-1) */ |
418 | 1.02k | cy = mpn_mul_1 (wp, wp, wn, k); |
419 | 1.02k | wp[wn] = cy; |
420 | 1.02k | wn += cy != 0; |
421 | | |
422 | | /* 6: current buffers: {sp,sn}, {qp,qn} */ |
423 | | |
424 | | /* multiply the root approximation by 2^b */ |
425 | 1.02k | MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS); |
426 | 1.02k | sn = sn + b / GMP_NUMB_BITS; |
427 | 1.02k | if (cy != 0) |
428 | 301 | { |
429 | 301 | sp[sn] = cy; |
430 | 301 | sn++; |
431 | 301 | } |
432 | | |
433 | 1.02k | save = sp[b / GMP_NUMB_BITS]; |
434 | | |
435 | | /* Number of limbs used by b bits, when least significant bit is |
436 | | aligned to least limb */ |
437 | 1.02k | bn = (b - 1) / GMP_NUMB_BITS + 1; |
438 | | |
439 | | /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ |
440 | | |
441 | | /* now divide {rp, rn} by {wp, wn} to get the low part of the root */ |
442 | 1.02k | if (UNLIKELY (rn < wn)) |
443 | 0 | { |
444 | 0 | MPN_FILL (sp, bn, 0); |
445 | 0 | } |
446 | 1.02k | else |
447 | 1.02k | { |
448 | 1.02k | qn = rn - wn; /* expected quotient size */ |
449 | 1.02k | if (qn <= bn) { /* Divide only if result is not too big. */ |
450 | 1.02k | mpn_div_q (qp, rp, rn, wp, wn, scratch); |
451 | 1.02k | qn += qp[qn] != 0; |
452 | 1.02k | } |
453 | | |
454 | | /* 5: current buffers: {sp,sn}, {qp,qn}. |
455 | | Note: {rp,rn} is not needed any more since we'll compute it from |
456 | | scratch at the end of the loop. |
457 | | */ |
458 | | |
459 | | /* the quotient should be smaller than 2^b, since the previous |
460 | | approximation was correctly rounded toward zero */ |
461 | 1.02k | if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) && |
462 | 1.02k | qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)))) |
463 | 5 | { |
464 | 5 | for (qn = 1; qn < bn; ++qn) |
465 | 0 | sp[qn - 1] = GMP_NUMB_MAX; |
466 | 5 | sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS)); |
467 | 5 | } |
468 | 1.02k | else |
469 | 1.02k | { |
470 | | /* 7: current buffers: {sp,sn}, {qp,qn} */ |
471 | | |
472 | | /* Combine sB and q to form sB + q. */ |
473 | 1.02k | MPN_COPY (sp, qp, qn); |
474 | 1.02k | MPN_ZERO (sp + qn, bn - qn); |
475 | 1.02k | } |
476 | 1.02k | } |
477 | 1.02k | sp[b / GMP_NUMB_BITS] |= save; |
478 | | |
479 | | /* 8: current buffer: {sp,sn} */ |
480 | | |
481 | 1.02k | } |
482 | | |
483 | | /* otherwise we have rn > 0, thus the return value is ok */ |
484 | 160 | if (!approx || sp[0] <= CNST_LIMB (1)) |
485 | 135 | { |
486 | 135 | for (c = 0;; c++) |
487 | 151 | { |
488 | | /* Compute S^k in {qp,qn}. */ |
489 | | /* Last iteration: we don't need W anymore. */ |
490 | | /* mpn_pow_1 requires that both qp and wp have enough |
491 | | space to store the result {sp,sn}^k + 1 limb */ |
492 | 151 | qn = mpn_pow_1 (qp, sp, sn, k, wp); |
493 | | |
494 | 151 | perf_pow = 1; |
495 | 151 | if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0)) |
496 | 16 | MPN_DECR_U (sp, sn, 1); |
497 | 135 | else |
498 | 135 | break; |
499 | 151 | }; |
500 | | |
501 | | /* sometimes two corrections are needed with logbased_root*/ |
502 | 135 | ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS); |
503 | | |
504 | 135 | rn = perf_pow != 0; |
505 | 135 | if (rn != 0 && remp != NULL) |
506 | 107 | { |
507 | 107 | mpn_sub (remp, up, un, qp, qn); |
508 | 107 | rn = un; |
509 | 107 | MPN_NORMALIZE_NOT_ZERO (remp, rn); |
510 | 107 | } |
511 | 135 | } |
512 | | |
513 | 160 | TMP_FREE; |
514 | 160 | return rn; |
515 | 160 | } |