/src/gmp-6.2.1/mpz/n_pow_ui.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpz_n_pow_ui -- mpn raised to ulong. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
4 | | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
5 | | FUTURE GNU MP RELEASES. |
6 | | |
7 | | Copyright 2001, 2002, 2005, 2012, 2015, 2020 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include <stdlib.h> |
36 | | #include <stdio.h> |
37 | | #include "gmp-impl.h" |
38 | | #include "longlong.h" |
39 | | |
40 | | |
41 | | /* Change this to "#define TRACE(x) x" for some traces. */ |
42 | | #define TRACE(x) |
43 | | |
44 | | |
45 | | /* Use this to test the mul_2 code on a CPU without a native version of that |
46 | | routine. */ |
47 | | #if 0 |
48 | | #define mpn_mul_2 refmpn_mul_2 |
49 | | #define HAVE_NATIVE_mpn_mul_2 1 |
50 | | #endif |
51 | | |
52 | | |
53 | | /* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code. |
54 | | ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on |
55 | | bsize==2 or >2, but separating that isn't easy because there's shared |
56 | | code both before and after (the size calculations and the powers of 2 |
57 | | handling). |
58 | | |
59 | | Alternatives: |
60 | | |
61 | | It would work to just use the mpn_mul powering loop for 1 and 2 limb |
62 | | bases, but the current separate loop allows mul_1 and mul_2 to be done |
63 | | in-place, which might help cache locality a bit. If mpn_mul was relaxed |
64 | | to allow source==dest when vn==1 or 2 then some pointer twiddling might |
65 | | let us get the same effect in one loop. |
66 | | |
67 | | The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't |
68 | | form the biggest possible power of b that fits, only the biggest power of |
69 | | 2 power, ie. b^(2^n). It'd be possible to choose a bigger power, perhaps |
70 | | using mp_bases[b].big_base for small b, and thereby get better value |
71 | | from mpn_mul_1 or mpn_mul_2 in the bignum powering. It's felt that doing |
72 | | so would be more complicated than it's worth, and could well end up being |
73 | | a slowdown for small e. For big e on the other hand the algorithm is |
74 | | dominated by mpn_sqr so there wouldn't much of a saving. The current |
75 | | code can be viewed as simply doing the first few steps of the powering in |
76 | | a single or double limb where possible. |
77 | | |
78 | | If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary |
79 | | copy made of b is unnecessary. We could just use the old alloc'ed block |
80 | | and free it at the end. But arranging this seems like a lot more trouble |
81 | | than it's worth. */ |
82 | | |
83 | | |
84 | | /* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in |
85 | | a limb without overflowing. |
86 | | FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */ |
87 | | |
88 | 0 | #define GMP_NUMB_HALFMAX (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1) |
89 | | |
90 | | |
91 | | /* The following are for convenience, they update the size and check the |
92 | | alloc. */ |
93 | | |
94 | | #define MPN_SQR(dst, alloc, src, size) \ |
95 | 0 | do { \ |
96 | 0 | ASSERT (2*(size) <= (alloc)); \ |
97 | 0 | mpn_sqr (dst, src, size); \ |
98 | 0 | (size) *= 2; \ |
99 | 0 | (size) -= ((dst)[(size)-1] == 0); \ |
100 | 0 | } while (0) |
101 | | |
102 | | #define MPN_MUL(dst, alloc, src, size, src2, size2) \ |
103 | 0 | do { \ |
104 | 0 | mp_limb_t cy; \ |
105 | 0 | ASSERT ((size) + (size2) <= (alloc)); \ |
106 | 0 | cy = mpn_mul (dst, src, size, src2, size2); \ |
107 | 0 | (size) += (size2) - (cy == 0); \ |
108 | 0 | } while (0) |
109 | | |
110 | | #define MPN_MUL_2(ptr, size, alloc, mult) \ |
111 | 0 | do { \ |
112 | 0 | mp_limb_t cy; \ |
113 | 0 | ASSERT ((size)+2 <= (alloc)); \ |
114 | 0 | cy = mpn_mul_2 (ptr, ptr, size, mult); \ |
115 | 0 | (size)++; \ |
116 | 0 | (ptr)[(size)] = cy; \ |
117 | 0 | (size) += (cy != 0); \ |
118 | 0 | } while (0) |
119 | | |
120 | | #define MPN_MUL_1(ptr, size, alloc, limb) \ |
121 | 0 | do { \ |
122 | 0 | mp_limb_t cy; \ |
123 | 0 | ASSERT ((size)+1 <= (alloc)); \ |
124 | 0 | cy = mpn_mul_1 (ptr, ptr, size, limb); \ |
125 | 0 | (ptr)[size] = cy; \ |
126 | 0 | (size) += (cy != 0); \ |
127 | 0 | } while (0) |
128 | | |
129 | | #define MPN_LSHIFT(ptr, size, alloc, shift) \ |
130 | 0 | do { \ |
131 | 0 | mp_limb_t cy; \ |
132 | 0 | ASSERT ((size)+1 <= (alloc)); \ |
133 | 0 | cy = mpn_lshift (ptr, ptr, size, shift); \ |
134 | 0 | (ptr)[size] = cy; \ |
135 | 0 | (size) += (cy != 0); \ |
136 | 0 | } while (0) |
137 | | |
138 | | #define MPN_RSHIFT_OR_COPY(dst, src, size, shift) \ |
139 | 0 | do { \ |
140 | 0 | if ((shift) == 0) \ |
141 | 0 | MPN_COPY (dst, src, size); \ |
142 | 0 | else \ |
143 | 0 | { \ |
144 | 0 | mpn_rshift (dst, src, size, shift); \ |
145 | 0 | (size) -= ((dst)[(size)-1] == 0); \ |
146 | 0 | } \ |
147 | 0 | } while (0) |
148 | | |
149 | | |
150 | | /* ralloc and talloc are only wanted for ASSERTs, after the initial space |
151 | | allocations. Avoid writing values to them in a normal build, to ensure |
152 | | the compiler lets them go dead. gcc already figures this out itself |
153 | | actually. */ |
154 | | |
155 | | #define SWAP_RP_TP \ |
156 | 0 | do { \ |
157 | 0 | MP_PTR_SWAP (rp, tp); \ |
158 | 0 | ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc)); \ |
159 | 0 | } while (0) |
160 | | |
161 | | |
162 | | void |
163 | | mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) |
164 | 0 | { |
165 | 0 | mp_ptr rp; |
166 | 0 | mp_size_t rtwos_limbs, ralloc, rsize; |
167 | 0 | int rneg, i, cnt, btwos, r_bp_overlap; |
168 | 0 | mp_limb_t blimb, rl; |
169 | 0 | mp_bitcnt_t rtwos_bits; |
170 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
171 | 0 | mp_limb_t blimb_low, rl_high; |
172 | | #else |
173 | | mp_limb_t b_twolimbs[2]; |
174 | | #endif |
175 | 0 | mp_limb_t ovfl; |
176 | 0 | TMP_DECL; |
177 | |
|
178 | 0 | TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n", |
179 | 0 | PTR(r), bp, bsize, e, e); |
180 | 0 | mpn_trace ("b", bp, bsize)); |
181 | |
|
182 | 0 | ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0); |
183 | 0 | ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize))); |
184 | | |
185 | | /* b^0 == 1, including 0^0 == 1 */ |
186 | 0 | if (e == 0) |
187 | 0 | { |
188 | 0 | MPZ_NEWALLOC (r, 1)[0] = 1; |
189 | 0 | SIZ(r) = 1; |
190 | 0 | return; |
191 | 0 | } |
192 | | |
193 | | /* 0^e == 0 apart from 0^0 above */ |
194 | 0 | if (bsize == 0) |
195 | 0 | { |
196 | 0 | SIZ(r) = 0; |
197 | 0 | return; |
198 | 0 | } |
199 | | |
200 | | /* Sign of the final result. */ |
201 | 0 | rneg = (bsize < 0 && (e & 1) != 0); |
202 | 0 | bsize = ABS (bsize); |
203 | 0 | TRACE (printf ("rneg %d\n", rneg)); |
204 | |
|
205 | 0 | r_bp_overlap = (PTR(r) == bp); |
206 | | |
207 | | /* Strip low zero limbs from b. */ |
208 | 0 | rtwos_limbs = 0; |
209 | 0 | for (blimb = *bp; blimb == 0; blimb = *++bp) |
210 | 0 | { |
211 | 0 | rtwos_limbs += e; |
212 | 0 | bsize--; ASSERT (bsize >= 1); |
213 | 0 | } |
214 | 0 | TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs)); |
215 | | |
216 | | /* Strip low zero bits from b. */ |
217 | 0 | count_trailing_zeros (btwos, blimb); |
218 | 0 | blimb >>= btwos; |
219 | |
|
220 | 0 | umul_ppmm (ovfl, rtwos_bits, e, btwos); |
221 | 0 | if (ovfl) |
222 | 0 | { |
223 | 0 | fprintf (stderr, "gmp: overflow in mpz type\n"); |
224 | 0 | abort (); |
225 | 0 | } |
226 | | |
227 | 0 | rtwos_limbs += rtwos_bits / GMP_NUMB_BITS; |
228 | 0 | rtwos_bits %= GMP_NUMB_BITS; |
229 | 0 | TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n", |
230 | 0 | btwos, rtwos_limbs, rtwos_bits)); |
231 | |
|
232 | 0 | TMP_MARK; |
233 | |
|
234 | 0 | rl = 1; |
235 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
236 | 0 | rl_high = 0; |
237 | 0 | #endif |
238 | |
|
239 | 0 | if (bsize == 1) |
240 | 0 | { |
241 | 0 | bsize_1: |
242 | | /* Power up as far as possible within blimb. We start here with e!=0, |
243 | | but if e is small then we might reach e==0 and the whole b^e in rl. |
244 | | Notice this code works when blimb==1 too, reaching e==0. */ |
245 | |
|
246 | 0 | while (blimb <= GMP_NUMB_HALFMAX) |
247 | 0 | { |
248 | 0 | TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n", |
249 | 0 | e, blimb, rl)); |
250 | 0 | ASSERT (e != 0); |
251 | 0 | if ((e & 1) != 0) |
252 | 0 | rl *= blimb; |
253 | 0 | e >>= 1; |
254 | 0 | if (e == 0) |
255 | 0 | goto got_rl; |
256 | 0 | blimb *= blimb; |
257 | 0 | } |
258 | | |
259 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
260 | 0 | TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n", |
261 | 0 | e, blimb, rl)); |
262 | | |
263 | | /* Can power b once more into blimb:blimb_low */ |
264 | 0 | bsize = 2; |
265 | 0 | ASSERT (e != 0); |
266 | 0 | if ((e & 1) != 0) |
267 | 0 | { |
268 | 0 | umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS); |
269 | 0 | rl >>= GMP_NAIL_BITS; |
270 | 0 | } |
271 | 0 | e >>= 1; |
272 | 0 | umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS); |
273 | 0 | blimb_low >>= GMP_NAIL_BITS; |
274 | |
|
275 | 0 | got_rl: |
276 | 0 | TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n", |
277 | 0 | e, blimb, blimb_low, rl_high, rl)); |
278 | | |
279 | | /* Combine left-over rtwos_bits into rl_high:rl to be handled by the |
280 | | final mul_1 or mul_2 rather than a separate lshift. |
281 | | - rl_high:rl mustn't be 1 (since then there's no final mul) |
282 | | - rl_high mustn't overflow |
283 | | - rl_high mustn't change to non-zero, since mul_1+lshift is |
284 | | probably faster than mul_2 (FIXME: is this true?) */ |
285 | |
|
286 | 0 | if (rtwos_bits != 0 |
287 | 0 | && ! (rl_high == 0 && rl == 1) |
288 | 0 | && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
289 | 0 | { |
290 | 0 | mp_limb_t new_rl_high = (rl_high << rtwos_bits) |
291 | 0 | | (rl >> (GMP_NUMB_BITS-rtwos_bits)); |
292 | 0 | if (! (rl_high == 0 && new_rl_high != 0)) |
293 | 0 | { |
294 | 0 | rl_high = new_rl_high; |
295 | 0 | rl <<= rtwos_bits; |
296 | 0 | rtwos_bits = 0; |
297 | 0 | TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n", |
298 | 0 | rl_high, rl)); |
299 | 0 | } |
300 | 0 | } |
301 | | #else |
302 | | got_rl: |
303 | | TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n", |
304 | | e, blimb, rl)); |
305 | | |
306 | | /* Combine left-over rtwos_bits into rl to be handled by the final |
307 | | mul_1 rather than a separate lshift. |
308 | | - rl mustn't be 1 (since then there's no final mul) |
309 | | - rl mustn't overflow */ |
310 | | |
311 | | if (rtwos_bits != 0 |
312 | | && rl != 1 |
313 | | && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
314 | | { |
315 | | rl <<= rtwos_bits; |
316 | | rtwos_bits = 0; |
317 | | TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl)); |
318 | | } |
319 | | #endif |
320 | 0 | } |
321 | 0 | else if (bsize == 2) |
322 | 0 | { |
323 | 0 | mp_limb_t bsecond = bp[1]; |
324 | 0 | if (btwos != 0) |
325 | 0 | blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK; |
326 | 0 | bsecond >>= btwos; |
327 | 0 | if (bsecond == 0) |
328 | 0 | { |
329 | | /* Two limbs became one after rshift. */ |
330 | 0 | bsize = 1; |
331 | 0 | goto bsize_1; |
332 | 0 | } |
333 | | |
334 | 0 | TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb)); |
335 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
336 | 0 | blimb_low = blimb; |
337 | | #else |
338 | | bp = b_twolimbs; |
339 | | b_twolimbs[0] = blimb; |
340 | | b_twolimbs[1] = bsecond; |
341 | | #endif |
342 | 0 | blimb = bsecond; |
343 | 0 | } |
344 | 0 | else |
345 | 0 | { |
346 | 0 | if (r_bp_overlap || btwos != 0) |
347 | 0 | { |
348 | 0 | mp_ptr tp = TMP_ALLOC_LIMBS (bsize); |
349 | 0 | MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos); |
350 | 0 | bp = tp; |
351 | 0 | TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize)); |
352 | 0 | } |
353 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
354 | | /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */ |
355 | 0 | blimb_low = bp[0]; |
356 | 0 | #endif |
357 | 0 | blimb = bp[bsize-1]; |
358 | |
|
359 | 0 | TRACE (printf ("big bsize=%ld ", bsize); |
360 | 0 | mpn_trace ("b", bp, bsize)); |
361 | 0 | } |
362 | | |
363 | | /* At this point blimb is the most significant limb of the base to use. |
364 | | |
365 | | Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1 |
366 | | limb to round up the division; +1 for multiplies all using an extra |
367 | | limb over the true size; +2 for rl at the end; +1 for lshift at the |
368 | | end. |
369 | | |
370 | | The size calculation here is reasonably accurate. The base is at least |
371 | | half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits |
372 | | when it will power up as just over 16, an overestimate of 17/16 = |
373 | | 6.25%. For a 64-bit limb it's half that. |
374 | | |
375 | | If e==0 then blimb won't be anything useful (though it will be |
376 | | non-zero), but that doesn't matter since we just end up with ralloc==5, |
377 | | and that's fine for 2 limbs of rl and 1 of lshift. */ |
378 | | |
379 | 0 | ASSERT (blimb != 0); |
380 | 0 | count_leading_zeros (cnt, blimb); |
381 | | |
382 | 0 | umul_ppmm (ovfl, ralloc, (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS), e); |
383 | 0 | if (ovfl) |
384 | 0 | { |
385 | 0 | fprintf (stderr, "gmp: overflow in mpz type\n"); |
386 | 0 | abort (); |
387 | 0 | } |
388 | 0 | ralloc = ralloc / GMP_NUMB_BITS + 5; |
389 | |
|
390 | 0 | TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n", |
391 | 0 | ralloc, bsize, blimb, cnt)); |
392 | 0 | rp = MPZ_NEWALLOC (r, ralloc + rtwos_limbs); |
393 | | |
394 | | /* Low zero limbs resulting from powers of 2. */ |
395 | 0 | MPN_ZERO (rp, rtwos_limbs); |
396 | 0 | rp += rtwos_limbs; |
397 | |
|
398 | 0 | if (e == 0) |
399 | 0 | { |
400 | | /* Any e==0 other than via bsize==1 or bsize==2 is covered at the |
401 | | start. */ |
402 | 0 | rp[0] = rl; |
403 | 0 | rsize = 1; |
404 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
405 | 0 | rp[1] = rl_high; |
406 | 0 | rsize += (rl_high != 0); |
407 | 0 | #endif |
408 | 0 | ASSERT (rp[rsize-1] != 0); |
409 | 0 | } |
410 | 0 | else |
411 | 0 | { |
412 | 0 | mp_ptr tp; |
413 | 0 | mp_size_t talloc; |
414 | | |
415 | | /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the |
416 | | low bit of e is zero, tp only has to hold the second last power |
417 | | step, which is half the size of the final result. There's no need |
418 | | to round up the divide by 2, since ralloc includes a +2 for rl |
419 | | which not needed by tp. In the mpn_mul loop when the low bit of e |
420 | | is 1, tp must hold nearly the full result, so just size it the same |
421 | | as rp. */ |
422 | |
|
423 | 0 | talloc = ralloc; |
424 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
425 | 0 | if (bsize <= 2 || (e & 1) == 0) |
426 | 0 | talloc /= 2; |
427 | | #else |
428 | | if (bsize <= 1 || (e & 1) == 0) |
429 | | talloc /= 2; |
430 | | #endif |
431 | 0 | TRACE (printf ("talloc %ld\n", talloc)); |
432 | 0 | tp = TMP_ALLOC_LIMBS (talloc); |
433 | | |
434 | | /* Go from high to low over the bits of e, starting with i pointing at |
435 | | the bit below the highest 1 (which will mean i==-1 if e==1). */ |
436 | 0 | count_leading_zeros (cnt, (mp_limb_t) e); |
437 | 0 | i = GMP_LIMB_BITS - cnt - 2; |
438 | |
|
439 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
440 | 0 | if (bsize <= 2) |
441 | 0 | { |
442 | 0 | mp_limb_t mult[2]; |
443 | | |
444 | | /* Any bsize==1 will have been powered above to be two limbs. */ |
445 | 0 | ASSERT (bsize == 2); |
446 | 0 | ASSERT (blimb != 0); |
447 | | |
448 | | /* Arrange the final result ends up in r, not in the temp space */ |
449 | 0 | if ((i & 1) == 0) |
450 | 0 | SWAP_RP_TP; |
451 | |
|
452 | 0 | rp[0] = blimb_low; |
453 | 0 | rp[1] = blimb; |
454 | 0 | rsize = 2; |
455 | |
|
456 | 0 | mult[0] = blimb_low; |
457 | 0 | mult[1] = blimb; |
458 | |
|
459 | 0 | for ( ; i >= 0; i--) |
460 | 0 | { |
461 | 0 | TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
462 | 0 | i, e, rsize, ralloc, talloc); |
463 | 0 | mpn_trace ("r", rp, rsize)); |
464 | |
|
465 | 0 | MPN_SQR (tp, talloc, rp, rsize); |
466 | 0 | SWAP_RP_TP; |
467 | 0 | if ((e & (1L << i)) != 0) |
468 | 0 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
469 | 0 | } |
470 | | |
471 | 0 | TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize)); |
472 | 0 | if (rl_high != 0) |
473 | 0 | { |
474 | 0 | mult[0] = rl; |
475 | 0 | mult[1] = rl_high; |
476 | 0 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
477 | 0 | } |
478 | 0 | else if (rl != 1) |
479 | 0 | MPN_MUL_1 (rp, rsize, ralloc, rl); |
480 | 0 | } |
481 | | #else |
482 | | if (bsize == 1) |
483 | | { |
484 | | /* Arrange the final result ends up in r, not in the temp space */ |
485 | | if ((i & 1) == 0) |
486 | | SWAP_RP_TP; |
487 | | |
488 | | rp[0] = blimb; |
489 | | rsize = 1; |
490 | | |
491 | | for ( ; i >= 0; i--) |
492 | | { |
493 | | TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
494 | | i, e, rsize, ralloc, talloc); |
495 | | mpn_trace ("r", rp, rsize)); |
496 | | |
497 | | MPN_SQR (tp, talloc, rp, rsize); |
498 | | SWAP_RP_TP; |
499 | | if ((e & (1L << i)) != 0) |
500 | | MPN_MUL_1 (rp, rsize, ralloc, blimb); |
501 | | } |
502 | | |
503 | | TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize)); |
504 | | if (rl != 1) |
505 | | MPN_MUL_1 (rp, rsize, ralloc, rl); |
506 | | } |
507 | | #endif |
508 | 0 | else |
509 | 0 | { |
510 | 0 | int parity; |
511 | | |
512 | | /* Arrange the final result ends up in r, not in the temp space */ |
513 | 0 | ULONG_PARITY (parity, e); |
514 | 0 | if (((parity ^ i) & 1) != 0) |
515 | 0 | SWAP_RP_TP; |
516 | |
|
517 | 0 | MPN_COPY (rp, bp, bsize); |
518 | 0 | rsize = bsize; |
519 | |
|
520 | 0 | for ( ; i >= 0; i--) |
521 | 0 | { |
522 | 0 | TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
523 | 0 | i, e, rsize, ralloc, talloc); |
524 | 0 | mpn_trace ("r", rp, rsize)); |
525 | |
|
526 | 0 | MPN_SQR (tp, talloc, rp, rsize); |
527 | 0 | SWAP_RP_TP; |
528 | 0 | if ((e & (1L << i)) != 0) |
529 | 0 | { |
530 | 0 | MPN_MUL (tp, talloc, rp, rsize, bp, bsize); |
531 | 0 | SWAP_RP_TP; |
532 | 0 | } |
533 | 0 | } |
534 | 0 | } |
535 | 0 | } |
536 | | |
537 | 0 | ASSERT (rp == PTR(r) + rtwos_limbs); |
538 | 0 | TRACE (mpn_trace ("end loop r", rp, rsize)); |
539 | 0 | TMP_FREE; |
540 | | |
541 | | /* Apply any partial limb factors of 2. */ |
542 | 0 | if (rtwos_bits != 0) |
543 | 0 | { |
544 | 0 | MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits); |
545 | 0 | TRACE (mpn_trace ("lshift r", rp, rsize)); |
546 | 0 | } |
547 | | |
548 | 0 | rsize += rtwos_limbs; |
549 | 0 | SIZ(r) = (rneg ? -rsize : rsize); |
550 | 0 | } |