/src/libgmp/mpz/n_pow_ui.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpz_n_pow_ui -- mpn raised to ulong. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
4 | | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
5 | | FUTURE GNU MP RELEASES. |
6 | | |
7 | | Copyright 2001, 2002, 2005, 2012, 2015 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | |
39 | | /* Change this to "#define TRACE(x) x" for some traces. */ |
40 | | #define TRACE(x) |
41 | | |
42 | | |
43 | | /* Use this to test the mul_2 code on a CPU without a native version of that |
44 | | routine. */ |
45 | | #if 0 |
46 | | #define mpn_mul_2 refmpn_mul_2 |
47 | | #define HAVE_NATIVE_mpn_mul_2 1 |
48 | | #endif |
49 | | |
50 | | |
51 | | /* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code. |
52 | | ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on |
53 | | bsize==2 or >2, but separating that isn't easy because there's shared |
54 | | code both before and after (the size calculations and the powers of 2 |
55 | | handling). |
56 | | |
57 | | Alternatives: |
58 | | |
59 | | It would work to just use the mpn_mul powering loop for 1 and 2 limb |
60 | | bases, but the current separate loop allows mul_1 and mul_2 to be done |
61 | | in-place, which might help cache locality a bit. If mpn_mul was relaxed |
62 | | to allow source==dest when vn==1 or 2 then some pointer twiddling might |
63 | | let us get the same effect in one loop. |
64 | | |
65 | | The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't |
66 | | form the biggest possible power of b that fits, only the biggest power of |
67 | | 2 power, ie. b^(2^n). It'd be possible to choose a bigger power, perhaps |
68 | | using mp_bases[b].big_base for small b, and thereby get better value |
69 | | from mpn_mul_1 or mpn_mul_2 in the bignum powering. It's felt that doing |
70 | | so would be more complicated than it's worth, and could well end up being |
71 | | a slowdown for small e. For big e on the other hand the algorithm is |
72 | | dominated by mpn_sqr so there wouldn't much of a saving. The current |
73 | | code can be viewed as simply doing the first few steps of the powering in |
74 | | a single or double limb where possible. |
75 | | |
76 | | If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary |
77 | | copy made of b is unnecessary. We could just use the old alloc'ed block |
78 | | and free it at the end. But arranging this seems like a lot more trouble |
79 | | than it's worth. */ |
80 | | |
81 | | |
82 | | /* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in |
83 | | a limb without overflowing. |
84 | | FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */ |
85 | | |
86 | 3.75k | #define GMP_NUMB_HALFMAX (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1) |
87 | | |
88 | | |
89 | | /* The following are for convenience, they update the size and check the |
90 | | alloc. */ |
91 | | |
92 | | #define MPN_SQR(dst, alloc, src, size) \ |
93 | 0 | do { \ |
94 | 0 | ASSERT (2*(size) <= (alloc)); \ |
95 | 0 | mpn_sqr (dst, src, size); \ |
96 | 0 | (size) *= 2; \ |
97 | 0 | (size) -= ((dst)[(size)-1] == 0); \ |
98 | 0 | } while (0) |
99 | | |
100 | | #define MPN_MUL(dst, alloc, src, size, src2, size2) \ |
101 | 0 | do { \ |
102 | 0 | mp_limb_t cy; \ |
103 | 0 | ASSERT ((size) + (size2) <= (alloc)); \ |
104 | 0 | cy = mpn_mul (dst, src, size, src2, size2); \ |
105 | 0 | (size) += (size2) - (cy == 0); \ |
106 | 0 | } while (0) |
107 | | |
108 | | #define MPN_MUL_2(ptr, size, alloc, mult) \ |
109 | 0 | do { \ |
110 | 0 | mp_limb_t cy; \ |
111 | 0 | ASSERT ((size)+2 <= (alloc)); \ |
112 | 0 | cy = mpn_mul_2 (ptr, ptr, size, mult); \ |
113 | 0 | (size)++; \ |
114 | 0 | (ptr)[(size)] = cy; \ |
115 | 0 | (size) += (cy != 0); \ |
116 | 0 | } while (0) |
117 | | |
118 | | #define MPN_MUL_1(ptr, size, alloc, limb) \ |
119 | 0 | do { \ |
120 | 0 | mp_limb_t cy; \ |
121 | 0 | ASSERT ((size)+1 <= (alloc)); \ |
122 | 0 | cy = mpn_mul_1 (ptr, ptr, size, limb); \ |
123 | 0 | (ptr)[size] = cy; \ |
124 | 0 | (size) += (cy != 0); \ |
125 | 0 | } while (0) |
126 | | |
127 | | #define MPN_LSHIFT(ptr, size, alloc, shift) \ |
128 | 9 | do { \ |
129 | 9 | mp_limb_t cy; \ |
130 | 9 | ASSERT ((size)+1 <= (alloc)); \ |
131 | 9 | cy = mpn_lshift (ptr, ptr, size, shift); \ |
132 | 9 | (ptr)[size] = cy; \ |
133 | 9 | (size) += (cy != 0); \ |
134 | 9 | } while (0) |
135 | | |
136 | | #define MPN_RSHIFT_OR_COPY(dst, src, size, shift) \ |
137 | 0 | do { \ |
138 | 0 | if ((shift) == 0) \ |
139 | 0 | MPN_COPY (dst, src, size); \ |
140 | 0 | else \ |
141 | 0 | { \ |
142 | 0 | mpn_rshift (dst, src, size, shift); \ |
143 | 0 | (size) -= ((dst)[(size)-1] == 0); \ |
144 | 0 | } \ |
145 | 0 | } while (0) |
146 | | |
147 | | |
148 | | /* ralloc and talloc are only wanted for ASSERTs, after the initial space |
149 | | allocations. Avoid writing values to them in a normal build, to ensure |
150 | | the compiler lets them go dead. gcc already figures this out itself |
151 | | actually. */ |
152 | | |
153 | | #define SWAP_RP_TP \ |
154 | 0 | do { \ |
155 | 0 | MP_PTR_SWAP (rp, tp); \ |
156 | 0 | ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc)); \ |
157 | 0 | } while (0) |
158 | | |
159 | | |
160 | | void |
161 | | mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) |
162 | 1.25k | { |
163 | 1.25k | mp_ptr rp; |
164 | 1.25k | mp_size_t rtwos_limbs, ralloc, rsize; |
165 | 1.25k | int rneg, i, cnt, btwos, r_bp_overlap; |
166 | 1.25k | mp_limb_t blimb, rl; |
167 | 1.25k | mp_bitcnt_t rtwos_bits; |
168 | 1.25k | #if HAVE_NATIVE_mpn_mul_2 |
169 | 1.25k | mp_limb_t blimb_low, rl_high; |
170 | | #else |
171 | | mp_limb_t b_twolimbs[2]; |
172 | | #endif |
173 | 1.25k | TMP_DECL; |
174 | | |
175 | 1.25k | TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n", |
176 | 1.25k | PTR(r), bp, bsize, e, e); |
177 | 1.25k | mpn_trace ("b", bp, bsize)); |
178 | | |
179 | 1.25k | ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0); |
180 | 1.25k | ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize))); |
181 | | |
182 | | /* b^0 == 1, including 0^0 == 1 */ |
183 | 1.25k | if (e == 0) |
184 | 1 | { |
185 | 1 | MPZ_NEWALLOC (r, 1)[0] = 1; |
186 | 1 | SIZ(r) = 1; |
187 | 1 | return; |
188 | 1 | } |
189 | | |
190 | | /* 0^e == 0 apart from 0^0 above */ |
191 | 1.24k | if (bsize == 0) |
192 | 0 | { |
193 | 0 | SIZ(r) = 0; |
194 | 0 | return; |
195 | 0 | } |
196 | | |
197 | | /* Sign of the final result. */ |
198 | 1.24k | rneg = (bsize < 0 && (e & 1) != 0); |
199 | 1.24k | bsize = ABS (bsize); |
200 | 1.24k | TRACE (printf ("rneg %d\n", rneg)); |
201 | | |
202 | 1.24k | r_bp_overlap = (PTR(r) == bp); |
203 | | |
204 | | /* Strip low zero limbs from b. */ |
205 | 1.24k | rtwos_limbs = 0; |
206 | 1.24k | for (blimb = *bp; blimb == 0; blimb = *++bp) |
207 | 0 | { |
208 | 0 | rtwos_limbs += e; |
209 | 0 | bsize--; ASSERT (bsize >= 1); |
210 | 0 | } |
211 | 1.24k | TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs)); |
212 | | |
213 | | /* Strip low zero bits from b. */ |
214 | 1.24k | count_trailing_zeros (btwos, blimb); |
215 | 1.24k | blimb >>= btwos; |
216 | 1.24k | rtwos_bits = e * btwos; |
217 | 1.24k | rtwos_limbs += rtwos_bits / GMP_NUMB_BITS; |
218 | 1.24k | rtwos_bits %= GMP_NUMB_BITS; |
219 | 1.24k | TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n", |
220 | 1.24k | btwos, rtwos_limbs, rtwos_bits)); |
221 | | |
222 | 1.24k | TMP_MARK; |
223 | | |
224 | 1.24k | rl = 1; |
225 | 1.24k | #if HAVE_NATIVE_mpn_mul_2 |
226 | 1.24k | rl_high = 0; |
227 | 1.24k | #endif |
228 | | |
229 | 1.24k | if (bsize == 1) |
230 | 1.24k | { |
231 | 1.24k | bsize_1: |
232 | | /* Power up as far as possible within blimb. We start here with e!=0, |
233 | | but if e is small then we might reach e==0 and the whole b^e in rl. |
234 | | Notice this code works when blimb==1 too, reaching e==0. */ |
235 | | |
236 | 3.75k | while (blimb <= GMP_NUMB_HALFMAX) |
237 | 3.18k | { |
238 | 3.18k | TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n", |
239 | 3.18k | e, blimb, rl)); |
240 | 3.18k | ASSERT (e != 0); |
241 | 3.18k | if ((e & 1) != 0) |
242 | 1.93k | rl *= blimb; |
243 | 3.18k | e >>= 1; |
244 | 3.18k | if (e == 0) |
245 | 680 | goto got_rl; |
246 | 2.50k | blimb *= blimb; |
247 | 2.50k | } |
248 | | |
249 | 569 | #if HAVE_NATIVE_mpn_mul_2 |
250 | 569 | TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n", |
251 | 569 | e, blimb, rl)); |
252 | | |
253 | | /* Can power b once more into blimb:blimb_low */ |
254 | 569 | bsize = 2; |
255 | 569 | ASSERT (e != 0); |
256 | 569 | if ((e & 1) != 0) |
257 | 569 | { |
258 | 569 | umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS); |
259 | 569 | rl >>= GMP_NAIL_BITS; |
260 | 569 | } |
261 | 569 | e >>= 1; |
262 | 569 | umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS); |
263 | 569 | blimb_low >>= GMP_NAIL_BITS; |
264 | | |
265 | 1.24k | got_rl: |
266 | 1.24k | TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n", |
267 | 1.24k | e, blimb, blimb_low, rl_high, rl)); |
268 | | |
269 | | /* Combine left-over rtwos_bits into rl_high:rl to be handled by the |
270 | | final mul_1 or mul_2 rather than a separate lshift. |
271 | | - rl_high:rl mustn't be 1 (since then there's no final mul) |
272 | | - rl_high mustn't overflow |
273 | | - rl_high mustn't change to non-zero, since mul_1+lshift is |
274 | | probably faster than mul_2 (FIXME: is this true?) */ |
275 | | |
276 | 1.24k | if (rtwos_bits != 0 |
277 | 1.24k | && ! (rl_high == 0 && rl == 1) |
278 | 1.24k | && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
279 | 617 | { |
280 | 617 | mp_limb_t new_rl_high = (rl_high << rtwos_bits) |
281 | 617 | | (rl >> (GMP_NUMB_BITS-rtwos_bits)); |
282 | 617 | if (! (rl_high == 0 && new_rl_high != 0)) |
283 | 617 | { |
284 | 617 | rl_high = new_rl_high; |
285 | 617 | rl <<= rtwos_bits; |
286 | 617 | rtwos_bits = 0; |
287 | 617 | TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n", |
288 | 617 | rl_high, rl)); |
289 | 617 | } |
290 | 617 | } |
291 | | #else |
292 | | got_rl: |
293 | | TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n", |
294 | | e, blimb, rl)); |
295 | | |
296 | | /* Combine left-over rtwos_bits into rl to be handled by the final |
297 | | mul_1 rather than a separate lshift. |
298 | | - rl mustn't be 1 (since then there's no final mul) |
299 | | - rl mustn't overflow */ |
300 | | |
301 | | if (rtwos_bits != 0 |
302 | | && rl != 1 |
303 | | && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
304 | | { |
305 | | rl <<= rtwos_bits; |
306 | | rtwos_bits = 0; |
307 | | TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl)); |
308 | | } |
309 | | #endif |
310 | 1.24k | } |
311 | 0 | else if (bsize == 2) |
312 | 0 | { |
313 | 0 | mp_limb_t bsecond = bp[1]; |
314 | 0 | if (btwos != 0) |
315 | 0 | blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK; |
316 | 0 | bsecond >>= btwos; |
317 | 0 | if (bsecond == 0) |
318 | 0 | { |
319 | | /* Two limbs became one after rshift. */ |
320 | 0 | bsize = 1; |
321 | 0 | goto bsize_1; |
322 | 0 | } |
323 | | |
324 | 0 | TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb)); |
325 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
326 | 0 | blimb_low = blimb; |
327 | | #else |
328 | | bp = b_twolimbs; |
329 | | b_twolimbs[0] = blimb; |
330 | | b_twolimbs[1] = bsecond; |
331 | | #endif |
332 | 0 | blimb = bsecond; |
333 | 0 | } |
334 | 0 | else |
335 | 0 | { |
336 | 0 | if (r_bp_overlap || btwos != 0) |
337 | 0 | { |
338 | 0 | mp_ptr tp = TMP_ALLOC_LIMBS (bsize); |
339 | 0 | MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos); |
340 | 0 | bp = tp; |
341 | 0 | TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize)); |
342 | 0 | } |
343 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
344 | | /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */ |
345 | 0 | blimb_low = bp[0]; |
346 | 0 | #endif |
347 | 0 | blimb = bp[bsize-1]; |
348 | |
|
349 | 0 | TRACE (printf ("big bsize=%ld ", bsize); |
350 | 0 | mpn_trace ("b", bp, bsize)); |
351 | 0 | } |
352 | | |
353 | | /* At this point blimb is the most significant limb of the base to use. |
354 | | |
355 | | Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1 |
356 | | limb to round up the division; +1 for multiplies all using an extra |
357 | | limb over the true size; +2 for rl at the end; +1 for lshift at the |
358 | | end. |
359 | | |
360 | | The size calculation here is reasonably accurate. The base is at least |
361 | | half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits |
362 | | when it will power up as just over 16, an overestimate of 17/16 = |
363 | | 6.25%. For a 64-bit limb it's half that. |
364 | | |
365 | | If e==0 then blimb won't be anything useful (though it will be |
366 | | non-zero), but that doesn't matter since we just end up with ralloc==5, |
367 | | and that's fine for 2 limbs of rl and 1 of lshift. */ |
368 | | |
369 | 1.24k | ASSERT (blimb != 0); |
370 | 1.24k | count_leading_zeros (cnt, blimb); |
371 | 1.24k | ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5; |
372 | 1.24k | TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n", |
373 | 1.24k | ralloc, bsize, blimb, cnt)); |
374 | 1.24k | rp = MPZ_NEWALLOC (r, ralloc + rtwos_limbs); |
375 | | |
376 | | /* Low zero limbs resulting from powers of 2. */ |
377 | 1.24k | MPN_ZERO (rp, rtwos_limbs); |
378 | 1.24k | rp += rtwos_limbs; |
379 | | |
380 | 1.24k | if (e == 0) |
381 | 1.24k | { |
382 | | /* Any e==0 other than via bsize==1 or bsize==2 is covered at the |
383 | | start. */ |
384 | 1.24k | rp[0] = rl; |
385 | 1.24k | rsize = 1; |
386 | 1.24k | #if HAVE_NATIVE_mpn_mul_2 |
387 | 1.24k | rp[1] = rl_high; |
388 | 1.24k | rsize += (rl_high != 0); |
389 | 1.24k | #endif |
390 | 1.24k | ASSERT (rp[rsize-1] != 0); |
391 | 1.24k | } |
392 | 0 | else |
393 | 0 | { |
394 | 0 | mp_ptr tp; |
395 | 0 | mp_size_t talloc; |
396 | | |
397 | | /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the |
398 | | low bit of e is zero, tp only has to hold the second last power |
399 | | step, which is half the size of the final result. There's no need |
400 | | to round up the divide by 2, since ralloc includes a +2 for rl |
401 | | which not needed by tp. In the mpn_mul loop when the low bit of e |
402 | | is 1, tp must hold nearly the full result, so just size it the same |
403 | | as rp. */ |
404 | |
|
405 | 0 | talloc = ralloc; |
406 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
407 | 0 | if (bsize <= 2 || (e & 1) == 0) |
408 | 0 | talloc /= 2; |
409 | | #else |
410 | | if (bsize <= 1 || (e & 1) == 0) |
411 | | talloc /= 2; |
412 | | #endif |
413 | 0 | TRACE (printf ("talloc %ld\n", talloc)); |
414 | 0 | tp = TMP_ALLOC_LIMBS (talloc); |
415 | | |
416 | | /* Go from high to low over the bits of e, starting with i pointing at |
417 | | the bit below the highest 1 (which will mean i==-1 if e==1). */ |
418 | 0 | count_leading_zeros (cnt, (mp_limb_t) e); |
419 | 0 | i = GMP_LIMB_BITS - cnt - 2; |
420 | |
|
421 | 0 | #if HAVE_NATIVE_mpn_mul_2 |
422 | 0 | if (bsize <= 2) |
423 | 0 | { |
424 | 0 | mp_limb_t mult[2]; |
425 | | |
426 | | /* Any bsize==1 will have been powered above to be two limbs. */ |
427 | 0 | ASSERT (bsize == 2); |
428 | 0 | ASSERT (blimb != 0); |
429 | | |
430 | | /* Arrange the final result ends up in r, not in the temp space */ |
431 | 0 | if ((i & 1) == 0) |
432 | 0 | SWAP_RP_TP; |
433 | |
|
434 | 0 | rp[0] = blimb_low; |
435 | 0 | rp[1] = blimb; |
436 | 0 | rsize = 2; |
437 | |
|
438 | 0 | mult[0] = blimb_low; |
439 | 0 | mult[1] = blimb; |
440 | |
|
441 | 0 | for ( ; i >= 0; i--) |
442 | 0 | { |
443 | 0 | TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
444 | 0 | i, e, rsize, ralloc, talloc); |
445 | 0 | mpn_trace ("r", rp, rsize)); |
446 | |
|
447 | 0 | MPN_SQR (tp, talloc, rp, rsize); |
448 | 0 | SWAP_RP_TP; |
449 | 0 | if ((e & (1L << i)) != 0) |
450 | 0 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
451 | 0 | } |
452 | | |
453 | 0 | TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize)); |
454 | 0 | if (rl_high != 0) |
455 | 0 | { |
456 | 0 | mult[0] = rl; |
457 | 0 | mult[1] = rl_high; |
458 | 0 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
459 | 0 | } |
460 | 0 | else if (rl != 1) |
461 | 0 | MPN_MUL_1 (rp, rsize, ralloc, rl); |
462 | 0 | } |
463 | | #else |
464 | | if (bsize == 1) |
465 | | { |
466 | | /* Arrange the final result ends up in r, not in the temp space */ |
467 | | if ((i & 1) == 0) |
468 | | SWAP_RP_TP; |
469 | | |
470 | | rp[0] = blimb; |
471 | | rsize = 1; |
472 | | |
473 | | for ( ; i >= 0; i--) |
474 | | { |
475 | | TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
476 | | i, e, rsize, ralloc, talloc); |
477 | | mpn_trace ("r", rp, rsize)); |
478 | | |
479 | | MPN_SQR (tp, talloc, rp, rsize); |
480 | | SWAP_RP_TP; |
481 | | if ((e & (1L << i)) != 0) |
482 | | MPN_MUL_1 (rp, rsize, ralloc, blimb); |
483 | | } |
484 | | |
485 | | TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize)); |
486 | | if (rl != 1) |
487 | | MPN_MUL_1 (rp, rsize, ralloc, rl); |
488 | | } |
489 | | #endif |
490 | 0 | else |
491 | 0 | { |
492 | 0 | int parity; |
493 | | |
494 | | /* Arrange the final result ends up in r, not in the temp space */ |
495 | 0 | ULONG_PARITY (parity, e); |
496 | 0 | if (((parity ^ i) & 1) != 0) |
497 | 0 | SWAP_RP_TP; |
498 | |
|
499 | 0 | MPN_COPY (rp, bp, bsize); |
500 | 0 | rsize = bsize; |
501 | |
|
502 | 0 | for ( ; i >= 0; i--) |
503 | 0 | { |
504 | 0 | TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
505 | 0 | i, e, rsize, ralloc, talloc); |
506 | 0 | mpn_trace ("r", rp, rsize)); |
507 | |
|
508 | 0 | MPN_SQR (tp, talloc, rp, rsize); |
509 | 0 | SWAP_RP_TP; |
510 | 0 | if ((e & (1L << i)) != 0) |
511 | 0 | { |
512 | 0 | MPN_MUL (tp, talloc, rp, rsize, bp, bsize); |
513 | 0 | SWAP_RP_TP; |
514 | 0 | } |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } |
518 | | |
519 | 1.24k | ASSERT (rp == PTR(r) + rtwos_limbs); |
520 | 1.24k | TRACE (mpn_trace ("end loop r", rp, rsize)); |
521 | 1.24k | TMP_FREE; |
522 | | |
523 | | /* Apply any partial limb factors of 2. */ |
524 | 1.24k | if (rtwos_bits != 0) |
525 | 9 | { |
526 | 9 | MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits); |
527 | 9 | TRACE (mpn_trace ("lshift r", rp, rsize)); |
528 | 9 | } |
529 | | |
530 | 1.24k | rsize += rtwos_limbs; |
531 | 1.24k | SIZ(r) = (rneg ? -rsize : rsize); |
532 | 1.24k | } |