/rust/registry/src/github.com-1ecc6299db9ec823/num-bigint-0.2.6/src/algorithms.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::borrow::Cow; |
2 | | use std::cmp; |
3 | | use std::cmp::Ordering::{self, Equal, Greater, Less}; |
4 | | use std::iter::repeat; |
5 | | use std::mem; |
6 | | use traits; |
7 | | use traits::{One, Zero}; |
8 | | |
9 | | use biguint::BigUint; |
10 | | |
11 | | use bigint::BigInt; |
12 | | use bigint::Sign; |
13 | | use bigint::Sign::{Minus, NoSign, Plus}; |
14 | | |
15 | | use big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; |
16 | | |
17 | | // Generic functions for add/subtract/multiply with carry/borrow: |
18 | | |
19 | | // Add with carry: |
20 | | #[inline] |
21 | 0 | fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { |
22 | 0 | *acc += DoubleBigDigit::from(a); |
23 | 0 | *acc += DoubleBigDigit::from(b); |
24 | 0 | let lo = *acc as BigDigit; |
25 | 0 | *acc >>= big_digit::BITS; |
26 | 0 | lo |
27 | 0 | } |
28 | | |
29 | | // Subtract with borrow: |
30 | | #[inline] |
31 | 0 | fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { |
32 | 0 | *acc += SignedDoubleBigDigit::from(a); |
33 | 0 | *acc -= SignedDoubleBigDigit::from(b); |
34 | 0 | let lo = *acc as BigDigit; |
35 | 0 | *acc >>= big_digit::BITS; |
36 | 0 | lo |
37 | 0 | } |
38 | | |
39 | | #[inline] |
40 | 0 | pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { |
41 | 0 | *acc += DoubleBigDigit::from(a); |
42 | 0 | *acc += DoubleBigDigit::from(b) * DoubleBigDigit::from(c); |
43 | 0 | let lo = *acc as BigDigit; |
44 | 0 | *acc >>= big_digit::BITS; |
45 | 0 | lo |
46 | 0 | } |
47 | | |
48 | | #[inline] |
49 | 0 | pub fn mul_with_carry(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { |
50 | 0 | *acc += DoubleBigDigit::from(a) * DoubleBigDigit::from(b); |
51 | 0 | let lo = *acc as BigDigit; |
52 | 0 | *acc >>= big_digit::BITS; |
53 | 0 | lo |
54 | 0 | } |
55 | | |
56 | | /// Divide a two digit numerator by a one digit divisor, returns quotient and remainder: |
57 | | /// |
58 | | /// Note: the caller must ensure that both the quotient and remainder will fit into a single digit. |
59 | | /// This is _not_ true for an arbitrary numerator/denominator. |
60 | | /// |
61 | | /// (This function also matches what the x86 divide instruction does). |
62 | | #[inline] |
63 | 0 | fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) { |
64 | 0 | debug_assert!(hi < divisor); |
65 | | |
66 | 0 | let lhs = big_digit::to_doublebigdigit(hi, lo); |
67 | 0 | let rhs = DoubleBigDigit::from(divisor); |
68 | 0 | ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit) |
69 | 0 | } |
70 | | |
71 | 0 | pub fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) { |
72 | 0 | let mut rem = 0; |
73 | | |
74 | 0 | for d in a.data.iter_mut().rev() { |
75 | 0 | let (q, r) = div_wide(rem, *d, b); |
76 | 0 | *d = q; |
77 | 0 | rem = r; |
78 | 0 | } |
79 | | |
80 | 0 | (a.normalized(), rem) |
81 | 0 | } |
82 | | |
83 | 0 | pub fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit { |
84 | 0 | let mut rem: DoubleBigDigit = 0; |
85 | 0 | for &digit in a.data.iter().rev() { |
86 | 0 | rem = (rem << big_digit::BITS) + DoubleBigDigit::from(digit); |
87 | 0 | rem %= DoubleBigDigit::from(b); |
88 | 0 | } |
89 | | |
90 | 0 | rem as BigDigit |
91 | 0 | } |
92 | | |
93 | | // Only for the Add impl: |
94 | | #[inline] |
95 | 0 | pub fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { |
96 | 0 | debug_assert!(a.len() >= b.len()); |
97 | | |
98 | 0 | let mut carry = 0; |
99 | 0 | let (a_lo, a_hi) = a.split_at_mut(b.len()); |
100 | | |
101 | 0 | for (a, b) in a_lo.iter_mut().zip(b) { |
102 | 0 | *a = adc(*a, *b, &mut carry); |
103 | 0 | } |
104 | | |
105 | 0 | if carry != 0 { |
106 | 0 | for a in a_hi { |
107 | 0 | *a = adc(*a, 0, &mut carry); |
108 | 0 | if carry == 0 { |
109 | 0 | break; |
110 | 0 | } |
111 | | } |
112 | 0 | } |
113 | | |
114 | 0 | carry as BigDigit |
115 | 0 | } |
116 | | |
117 | | /// Two argument addition of raw slices: |
118 | | /// a += b |
119 | | /// |
120 | | /// The caller _must_ ensure that a is big enough to store the result - typically this means |
121 | | /// resizing a to max(a.len(), b.len()) + 1, to fit a possible carry. |
122 | 0 | pub fn add2(a: &mut [BigDigit], b: &[BigDigit]) { |
123 | 0 | let carry = __add2(a, b); |
124 | | |
125 | 0 | debug_assert!(carry == 0); |
126 | 0 | } |
127 | | |
128 | 0 | pub fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { |
129 | 0 | let mut borrow = 0; |
130 | 0 |
|
131 | 0 | let len = cmp::min(a.len(), b.len()); |
132 | 0 | let (a_lo, a_hi) = a.split_at_mut(len); |
133 | 0 | let (b_lo, b_hi) = b.split_at(len); |
134 | | |
135 | 0 | for (a, b) in a_lo.iter_mut().zip(b_lo) { |
136 | 0 | *a = sbb(*a, *b, &mut borrow); |
137 | 0 | } |
138 | | |
139 | 0 | if borrow != 0 { |
140 | 0 | for a in a_hi { |
141 | 0 | *a = sbb(*a, 0, &mut borrow); |
142 | 0 | if borrow == 0 { |
143 | 0 | break; |
144 | 0 | } |
145 | | } |
146 | 0 | } |
147 | | |
148 | | // note: we're _required_ to fail on underflow |
149 | 0 | assert!( |
150 | 0 | borrow == 0 && b_hi.iter().all(|x| *x == 0), |
151 | | "Cannot subtract b from a because b is larger than a." |
152 | | ); |
153 | 0 | } |
154 | | |
155 | | // Only for the Sub impl. `a` and `b` must have same length. |
156 | | #[inline] |
157 | 0 | pub fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { |
158 | 0 | debug_assert!(b.len() == a.len()); |
159 | | |
160 | 0 | let mut borrow = 0; |
161 | | |
162 | 0 | for (ai, bi) in a.iter().zip(b) { |
163 | 0 | *bi = sbb(*ai, *bi, &mut borrow); |
164 | 0 | } |
165 | | |
166 | 0 | borrow as BigDigit |
167 | 0 | } |
168 | | |
169 | 0 | pub fn sub2rev(a: &[BigDigit], b: &mut [BigDigit]) { |
170 | 0 | debug_assert!(b.len() >= a.len()); |
171 | | |
172 | 0 | let len = cmp::min(a.len(), b.len()); |
173 | 0 | let (a_lo, a_hi) = a.split_at(len); |
174 | 0 | let (b_lo, b_hi) = b.split_at_mut(len); |
175 | 0 |
|
176 | 0 | let borrow = __sub2rev(a_lo, b_lo); |
177 | 0 |
|
178 | 0 | assert!(a_hi.is_empty()); |
179 | | |
180 | | // note: we're _required_ to fail on underflow |
181 | 0 | assert!( |
182 | 0 | borrow == 0 && b_hi.iter().all(|x| *x == 0), |
183 | | "Cannot subtract b from a because b is larger than a." |
184 | | ); |
185 | 0 | } |
186 | | |
187 | 0 | pub fn sub_sign(a: &[BigDigit], b: &[BigDigit]) -> (Sign, BigUint) { |
188 | 0 | // Normalize: |
189 | 0 | let a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; |
190 | 0 | let b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; |
191 | 0 |
|
192 | 0 | match cmp_slice(a, b) { |
193 | | Greater => { |
194 | 0 | let mut a = a.to_vec(); |
195 | 0 | sub2(&mut a, b); |
196 | 0 | (Plus, BigUint::new(a)) |
197 | | } |
198 | | Less => { |
199 | 0 | let mut b = b.to_vec(); |
200 | 0 | sub2(&mut b, a); |
201 | 0 | (Minus, BigUint::new(b)) |
202 | | } |
203 | 0 | _ => (NoSign, Zero::zero()), |
204 | | } |
205 | 0 | } |
206 | | |
207 | | /// Three argument multiply accumulate: |
208 | | /// acc += b * c |
209 | 0 | pub fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) { |
210 | 0 | if c == 0 { |
211 | 0 | return; |
212 | 0 | } |
213 | 0 |
|
214 | 0 | let mut carry = 0; |
215 | 0 | let (a_lo, a_hi) = acc.split_at_mut(b.len()); |
216 | | |
217 | 0 | for (a, &b) in a_lo.iter_mut().zip(b) { |
218 | 0 | *a = mac_with_carry(*a, b, c, &mut carry); |
219 | 0 | } |
220 | | |
221 | 0 | let mut a = a_hi.iter_mut(); |
222 | 0 | while carry != 0 { |
223 | 0 | let a = a.next().expect("carry overflow during multiplication!"); |
224 | 0 | *a = adc(*a, 0, &mut carry); |
225 | 0 | } |
226 | 0 | } |
227 | | |
228 | | /// Three argument multiply accumulate: |
229 | | /// acc += b * c |
230 | 0 | fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { |
231 | 0 | let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) }; |
232 | | |
233 | | // We use three algorithms for different input sizes. |
234 | | // |
235 | | // - For small inputs, long multiplication is fastest. |
236 | | // - Next we use Karatsuba multiplication (Toom-2), which we have optimized |
237 | | // to avoid unnecessary allocations for intermediate values. |
238 | | // - For the largest inputs we use Toom-3, which better optimizes the |
239 | | // number of operations, but uses more temporary allocations. |
240 | | // |
241 | | // The thresholds are somewhat arbitrary, chosen by evaluating the results |
242 | | // of `cargo bench --bench bigint multiply`. |
243 | | |
244 | 0 | if x.len() <= 32 { |
245 | | // Long multiplication: |
246 | 0 | for (i, xi) in x.iter().enumerate() { |
247 | 0 | mac_digit(&mut acc[i..], y, *xi); |
248 | 0 | } |
249 | 0 | } else if x.len() <= 256 { |
250 | | /* |
251 | | * Karatsuba multiplication: |
252 | | * |
253 | | * The idea is that we break x and y up into two smaller numbers that each have about half |
254 | | * as many digits, like so (note that multiplying by b is just a shift): |
255 | | * |
256 | | * x = x0 + x1 * b |
257 | | * y = y0 + y1 * b |
258 | | * |
259 | | * With some algebra, we can compute x * y with three smaller products, where the inputs to |
260 | | * each of the smaller products have only about half as many digits as x and y: |
261 | | * |
262 | | * x * y = (x0 + x1 * b) * (y0 + y1 * b) |
263 | | * |
264 | | * x * y = x0 * y0 |
265 | | * + x0 * y1 * b |
266 | | * + x1 * y0 * b |
267 | | * + x1 * y1 * b^2 |
268 | | * |
269 | | * Let p0 = x0 * y0 and p2 = x1 * y1: |
270 | | * |
271 | | * x * y = p0 |
272 | | * + (x0 * y1 + x1 * y0) * b |
273 | | * + p2 * b^2 |
274 | | * |
275 | | * The real trick is that middle term: |
276 | | * |
277 | | * x0 * y1 + x1 * y0 |
278 | | * |
279 | | * = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2 |
280 | | * |
281 | | * = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2 |
282 | | * |
283 | | * Now we complete the square: |
284 | | * |
285 | | * = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2 |
286 | | * |
287 | | * = -((x1 - x0) * (y1 - y0)) + p0 + p2 |
288 | | * |
289 | | * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula: |
290 | | * |
291 | | * x * y = p0 |
292 | | * + (p0 + p2 - p1) * b |
293 | | * + p2 * b^2 |
294 | | * |
295 | | * Where the three intermediate products are: |
296 | | * |
297 | | * p0 = x0 * y0 |
298 | | * p1 = (x1 - x0) * (y1 - y0) |
299 | | * p2 = x1 * y1 |
300 | | * |
301 | | * In doing the computation, we take great care to avoid unnecessary temporary variables |
302 | | * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a |
303 | | * bit so we can use the same temporary variable for all the intermediate products: |
304 | | * |
305 | | * x * y = p2 * b^2 + p2 * b |
306 | | * + p0 * b + p0 |
307 | | * - p1 * b |
308 | | * |
309 | | * The other trick we use is instead of doing explicit shifts, we slice acc at the |
310 | | * appropriate offset when doing the add. |
311 | | */ |
312 | | |
313 | | /* |
314 | | * When x is smaller than y, it's significantly faster to pick b such that x is split in |
315 | | * half, not y: |
316 | | */ |
317 | 0 | let b = x.len() / 2; |
318 | 0 | let (x0, x1) = x.split_at(b); |
319 | 0 | let (y0, y1) = y.split_at(b); |
320 | 0 |
|
321 | 0 | /* |
322 | 0 | * We reuse the same BigUint for all the intermediate multiplies and have to size p |
323 | 0 | * appropriately here: x1.len() >= x0.len and y1.len() >= y0.len(): |
324 | 0 | */ |
325 | 0 | let len = x1.len() + y1.len() + 1; |
326 | 0 | let mut p = BigUint { data: vec![0; len] }; |
327 | 0 |
|
328 | 0 | // p2 = x1 * y1 |
329 | 0 | mac3(&mut p.data[..], x1, y1); |
330 | 0 |
|
331 | 0 | // Not required, but the adds go faster if we drop any unneeded 0s from the end: |
332 | 0 | p.normalize(); |
333 | 0 |
|
334 | 0 | add2(&mut acc[b..], &p.data[..]); |
335 | 0 | add2(&mut acc[b * 2..], &p.data[..]); |
336 | 0 |
|
337 | 0 | // Zero out p before the next multiply: |
338 | 0 | p.data.truncate(0); |
339 | 0 | p.data.extend(repeat(0).take(len)); |
340 | 0 |
|
341 | 0 | // p0 = x0 * y0 |
342 | 0 | mac3(&mut p.data[..], x0, y0); |
343 | 0 | p.normalize(); |
344 | 0 |
|
345 | 0 | add2(&mut acc[..], &p.data[..]); |
346 | 0 | add2(&mut acc[b..], &p.data[..]); |
347 | 0 |
|
348 | 0 | // p1 = (x1 - x0) * (y1 - y0) |
349 | 0 | // We do this one last, since it may be negative and acc can't ever be negative: |
350 | 0 | let (j0_sign, j0) = sub_sign(x1, x0); |
351 | 0 | let (j1_sign, j1) = sub_sign(y1, y0); |
352 | 0 |
|
353 | 0 | match j0_sign * j1_sign { |
354 | 0 | Plus => { |
355 | 0 | p.data.truncate(0); |
356 | 0 | p.data.extend(repeat(0).take(len)); |
357 | 0 |
|
358 | 0 | mac3(&mut p.data[..], &j0.data[..], &j1.data[..]); |
359 | 0 | p.normalize(); |
360 | 0 |
|
361 | 0 | sub2(&mut acc[b..], &p.data[..]); |
362 | 0 | } |
363 | 0 | Minus => { |
364 | 0 | mac3(&mut acc[b..], &j0.data[..], &j1.data[..]); |
365 | 0 | } |
366 | 0 | NoSign => (), |
367 | | } |
368 | 0 | } else { |
369 | 0 | // Toom-3 multiplication: |
370 | 0 | // |
371 | 0 | // Toom-3 is like Karatsuba above, but dividing the inputs into three parts. |
372 | 0 | // Both are instances of Toom-Cook, using `k=3` and `k=2` respectively. |
373 | 0 | // |
374 | 0 | // The general idea is to treat the large integers digits as |
375 | 0 | // polynomials of a certain degree and determine the coefficients/digits |
376 | 0 | // of the product of the two via interpolation of the polynomial product. |
377 | 0 | let i = y.len() / 3 + 1; |
378 | 0 |
|
379 | 0 | let x0_len = cmp::min(x.len(), i); |
380 | 0 | let x1_len = cmp::min(x.len() - x0_len, i); |
381 | 0 |
|
382 | 0 | let y0_len = i; |
383 | 0 | let y1_len = cmp::min(y.len() - y0_len, i); |
384 | 0 |
|
385 | 0 | // Break x and y into three parts, representating an order two polynomial. |
386 | 0 | // t is chosen to be the size of a digit so we can use faster shifts |
387 | 0 | // in place of multiplications. |
388 | 0 | // |
389 | 0 | // x(t) = x2*t^2 + x1*t + x0 |
390 | 0 | let x0 = BigInt::from_slice(Plus, &x[..x0_len]); |
391 | 0 | let x1 = BigInt::from_slice(Plus, &x[x0_len..x0_len + x1_len]); |
392 | 0 | let x2 = BigInt::from_slice(Plus, &x[x0_len + x1_len..]); |
393 | 0 |
|
394 | 0 | // y(t) = y2*t^2 + y1*t + y0 |
395 | 0 | let y0 = BigInt::from_slice(Plus, &y[..y0_len]); |
396 | 0 | let y1 = BigInt::from_slice(Plus, &y[y0_len..y0_len + y1_len]); |
397 | 0 | let y2 = BigInt::from_slice(Plus, &y[y0_len + y1_len..]); |
398 | 0 |
|
399 | 0 | // Let w(t) = x(t) * y(t) |
400 | 0 | // |
401 | 0 | // This gives us the following order-4 polynomial. |
402 | 0 | // |
403 | 0 | // w(t) = w4*t^4 + w3*t^3 + w2*t^2 + w1*t + w0 |
404 | 0 | // |
405 | 0 | // We need to find the coefficients w4, w3, w2, w1 and w0. Instead |
406 | 0 | // of simply multiplying the x and y in total, we can evaluate w |
407 | 0 | // at 5 points. An n-degree polynomial is uniquely identified by (n + 1) |
408 | 0 | // points. |
409 | 0 | // |
410 | 0 | // It is arbitrary as to what points we evaluate w at but we use the |
411 | 0 | // following. |
412 | 0 | // |
413 | 0 | // w(t) at t = 0, 1, -1, -2 and inf |
414 | 0 | // |
415 | 0 | // The values for w(t) in terms of x(t)*y(t) at these points are: |
416 | 0 | // |
417 | 0 | // let a = w(0) = x0 * y0 |
418 | 0 | // let b = w(1) = (x2 + x1 + x0) * (y2 + y1 + y0) |
419 | 0 | // let c = w(-1) = (x2 - x1 + x0) * (y2 - y1 + y0) |
420 | 0 | // let d = w(-2) = (4*x2 - 2*x1 + x0) * (4*y2 - 2*y1 + y0) |
421 | 0 | // let e = w(inf) = x2 * y2 as t -> inf |
422 | 0 |
|
423 | 0 | // x0 + x2, avoiding temporaries |
424 | 0 | let p = &x0 + &x2; |
425 | 0 |
|
426 | 0 | // y0 + y2, avoiding temporaries |
427 | 0 | let q = &y0 + &y2; |
428 | 0 |
|
429 | 0 | // x2 - x1 + x0, avoiding temporaries |
430 | 0 | let p2 = &p - &x1; |
431 | 0 |
|
432 | 0 | // y2 - y1 + y0, avoiding temporaries |
433 | 0 | let q2 = &q - &y1; |
434 | 0 |
|
435 | 0 | // w(0) |
436 | 0 | let r0 = &x0 * &y0; |
437 | 0 |
|
438 | 0 | // w(inf) |
439 | 0 | let r4 = &x2 * &y2; |
440 | 0 |
|
441 | 0 | // w(1) |
442 | 0 | let r1 = (p + x1) * (q + y1); |
443 | 0 |
|
444 | 0 | // w(-1) |
445 | 0 | let r2 = &p2 * &q2; |
446 | 0 |
|
447 | 0 | // w(-2) |
448 | 0 | let r3 = ((p2 + x2) * 2 - x0) * ((q2 + y2) * 2 - y0); |
449 | 0 |
|
450 | 0 | // Evaluating these points gives us the following system of linear equations. |
451 | 0 | // |
452 | 0 | // 0 0 0 0 1 | a |
453 | 0 | // 1 1 1 1 1 | b |
454 | 0 | // 1 -1 1 -1 1 | c |
455 | 0 | // 16 -8 4 -2 1 | d |
456 | 0 | // 1 0 0 0 0 | e |
457 | 0 | // |
458 | 0 | // The solved equation (after gaussian elimination or similar) |
459 | 0 | // in terms of its coefficients: |
460 | 0 | // |
461 | 0 | // w0 = w(0) |
462 | 0 | // w1 = w(0)/2 + w(1)/3 - w(-1) + w(2)/6 - 2*w(inf) |
463 | 0 | // w2 = -w(0) + w(1)/2 + w(-1)/2 - w(inf) |
464 | 0 | // w3 = -w(0)/2 + w(1)/6 + w(-1)/2 - w(1)/6 |
465 | 0 | // w4 = w(inf) |
466 | 0 | // |
467 | 0 | // This particular sequence is given by Bodrato and is an interpolation |
468 | 0 | // of the above equations. |
469 | 0 | let mut comp3: BigInt = (r3 - &r1) / 3; |
470 | 0 | let mut comp1: BigInt = (r1 - &r2) / 2; |
471 | 0 | let mut comp2: BigInt = r2 - &r0; |
472 | 0 | comp3 = (&comp2 - comp3) / 2 + &r4 * 2; |
473 | 0 | comp2 += &comp1 - &r4; |
474 | 0 | comp1 -= &comp3; |
475 | 0 |
|
476 | 0 | // Recomposition. The coefficients of the polynomial are now known. |
477 | 0 | // |
478 | 0 | // Evaluate at w(t) where t is our given base to get the result. |
479 | 0 | let result = r0 |
480 | 0 | + (comp1 << (32 * i)) |
481 | 0 | + (comp2 << (2 * 32 * i)) |
482 | 0 | + (comp3 << (3 * 32 * i)) |
483 | 0 | + (r4 << (4 * 32 * i)); |
484 | 0 | let result_pos = result.to_biguint().unwrap(); |
485 | 0 | add2(&mut acc[..], &result_pos.data); |
486 | 0 | } |
487 | 0 | } |
488 | | |
489 | 0 | pub fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint { |
490 | 0 | let len = x.len() + y.len() + 1; |
491 | 0 | let mut prod = BigUint { data: vec![0; len] }; |
492 | 0 |
|
493 | 0 | mac3(&mut prod.data[..], x, y); |
494 | 0 | prod.normalized() |
495 | 0 | } |
496 | | |
497 | 0 | pub fn scalar_mul(a: &mut [BigDigit], b: BigDigit) -> BigDigit { |
498 | 0 | let mut carry = 0; |
499 | 0 | for a in a.iter_mut() { |
500 | 0 | *a = mul_with_carry(*a, b, &mut carry); |
501 | 0 | } |
502 | 0 | carry as BigDigit |
503 | 0 | } |
504 | | |
505 | 0 | pub fn div_rem(mut u: BigUint, mut d: BigUint) -> (BigUint, BigUint) { |
506 | 0 | if d.is_zero() { |
507 | 0 | panic!() |
508 | 0 | } |
509 | 0 | if u.is_zero() { |
510 | 0 | return (Zero::zero(), Zero::zero()); |
511 | 0 | } |
512 | 0 |
|
513 | 0 | if d.data.len() == 1 { |
514 | 0 | if d.data == [1] { |
515 | 0 | return (u, Zero::zero()); |
516 | 0 | } |
517 | 0 | let (div, rem) = div_rem_digit(u, d.data[0]); |
518 | 0 | // reuse d |
519 | 0 | d.data.clear(); |
520 | 0 | d += rem; |
521 | 0 | return (div, d); |
522 | 0 | } |
523 | 0 |
|
524 | 0 | // Required or the q_len calculation below can underflow: |
525 | 0 | match u.cmp(&d) { |
526 | 0 | Less => return (Zero::zero(), u), |
527 | | Equal => { |
528 | 0 | u.set_one(); |
529 | 0 | return (u, Zero::zero()); |
530 | | } |
531 | 0 | Greater => {} // Do nothing |
532 | 0 | } |
533 | 0 |
|
534 | 0 | // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D: |
535 | 0 | // |
536 | 0 | // First, normalize the arguments so the highest bit in the highest digit of the divisor is |
537 | 0 | // set: the main loop uses the highest digit of the divisor for generating guesses, so we |
538 | 0 | // want it to be the largest number we can efficiently divide by. |
539 | 0 | // |
540 | 0 | let shift = d.data.last().unwrap().leading_zeros() as usize; |
541 | 0 | let (q, r) = if shift == 0 { |
542 | | // no need to clone d |
543 | 0 | div_rem_core(u, &d) |
544 | | } else { |
545 | 0 | div_rem_core(u << shift, &(d << shift)) |
546 | | }; |
547 | | // renormalize the remainder |
548 | 0 | (q, r >> shift) |
549 | 0 | } |
550 | | |
551 | 0 | pub fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) { |
552 | 0 | if d.is_zero() { |
553 | 0 | panic!() |
554 | 0 | } |
555 | 0 | if u.is_zero() { |
556 | 0 | return (Zero::zero(), Zero::zero()); |
557 | 0 | } |
558 | 0 |
|
559 | 0 | if d.data.len() == 1 { |
560 | 0 | if d.data == [1] { |
561 | 0 | return (u.clone(), Zero::zero()); |
562 | 0 | } |
563 | 0 |
|
564 | 0 | let (div, rem) = div_rem_digit(u.clone(), d.data[0]); |
565 | 0 | return (div, rem.into()); |
566 | 0 | } |
567 | 0 |
|
568 | 0 | // Required or the q_len calculation below can underflow: |
569 | 0 | match u.cmp(d) { |
570 | 0 | Less => return (Zero::zero(), u.clone()), |
571 | 0 | Equal => return (One::one(), Zero::zero()), |
572 | 0 | Greater => {} // Do nothing |
573 | 0 | } |
574 | 0 |
|
575 | 0 | // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D: |
576 | 0 | // |
577 | 0 | // First, normalize the arguments so the highest bit in the highest digit of the divisor is |
578 | 0 | // set: the main loop uses the highest digit of the divisor for generating guesses, so we |
579 | 0 | // want it to be the largest number we can efficiently divide by. |
580 | 0 | // |
581 | 0 | let shift = d.data.last().unwrap().leading_zeros() as usize; |
582 | | |
583 | 0 | let (q, r) = if shift == 0 { |
584 | | // no need to clone d |
585 | 0 | div_rem_core(u.clone(), d) |
586 | | } else { |
587 | 0 | div_rem_core(u << shift, &(d << shift)) |
588 | | }; |
589 | | // renormalize the remainder |
590 | 0 | (q, r >> shift) |
591 | 0 | } |
592 | | |
593 | | /// an implementation of Knuth, TAOCP vol 2 section 4.3, algorithm D |
594 | | /// |
595 | | /// # Correctness |
596 | | /// |
597 | | /// This function requires the following conditions to run correctly and/or effectively |
598 | | /// |
599 | | /// - `a > b` |
600 | | /// - `d.data.len() > 1` |
601 | | /// - `d.data.last().unwrap().leading_zeros() == 0` |
602 | 0 | fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { |
603 | 0 | // The algorithm works by incrementally calculating "guesses", q0, for part of the |
604 | 0 | // remainder. Once we have any number q0 such that q0 * b <= a, we can set |
605 | 0 | // |
606 | 0 | // q += q0 |
607 | 0 | // a -= q0 * b |
608 | 0 | // |
609 | 0 | // and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder. |
610 | 0 | // |
611 | 0 | // q0, our guess, is calculated by dividing the last few digits of a by the last digit of b |
612 | 0 | // - this should give us a guess that is "close" to the actual quotient, but is possibly |
613 | 0 | // greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction |
614 | 0 | // until we have a guess such that q0 * b <= a. |
615 | 0 | // |
616 | 0 |
|
617 | 0 | let bn = *b.data.last().unwrap(); |
618 | 0 | let q_len = a.data.len() - b.data.len() + 1; |
619 | 0 | let mut q = BigUint { |
620 | 0 | data: vec![0; q_len], |
621 | 0 | }; |
622 | 0 |
|
623 | 0 | // We reuse the same temporary to avoid hitting the allocator in our inner loop - this is |
624 | 0 | // sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0 |
625 | 0 | // can be bigger). |
626 | 0 | // |
627 | 0 | let mut tmp = BigUint { |
628 | 0 | data: Vec::with_capacity(2), |
629 | 0 | }; |
630 | | |
631 | 0 | for j in (0..q_len).rev() { |
632 | | /* |
633 | | * When calculating our next guess q0, we don't need to consider the digits below j |
634 | | * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from |
635 | | * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those |
636 | | * two numbers will be zero in all digits up to (j + b.data.len() - 1). |
637 | | */ |
638 | 0 | let offset = j + b.data.len() - 1; |
639 | 0 | if offset >= a.data.len() { |
640 | 0 | continue; |
641 | 0 | } |
642 | 0 |
|
643 | 0 | /* just avoiding a heap allocation: */ |
644 | 0 | let mut a0 = tmp; |
645 | 0 | a0.data.truncate(0); |
646 | 0 | a0.data.extend(a.data[offset..].iter().cloned()); |
647 | 0 |
|
648 | 0 | /* |
649 | 0 | * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts |
650 | 0 | * implicitly at the end, when adding and subtracting to a and q. Not only do we |
651 | 0 | * save the cost of the shifts, the rest of the arithmetic gets to work with |
652 | 0 | * smaller numbers. |
653 | 0 | */ |
654 | 0 | let (mut q0, _) = div_rem_digit(a0, bn); |
655 | 0 | let mut prod = b * &q0; |
656 | | |
657 | 0 | while cmp_slice(&prod.data[..], &a.data[j..]) == Greater { |
658 | 0 | let one: BigUint = One::one(); |
659 | 0 | q0 -= one; |
660 | 0 | prod -= b; |
661 | 0 | } |
662 | | |
663 | 0 | add2(&mut q.data[j..], &q0.data[..]); |
664 | 0 | sub2(&mut a.data[j..], &prod.data[..]); |
665 | 0 | a.normalize(); |
666 | 0 |
|
667 | 0 | tmp = q0; |
668 | | } |
669 | | |
670 | 0 | debug_assert!(a < *b); |
671 | | |
672 | 0 | (q.normalized(), a) |
673 | 0 | } |
674 | | |
675 | | /// Find last set bit |
676 | | /// fls(0) == 0, fls(u32::MAX) == 32 |
677 | 0 | pub fn fls<T: traits::PrimInt>(v: T) -> usize { |
678 | 0 | mem::size_of::<T>() * 8 - v.leading_zeros() as usize |
679 | 0 | } Unexecuted instantiation: num_bigint::biguint::algorithms::fls::<u64> Unexecuted instantiation: num_bigint::biguint::algorithms::fls::<u32> |
680 | | |
681 | 0 | pub fn ilog2<T: traits::PrimInt>(v: T) -> usize { |
682 | 0 | fls(v) - 1 |
683 | 0 | } |
684 | | |
685 | | #[inline] |
686 | 0 | pub fn biguint_shl(n: Cow<BigUint>, bits: usize) -> BigUint { |
687 | 0 | let n_unit = bits / big_digit::BITS; |
688 | 0 | let mut data = match n_unit { |
689 | 0 | 0 => n.into_owned().data, |
690 | | _ => { |
691 | 0 | let len = n_unit + n.data.len() + 1; |
692 | 0 | let mut data = Vec::with_capacity(len); |
693 | 0 | data.extend(repeat(0).take(n_unit)); |
694 | 0 | data.extend(n.data.iter().cloned()); |
695 | 0 | data |
696 | | } |
697 | | }; |
698 | | |
699 | 0 | let n_bits = bits % big_digit::BITS; |
700 | 0 | if n_bits > 0 { |
701 | 0 | let mut carry = 0; |
702 | 0 | for elem in data[n_unit..].iter_mut() { |
703 | 0 | let new_carry = *elem >> (big_digit::BITS - n_bits); |
704 | 0 | *elem = (*elem << n_bits) | carry; |
705 | 0 | carry = new_carry; |
706 | 0 | } |
707 | 0 | if carry != 0 { |
708 | 0 | data.push(carry); |
709 | 0 | } |
710 | 0 | } |
711 | | |
712 | 0 | BigUint::new(data) |
713 | 0 | } |
714 | | |
715 | | #[inline] |
716 | 0 | pub fn biguint_shr(n: Cow<BigUint>, bits: usize) -> BigUint { |
717 | 0 | let n_unit = bits / big_digit::BITS; |
718 | 0 | if n_unit >= n.data.len() { |
719 | 0 | return Zero::zero(); |
720 | 0 | } |
721 | 0 | let mut data = match n { |
722 | 0 | Cow::Borrowed(n) => n.data[n_unit..].to_vec(), |
723 | 0 | Cow::Owned(mut n) => { |
724 | 0 | n.data.drain(..n_unit); |
725 | 0 | n.data |
726 | | } |
727 | | }; |
728 | | |
729 | 0 | let n_bits = bits % big_digit::BITS; |
730 | 0 | if n_bits > 0 { |
731 | 0 | let mut borrow = 0; |
732 | 0 | for elem in data.iter_mut().rev() { |
733 | 0 | let new_borrow = *elem << (big_digit::BITS - n_bits); |
734 | 0 | *elem = (*elem >> n_bits) | borrow; |
735 | 0 | borrow = new_borrow; |
736 | 0 | } |
737 | 0 | } |
738 | | |
739 | 0 | BigUint::new(data) |
740 | 0 | } |
741 | | |
742 | 0 | pub fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering { |
743 | 0 | debug_assert!(a.last() != Some(&0)); |
744 | 0 | debug_assert!(b.last() != Some(&0)); |
745 | | |
746 | 0 | let (a_len, b_len) = (a.len(), b.len()); |
747 | 0 | if a_len < b_len { |
748 | 0 | return Less; |
749 | 0 | } |
750 | 0 | if a_len > b_len { |
751 | 0 | return Greater; |
752 | 0 | } |
753 | | |
754 | 0 | for (&ai, &bi) in a.iter().rev().zip(b.iter().rev()) { |
755 | 0 | if ai < bi { |
756 | 0 | return Less; |
757 | 0 | } |
758 | 0 | if ai > bi { |
759 | 0 | return Greater; |
760 | 0 | } |
761 | | } |
762 | 0 | Equal |
763 | 0 | } |
764 | | |
765 | | #[cfg(test)] |
766 | | mod algorithm_tests { |
767 | | use big_digit::BigDigit; |
768 | | use traits::Num; |
769 | | use Sign::Plus; |
770 | | use {BigInt, BigUint}; |
771 | | |
772 | | #[test] |
773 | | fn test_sub_sign() { |
774 | | use super::sub_sign; |
775 | | |
776 | | fn sub_sign_i(a: &[BigDigit], b: &[BigDigit]) -> BigInt { |
777 | | let (sign, val) = sub_sign(a, b); |
778 | | BigInt::from_biguint(sign, val) |
779 | | } |
780 | | |
781 | | let a = BigUint::from_str_radix("265252859812191058636308480000000", 10).unwrap(); |
782 | | let b = BigUint::from_str_radix("26525285981219105863630848000000", 10).unwrap(); |
783 | | let a_i = BigInt::from_biguint(Plus, a.clone()); |
784 | | let b_i = BigInt::from_biguint(Plus, b.clone()); |
785 | | |
786 | | assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i); |
787 | | assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i); |
788 | | } |
789 | | } |