Coverage Report

Created: 2021-11-03 07:11

/rust/registry/src/github.com-1ecc6299db9ec823/num-bigint-0.2.6/src/algorithms.rs
Line
Count
Source (jump to first uncovered line)
1
use std::borrow::Cow;
2
use std::cmp;
3
use std::cmp::Ordering::{self, Equal, Greater, Less};
4
use std::iter::repeat;
5
use std::mem;
6
use traits;
7
use traits::{One, Zero};
8
9
use biguint::BigUint;
10
11
use bigint::BigInt;
12
use bigint::Sign;
13
use bigint::Sign::{Minus, NoSign, Plus};
14
15
use big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
16
17
// Generic functions for add/subtract/multiply with carry/borrow:
18
19
// Add with carry:
20
#[inline]
21
0
fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit {
22
0
    *acc += DoubleBigDigit::from(a);
23
0
    *acc += DoubleBigDigit::from(b);
24
0
    let lo = *acc as BigDigit;
25
0
    *acc >>= big_digit::BITS;
26
0
    lo
27
0
}
28
29
// Subtract with borrow:
30
#[inline]
31
0
fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit {
32
0
    *acc += SignedDoubleBigDigit::from(a);
33
0
    *acc -= SignedDoubleBigDigit::from(b);
34
0
    let lo = *acc as BigDigit;
35
0
    *acc >>= big_digit::BITS;
36
0
    lo
37
0
}
38
39
#[inline]
40
0
pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit {
41
0
    *acc += DoubleBigDigit::from(a);
42
0
    *acc += DoubleBigDigit::from(b) * DoubleBigDigit::from(c);
43
0
    let lo = *acc as BigDigit;
44
0
    *acc >>= big_digit::BITS;
45
0
    lo
46
0
}
47
48
#[inline]
49
0
pub fn mul_with_carry(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit {
50
0
    *acc += DoubleBigDigit::from(a) * DoubleBigDigit::from(b);
51
0
    let lo = *acc as BigDigit;
52
0
    *acc >>= big_digit::BITS;
53
0
    lo
54
0
}
55
56
/// Divide a two digit numerator by a one digit divisor, returns quotient and remainder:
57
///
58
/// Note: the caller must ensure that both the quotient and remainder will fit into a single digit.
59
/// This is _not_ true for an arbitrary numerator/denominator.
60
///
61
/// (This function also matches what the x86 divide instruction does).
62
#[inline]
63
0
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
64
0
    debug_assert!(hi < divisor);
65
66
0
    let lhs = big_digit::to_doublebigdigit(hi, lo);
67
0
    let rhs = DoubleBigDigit::from(divisor);
68
0
    ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
69
0
}
70
71
0
pub fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) {
72
0
    let mut rem = 0;
73
74
0
    for d in a.data.iter_mut().rev() {
75
0
        let (q, r) = div_wide(rem, *d, b);
76
0
        *d = q;
77
0
        rem = r;
78
0
    }
79
80
0
    (a.normalized(), rem)
81
0
}
82
83
0
pub fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit {
84
0
    let mut rem: DoubleBigDigit = 0;
85
0
    for &digit in a.data.iter().rev() {
86
0
        rem = (rem << big_digit::BITS) + DoubleBigDigit::from(digit);
87
0
        rem %= DoubleBigDigit::from(b);
88
0
    }
89
90
0
    rem as BigDigit
91
0
}
92
93
// Only for the Add impl:
94
#[inline]
95
0
pub fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
96
0
    debug_assert!(a.len() >= b.len());
97
98
0
    let mut carry = 0;
99
0
    let (a_lo, a_hi) = a.split_at_mut(b.len());
100
101
0
    for (a, b) in a_lo.iter_mut().zip(b) {
102
0
        *a = adc(*a, *b, &mut carry);
103
0
    }
104
105
0
    if carry != 0 {
106
0
        for a in a_hi {
107
0
            *a = adc(*a, 0, &mut carry);
108
0
            if carry == 0 {
109
0
                break;
110
0
            }
111
        }
112
0
    }
113
114
0
    carry as BigDigit
115
0
}
116
117
/// Two argument addition of raw slices:
118
/// a += b
119
///
120
/// The caller _must_ ensure that a is big enough to store the result - typically this means
121
/// resizing a to max(a.len(), b.len()) + 1, to fit a possible carry.
122
0
pub fn add2(a: &mut [BigDigit], b: &[BigDigit]) {
123
0
    let carry = __add2(a, b);
124
125
0
    debug_assert!(carry == 0);
126
0
}
127
128
0
pub fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
129
0
    let mut borrow = 0;
130
0
131
0
    let len = cmp::min(a.len(), b.len());
132
0
    let (a_lo, a_hi) = a.split_at_mut(len);
133
0
    let (b_lo, b_hi) = b.split_at(len);
134
135
0
    for (a, b) in a_lo.iter_mut().zip(b_lo) {
136
0
        *a = sbb(*a, *b, &mut borrow);
137
0
    }
138
139
0
    if borrow != 0 {
140
0
        for a in a_hi {
141
0
            *a = sbb(*a, 0, &mut borrow);
142
0
            if borrow == 0 {
143
0
                break;
144
0
            }
145
        }
146
0
    }
147
148
    // note: we're _required_ to fail on underflow
149
0
    assert!(
150
0
        borrow == 0 && b_hi.iter().all(|x| *x == 0),
151
        "Cannot subtract b from a because b is larger than a."
152
    );
153
0
}
154
155
// Only for the Sub impl. `a` and `b` must have same length.
156
#[inline]
157
0
pub fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit {
158
0
    debug_assert!(b.len() == a.len());
159
160
0
    let mut borrow = 0;
161
162
0
    for (ai, bi) in a.iter().zip(b) {
163
0
        *bi = sbb(*ai, *bi, &mut borrow);
164
0
    }
165
166
0
    borrow as BigDigit
167
0
}
168
169
0
pub fn sub2rev(a: &[BigDigit], b: &mut [BigDigit]) {
170
0
    debug_assert!(b.len() >= a.len());
171
172
0
    let len = cmp::min(a.len(), b.len());
173
0
    let (a_lo, a_hi) = a.split_at(len);
174
0
    let (b_lo, b_hi) = b.split_at_mut(len);
175
0
176
0
    let borrow = __sub2rev(a_lo, b_lo);
177
0
178
0
    assert!(a_hi.is_empty());
179
180
    // note: we're _required_ to fail on underflow
181
0
    assert!(
182
0
        borrow == 0 && b_hi.iter().all(|x| *x == 0),
183
        "Cannot subtract b from a because b is larger than a."
184
    );
185
0
}
186
187
0
pub fn sub_sign(a: &[BigDigit], b: &[BigDigit]) -> (Sign, BigUint) {
188
0
    // Normalize:
189
0
    let a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
190
0
    let b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
191
0
192
0
    match cmp_slice(a, b) {
193
        Greater => {
194
0
            let mut a = a.to_vec();
195
0
            sub2(&mut a, b);
196
0
            (Plus, BigUint::new(a))
197
        }
198
        Less => {
199
0
            let mut b = b.to_vec();
200
0
            sub2(&mut b, a);
201
0
            (Minus, BigUint::new(b))
202
        }
203
0
        _ => (NoSign, Zero::zero()),
204
    }
205
0
}
206
207
/// Three argument multiply accumulate:
208
/// acc += b * c
209
0
pub fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
210
0
    if c == 0 {
211
0
        return;
212
0
    }
213
0
214
0
    let mut carry = 0;
215
0
    let (a_lo, a_hi) = acc.split_at_mut(b.len());
216
217
0
    for (a, &b) in a_lo.iter_mut().zip(b) {
218
0
        *a = mac_with_carry(*a, b, c, &mut carry);
219
0
    }
220
221
0
    let mut a = a_hi.iter_mut();
222
0
    while carry != 0 {
223
0
        let a = a.next().expect("carry overflow during multiplication!");
224
0
        *a = adc(*a, 0, &mut carry);
225
0
    }
226
0
}
227
228
/// Three argument multiply accumulate:
229
/// acc += b * c
230
0
fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
231
0
    let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) };
232
233
    // We use three algorithms for different input sizes.
234
    //
235
    // - For small inputs, long multiplication is fastest.
236
    // - Next we use Karatsuba multiplication (Toom-2), which we have optimized
237
    //   to avoid unnecessary allocations for intermediate values.
238
    // - For the largest inputs we use Toom-3, which better optimizes the
239
    //   number of operations, but uses more temporary allocations.
240
    //
241
    // The thresholds are somewhat arbitrary, chosen by evaluating the results
242
    // of `cargo bench --bench bigint multiply`.
243
244
0
    if x.len() <= 32 {
245
        // Long multiplication:
246
0
        for (i, xi) in x.iter().enumerate() {
247
0
            mac_digit(&mut acc[i..], y, *xi);
248
0
        }
249
0
    } else if x.len() <= 256 {
250
        /*
251
         * Karatsuba multiplication:
252
         *
253
         * The idea is that we break x and y up into two smaller numbers that each have about half
254
         * as many digits, like so (note that multiplying by b is just a shift):
255
         *
256
         * x = x0 + x1 * b
257
         * y = y0 + y1 * b
258
         *
259
         * With some algebra, we can compute x * y with three smaller products, where the inputs to
260
         * each of the smaller products have only about half as many digits as x and y:
261
         *
262
         * x * y = (x0 + x1 * b) * (y0 + y1 * b)
263
         *
264
         * x * y = x0 * y0
265
         *       + x0 * y1 * b
266
         *       + x1 * y0 * b
267
         *       + x1 * y1 * b^2
268
         *
269
         * Let p0 = x0 * y0 and p2 = x1 * y1:
270
         *
271
         * x * y = p0
272
         *       + (x0 * y1 + x1 * y0) * b
273
         *       + p2 * b^2
274
         *
275
         * The real trick is that middle term:
276
         *
277
         *         x0 * y1 + x1 * y0
278
         *
279
         *       = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
280
         *
281
         *       = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
282
         *
283
         * Now we complete the square:
284
         *
285
         *       = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
286
         *
287
         *       = -((x1 - x0) * (y1 - y0)) + p0 + p2
288
         *
289
         * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
290
         *
291
         * x * y = p0
292
         *       + (p0 + p2 - p1) * b
293
         *       + p2 * b^2
294
         *
295
         * Where the three intermediate products are:
296
         *
297
         * p0 = x0 * y0
298
         * p1 = (x1 - x0) * (y1 - y0)
299
         * p2 = x1 * y1
300
         *
301
         * In doing the computation, we take great care to avoid unnecessary temporary variables
302
         * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
303
         * bit so we can use the same temporary variable for all the intermediate products:
304
         *
305
         * x * y = p2 * b^2 + p2 * b
306
         *       + p0 * b + p0
307
         *       - p1 * b
308
         *
309
         * The other trick we use is instead of doing explicit shifts, we slice acc at the
310
         * appropriate offset when doing the add.
311
         */
312
313
        /*
314
         * When x is smaller than y, it's significantly faster to pick b such that x is split in
315
         * half, not y:
316
         */
317
0
        let b = x.len() / 2;
318
0
        let (x0, x1) = x.split_at(b);
319
0
        let (y0, y1) = y.split_at(b);
320
0
321
0
        /*
322
0
         * We reuse the same BigUint for all the intermediate multiplies and have to size p
323
0
         * appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
324
0
         */
325
0
        let len = x1.len() + y1.len() + 1;
326
0
        let mut p = BigUint { data: vec![0; len] };
327
0
328
0
        // p2 = x1 * y1
329
0
        mac3(&mut p.data[..], x1, y1);
330
0
331
0
        // Not required, but the adds go faster if we drop any unneeded 0s from the end:
332
0
        p.normalize();
333
0
334
0
        add2(&mut acc[b..], &p.data[..]);
335
0
        add2(&mut acc[b * 2..], &p.data[..]);
336
0
337
0
        // Zero out p before the next multiply:
338
0
        p.data.truncate(0);
339
0
        p.data.extend(repeat(0).take(len));
340
0
341
0
        // p0 = x0 * y0
342
0
        mac3(&mut p.data[..], x0, y0);
343
0
        p.normalize();
344
0
345
0
        add2(&mut acc[..], &p.data[..]);
346
0
        add2(&mut acc[b..], &p.data[..]);
347
0
348
0
        // p1 = (x1 - x0) * (y1 - y0)
349
0
        // We do this one last, since it may be negative and acc can't ever be negative:
350
0
        let (j0_sign, j0) = sub_sign(x1, x0);
351
0
        let (j1_sign, j1) = sub_sign(y1, y0);
352
0
353
0
        match j0_sign * j1_sign {
354
0
            Plus => {
355
0
                p.data.truncate(0);
356
0
                p.data.extend(repeat(0).take(len));
357
0
358
0
                mac3(&mut p.data[..], &j0.data[..], &j1.data[..]);
359
0
                p.normalize();
360
0
361
0
                sub2(&mut acc[b..], &p.data[..]);
362
0
            }
363
0
            Minus => {
364
0
                mac3(&mut acc[b..], &j0.data[..], &j1.data[..]);
365
0
            }
366
0
            NoSign => (),
367
        }
368
0
    } else {
369
0
        // Toom-3 multiplication:
370
0
        //
371
0
        // Toom-3 is like Karatsuba above, but dividing the inputs into three parts.
372
0
        // Both are instances of Toom-Cook, using `k=3` and `k=2` respectively.
373
0
        //
374
0
        // The general idea is to treat the large integers digits as
375
0
        // polynomials of a certain degree and determine the coefficients/digits
376
0
        // of the product of the two via interpolation of the polynomial product.
377
0
        let i = y.len() / 3 + 1;
378
0
379
0
        let x0_len = cmp::min(x.len(), i);
380
0
        let x1_len = cmp::min(x.len() - x0_len, i);
381
0
382
0
        let y0_len = i;
383
0
        let y1_len = cmp::min(y.len() - y0_len, i);
384
0
385
0
        // Break x and y into three parts, representating an order two polynomial.
386
0
        // t is chosen to be the size of a digit so we can use faster shifts
387
0
        // in place of multiplications.
388
0
        //
389
0
        // x(t) = x2*t^2 + x1*t + x0
390
0
        let x0 = BigInt::from_slice(Plus, &x[..x0_len]);
391
0
        let x1 = BigInt::from_slice(Plus, &x[x0_len..x0_len + x1_len]);
392
0
        let x2 = BigInt::from_slice(Plus, &x[x0_len + x1_len..]);
393
0
394
0
        // y(t) = y2*t^2 + y1*t + y0
395
0
        let y0 = BigInt::from_slice(Plus, &y[..y0_len]);
396
0
        let y1 = BigInt::from_slice(Plus, &y[y0_len..y0_len + y1_len]);
397
0
        let y2 = BigInt::from_slice(Plus, &y[y0_len + y1_len..]);
398
0
399
0
        // Let w(t) = x(t) * y(t)
400
0
        //
401
0
        // This gives us the following order-4 polynomial.
402
0
        //
403
0
        // w(t) = w4*t^4 + w3*t^3 + w2*t^2 + w1*t + w0
404
0
        //
405
0
        // We need to find the coefficients w4, w3, w2, w1 and w0. Instead
406
0
        // of simply multiplying the x and y in total, we can evaluate w
407
0
        // at 5 points. An n-degree polynomial is uniquely identified by (n + 1)
408
0
        // points.
409
0
        //
410
0
        // It is arbitrary as to what points we evaluate w at but we use the
411
0
        // following.
412
0
        //
413
0
        // w(t) at t = 0, 1, -1, -2 and inf
414
0
        //
415
0
        // The values for w(t) in terms of x(t)*y(t) at these points are:
416
0
        //
417
0
        // let a = w(0)   = x0 * y0
418
0
        // let b = w(1)   = (x2 + x1 + x0) * (y2 + y1 + y0)
419
0
        // let c = w(-1)  = (x2 - x1 + x0) * (y2 - y1 + y0)
420
0
        // let d = w(-2)  = (4*x2 - 2*x1 + x0) * (4*y2 - 2*y1 + y0)
421
0
        // let e = w(inf) = x2 * y2 as t -> inf
422
0
423
0
        // x0 + x2, avoiding temporaries
424
0
        let p = &x0 + &x2;
425
0
426
0
        // y0 + y2, avoiding temporaries
427
0
        let q = &y0 + &y2;
428
0
429
0
        // x2 - x1 + x0, avoiding temporaries
430
0
        let p2 = &p - &x1;
431
0
432
0
        // y2 - y1 + y0, avoiding temporaries
433
0
        let q2 = &q - &y1;
434
0
435
0
        // w(0)
436
0
        let r0 = &x0 * &y0;
437
0
438
0
        // w(inf)
439
0
        let r4 = &x2 * &y2;
440
0
441
0
        // w(1)
442
0
        let r1 = (p + x1) * (q + y1);
443
0
444
0
        // w(-1)
445
0
        let r2 = &p2 * &q2;
446
0
447
0
        // w(-2)
448
0
        let r3 = ((p2 + x2) * 2 - x0) * ((q2 + y2) * 2 - y0);
449
0
450
0
        // Evaluating these points gives us the following system of linear equations.
451
0
        //
452
0
        //  0  0  0  0  1 | a
453
0
        //  1  1  1  1  1 | b
454
0
        //  1 -1  1 -1  1 | c
455
0
        // 16 -8  4 -2  1 | d
456
0
        //  1  0  0  0  0 | e
457
0
        //
458
0
        // The solved equation (after gaussian elimination or similar)
459
0
        // in terms of its coefficients:
460
0
        //
461
0
        // w0 = w(0)
462
0
        // w1 = w(0)/2 + w(1)/3 - w(-1) + w(2)/6 - 2*w(inf)
463
0
        // w2 = -w(0) + w(1)/2 + w(-1)/2 - w(inf)
464
0
        // w3 = -w(0)/2 + w(1)/6 + w(-1)/2 - w(1)/6
465
0
        // w4 = w(inf)
466
0
        //
467
0
        // This particular sequence is given by Bodrato and is an interpolation
468
0
        // of the above equations.
469
0
        let mut comp3: BigInt = (r3 - &r1) / 3;
470
0
        let mut comp1: BigInt = (r1 - &r2) / 2;
471
0
        let mut comp2: BigInt = r2 - &r0;
472
0
        comp3 = (&comp2 - comp3) / 2 + &r4 * 2;
473
0
        comp2 += &comp1 - &r4;
474
0
        comp1 -= &comp3;
475
0
476
0
        // Recomposition. The coefficients of the polynomial are now known.
477
0
        //
478
0
        // Evaluate at w(t) where t is our given base to get the result.
479
0
        let result = r0
480
0
            + (comp1 << (32 * i))
481
0
            + (comp2 << (2 * 32 * i))
482
0
            + (comp3 << (3 * 32 * i))
483
0
            + (r4 << (4 * 32 * i));
484
0
        let result_pos = result.to_biguint().unwrap();
485
0
        add2(&mut acc[..], &result_pos.data);
486
0
    }
487
0
}
488
489
0
pub fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint {
490
0
    let len = x.len() + y.len() + 1;
491
0
    let mut prod = BigUint { data: vec![0; len] };
492
0
493
0
    mac3(&mut prod.data[..], x, y);
494
0
    prod.normalized()
495
0
}
496
497
0
pub fn scalar_mul(a: &mut [BigDigit], b: BigDigit) -> BigDigit {
498
0
    let mut carry = 0;
499
0
    for a in a.iter_mut() {
500
0
        *a = mul_with_carry(*a, b, &mut carry);
501
0
    }
502
0
    carry as BigDigit
503
0
}
504
505
0
pub fn div_rem(mut u: BigUint, mut d: BigUint) -> (BigUint, BigUint) {
506
0
    if d.is_zero() {
507
0
        panic!()
508
0
    }
509
0
    if u.is_zero() {
510
0
        return (Zero::zero(), Zero::zero());
511
0
    }
512
0
513
0
    if d.data.len() == 1 {
514
0
        if d.data == [1] {
515
0
            return (u, Zero::zero());
516
0
        }
517
0
        let (div, rem) = div_rem_digit(u, d.data[0]);
518
0
        // reuse d
519
0
        d.data.clear();
520
0
        d += rem;
521
0
        return (div, d);
522
0
    }
523
0
524
0
    // Required or the q_len calculation below can underflow:
525
0
    match u.cmp(&d) {
526
0
        Less => return (Zero::zero(), u),
527
        Equal => {
528
0
            u.set_one();
529
0
            return (u, Zero::zero());
530
        }
531
0
        Greater => {} // Do nothing
532
0
    }
533
0
534
0
    // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
535
0
    //
536
0
    // First, normalize the arguments so the highest bit in the highest digit of the divisor is
537
0
    // set: the main loop uses the highest digit of the divisor for generating guesses, so we
538
0
    // want it to be the largest number we can efficiently divide by.
539
0
    //
540
0
    let shift = d.data.last().unwrap().leading_zeros() as usize;
541
0
    let (q, r) = if shift == 0 {
542
        // no need to clone d
543
0
        div_rem_core(u, &d)
544
    } else {
545
0
        div_rem_core(u << shift, &(d << shift))
546
    };
547
    // renormalize the remainder
548
0
    (q, r >> shift)
549
0
}
550
551
0
pub fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
552
0
    if d.is_zero() {
553
0
        panic!()
554
0
    }
555
0
    if u.is_zero() {
556
0
        return (Zero::zero(), Zero::zero());
557
0
    }
558
0
559
0
    if d.data.len() == 1 {
560
0
        if d.data == [1] {
561
0
            return (u.clone(), Zero::zero());
562
0
        }
563
0
564
0
        let (div, rem) = div_rem_digit(u.clone(), d.data[0]);
565
0
        return (div, rem.into());
566
0
    }
567
0
568
0
    // Required or the q_len calculation below can underflow:
569
0
    match u.cmp(d) {
570
0
        Less => return (Zero::zero(), u.clone()),
571
0
        Equal => return (One::one(), Zero::zero()),
572
0
        Greater => {} // Do nothing
573
0
    }
574
0
575
0
    // This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
576
0
    //
577
0
    // First, normalize the arguments so the highest bit in the highest digit of the divisor is
578
0
    // set: the main loop uses the highest digit of the divisor for generating guesses, so we
579
0
    // want it to be the largest number we can efficiently divide by.
580
0
    //
581
0
    let shift = d.data.last().unwrap().leading_zeros() as usize;
582
583
0
    let (q, r) = if shift == 0 {
584
        // no need to clone d
585
0
        div_rem_core(u.clone(), d)
586
    } else {
587
0
        div_rem_core(u << shift, &(d << shift))
588
    };
589
    // renormalize the remainder
590
0
    (q, r >> shift)
591
0
}
592
593
/// an implementation of Knuth, TAOCP vol 2 section 4.3, algorithm D
594
///
595
/// # Correctness
596
///
597
/// This function requires the following conditions to run correctly and/or effectively
598
///
599
/// - `a > b`
600
/// - `d.data.len() > 1`
601
/// - `d.data.last().unwrap().leading_zeros() == 0`
602
0
fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
603
0
    // The algorithm works by incrementally calculating "guesses", q0, for part of the
604
0
    // remainder. Once we have any number q0 such that q0 * b <= a, we can set
605
0
    //
606
0
    //     q += q0
607
0
    //     a -= q0 * b
608
0
    //
609
0
    // and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
610
0
    //
611
0
    // q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
612
0
    // - this should give us a guess that is "close" to the actual quotient, but is possibly
613
0
    // greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
614
0
    // until we have a guess such that q0 * b <= a.
615
0
    //
616
0
617
0
    let bn = *b.data.last().unwrap();
618
0
    let q_len = a.data.len() - b.data.len() + 1;
619
0
    let mut q = BigUint {
620
0
        data: vec![0; q_len],
621
0
    };
622
0
623
0
    // We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
624
0
    // sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
625
0
    // can be bigger).
626
0
    //
627
0
    let mut tmp = BigUint {
628
0
        data: Vec::with_capacity(2),
629
0
    };
630
631
0
    for j in (0..q_len).rev() {
632
        /*
633
         * When calculating our next guess q0, we don't need to consider the digits below j
634
         * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
635
         * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
636
         * two numbers will be zero in all digits up to (j + b.data.len() - 1).
637
         */
638
0
        let offset = j + b.data.len() - 1;
639
0
        if offset >= a.data.len() {
640
0
            continue;
641
0
        }
642
0
643
0
        /* just avoiding a heap allocation: */
644
0
        let mut a0 = tmp;
645
0
        a0.data.truncate(0);
646
0
        a0.data.extend(a.data[offset..].iter().cloned());
647
0
648
0
        /*
649
0
         * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
650
0
         * implicitly at the end, when adding and subtracting to a and q. Not only do we
651
0
         * save the cost of the shifts, the rest of the arithmetic gets to work with
652
0
         * smaller numbers.
653
0
         */
654
0
        let (mut q0, _) = div_rem_digit(a0, bn);
655
0
        let mut prod = b * &q0;
656
657
0
        while cmp_slice(&prod.data[..], &a.data[j..]) == Greater {
658
0
            let one: BigUint = One::one();
659
0
            q0 -= one;
660
0
            prod -= b;
661
0
        }
662
663
0
        add2(&mut q.data[j..], &q0.data[..]);
664
0
        sub2(&mut a.data[j..], &prod.data[..]);
665
0
        a.normalize();
666
0
667
0
        tmp = q0;
668
    }
669
670
0
    debug_assert!(a < *b);
671
672
0
    (q.normalized(), a)
673
0
}
674
675
/// Find last set bit
676
/// fls(0) == 0, fls(u32::MAX) == 32
677
0
pub fn fls<T: traits::PrimInt>(v: T) -> usize {
678
0
    mem::size_of::<T>() * 8 - v.leading_zeros() as usize
679
0
}
Unexecuted instantiation: num_bigint::biguint::algorithms::fls::<u64>
Unexecuted instantiation: num_bigint::biguint::algorithms::fls::<u32>
680
681
0
pub fn ilog2<T: traits::PrimInt>(v: T) -> usize {
682
0
    fls(v) - 1
683
0
}
684
685
#[inline]
686
0
pub fn biguint_shl(n: Cow<BigUint>, bits: usize) -> BigUint {
687
0
    let n_unit = bits / big_digit::BITS;
688
0
    let mut data = match n_unit {
689
0
        0 => n.into_owned().data,
690
        _ => {
691
0
            let len = n_unit + n.data.len() + 1;
692
0
            let mut data = Vec::with_capacity(len);
693
0
            data.extend(repeat(0).take(n_unit));
694
0
            data.extend(n.data.iter().cloned());
695
0
            data
696
        }
697
    };
698
699
0
    let n_bits = bits % big_digit::BITS;
700
0
    if n_bits > 0 {
701
0
        let mut carry = 0;
702
0
        for elem in data[n_unit..].iter_mut() {
703
0
            let new_carry = *elem >> (big_digit::BITS - n_bits);
704
0
            *elem = (*elem << n_bits) | carry;
705
0
            carry = new_carry;
706
0
        }
707
0
        if carry != 0 {
708
0
            data.push(carry);
709
0
        }
710
0
    }
711
712
0
    BigUint::new(data)
713
0
}
714
715
#[inline]
716
0
pub fn biguint_shr(n: Cow<BigUint>, bits: usize) -> BigUint {
717
0
    let n_unit = bits / big_digit::BITS;
718
0
    if n_unit >= n.data.len() {
719
0
        return Zero::zero();
720
0
    }
721
0
    let mut data = match n {
722
0
        Cow::Borrowed(n) => n.data[n_unit..].to_vec(),
723
0
        Cow::Owned(mut n) => {
724
0
            n.data.drain(..n_unit);
725
0
            n.data
726
        }
727
    };
728
729
0
    let n_bits = bits % big_digit::BITS;
730
0
    if n_bits > 0 {
731
0
        let mut borrow = 0;
732
0
        for elem in data.iter_mut().rev() {
733
0
            let new_borrow = *elem << (big_digit::BITS - n_bits);
734
0
            *elem = (*elem >> n_bits) | borrow;
735
0
            borrow = new_borrow;
736
0
        }
737
0
    }
738
739
0
    BigUint::new(data)
740
0
}
741
742
0
pub fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering {
743
0
    debug_assert!(a.last() != Some(&0));
744
0
    debug_assert!(b.last() != Some(&0));
745
746
0
    let (a_len, b_len) = (a.len(), b.len());
747
0
    if a_len < b_len {
748
0
        return Less;
749
0
    }
750
0
    if a_len > b_len {
751
0
        return Greater;
752
0
    }
753
754
0
    for (&ai, &bi) in a.iter().rev().zip(b.iter().rev()) {
755
0
        if ai < bi {
756
0
            return Less;
757
0
        }
758
0
        if ai > bi {
759
0
            return Greater;
760
0
        }
761
    }
762
0
    Equal
763
0
}
764
765
#[cfg(test)]
766
mod algorithm_tests {
767
    use big_digit::BigDigit;
768
    use traits::Num;
769
    use Sign::Plus;
770
    use {BigInt, BigUint};
771
772
    #[test]
773
    fn test_sub_sign() {
774
        use super::sub_sign;
775
776
        fn sub_sign_i(a: &[BigDigit], b: &[BigDigit]) -> BigInt {
777
            let (sign, val) = sub_sign(a, b);
778
            BigInt::from_biguint(sign, val)
779
        }
780
781
        let a = BigUint::from_str_radix("265252859812191058636308480000000", 10).unwrap();
782
        let b = BigUint::from_str_radix("26525285981219105863630848000000", 10).unwrap();
783
        let a_i = BigInt::from_biguint(Plus, a.clone());
784
        let b_i = BigInt::from_biguint(Plus, b.clone());
785
786
        assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i);
787
        assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i);
788
    }
789
}