/rust/registry/src/index.crates.io-1949cf8c6b5b557f/num-bigint-0.4.4/src/biguint/monty.rs

Source
use crate::std_alloc::Vec;
use core::mem;
use core::ops::Shl;
use num_traits::{One, Zero};

use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
use crate::biguint::BigUint;

struct MontyReducer {
    n0inv: BigDigit,
}

// k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson
// Iteration for Multiplicative Inverses Modulo Prime Powers".
fn inv_mod_alt(b: BigDigit) -> BigDigit {
    assert_ne!(b & 1, 0);

    let mut k0 = 2 - b as SignedDoubleBigDigit;
    let mut t = (b - 1) as SignedDoubleBigDigit;
    let mut i = 1;
    while i < big_digit::BITS {
        t = t.wrapping_mul(t);
        k0 = k0.wrapping_mul(t + 1);

        i <<= 1;
    }
    -k0 as BigDigit
}

impl MontyReducer {
    fn new(n: &BigUint) -> Self {
        let n0inv = inv_mod_alt(n.data[0]);
        MontyReducer { n0inv }
    }
}

/// Computes z mod m = x * y * 2 ** (-n*_W) mod m
/// assuming k = -1/m mod 2**_W
/// See Gueron, "Efficient Software Implementations of Modular Exponentiation".
/// <https://eprint.iacr.org/2011/239.pdf>
/// In the terminology of that paper, this is an "Almost Montgomery Multiplication":
/// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
/// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
#[allow(clippy::many_single_char_names)]
fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint {
    // This code assumes x, y, m are all the same length, n.
    // (required by addMulVVW and the for loop).
    // It also assumes that x, y are already reduced mod m,
    // or else the result will not be properly reduced.
    assert!(
        x.data.len() == n && y.data.len() == n && m.data.len() == n,
        "{:?} {:?} {:?} {}",
        x,
        y,
        m,
        n
    );

    let mut z = BigUint::zero();
    z.data.resize(n * 2, 0);

    let mut c: BigDigit = 0;
    for i in 0..n {
        let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]);
        let t = z.data[i].wrapping_mul(k);
        let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t);
        let cx = c.wrapping_add(c2);
        let cy = cx.wrapping_add(c3);
        z.data[n + i] = cy;
        if cx < c2 || cy < c3 {
            c = 1;
        } else {
            c = 0;
        }
    }

    if c == 0 {
        z.data = z.data[n..].to_vec();
    } else {
        {
            let (first, second) = z.data.split_at_mut(n);
            sub_vv(first, second, &m.data);
        }
        z.data = z.data[..n].to_vec();
    }

    z
}

#[inline(always)]
fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit {
    let mut c = 0;
    for (zi, xi) in z.iter_mut().zip(x.iter()) {
        let (z1, z0) = mul_add_www(*xi, y, *zi);
        let (c_, zi_) = add_ww(z0, c, 0);
        *zi = zi_;
        c = c_ + z1;
    }

    c
}

/// The resulting carry c is either 0 or 1.
#[inline(always)]
fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit {
    let mut c = 0;
    for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) {
        let zi = xi.wrapping_sub(*yi).wrapping_sub(c);
        z[i] = zi;
        // see "Hacker's Delight", section 2-12 (overflow detection)
        c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1)
    }

    c
}

/// z1<<_W + z0 = x+y+c, with c == 0 or 1
#[inline(always)]
fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
    let yc = y.wrapping_add(c);
    let z0 = x.wrapping_add(yc);
    let z1 = if z0 < x || yc < y { 1 } else { 0 };

    (z1, z0)
}

/// z1 << _W + z0 = x * y + c
#[inline(always)]
fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
    let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit;
    ((z >> big_digit::BITS) as BigDigit, z as BigDigit)
}

/// Calculates x ** y mod m using a fixed, 4-bit window.
#[allow(clippy::many_single_char_names)]
pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint {
    assert!(m.data[0] & 1 == 1);
    let mr = MontyReducer::new(m);
    let num_words = m.data.len();

    let mut x = x.clone();

    // We want the lengths of x and m to be equal.
    // It is OK if x >= m as long as len(x) == len(m).
    if x.data.len() > num_words {
        x %= m;
        // Note: now len(x) <= numWords, not guaranteed ==.
    }
    if x.data.len() < num_words {
        x.data.resize(num_words, 0);
    }

    // rr = 2**(2*_W*len(m)) mod m
    let mut rr = BigUint::one();
    rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m;
    if rr.data.len() < num_words {
        rr.data.resize(num_words, 0);
    }
    // one = 1, with equal length to that of m
    let mut one = BigUint::one();
    one.data.resize(num_words, 0);

    let n = 4;
    // powers[i] contains x^i
    let mut powers = Vec::with_capacity(1 << n);
    powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words));
    powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words));
    for i in 2..1 << n {
        let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words);
        powers.push(r);
    }

    // initialize z = 1 (Montgomery 1)
    let mut z = powers[0].clone();
    z.data.resize(num_words, 0);
    let mut zz = BigUint::zero();
    zz.data.resize(num_words, 0);

    // same windowed exponent, but with Montgomery multiplications
    for i in (0..y.data.len()).rev() {
        let mut yi = y.data[i];
        let mut j = 0;
        while j < big_digit::BITS {
            if i != y.data.len() - 1 || j != 0 {
                zz = montgomery(&z, &z, m, mr.n0inv, num_words);
                z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
                zz = montgomery(&z, &z, m, mr.n0inv, num_words);
                z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
            }
            zz = montgomery(
                &z,
                &powers[(yi >> (big_digit::BITS - n)) as usize],
                m,
                mr.n0inv,
                num_words,
            );
            mem::swap(&mut z, &mut zz);
            yi <<= n;
            j += n;
        }
    }

    // convert to regular number
    zz = montgomery(&z, &one, m, mr.n0inv, num_words);

    zz.normalize();
    // One last reduction, just in case.
    // See golang.org/issue/13907.
    if zz >= *m {
        // Common case is m has high bit set; in that case,
        // since zz is the same length as m, there can be just
        // one multiple of m to remove. Just subtract.
        // We think that the subtract should be sufficient in general,
        // so do that unconditionally, but double-check,
        // in case our beliefs are wrong.
        // The div is not expected to be reached.
        zz -= m;
        if zz >= *m {
            zz %= m;
        }
    }

    zz.normalize();
    zz
}

Coverage Report

Created: 2025-11-16 07:09

Line	Count	Source
1		use crate::std_alloc::Vec;
2		use core::mem;
3		use core::ops::Shl;
4		use num_traits::{One, Zero};
5
6		use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
7		use crate::biguint::BigUint;
8
9		struct MontyReducer {
10		n0inv: BigDigit,
11		}
12
13		// k0 = -m-1 mod 2BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson
14		// Iteration for Multiplicative Inverses Modulo Prime Powers".
15	0	fn inv_mod_alt(b: BigDigit) -> BigDigit {
16	0	assert_ne!(b & 1, 0);
17
18	0	let mut k0 = 2 - b as SignedDoubleBigDigit;
19	0	let mut t = (b - 1) as SignedDoubleBigDigit;
20	0	let mut i = 1;
21	0	while i < big_digit::BITS {
22	0	t = t.wrapping_mul(t);
23	0	k0 = k0.wrapping_mul(t + 1);
24	0
25	0	i <<= 1;
26	0	}
27	0	-k0 as BigDigit
28	0	}
29
30		impl MontyReducer {
31	0	fn new(n: &BigUint) -> Self {
32	0	let n0inv = inv_mod_alt(n.data[0]);
33	0	MontyReducer { n0inv }
34	0	}
35		}
36
37		/// Computes z mod m = x * y * 2 ** (-n*_W) mod m
38		/// assuming k = -1/m mod 2**_W
39		/// See Gueron, "Efficient Software Implementations of Modular Exponentiation".
40		/// <https://eprint.iacr.org/2011/239.pdf>
41		/// In the terminology of that paper, this is an "Almost Montgomery Multiplication":
42		/// x and y are required to satisfy 0 <= z < 2*(n_W) and then the result
43		/// z is guaranteed to satisfy 0 <= z < 2*(n_W), but it may not be < m.
44		#[allow(clippy::many_single_char_names)]
45	0	fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint {
46		// This code assumes x, y, m are all the same length, n.
47		// (required by addMulVVW and the for loop).
48		// It also assumes that x, y are already reduced mod m,
49		// or else the result will not be properly reduced.
50	0	assert!(
51	0	x.data.len() == n && y.data.len() == n && m.data.len() == n,
52	0	"{:?} {:?} {:?} {}",
53		x,
54		y,
55		m,
56		n
57		);
58
59	0	let mut z = BigUint::zero();
60	0	z.data.resize(n * 2, 0);
61
62	0	let mut c: BigDigit = 0;
63	0	for i in 0..n {
64	0	let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]);
65	0	let t = z.data[i].wrapping_mul(k);
66	0	let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t);
67	0	let cx = c.wrapping_add(c2);
68	0	let cy = cx.wrapping_add(c3);
69	0	z.data[n + i] = cy;
70	0	if cx < c2 \|\| cy < c3 {
71	0	c = 1;
72	0	} else {
73	0	c = 0;
74	0	}
75		}
76
77	0	if c == 0 {
78	0	z.data = z.data[n..].to_vec();
79	0	} else {
80	0	{
81	0	let (first, second) = z.data.split_at_mut(n);
82	0	sub_vv(first, second, &m.data);
83	0	}
84	0	z.data = z.data[..n].to_vec();
85	0	}
86
87	0	z
88	0	}
89
90		#[inline(always)]
91	0	fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit {
92	0	let mut c = 0;
93	0	for (zi, xi) in z.iter_mut().zip(x.iter()) {
94	0	let (z1, z0) = mul_add_www(xi, y, zi);
95	0	let (c_, zi_) = add_ww(z0, c, 0);
96	0	*zi = zi_;
97	0	c = c_ + z1;
98	0	}
99
100	0	c
101	0	}
102
103		/// The resulting carry c is either 0 or 1.
104		#[inline(always)]
105	0	fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit {
106	0	let mut c = 0;
107	0	for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) {
108	0	let zi = xi.wrapping_sub(*yi).wrapping_sub(c);
109	0	z[i] = zi;
110		// see "Hacker's Delight", section 2-12 (overflow detection)
111	0	c = ((yi & !xi) \| ((yi \| !xi) & zi)) >> (big_digit::BITS - 1)
112		}
113
114	0	c
115	0	}
116
117		/// z1<<_W + z0 = x+y+c, with c == 0 or 1
118		#[inline(always)]
119	0	fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
120	0	let yc = y.wrapping_add(c);
121	0	let z0 = x.wrapping_add(yc);
122	0	let z1 = if z0 < x \|\| yc < y { 1 } else { 0 };
123
124	0	(z1, z0)
125	0	}
126
127		/// z1 << _W + z0 = x * y + c
128		#[inline(always)]
129	0	fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
130	0	let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit;
131	0	((z >> big_digit::BITS) as BigDigit, z as BigDigit)
132	0	}
133
134		/// Calculates x ** y mod m using a fixed, 4-bit window.
135		#[allow(clippy::many_single_char_names)]
136	0	pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint {
137	0	assert!(m.data[0] & 1 == 1);
138	0	let mr = MontyReducer::new(m);
139	0	let num_words = m.data.len();
140
141	0	let mut x = x.clone();
142
143		// We want the lengths of x and m to be equal.
144		// It is OK if x >= m as long as len(x) == len(m).
145	0	if x.data.len() > num_words {
146	0	x %= m;
147	0	// Note: now len(x) <= numWords, not guaranteed ==.
148	0	}
149	0	if x.data.len() < num_words {
150	0	x.data.resize(num_words, 0);
151	0	}
152
153		// rr = 2*(2_W*len(m)) mod m
154	0	let mut rr = BigUint::one();
155	0	rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m;
156	0	if rr.data.len() < num_words {
157	0	rr.data.resize(num_words, 0);
158	0	}
159		// one = 1, with equal length to that of m
160	0	let mut one = BigUint::one();
161	0	one.data.resize(num_words, 0);
162
163	0	let n = 4;
164		// powers[i] contains x^i
165	0	let mut powers = Vec::with_capacity(1 << n);
166	0	powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words));
167	0	powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words));
168	0	for i in 2..1 << n {
169	0	let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words);
170	0	powers.push(r);
171	0	}
172
173		// initialize z = 1 (Montgomery 1)
174	0	let mut z = powers[0].clone();
175	0	z.data.resize(num_words, 0);
176	0	let mut zz = BigUint::zero();
177	0	zz.data.resize(num_words, 0);
178
179		// same windowed exponent, but with Montgomery multiplications
180	0	for i in (0..y.data.len()).rev() {
181	0	let mut yi = y.data[i];
182	0	let mut j = 0;
183	0	while j < big_digit::BITS {
184	0	if i != y.data.len() - 1 \|\| j != 0 {
185	0	zz = montgomery(&z, &z, m, mr.n0inv, num_words);
186	0	z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
187	0	zz = montgomery(&z, &z, m, mr.n0inv, num_words);
188	0	z = montgomery(&zz, &zz, m, mr.n0inv, num_words);
189	0	}
190	0	zz = montgomery(
191	0	&z,
192	0	&powers[(yi >> (big_digit::BITS - n)) as usize],
193	0	m,
194	0	mr.n0inv,
195	0	num_words,
196		);
197	0	mem::swap(&mut z, &mut zz);
198	0	yi <<= n;
199	0	j += n;
200		}
201		}
202
203		// convert to regular number
204	0	zz = montgomery(&z, &one, m, mr.n0inv, num_words);
205
206	0	zz.normalize();
207		// One last reduction, just in case.
208		// See golang.org/issue/13907.
209	0	if zz >= *m {
210		// Common case is m has high bit set; in that case,
211		// since zz is the same length as m, there can be just
212		// one multiple of m to remove. Just subtract.
213		// We think that the subtract should be sufficient in general,
214		// so do that unconditionally, but double-check,
215		// in case our beliefs are wrong.
216		// The div is not expected to be reached.
217	0	zz -= m;
218	0	if zz >= *m {
219	0	zz %= m;
220	0	}
221	0	}
222
223	0	zz.normalize();
224	0	zz
225	0	}