/rust/registry/src/index.crates.io-1949cf8c6b5b557f/num-bigint-0.4.4/src/biguint/monty.rs
Line | Count | Source |
1 | | use crate::std_alloc::Vec; |
2 | | use core::mem; |
3 | | use core::ops::Shl; |
4 | | use num_traits::{One, Zero}; |
5 | | |
6 | | use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; |
7 | | use crate::biguint::BigUint; |
8 | | |
9 | | struct MontyReducer { |
10 | | n0inv: BigDigit, |
11 | | } |
12 | | |
13 | | // k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson |
14 | | // Iteration for Multiplicative Inverses Modulo Prime Powers". |
15 | 0 | fn inv_mod_alt(b: BigDigit) -> BigDigit { |
16 | 0 | assert_ne!(b & 1, 0); |
17 | | |
18 | 0 | let mut k0 = 2 - b as SignedDoubleBigDigit; |
19 | 0 | let mut t = (b - 1) as SignedDoubleBigDigit; |
20 | 0 | let mut i = 1; |
21 | 0 | while i < big_digit::BITS { |
22 | 0 | t = t.wrapping_mul(t); |
23 | 0 | k0 = k0.wrapping_mul(t + 1); |
24 | 0 |
|
25 | 0 | i <<= 1; |
26 | 0 | } |
27 | 0 | -k0 as BigDigit |
28 | 0 | } |
29 | | |
30 | | impl MontyReducer { |
31 | 0 | fn new(n: &BigUint) -> Self { |
32 | 0 | let n0inv = inv_mod_alt(n.data[0]); |
33 | 0 | MontyReducer { n0inv } |
34 | 0 | } |
35 | | } |
36 | | |
37 | | /// Computes z mod m = x * y * 2 ** (-n*_W) mod m |
38 | | /// assuming k = -1/m mod 2**_W |
39 | | /// See Gueron, "Efficient Software Implementations of Modular Exponentiation". |
40 | | /// <https://eprint.iacr.org/2011/239.pdf> |
41 | | /// In the terminology of that paper, this is an "Almost Montgomery Multiplication": |
42 | | /// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result |
43 | | /// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. |
44 | | #[allow(clippy::many_single_char_names)] |
45 | 0 | fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint { |
46 | | // This code assumes x, y, m are all the same length, n. |
47 | | // (required by addMulVVW and the for loop). |
48 | | // It also assumes that x, y are already reduced mod m, |
49 | | // or else the result will not be properly reduced. |
50 | 0 | assert!( |
51 | 0 | x.data.len() == n && y.data.len() == n && m.data.len() == n, |
52 | 0 | "{:?} {:?} {:?} {}", |
53 | | x, |
54 | | y, |
55 | | m, |
56 | | n |
57 | | ); |
58 | | |
59 | 0 | let mut z = BigUint::zero(); |
60 | 0 | z.data.resize(n * 2, 0); |
61 | | |
62 | 0 | let mut c: BigDigit = 0; |
63 | 0 | for i in 0..n { |
64 | 0 | let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]); |
65 | 0 | let t = z.data[i].wrapping_mul(k); |
66 | 0 | let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t); |
67 | 0 | let cx = c.wrapping_add(c2); |
68 | 0 | let cy = cx.wrapping_add(c3); |
69 | 0 | z.data[n + i] = cy; |
70 | 0 | if cx < c2 || cy < c3 { |
71 | 0 | c = 1; |
72 | 0 | } else { |
73 | 0 | c = 0; |
74 | 0 | } |
75 | | } |
76 | | |
77 | 0 | if c == 0 { |
78 | 0 | z.data = z.data[n..].to_vec(); |
79 | 0 | } else { |
80 | 0 | { |
81 | 0 | let (first, second) = z.data.split_at_mut(n); |
82 | 0 | sub_vv(first, second, &m.data); |
83 | 0 | } |
84 | 0 | z.data = z.data[..n].to_vec(); |
85 | 0 | } |
86 | | |
87 | 0 | z |
88 | 0 | } |
89 | | |
90 | | #[inline(always)] |
91 | 0 | fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit { |
92 | 0 | let mut c = 0; |
93 | 0 | for (zi, xi) in z.iter_mut().zip(x.iter()) { |
94 | 0 | let (z1, z0) = mul_add_www(*xi, y, *zi); |
95 | 0 | let (c_, zi_) = add_ww(z0, c, 0); |
96 | 0 | *zi = zi_; |
97 | 0 | c = c_ + z1; |
98 | 0 | } |
99 | | |
100 | 0 | c |
101 | 0 | } |
102 | | |
103 | | /// The resulting carry c is either 0 or 1. |
104 | | #[inline(always)] |
105 | 0 | fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit { |
106 | 0 | let mut c = 0; |
107 | 0 | for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { |
108 | 0 | let zi = xi.wrapping_sub(*yi).wrapping_sub(c); |
109 | 0 | z[i] = zi; |
110 | | // see "Hacker's Delight", section 2-12 (overflow detection) |
111 | 0 | c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1) |
112 | | } |
113 | | |
114 | 0 | c |
115 | 0 | } |
116 | | |
117 | | /// z1<<_W + z0 = x+y+c, with c == 0 or 1 |
118 | | #[inline(always)] |
119 | 0 | fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
120 | 0 | let yc = y.wrapping_add(c); |
121 | 0 | let z0 = x.wrapping_add(yc); |
122 | 0 | let z1 = if z0 < x || yc < y { 1 } else { 0 }; |
123 | | |
124 | 0 | (z1, z0) |
125 | 0 | } |
126 | | |
127 | | /// z1 << _W + z0 = x * y + c |
128 | | #[inline(always)] |
129 | 0 | fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
130 | 0 | let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit; |
131 | 0 | ((z >> big_digit::BITS) as BigDigit, z as BigDigit) |
132 | 0 | } |
133 | | |
134 | | /// Calculates x ** y mod m using a fixed, 4-bit window. |
135 | | #[allow(clippy::many_single_char_names)] |
136 | 0 | pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint { |
137 | 0 | assert!(m.data[0] & 1 == 1); |
138 | 0 | let mr = MontyReducer::new(m); |
139 | 0 | let num_words = m.data.len(); |
140 | | |
141 | 0 | let mut x = x.clone(); |
142 | | |
143 | | // We want the lengths of x and m to be equal. |
144 | | // It is OK if x >= m as long as len(x) == len(m). |
145 | 0 | if x.data.len() > num_words { |
146 | 0 | x %= m; |
147 | 0 | // Note: now len(x) <= numWords, not guaranteed ==. |
148 | 0 | } |
149 | 0 | if x.data.len() < num_words { |
150 | 0 | x.data.resize(num_words, 0); |
151 | 0 | } |
152 | | |
153 | | // rr = 2**(2*_W*len(m)) mod m |
154 | 0 | let mut rr = BigUint::one(); |
155 | 0 | rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m; |
156 | 0 | if rr.data.len() < num_words { |
157 | 0 | rr.data.resize(num_words, 0); |
158 | 0 | } |
159 | | // one = 1, with equal length to that of m |
160 | 0 | let mut one = BigUint::one(); |
161 | 0 | one.data.resize(num_words, 0); |
162 | | |
163 | 0 | let n = 4; |
164 | | // powers[i] contains x^i |
165 | 0 | let mut powers = Vec::with_capacity(1 << n); |
166 | 0 | powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words)); |
167 | 0 | powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words)); |
168 | 0 | for i in 2..1 << n { |
169 | 0 | let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words); |
170 | 0 | powers.push(r); |
171 | 0 | } |
172 | | |
173 | | // initialize z = 1 (Montgomery 1) |
174 | 0 | let mut z = powers[0].clone(); |
175 | 0 | z.data.resize(num_words, 0); |
176 | 0 | let mut zz = BigUint::zero(); |
177 | 0 | zz.data.resize(num_words, 0); |
178 | | |
179 | | // same windowed exponent, but with Montgomery multiplications |
180 | 0 | for i in (0..y.data.len()).rev() { |
181 | 0 | let mut yi = y.data[i]; |
182 | 0 | let mut j = 0; |
183 | 0 | while j < big_digit::BITS { |
184 | 0 | if i != y.data.len() - 1 || j != 0 { |
185 | 0 | zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
186 | 0 | z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
187 | 0 | zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
188 | 0 | z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
189 | 0 | } |
190 | 0 | zz = montgomery( |
191 | 0 | &z, |
192 | 0 | &powers[(yi >> (big_digit::BITS - n)) as usize], |
193 | 0 | m, |
194 | 0 | mr.n0inv, |
195 | 0 | num_words, |
196 | | ); |
197 | 0 | mem::swap(&mut z, &mut zz); |
198 | 0 | yi <<= n; |
199 | 0 | j += n; |
200 | | } |
201 | | } |
202 | | |
203 | | // convert to regular number |
204 | 0 | zz = montgomery(&z, &one, m, mr.n0inv, num_words); |
205 | | |
206 | 0 | zz.normalize(); |
207 | | // One last reduction, just in case. |
208 | | // See golang.org/issue/13907. |
209 | 0 | if zz >= *m { |
210 | | // Common case is m has high bit set; in that case, |
211 | | // since zz is the same length as m, there can be just |
212 | | // one multiple of m to remove. Just subtract. |
213 | | // We think that the subtract should be sufficient in general, |
214 | | // so do that unconditionally, but double-check, |
215 | | // in case our beliefs are wrong. |
216 | | // The div is not expected to be reached. |
217 | 0 | zz -= m; |
218 | 0 | if zz >= *m { |
219 | 0 | zz %= m; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | 0 | zz.normalize(); |
224 | 0 | zz |
225 | 0 | } |