/src/botan/src/lib/math/mp/mp_monty.cpp
Line | Count | Source |
1 | | /* |
2 | | * Montgomery Reduction |
3 | | * (C) 1999-2011,2025 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * 2016 Matthias Gierlings |
6 | | * |
7 | | * Botan is released under the Simplified BSD License (see license.txt) |
8 | | */ |
9 | | |
10 | | #include <botan/internal/mp_core.h> |
11 | | |
12 | | #include <botan/assert.h> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | namespace { |
17 | | |
18 | 2.05G | BOTAN_FORCE_INLINE void mul_rev_range(word3<word>& accum, const word ws[], const word p[], size_t bound) { |
19 | | /* |
20 | | Unrolled version of: |
21 | | |
22 | | for(size_t i = 0; i < bound; ++i) { |
23 | | accum.mul(ws[i], p[bound - i]); |
24 | | } |
25 | | */ |
26 | | |
27 | 2.05G | size_t lower = 0; |
28 | 7.74G | while(lower < bound) { |
29 | 5.69G | const size_t upper = bound - lower; |
30 | | |
31 | 5.69G | if(upper >= 16) { |
32 | 1.88G | accum.mul(ws[lower], p[upper]); |
33 | 1.88G | accum.mul(ws[lower + 1], p[upper - 1]); |
34 | 1.88G | accum.mul(ws[lower + 2], p[upper - 2]); |
35 | 1.88G | accum.mul(ws[lower + 3], p[upper - 3]); |
36 | 1.88G | accum.mul(ws[lower + 4], p[upper - 4]); |
37 | 1.88G | accum.mul(ws[lower + 5], p[upper - 5]); |
38 | 1.88G | accum.mul(ws[lower + 6], p[upper - 6]); |
39 | 1.88G | accum.mul(ws[lower + 7], p[upper - 7]); |
40 | 1.88G | accum.mul(ws[lower + 8], p[upper - 8]); |
41 | 1.88G | accum.mul(ws[lower + 9], p[upper - 9]); |
42 | 1.88G | accum.mul(ws[lower + 10], p[upper - 10]); |
43 | 1.88G | accum.mul(ws[lower + 11], p[upper - 11]); |
44 | 1.88G | accum.mul(ws[lower + 12], p[upper - 12]); |
45 | 1.88G | accum.mul(ws[lower + 13], p[upper - 13]); |
46 | 1.88G | accum.mul(ws[lower + 14], p[upper - 14]); |
47 | 1.88G | accum.mul(ws[lower + 15], p[upper - 15]); |
48 | 1.88G | lower += 16; |
49 | 3.80G | } else if(upper >= 8) { |
50 | 847M | accum.mul(ws[lower], p[upper]); |
51 | 847M | accum.mul(ws[lower + 1], p[upper - 1]); |
52 | 847M | accum.mul(ws[lower + 2], p[upper - 2]); |
53 | 847M | accum.mul(ws[lower + 3], p[upper - 3]); |
54 | 847M | accum.mul(ws[lower + 4], p[upper - 4]); |
55 | 847M | accum.mul(ws[lower + 5], p[upper - 5]); |
56 | 847M | accum.mul(ws[lower + 6], p[upper - 6]); |
57 | 847M | accum.mul(ws[lower + 7], p[upper - 7]); |
58 | 847M | lower += 8; |
59 | 2.96G | } else if(upper >= 4) { |
60 | 897M | accum.mul(ws[lower], p[upper]); |
61 | 897M | accum.mul(ws[lower + 1], p[upper - 1]); |
62 | 897M | accum.mul(ws[lower + 2], p[upper - 2]); |
63 | 897M | accum.mul(ws[lower + 3], p[upper - 3]); |
64 | 897M | lower += 4; |
65 | 2.06G | } else if(upper >= 2) { |
66 | 1.02G | accum.mul(ws[lower], p[upper]); |
67 | 1.02G | accum.mul(ws[lower + 1], p[upper - 1]); |
68 | 1.02G | lower += 2; |
69 | 1.03G | } else { |
70 | 1.03G | accum.mul(ws[lower], p[upper]); |
71 | 1.03G | lower += 1; |
72 | 1.03G | } |
73 | 5.69G | } |
74 | 2.05G | } |
75 | | |
76 | | } // namespace |
77 | | |
78 | | /* |
79 | | * Montgomery reduction - product scanning form |
80 | | * |
81 | | * Algorithm 5 from "Energy-Efficient Software Implementation of Long |
82 | | * Integer Modular Arithmetic" |
83 | | * (https://www.iacr.org/archive/ches2005/006.pdf) |
84 | | * |
85 | | * See also |
86 | | * |
87 | | * https://eprint.iacr.org/2013/882.pdf |
88 | | * https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf |
89 | | */ |
90 | | void bigint_monty_redc_generic( |
91 | 76.0M | word r[], const word z[], size_t z_size, const word p[], size_t p_size, word p_dash, word ws[]) { |
92 | 76.0M | BOTAN_ARG_CHECK(z_size >= 2 * p_size && p_size > 0, "Invalid sizes for bigint_monty_redc_generic"); |
93 | | |
94 | 76.0M | word3<word> accum; |
95 | | |
96 | 76.0M | accum.add(z[0]); |
97 | | |
98 | 76.0M | ws[0] = accum.monty_step(p[0], p_dash); |
99 | | |
100 | 1.10G | for(size_t i = 1; i != p_size; ++i) { |
101 | 1.02G | mul_rev_range(accum, ws, p, i); |
102 | 1.02G | accum.add(z[i]); |
103 | 1.02G | ws[i] = accum.monty_step(p[0], p_dash); |
104 | 1.02G | } |
105 | | |
106 | 1.10G | for(size_t i = 0; i != p_size - 1; ++i) { |
107 | 1.02G | mul_rev_range(accum, &ws[i + 1], &p[i], p_size - (i + 1)); |
108 | 1.02G | accum.add(z[p_size + i]); |
109 | 1.02G | ws[i] = accum.extract(); |
110 | 1.02G | } |
111 | | |
112 | 76.0M | accum.add(z[2 * p_size - 1]); |
113 | | |
114 | 76.0M | ws[p_size - 1] = accum.extract(); |
115 | | // w1 is the final part, which is not stored in the workspace |
116 | 76.0M | const word w1 = accum.extract(); |
117 | | |
118 | | /* |
119 | | * The result might need to be reduced mod p. To avoid a timing |
120 | | * channel, always perform the subtraction. If in the compution |
121 | | * of x - p a borrow is required then x was already < p. |
122 | | * |
123 | | * x starts at ws[0] and is p_size bytes long plus a possible high |
124 | | * digit left over in w1. |
125 | | * |
126 | | * x - p starts at z[0] and is also p_size bytes long |
127 | | * |
128 | | * If borrow was set after the subtraction, then x was already less |
129 | | * than p and the subtraction was not needed. In that case overwrite |
130 | | * z[0:p_size] with the original x in ws[0:p_size]. |
131 | | * |
132 | | * We only copy out p_size in the final step because we know |
133 | | * the Montgomery result is < P |
134 | | */ |
135 | | |
136 | 76.0M | bigint_monty_maybe_sub(p_size, r, w1, ws, p); |
137 | 76.0M | } |
138 | | |
139 | | } // namespace Botan |