/src/Botan-3.4.0/src/lib/math/mp/mp_karat.cpp
Line | Count | Source |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | |
11 | | #include <botan/exceptn.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/internal/ct_utils.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Simple O(N^2) Multiplication |
19 | | */ |
20 | 751k | void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) { |
21 | 751k | if(z_size < x_size + y_size) { |
22 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
23 | 0 | } |
24 | | |
25 | 751k | const size_t x_size_8 = x_size - (x_size % 8); |
26 | | |
27 | 751k | clear_mem(z, z_size); |
28 | | |
29 | 10.5M | for(size_t i = 0; i != y_size; ++i) { |
30 | 9.80M | const word y_i = y[i]; |
31 | | |
32 | 9.80M | word carry = 0; |
33 | | |
34 | 28.3M | for(size_t j = 0; j != x_size_8; j += 8) { |
35 | 18.5M | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
36 | 18.5M | } |
37 | | |
38 | 33.2M | for(size_t j = x_size_8; j != x_size; ++j) { |
39 | 23.4M | z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry); |
40 | 23.4M | } |
41 | | |
42 | 9.80M | z[x_size + i] = carry; |
43 | 9.80M | } |
44 | 751k | } |
45 | | |
46 | 1.11M | void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) { |
47 | 1.11M | if(z_size < 2 * x_size) { |
48 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
49 | 0 | } |
50 | | |
51 | 1.11M | const size_t x_size_8 = x_size - (x_size % 8); |
52 | | |
53 | 1.11M | clear_mem(z, z_size); |
54 | | |
55 | 16.4M | for(size_t i = 0; i != x_size; ++i) { |
56 | 15.2M | const word x_i = x[i]; |
57 | | |
58 | 15.2M | word carry = 0; |
59 | | |
60 | 51.0M | for(size_t j = 0; j != x_size_8; j += 8) { |
61 | 35.7M | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
62 | 35.7M | } |
63 | | |
64 | 53.1M | for(size_t j = x_size_8; j != x_size; ++j) { |
65 | 37.8M | z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry); |
66 | 37.8M | } |
67 | | |
68 | 15.2M | z[x_size + i] = carry; |
69 | 15.2M | } |
70 | 1.11M | } |
71 | | |
72 | | namespace { |
73 | | |
74 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
75 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
76 | | |
77 | | /* |
78 | | * Karatsuba Multiplication Operation |
79 | | */ |
80 | 774k | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) { |
81 | 774k | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) { |
82 | 579k | switch(N) { |
83 | 0 | case 6: |
84 | 0 | return bigint_comba_mul6(z, x, y); |
85 | 0 | case 8: |
86 | 0 | return bigint_comba_mul8(z, x, y); |
87 | 0 | case 9: |
88 | 0 | return bigint_comba_mul9(z, x, y); |
89 | 459k | case 16: |
90 | 459k | return bigint_comba_mul16(z, x, y); |
91 | 114k | case 24: |
92 | 114k | return bigint_comba_mul24(z, x, y); |
93 | 5.15k | default: |
94 | 5.15k | return basecase_mul(z, 2 * N, x, N, y, N); |
95 | 579k | } |
96 | 579k | } |
97 | | |
98 | 194k | const size_t N2 = N / 2; |
99 | | |
100 | 194k | const word* x0 = x; |
101 | 194k | const word* x1 = x + N2; |
102 | 194k | const word* y0 = y; |
103 | 194k | const word* y1 = y + N2; |
104 | 194k | word* z0 = z; |
105 | 194k | word* z1 = z + N; |
106 | | |
107 | 194k | word* ws0 = workspace; |
108 | 194k | word* ws1 = workspace + N; |
109 | | |
110 | 194k | clear_mem(workspace, 2 * N); |
111 | | |
112 | | /* |
113 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
114 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
115 | | * anything, clear_mem above already set the correct result. |
116 | | * |
117 | | * However we ignore the result of the comparisons and always perform the |
118 | | * subtractions and recursively multiply to avoid the timing channel. |
119 | | */ |
120 | | |
121 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
122 | 194k | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
123 | 194k | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
124 | 194k | const auto neg_mask = ~(cmp0 ^ cmp1); |
125 | | |
126 | 194k | karatsuba_mul(ws0, z0, z1, N2, ws1); |
127 | | |
128 | | // Compute X_lo * Y_lo |
129 | 194k | karatsuba_mul(z0, x0, y0, N2, ws1); |
130 | | |
131 | | // Compute X_hi * Y_hi |
132 | 194k | karatsuba_mul(z1, x1, y1, N2, ws1); |
133 | | |
134 | 194k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
135 | 194k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
136 | | |
137 | 194k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
138 | 194k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
139 | | |
140 | 194k | clear_mem(workspace + N, N2); |
141 | | |
142 | 194k | bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2); |
143 | 194k | } |
144 | | |
145 | | /* |
146 | | * Karatsuba Squaring Operation |
147 | | */ |
148 | 1.29M | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) { |
149 | 1.29M | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) { |
150 | 991k | switch(N) { |
151 | 0 | case 6: |
152 | 0 | return bigint_comba_sqr6(z, x); |
153 | 0 | case 8: |
154 | 0 | return bigint_comba_sqr8(z, x); |
155 | 0 | case 9: |
156 | 0 | return bigint_comba_sqr9(z, x); |
157 | 723k | case 16: |
158 | 723k | return bigint_comba_sqr16(z, x); |
159 | 180k | case 24: |
160 | 180k | return bigint_comba_sqr24(z, x); |
161 | 88.0k | default: |
162 | 88.0k | return basecase_sqr(z, 2 * N, x, N); |
163 | 991k | } |
164 | 991k | } |
165 | | |
166 | 303k | const size_t N2 = N / 2; |
167 | | |
168 | 303k | const word* x0 = x; |
169 | 303k | const word* x1 = x + N2; |
170 | 303k | word* z0 = z; |
171 | 303k | word* z1 = z + N; |
172 | | |
173 | 303k | word* ws0 = workspace; |
174 | 303k | word* ws1 = workspace + N; |
175 | | |
176 | 303k | clear_mem(workspace, 2 * N); |
177 | | |
178 | | // See comment in karatsuba_mul |
179 | 303k | bigint_sub_abs(z0, x0, x1, N2, workspace); |
180 | 303k | karatsuba_sqr(ws0, z0, N2, ws1); |
181 | | |
182 | 303k | karatsuba_sqr(z0, x0, N2, ws1); |
183 | 303k | karatsuba_sqr(z1, x1, N2, ws1); |
184 | | |
185 | 303k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
186 | 303k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
187 | | |
188 | 303k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
189 | 303k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
190 | | |
191 | | /* |
192 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
193 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
194 | | * timing channel. |
195 | | */ |
196 | 303k | bigint_sub2(z + N2, 2 * N - N2, ws0, N); |
197 | 303k | } |
198 | | |
199 | | /* |
200 | | * Pick a good size for the Karatsuba multiply |
201 | | */ |
202 | 259k | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) { |
203 | 259k | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) { |
204 | 344 | return 0; |
205 | 344 | } |
206 | | |
207 | 258k | if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) { |
208 | 0 | return 0; |
209 | 0 | } |
210 | | |
211 | 258k | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
212 | 258k | const size_t end = (x_size < y_size) ? x_size : y_size; |
213 | | |
214 | 258k | if(start == end) { |
215 | 7.81k | if(start % 2) { |
216 | 0 | return 0; |
217 | 0 | } |
218 | 7.81k | return start; |
219 | 7.81k | } |
220 | | |
221 | 318k | for(size_t j = start; j <= end; ++j) { |
222 | 318k | if(j % 2) { |
223 | 67.9k | continue; |
224 | 67.9k | } |
225 | | |
226 | 250k | if(2 * j > z_size) { |
227 | 61.2k | return 0; |
228 | 61.2k | } |
229 | | |
230 | 189k | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) { |
231 | 189k | if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) { |
232 | 6.73k | return j + 2; |
233 | 6.73k | } |
234 | 182k | return j; |
235 | 189k | } |
236 | 189k | } |
237 | | |
238 | 0 | return 0; |
239 | 250k | } |
240 | | |
241 | | /* |
242 | | * Pick a good size for the Karatsuba squaring |
243 | | */ |
244 | 1.16M | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) { |
245 | 1.16M | if(x_sw == x_size) { |
246 | 0 | if(x_sw % 2) { |
247 | 0 | return 0; |
248 | 0 | } |
249 | 0 | return x_sw; |
250 | 0 | } |
251 | | |
252 | 1.95M | for(size_t j = x_sw; j <= x_size; ++j) { |
253 | 1.95M | if(j % 2) { |
254 | 784k | continue; |
255 | 784k | } |
256 | | |
257 | 1.16M | if(2 * j > z_size) { |
258 | 784k | return 0; |
259 | 784k | } |
260 | | |
261 | 385k | if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) { |
262 | 422 | return j + 2; |
263 | 422 | } |
264 | 384k | return j; |
265 | 385k | } |
266 | | |
267 | 0 | return 0; |
268 | 1.16M | } |
269 | | |
270 | | template <size_t SZ> |
271 | 161M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { |
272 | 161M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); |
273 | 161M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 64.6M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 64.6M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 64.6M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 34.1M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 34.1M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 34.1M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 31.1M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 31.1M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 31.1M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 29.1M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 29.1M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 29.1M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.70M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.70M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.70M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 941k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 941k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 941k | } |
|
274 | | |
275 | | template <size_t SZ> |
276 | 150M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { |
277 | 150M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); |
278 | 150M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 56.8M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 56.8M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 56.8M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 32.3M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 32.3M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 32.3M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 29.7M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 29.7M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 29.7M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 27.7M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 27.7M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 27.7M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 2.45M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 2.45M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 2.45M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 1.41M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 1.41M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 1.41M | } |
|
279 | | |
280 | | } // namespace |
281 | | |
282 | | void bigint_mul(word z[], |
283 | | size_t z_size, |
284 | | const word x[], |
285 | | size_t x_size, |
286 | | size_t x_sw, |
287 | | const word y[], |
288 | | size_t y_size, |
289 | | size_t y_sw, |
290 | | word workspace[], |
291 | 64.7M | size_t ws_size) { |
292 | 64.7M | clear_mem(z, z_size); |
293 | | |
294 | 64.7M | if(x_sw == 1) { |
295 | 7.49k | bigint_linmul3(z, y, y_sw, x[0]); |
296 | 64.6M | } else if(y_sw == 1) { |
297 | 0 | bigint_linmul3(z, x, x_sw, y[0]); |
298 | 64.6M | } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) { |
299 | 30.5M | bigint_comba_mul4(z, x, y); |
300 | 34.1M | } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) { |
301 | 3.00M | bigint_comba_mul6(z, x, y); |
302 | 31.1M | } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) { |
303 | 2.07M | bigint_comba_mul8(z, x, y); |
304 | 29.1M | } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) { |
305 | 27.4M | bigint_comba_mul9(z, x, y); |
306 | 27.4M | } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) { |
307 | 767k | bigint_comba_mul16(z, x, y); |
308 | 941k | } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) { |
309 | 4.49k | bigint_comba_mul24(z, x, y); |
310 | 937k | } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) { |
311 | 678k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
312 | 678k | } else { |
313 | 259k | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
314 | | |
315 | 259k | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
316 | 191k | karatsuba_mul(z, x, y, N, workspace); |
317 | 191k | } else { |
318 | 67.7k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
319 | 67.7k | } |
320 | 259k | } |
321 | 64.7M | } |
322 | | |
323 | | /* |
324 | | * Squaring Algorithm Dispatcher |
325 | | */ |
326 | 56.8M | void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) { |
327 | 56.8M | clear_mem(z, z_size); |
328 | | |
329 | 56.8M | BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient"); |
330 | | |
331 | 56.8M | if(x_sw == 1) { |
332 | 12.6k | bigint_linmul3(z, x, x_sw, x[0]); |
333 | 56.8M | } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) { |
334 | 24.5M | bigint_comba_sqr4(z, x); |
335 | 32.3M | } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) { |
336 | 2.67M | bigint_comba_sqr6(z, x); |
337 | 29.7M | } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) { |
338 | 1.96M | bigint_comba_sqr8(z, x); |
339 | 27.7M | } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) { |
340 | 25.2M | bigint_comba_sqr9(z, x); |
341 | 25.2M | } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) { |
342 | 1.04M | bigint_comba_sqr16(z, x); |
343 | 1.41M | } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) { |
344 | 304 | bigint_comba_sqr24(z, x); |
345 | 1.41M | } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) { |
346 | 245k | basecase_sqr(z, z_size, x, x_sw); |
347 | 1.16M | } else { |
348 | 1.16M | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
349 | | |
350 | 1.16M | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
351 | 384k | karatsuba_sqr(z, x, N, workspace); |
352 | 784k | } else { |
353 | 784k | basecase_sqr(z, z_size, x, x_sw); |
354 | 784k | } |
355 | 1.16M | } |
356 | 56.8M | } |
357 | | |
358 | | } // namespace Botan |