/src/botan/src/lib/math/mp/mp_karat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | |
11 | | #include <botan/exceptn.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/internal/ct_utils.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Simple O(N^2) Multiplication |
19 | | */ |
20 | 1.68k | void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) { |
21 | 1.68k | if(z_size < x_size + y_size) { |
22 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
23 | 0 | } |
24 | | |
25 | 1.68k | const size_t x_size_8 = x_size - (x_size % 8); |
26 | | |
27 | 1.68k | clear_mem(z, z_size); |
28 | | |
29 | 13.5k | for(size_t i = 0; i != y_size; ++i) { |
30 | 11.8k | const word y_i = y[i]; |
31 | | |
32 | 11.8k | word carry = 0; |
33 | | |
34 | 32.5k | for(size_t j = 0; j != x_size_8; j += 8) { |
35 | 20.6k | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
36 | 20.6k | } |
37 | | |
38 | 50.8k | for(size_t j = x_size_8; j != x_size; ++j) { |
39 | 38.9k | z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry); |
40 | 38.9k | } |
41 | | |
42 | 11.8k | z[x_size + i] = carry; |
43 | 11.8k | } |
44 | 1.68k | } |
45 | | |
46 | 0 | void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) { |
47 | 0 | if(z_size < 2 * x_size) { |
48 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
49 | 0 | } |
50 | | |
51 | 0 | const size_t x_size_8 = x_size - (x_size % 8); |
52 | |
|
53 | 0 | clear_mem(z, z_size); |
54 | |
|
55 | 0 | for(size_t i = 0; i != x_size; ++i) { |
56 | 0 | const word x_i = x[i]; |
57 | |
|
58 | 0 | word carry = 0; |
59 | |
|
60 | 0 | for(size_t j = 0; j != x_size_8; j += 8) { |
61 | 0 | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
62 | 0 | } |
63 | |
|
64 | 0 | for(size_t j = x_size_8; j != x_size; ++j) { |
65 | 0 | z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry); |
66 | 0 | } |
67 | |
|
68 | 0 | z[x_size + i] = carry; |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | | namespace { |
73 | | |
74 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
75 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
76 | | |
77 | | /* |
78 | | * Karatsuba Multiplication Operation |
79 | | */ |
80 | 168 | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) { |
81 | 168 | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) { |
82 | 126 | switch(N) { |
83 | 0 | case 6: |
84 | 0 | return bigint_comba_mul6(z, x, y); |
85 | 0 | case 8: |
86 | 0 | return bigint_comba_mul8(z, x, y); |
87 | 0 | case 9: |
88 | 0 | return bigint_comba_mul9(z, x, y); |
89 | 27 | case 16: |
90 | 27 | return bigint_comba_mul16(z, x, y); |
91 | 0 | case 24: |
92 | 0 | return bigint_comba_mul24(z, x, y); |
93 | 99 | default: |
94 | 99 | return basecase_mul(z, 2 * N, x, N, y, N); |
95 | 126 | } |
96 | 126 | } |
97 | | |
98 | 42 | const size_t N2 = N / 2; |
99 | | |
100 | 42 | const word* x0 = x; |
101 | 42 | const word* x1 = x + N2; |
102 | 42 | const word* y0 = y; |
103 | 42 | const word* y1 = y + N2; |
104 | 42 | word* z0 = z; |
105 | 42 | word* z1 = z + N; |
106 | | |
107 | 42 | word* ws0 = workspace; |
108 | 42 | word* ws1 = workspace + N; |
109 | | |
110 | 42 | clear_mem(workspace, 2 * N); |
111 | | |
112 | | /* |
113 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
114 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
115 | | * anything, clear_mem above already set the correct result. |
116 | | * |
117 | | * However we ignore the result of the comparisons and always perform the |
118 | | * subtractions and recursively multiply to avoid the timing channel. |
119 | | */ |
120 | | |
121 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
122 | 42 | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
123 | 42 | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
124 | 42 | const auto neg_mask = ~(cmp0 ^ cmp1); |
125 | | |
126 | 42 | karatsuba_mul(ws0, z0, z1, N2, ws1); |
127 | | |
128 | | // Compute X_lo * Y_lo |
129 | 42 | karatsuba_mul(z0, x0, y0, N2, ws1); |
130 | | |
131 | | // Compute X_hi * Y_hi |
132 | 42 | karatsuba_mul(z1, x1, y1, N2, ws1); |
133 | | |
134 | 42 | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
135 | 42 | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
136 | | |
137 | 42 | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
138 | 42 | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
139 | | |
140 | 42 | clear_mem(workspace + N, N2); |
141 | | |
142 | 42 | bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2); |
143 | 42 | } |
144 | | |
145 | | /* |
146 | | * Karatsuba Squaring Operation |
147 | | */ |
148 | 0 | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) { |
149 | 0 | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) { |
150 | 0 | switch(N) { |
151 | 0 | case 6: |
152 | 0 | return bigint_comba_sqr6(z, x); |
153 | 0 | case 8: |
154 | 0 | return bigint_comba_sqr8(z, x); |
155 | 0 | case 9: |
156 | 0 | return bigint_comba_sqr9(z, x); |
157 | 0 | case 16: |
158 | 0 | return bigint_comba_sqr16(z, x); |
159 | 0 | case 24: |
160 | 0 | return bigint_comba_sqr24(z, x); |
161 | 0 | default: |
162 | 0 | return basecase_sqr(z, 2 * N, x, N); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 0 | const size_t N2 = N / 2; |
167 | |
|
168 | 0 | const word* x0 = x; |
169 | 0 | const word* x1 = x + N2; |
170 | 0 | word* z0 = z; |
171 | 0 | word* z1 = z + N; |
172 | |
|
173 | 0 | word* ws0 = workspace; |
174 | 0 | word* ws1 = workspace + N; |
175 | |
|
176 | 0 | clear_mem(workspace, 2 * N); |
177 | | |
178 | | // See comment in karatsuba_mul |
179 | 0 | bigint_sub_abs(z0, x0, x1, N2, workspace); |
180 | 0 | karatsuba_sqr(ws0, z0, N2, ws1); |
181 | |
|
182 | 0 | karatsuba_sqr(z0, x0, N2, ws1); |
183 | 0 | karatsuba_sqr(z1, x1, N2, ws1); |
184 | |
|
185 | 0 | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
186 | 0 | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
187 | |
|
188 | 0 | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
189 | 0 | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
190 | | |
191 | | /* |
192 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
193 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
194 | | * timing channel. |
195 | | */ |
196 | 0 | bigint_sub2(z + N2, 2 * N - N2, ws0, N); |
197 | 0 | } |
198 | | |
199 | | /* |
200 | | * Pick a good size for the Karatsuba multiply |
201 | | */ |
202 | 104 | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) { |
203 | 104 | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) { |
204 | 4 | return 0; |
205 | 4 | } |
206 | | |
207 | 100 | if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) { |
208 | 0 | return 0; |
209 | 0 | } |
210 | | |
211 | 100 | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
212 | 100 | const size_t end = (x_size < y_size) ? x_size : y_size; |
213 | | |
214 | 100 | if(start == end) { |
215 | 10 | if(start % 2) { |
216 | 0 | return 0; |
217 | 0 | } |
218 | 10 | return start; |
219 | 10 | } |
220 | | |
221 | 135 | for(size_t j = start; j <= end; ++j) { |
222 | 135 | if(j % 2) { |
223 | 45 | continue; |
224 | 45 | } |
225 | | |
226 | 90 | if(2 * j > z_size) { |
227 | 56 | return 0; |
228 | 56 | } |
229 | | |
230 | 34 | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) { |
231 | 34 | if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) { |
232 | 0 | return j + 2; |
233 | 0 | } |
234 | 34 | return j; |
235 | 34 | } |
236 | 34 | } |
237 | | |
238 | 0 | return 0; |
239 | 90 | } |
240 | | |
241 | | /* |
242 | | * Pick a good size for the Karatsuba squaring |
243 | | */ |
244 | 0 | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) { |
245 | 0 | if(x_sw == x_size) { |
246 | 0 | if(x_sw % 2) { |
247 | 0 | return 0; |
248 | 0 | } |
249 | 0 | return x_sw; |
250 | 0 | } |
251 | | |
252 | 0 | for(size_t j = x_sw; j <= x_size; ++j) { |
253 | 0 | if(j % 2) { |
254 | 0 | continue; |
255 | 0 | } |
256 | | |
257 | 0 | if(2 * j > z_size) { |
258 | 0 | return 0; |
259 | 0 | } |
260 | | |
261 | 0 | if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) { |
262 | 0 | return j + 2; |
263 | 0 | } |
264 | 0 | return j; |
265 | 0 | } |
266 | | |
267 | 0 | return 0; |
268 | 0 | } |
269 | | |
270 | | template <size_t SZ> |
271 | 10.0k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { |
272 | 10.0k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); |
273 | 10.0k | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.72k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.72k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.72k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.69k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.69k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.69k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.67k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.67k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.67k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.66k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.66k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.66k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.64k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.64k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.64k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 1.63k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 1.63k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 1.63k | } |
|
274 | | |
275 | | template <size_t SZ> |
276 | 0 | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { |
277 | 0 | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); |
278 | 0 | } Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Unexecuted instantiation: mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) |
279 | | |
280 | | } // namespace |
281 | | |
282 | | void bigint_mul(word z[], |
283 | | size_t z_size, |
284 | | const word x[], |
285 | | size_t x_size, |
286 | | size_t x_sw, |
287 | | const word y[], |
288 | | size_t y_size, |
289 | | size_t y_sw, |
290 | | word workspace[], |
291 | 1.72k | size_t ws_size) { |
292 | 1.72k | clear_mem(z, z_size); |
293 | | |
294 | 1.72k | if(x_sw == 1) { |
295 | 0 | bigint_linmul3(z, y, y_sw, x[0]); |
296 | 1.72k | } else if(y_sw == 1) { |
297 | 0 | bigint_linmul3(z, x, x_sw, y[0]); |
298 | 1.72k | } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) { |
299 | 32 | bigint_comba_mul4(z, x, y); |
300 | 1.69k | } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) { |
301 | 15 | bigint_comba_mul6(z, x, y); |
302 | 1.67k | } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) { |
303 | 18 | bigint_comba_mul8(z, x, y); |
304 | 1.66k | } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) { |
305 | 16 | bigint_comba_mul9(z, x, y); |
306 | 1.64k | } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) { |
307 | 12 | bigint_comba_mul16(z, x, y); |
308 | 1.63k | } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) { |
309 | 8 | bigint_comba_mul24(z, x, y); |
310 | 1.62k | } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) { |
311 | 1.52k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
312 | 1.52k | } else { |
313 | 104 | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
314 | | |
315 | 104 | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
316 | 42 | karatsuba_mul(z, x, y, N, workspace); |
317 | 62 | } else { |
318 | 62 | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
319 | 62 | } |
320 | 104 | } |
321 | 1.72k | } |
322 | | |
323 | | /* |
324 | | * Squaring Algorithm Dispatcher |
325 | | */ |
326 | 0 | void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) { |
327 | 0 | clear_mem(z, z_size); |
328 | |
|
329 | 0 | BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient"); |
330 | |
|
331 | 0 | if(x_sw == 1) { |
332 | 0 | bigint_linmul3(z, x, x_sw, x[0]); |
333 | 0 | } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) { |
334 | 0 | bigint_comba_sqr4(z, x); |
335 | 0 | } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) { |
336 | 0 | bigint_comba_sqr6(z, x); |
337 | 0 | } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) { |
338 | 0 | bigint_comba_sqr8(z, x); |
339 | 0 | } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) { |
340 | 0 | bigint_comba_sqr9(z, x); |
341 | 0 | } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) { |
342 | 0 | bigint_comba_sqr16(z, x); |
343 | 0 | } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) { |
344 | 0 | bigint_comba_sqr24(z, x); |
345 | 0 | } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) { |
346 | 0 | basecase_sqr(z, z_size, x, x_sw); |
347 | 0 | } else { |
348 | 0 | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
349 | |
|
350 | 0 | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
351 | 0 | karatsuba_sqr(z, x, N, workspace); |
352 | 0 | } else { |
353 | 0 | basecase_sqr(z, z_size, x, x_sw); |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | | } // namespace Botan |