/src/botan/src/lib/math/mp/mp_karat.cpp
Line | Count | Source |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | |
11 | | #include <botan/exceptn.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/internal/ct_utils.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Simple O(N^2) Multiplication |
19 | | */ |
20 | 51.2k | void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) { |
21 | 51.2k | if(z_size < x_size + y_size) { |
22 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
23 | 0 | } |
24 | | |
25 | 51.2k | const size_t x_size_8 = x_size - (x_size % 8); |
26 | | |
27 | 51.2k | clear_mem(z, z_size); |
28 | | |
29 | 160k | for(size_t i = 0; i != y_size; ++i) { |
30 | 109k | const word y_i = y[i]; |
31 | | |
32 | 109k | word carry = 0; |
33 | | |
34 | 133k | for(size_t j = 0; j != x_size_8; j += 8) { |
35 | 24.2k | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
36 | 24.2k | } |
37 | | |
38 | 341k | for(size_t j = x_size_8; j != x_size; ++j) { |
39 | 232k | z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry); |
40 | 232k | } |
41 | | |
42 | 109k | z[x_size + i] = carry; |
43 | 109k | } |
44 | 51.2k | } |
45 | | |
46 | 21.9k | void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) { |
47 | 21.9k | if(z_size < 2 * x_size) { |
48 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
49 | 0 | } |
50 | | |
51 | 21.9k | const size_t x_size_8 = x_size - (x_size % 8); |
52 | | |
53 | 21.9k | clear_mem(z, z_size); |
54 | | |
55 | 72.8k | for(size_t i = 0; i != x_size; ++i) { |
56 | 50.8k | const word x_i = x[i]; |
57 | | |
58 | 50.8k | word carry = 0; |
59 | | |
60 | 96.1k | for(size_t j = 0; j != x_size_8; j += 8) { |
61 | 45.2k | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
62 | 45.2k | } |
63 | | |
64 | 166k | for(size_t j = x_size_8; j != x_size; ++j) { |
65 | 116k | z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry); |
66 | 116k | } |
67 | | |
68 | 50.8k | z[x_size + i] = carry; |
69 | 50.8k | } |
70 | 21.9k | } |
71 | | |
72 | | namespace { |
73 | | |
74 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
75 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
76 | | |
77 | | /* |
78 | | * Karatsuba Multiplication Operation |
79 | | */ |
80 | 301 | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) { |
81 | 301 | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2 != 0) { |
82 | 210 | switch(N) { |
83 | 0 | case 6: |
84 | 0 | return bigint_comba_mul6(z, x, y); |
85 | 0 | case 8: |
86 | 0 | return bigint_comba_mul8(z, x, y); |
87 | 0 | case 9: |
88 | 0 | return bigint_comba_mul9(z, x, y); |
89 | 66 | case 16: |
90 | 66 | return bigint_comba_mul16(z, x, y); |
91 | 6 | case 24: |
92 | 6 | return bigint_comba_mul24(z, x, y); |
93 | 138 | default: |
94 | 138 | return basecase_mul(z, 2 * N, x, N, y, N); |
95 | 210 | } |
96 | 210 | } |
97 | | |
98 | 91 | const size_t N2 = N / 2; |
99 | | |
100 | 91 | const word* x0 = x; |
101 | 91 | const word* x1 = x + N2; |
102 | 91 | const word* y0 = y; |
103 | 91 | const word* y1 = y + N2; |
104 | 91 | word* z0 = z; |
105 | 91 | word* z1 = z + N; |
106 | | |
107 | 91 | word* ws0 = workspace; |
108 | 91 | word* ws1 = workspace + N; |
109 | | |
110 | 91 | clear_mem(workspace, 2 * N); |
111 | | |
112 | | /* |
113 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
114 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
115 | | * anything, clear_mem above already set the correct result. |
116 | | * |
117 | | * However we ignore the result of the comparisons and always perform the |
118 | | * subtractions and recursively multiply to avoid the timing channel. |
119 | | */ |
120 | | |
121 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
122 | 91 | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
123 | 91 | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
124 | 91 | const auto neg_mask = ~(cmp0 ^ cmp1); |
125 | | |
126 | 91 | karatsuba_mul(ws0, z0, z1, N2, ws1); |
127 | | |
128 | | // Compute X_lo * Y_lo |
129 | 91 | karatsuba_mul(z0, x0, y0, N2, ws1); |
130 | | |
131 | | // Compute X_hi * Y_hi |
132 | 91 | karatsuba_mul(z1, x1, y1, N2, ws1); |
133 | | |
134 | 91 | const word ws_carry = bigint_add3(ws1, z0, N, z1, N); |
135 | 91 | word z_carry = bigint_add2(z + N2, N, ws1, N); |
136 | | |
137 | 91 | z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1); |
138 | 91 | bigint_add2(z + N + N2, N2, &z_carry, 1); |
139 | | |
140 | 91 | clear_mem(workspace + N, N2); |
141 | | |
142 | 91 | bigint_cnd_add(neg_mask.value(), z + N2, workspace, 2 * N - N2); |
143 | 91 | bigint_cnd_sub((~neg_mask).value(), z + N2, workspace, 2 * N - N2); |
144 | 91 | } |
145 | | |
146 | | /* |
147 | | * Karatsuba Squaring Operation |
148 | | */ |
149 | 394 | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) { |
150 | 394 | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2 != 0) { |
151 | 276 | switch(N) { |
152 | 0 | case 6: |
153 | 0 | return bigint_comba_sqr6(z, x); |
154 | 0 | case 8: |
155 | 0 | return bigint_comba_sqr8(z, x); |
156 | 0 | case 9: |
157 | 0 | return bigint_comba_sqr9(z, x); |
158 | 42 | case 16: |
159 | 42 | return bigint_comba_sqr16(z, x); |
160 | 42 | case 24: |
161 | 42 | return bigint_comba_sqr24(z, x); |
162 | 192 | default: |
163 | 192 | return basecase_sqr(z, 2 * N, x, N); |
164 | 276 | } |
165 | 276 | } |
166 | | |
167 | 118 | const size_t N2 = N / 2; |
168 | | |
169 | 118 | const word* x0 = x; |
170 | 118 | const word* x1 = x + N2; |
171 | 118 | word* z0 = z; |
172 | 118 | word* z1 = z + N; |
173 | | |
174 | 118 | word* ws0 = workspace; |
175 | 118 | word* ws1 = workspace + N; |
176 | | |
177 | 118 | clear_mem(workspace, 2 * N); |
178 | | |
179 | | // See comment in karatsuba_mul |
180 | 118 | bigint_sub_abs(z0, x0, x1, N2, workspace); |
181 | 118 | karatsuba_sqr(ws0, z0, N2, ws1); |
182 | | |
183 | 118 | karatsuba_sqr(z0, x0, N2, ws1); |
184 | 118 | karatsuba_sqr(z1, x1, N2, ws1); |
185 | | |
186 | 118 | const word ws_carry = bigint_add3(ws1, z0, N, z1, N); |
187 | 118 | word z_carry = bigint_add2(z + N2, N, ws1, N); |
188 | | |
189 | 118 | z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1); |
190 | 118 | bigint_add2(z + N + N2, N2, &z_carry, 1); |
191 | | |
192 | | /* |
193 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
194 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
195 | | * timing channel. |
196 | | */ |
197 | 118 | bigint_sub2(z + N2, 2 * N - N2, ws0, N); |
198 | 118 | } |
199 | | |
200 | | /* |
201 | | * Pick a good size for the Karatsuba multiply |
202 | | */ |
203 | 39 | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) { |
204 | 39 | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) { |
205 | 7 | return 0; |
206 | 7 | } |
207 | | |
208 | 32 | if(((x_size == x_sw) && (x_size % 2 != 0)) || ((y_size == y_sw) && (y_size % 2 != 0))) { |
209 | 0 | return 0; |
210 | 0 | } |
211 | | |
212 | 32 | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
213 | 32 | const size_t end = (x_size < y_size) ? x_size : y_size; |
214 | | |
215 | 32 | if(start == end) { |
216 | 13 | if(start % 2 != 0) { |
217 | 4 | return 0; |
218 | 4 | } |
219 | 9 | return start; |
220 | 13 | } |
221 | | |
222 | 27 | for(size_t j = start; j <= end; ++j) { |
223 | 27 | if(j % 2 != 0) { |
224 | 8 | continue; |
225 | 8 | } |
226 | | |
227 | 19 | if(2 * j > z_size) { |
228 | 0 | return 0; |
229 | 0 | } |
230 | | |
231 | 19 | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) { |
232 | 19 | if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) { |
233 | 4 | return j + 2; |
234 | 4 | } |
235 | 15 | return j; |
236 | 19 | } |
237 | 19 | } |
238 | | |
239 | 0 | return 0; |
240 | 19 | } |
241 | | |
242 | | /* |
243 | | * Pick a good size for the Karatsuba squaring |
244 | | */ |
245 | 60 | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) { |
246 | 60 | if(x_sw == x_size) { |
247 | 0 | if(x_sw % 2 != 0) { |
248 | 0 | return 0; |
249 | 0 | } |
250 | 0 | return x_sw; |
251 | 0 | } |
252 | | |
253 | 80 | for(size_t j = x_sw; j <= x_size; ++j) { |
254 | 80 | if(j % 2 != 0) { |
255 | 20 | continue; |
256 | 20 | } |
257 | | |
258 | 60 | if(2 * j > z_size) { |
259 | 20 | return 0; |
260 | 20 | } |
261 | | |
262 | 40 | if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) { |
263 | 0 | return j + 2; |
264 | 0 | } |
265 | 40 | return j; |
266 | 40 | } |
267 | | |
268 | 0 | return 0; |
269 | 60 | } |
270 | | |
271 | | template <size_t SZ> |
272 | 1.05M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { |
273 | 1.05M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); |
274 | 1.05M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 521k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 521k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 521k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 330k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 330k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 330k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 52.2k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 52.2k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 52.2k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 51.1k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 51.1k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 51.1k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 51.1k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 51.1k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 51.1k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 272 | 51.1k | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 273 | 51.1k | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 274 | 51.1k | } |
|
275 | | |
276 | | template <size_t SZ> |
277 | 572k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { |
278 | 572k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); |
279 | 572k | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 398k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 398k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 398k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 86.8k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 86.8k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 86.8k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 21.8k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 21.8k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 21.8k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 21.8k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 21.8k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 21.8k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 21.8k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 21.8k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 21.8k | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 277 | 21.8k | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 278 | 21.8k | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 279 | 21.8k | } |
|
280 | | |
281 | | } // namespace |
282 | | |
283 | | void bigint_mul(word z[], |
284 | | size_t z_size, |
285 | | const word x[], |
286 | | size_t x_size, |
287 | | size_t x_sw, |
288 | | const word y[], |
289 | | size_t y_size, |
290 | | size_t y_sw, |
291 | | word workspace[], |
292 | 521k | size_t ws_size) { |
293 | 521k | clear_mem(z, z_size); |
294 | | |
295 | 521k | if(x_sw == 1) { |
296 | 0 | bigint_linmul3(z, y, y_sw, x[0]); |
297 | 521k | } else if(y_sw == 1) { |
298 | 0 | bigint_linmul3(z, x, x_sw, y[0]); |
299 | 521k | } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) { |
300 | 190k | bigint_comba_mul4(z, x, y); |
301 | 330k | } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) { |
302 | 278k | bigint_comba_mul6(z, x, y); |
303 | 278k | } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) { |
304 | 1.10k | bigint_comba_mul8(z, x, y); |
305 | 51.1k | } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) { |
306 | 1 | bigint_comba_mul9(z, x, y); |
307 | 51.1k | } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) { |
308 | 3 | bigint_comba_mul16(z, x, y); |
309 | 51.1k | } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) { |
310 | 3 | bigint_comba_mul24(z, x, y); |
311 | 51.1k | } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || workspace == nullptr) { |
312 | 51.0k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
313 | 51.0k | } else { |
314 | 39 | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
315 | | |
316 | 39 | if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) { |
317 | 28 | karatsuba_mul(z, x, y, N, workspace); |
318 | 28 | } else { |
319 | 11 | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
320 | 11 | } |
321 | 39 | } |
322 | 521k | } |
323 | | |
324 | | /* |
325 | | * Squaring Algorithm Dispatcher |
326 | | */ |
327 | 398k | void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) { |
328 | 398k | clear_mem(z, z_size); |
329 | | |
330 | 398k | BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient"); |
331 | | |
332 | 398k | if(x_sw == 1) { |
333 | 71 | bigint_linmul3(z, x, x_sw, x[0]); |
334 | 398k | } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) { |
335 | 311k | bigint_comba_sqr4(z, x); |
336 | 311k | } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) { |
337 | 65.0k | bigint_comba_sqr6(z, x); |
338 | 65.0k | } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) { |
339 | 3 | bigint_comba_sqr8(z, x); |
340 | 21.8k | } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) { |
341 | 3 | bigint_comba_sqr9(z, x); |
342 | 21.8k | } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) { |
343 | 1 | bigint_comba_sqr16(z, x); |
344 | 21.8k | } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) { |
345 | 1 | bigint_comba_sqr24(z, x); |
346 | 21.8k | } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || workspace == nullptr) { |
347 | 21.7k | basecase_sqr(z, z_size, x, x_sw); |
348 | 21.7k | } else { |
349 | 60 | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
350 | | |
351 | 60 | if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) { |
352 | 40 | karatsuba_sqr(z, x, N, workspace); |
353 | 40 | } else { |
354 | 20 | basecase_sqr(z, z_size, x, x_sw); |
355 | 20 | } |
356 | 60 | } |
357 | 398k | } |
358 | | |
359 | | } // namespace Botan |