/src/botan/src/lib/math/mp/mp_karat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | #include <botan/internal/mp_asmi.h> |
11 | | #include <botan/internal/ct_utils.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/exceptn.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | namespace { |
18 | | |
19 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
20 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
21 | | |
22 | | /* |
23 | | * Simple O(N^2) Multiplication |
24 | | */ |
25 | | void basecase_mul(word z[], size_t z_size, |
26 | | const word x[], size_t x_size, |
27 | | const word y[], size_t y_size) |
28 | 9.82M | { |
29 | 9.82M | if(z_size < x_size + y_size) |
30 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
31 | 9.82M | |
32 | 9.82M | const size_t x_size_8 = x_size - (x_size % 8); |
33 | 9.82M | |
34 | 9.82M | clear_mem(z, z_size); |
35 | 9.82M | |
36 | 168M | for(size_t i = 0; i != y_size; ++i) |
37 | 159M | { |
38 | 159M | const word y_i = y[i]; |
39 | 159M | |
40 | 159M | word carry = 0; |
41 | 159M | |
42 | 574M | for(size_t j = 0; j != x_size_8; j += 8) |
43 | 414M | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
44 | 159M | |
45 | 806M | for(size_t j = x_size_8; j != x_size; ++j) |
46 | 647M | z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry); |
47 | 159M | |
48 | 159M | z[x_size+i] = carry; |
49 | 159M | } |
50 | 9.82M | } |
51 | | |
52 | | void basecase_sqr(word z[], size_t z_size, |
53 | | const word x[], size_t x_size) |
54 | 6.17M | { |
55 | 6.17M | if(z_size < 2*x_size) |
56 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
57 | 6.17M | |
58 | 6.17M | const size_t x_size_8 = x_size - (x_size % 8); |
59 | 6.17M | |
60 | 6.17M | clear_mem(z, z_size); |
61 | 6.17M | |
62 | 156M | for(size_t i = 0; i != x_size; ++i) |
63 | 150M | { |
64 | 150M | const word x_i = x[i]; |
65 | 150M | |
66 | 150M | word carry = 0; |
67 | 150M | |
68 | 564M | for(size_t j = 0; j != x_size_8; j += 8) |
69 | 414M | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
70 | 150M | |
71 | 893M | for(size_t j = x_size_8; j != x_size; ++j) |
72 | 743M | z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); |
73 | 150M | |
74 | 150M | z[x_size+i] = carry; |
75 | 150M | } |
76 | 6.17M | } |
77 | | |
78 | | /* |
79 | | * Karatsuba Multiplication Operation |
80 | | */ |
81 | | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, |
82 | | word workspace[]) |
83 | 15.7M | { |
84 | 15.7M | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) |
85 | 11.5M | { |
86 | 11.5M | switch(N) |
87 | 11.5M | { |
88 | 0 | case 6: |
89 | 0 | return bigint_comba_mul6(z, x, y); |
90 | 0 | case 8: |
91 | 0 | return bigint_comba_mul8(z, x, y); |
92 | 0 | case 9: |
93 | 0 | return bigint_comba_mul9(z, x, y); |
94 | 11.2M | case 16: |
95 | 11.2M | return bigint_comba_mul16(z, x, y); |
96 | 345k | case 24: |
97 | 345k | return bigint_comba_mul24(z, x, y); |
98 | 11.2k | default: |
99 | 11.2k | return basecase_mul(z, 2*N, x, N, y, N); |
100 | 4.16M | } |
101 | 4.16M | } |
102 | 4.16M | |
103 | 4.16M | const size_t N2 = N / 2; |
104 | 4.16M | |
105 | 4.16M | const word* x0 = x; |
106 | 4.16M | const word* x1 = x + N2; |
107 | 4.16M | const word* y0 = y; |
108 | 4.16M | const word* y1 = y + N2; |
109 | 4.16M | word* z0 = z; |
110 | 4.16M | word* z1 = z + N; |
111 | 4.16M | |
112 | 4.16M | word* ws0 = workspace; |
113 | 4.16M | word* ws1 = workspace + N; |
114 | 4.16M | |
115 | 4.16M | clear_mem(workspace, 2*N); |
116 | 4.16M | |
117 | | /* |
118 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
119 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
120 | | * anything, clear_mem above already set the correct result. |
121 | | * |
122 | | * However we ignore the result of the comparisons and always perform the |
123 | | * subtractions and recursively multiply to avoid the timing channel. |
124 | | */ |
125 | 4.16M | |
126 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
127 | 4.16M | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
128 | 4.16M | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
129 | 4.16M | const auto neg_mask = ~(cmp0 ^ cmp1); |
130 | 4.16M | |
131 | 4.16M | karatsuba_mul(ws0, z0, z1, N2, ws1); |
132 | 4.16M | |
133 | | // Compute X_lo * Y_lo |
134 | 4.16M | karatsuba_mul(z0, x0, y0, N2, ws1); |
135 | 4.16M | |
136 | | // Compute X_hi * Y_hi |
137 | 4.16M | karatsuba_mul(z1, x1, y1, N2, ws1); |
138 | 4.16M | |
139 | 4.16M | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
140 | 4.16M | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
141 | 4.16M | |
142 | 4.16M | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
143 | 4.16M | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
144 | 4.16M | |
145 | 4.16M | clear_mem(workspace + N, N2); |
146 | 4.16M | |
147 | 4.16M | bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2); |
148 | 4.16M | } |
149 | | |
150 | | /* |
151 | | * Karatsuba Squaring Operation |
152 | | */ |
153 | | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) |
154 | 56.4M | { |
155 | 56.4M | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) |
156 | 42.8M | { |
157 | 42.8M | switch(N) |
158 | 42.8M | { |
159 | 0 | case 6: |
160 | 0 | return bigint_comba_sqr6(z, x); |
161 | 0 | case 8: |
162 | 0 | return bigint_comba_sqr8(z, x); |
163 | 0 | case 9: |
164 | 0 | return bigint_comba_sqr9(z, x); |
165 | 36.6M | case 16: |
166 | 36.6M | return bigint_comba_sqr16(z, x); |
167 | 1.14M | case 24: |
168 | 1.14M | return bigint_comba_sqr24(z, x); |
169 | 4.99M | default: |
170 | 4.99M | return basecase_sqr(z, 2*N, x, N); |
171 | 13.6M | } |
172 | 13.6M | } |
173 | 13.6M | |
174 | 13.6M | const size_t N2 = N / 2; |
175 | 13.6M | |
176 | 13.6M | const word* x0 = x; |
177 | 13.6M | const word* x1 = x + N2; |
178 | 13.6M | word* z0 = z; |
179 | 13.6M | word* z1 = z + N; |
180 | 13.6M | |
181 | 13.6M | word* ws0 = workspace; |
182 | 13.6M | word* ws1 = workspace + N; |
183 | 13.6M | |
184 | 13.6M | clear_mem(workspace, 2*N); |
185 | 13.6M | |
186 | | // See comment in karatsuba_mul |
187 | 13.6M | bigint_sub_abs(z0, x0, x1, N2, workspace); |
188 | 13.6M | karatsuba_sqr(ws0, z0, N2, ws1); |
189 | 13.6M | |
190 | 13.6M | karatsuba_sqr(z0, x0, N2, ws1); |
191 | 13.6M | karatsuba_sqr(z1, x1, N2, ws1); |
192 | 13.6M | |
193 | 13.6M | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
194 | 13.6M | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
195 | 13.6M | |
196 | 13.6M | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
197 | 13.6M | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
198 | 13.6M | |
199 | | /* |
200 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
201 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
202 | | * timing channel. |
203 | | */ |
204 | 13.6M | bigint_sub2(z + N2, 2*N-N2, ws0, N); |
205 | 13.6M | } |
206 | | |
207 | | /* |
208 | | * Pick a good size for the Karatsuba multiply |
209 | | */ |
210 | | size_t karatsuba_size(size_t z_size, |
211 | | size_t x_size, size_t x_sw, |
212 | | size_t y_size, size_t y_sw) |
213 | 3.92M | { |
214 | 3.92M | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) |
215 | 430 | return 0; |
216 | 3.92M | |
217 | 3.92M | if(((x_size == x_sw) && (x_size % 2)) || |
218 | 3.92M | ((y_size == y_sw) && (y_size % 2))) |
219 | 0 | return 0; |
220 | 3.92M | |
221 | 3.92M | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
222 | 3.18M | const size_t end = (x_size < y_size) ? x_size : y_size; |
223 | 3.92M | |
224 | 3.92M | if(start == end) |
225 | 2.75M | { |
226 | 2.75M | if(start % 2) |
227 | 0 | return 0; |
228 | 2.75M | return start; |
229 | 2.75M | } |
230 | 1.16M | |
231 | 1.81M | for(size_t j = start; j <= end; ++j) |
232 | 1.81M | { |
233 | 1.81M | if(j % 2) |
234 | 657k | continue; |
235 | 1.16M | |
236 | 1.16M | if(2*j > z_size) |
237 | 655k | return 0; |
238 | 505k | |
239 | 505k | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) |
240 | 505k | { |
241 | 505k | if(j % 4 == 2 && |
242 | 1.91k | (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size) |
243 | 417 | return j+2; |
244 | 505k | return j; |
245 | 505k | } |
246 | 505k | } |
247 | 1.16M | |
248 | 0 | return 0; |
249 | 1.16M | } |
250 | | |
251 | | /* |
252 | | * Pick a good size for the Karatsuba squaring |
253 | | */ |
254 | | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) |
255 | 16.1M | { |
256 | 16.1M | if(x_sw == x_size) |
257 | 15.9k | { |
258 | 15.9k | if(x_sw % 2) |
259 | 0 | return 0; |
260 | 15.9k | return x_sw; |
261 | 15.9k | } |
262 | 16.1M | |
263 | 20.1M | for(size_t j = x_sw; j <= x_size; ++j) |
264 | 20.1M | { |
265 | 20.1M | if(j % 2) |
266 | 4.04M | continue; |
267 | 16.1M | |
268 | 16.1M | if(2*j > z_size) |
269 | 573k | return 0; |
270 | 15.5M | |
271 | 15.5M | if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size) |
272 | 64 | return j+2; |
273 | 15.5M | return j; |
274 | 15.5M | } |
275 | 16.1M | |
276 | 0 | return 0; |
277 | 16.1M | } |
278 | | |
279 | | template<size_t SZ> |
280 | | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, |
281 | | size_t y_sw, size_t y_size, |
282 | | size_t z_size) |
283 | 549M | { |
284 | 549M | return (x_sw <= SZ && x_size >= SZ && |
285 | 184M | y_sw <= SZ && y_size >= SZ && |
286 | 178M | z_size >= 2*SZ); |
287 | 549M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 182M | { | 284 | 182M | return (x_sw <= SZ && x_size >= SZ && | 285 | 47.8M | y_sw <= SZ && y_size >= SZ && | 286 | 44.8M | z_size >= 2*SZ); | 287 | 182M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 138M | { | 284 | 138M | return (x_sw <= SZ && x_size >= SZ && | 285 | 36.7M | y_sw <= SZ && y_size >= SZ && | 286 | 36.4M | z_size >= 2*SZ); | 287 | 138M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 105M | { | 284 | 105M | return (x_sw <= SZ && x_size >= SZ && | 285 | 13.3M | y_sw <= SZ && y_size >= SZ && | 286 | 13.0M | z_size >= 2*SZ); | 287 | 105M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 95.6M | { | 284 | 95.6M | return (x_sw <= SZ && x_size >= SZ && | 285 | 82.6M | y_sw <= SZ && y_size >= SZ && | 286 | 82.1M | z_size >= 2*SZ); | 287 | 95.6M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 13.8M | { | 284 | 13.8M | return (x_sw <= SZ && x_size >= SZ && | 285 | 1.47M | y_sw <= SZ && y_size >= SZ && | 286 | 1.13M | z_size >= 2*SZ); | 287 | 13.8M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 283 | 13.5M | { | 284 | 13.5M | return (x_sw <= SZ && x_size >= SZ && | 285 | 2.16M | y_sw <= SZ && y_size >= SZ && | 286 | 1.17M | z_size >= 2*SZ); | 287 | 13.5M | } |
|
288 | | |
289 | | template<size_t SZ> |
290 | | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, |
291 | | size_t z_size) |
292 | 549M | { |
293 | 549M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); |
294 | 549M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 174M | { | 293 | 174M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 174M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 137M | { | 293 | 137M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 137M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 104M | { | 293 | 104M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 104M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 96.1M | { | 293 | 96.1M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 96.1M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 18.5M | { | 293 | 18.5M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 18.5M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 292 | 18.1M | { | 293 | 18.1M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); | 294 | 18.1M | } |
|
295 | | |
296 | | } |
297 | | |
298 | | void bigint_mul(word z[], size_t z_size, |
299 | | const word x[], size_t x_size, size_t x_sw, |
300 | | const word y[], size_t y_size, size_t y_sw, |
301 | | word workspace[], size_t ws_size) |
302 | 182M | { |
303 | 182M | clear_mem(z, z_size); |
304 | 182M | |
305 | 182M | if(x_sw == 1) |
306 | 94.2k | { |
307 | 94.2k | bigint_linmul3(z, y, y_sw, x[0]); |
308 | 94.2k | } |
309 | 182M | else if(y_sw == 1) |
310 | 1.57k | { |
311 | 1.57k | bigint_linmul3(z, x, x_sw, y[0]); |
312 | 1.57k | } |
313 | 182M | else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) |
314 | 44.6M | { |
315 | 44.6M | bigint_comba_mul4(z, x, y); |
316 | 44.6M | } |
317 | 138M | else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) |
318 | 33.0M | { |
319 | 33.0M | bigint_comba_mul6(z, x, y); |
320 | 33.0M | } |
321 | 105M | else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) |
322 | 9.45M | { |
323 | 9.45M | bigint_comba_mul8(z, x, y); |
324 | 9.45M | } |
325 | 95.6M | else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) |
326 | 81.8M | { |
327 | 81.8M | bigint_comba_mul9(z, x, y); |
328 | 81.8M | } |
329 | 13.8M | else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) |
330 | 287k | { |
331 | 287k | bigint_comba_mul16(z, x, y); |
332 | 287k | } |
333 | 13.5M | else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) |
334 | 517k | { |
335 | 517k | bigint_comba_mul24(z, x, y); |
336 | 517k | } |
337 | 13.0M | else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || |
338 | 3.92M | y_sw < KARATSUBA_MULTIPLY_THRESHOLD || |
339 | 3.92M | !workspace) |
340 | 9.15M | { |
341 | 9.15M | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
342 | 9.15M | } |
343 | 3.92M | else |
344 | 3.92M | { |
345 | 3.92M | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
346 | 3.92M | |
347 | 3.92M | if(N && z_size >= 2*N && ws_size >= 2*N) |
348 | 3.26M | karatsuba_mul(z, x, y, N, workspace); |
349 | 657k | else |
350 | 657k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
351 | 3.92M | } |
352 | 182M | } |
353 | | |
354 | | /* |
355 | | * Squaring Algorithm Dispatcher |
356 | | */ |
357 | | void bigint_sqr(word z[], size_t z_size, |
358 | | const word x[], size_t x_size, size_t x_sw, |
359 | | word workspace[], size_t ws_size) |
360 | 174M | { |
361 | 174M | clear_mem(z, z_size); |
362 | 174M | |
363 | 174M | BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient"); |
364 | 174M | |
365 | 174M | if(x_sw == 1) |
366 | 129k | { |
367 | 129k | bigint_linmul3(z, x, x_sw, x[0]); |
368 | 129k | } |
369 | 174M | else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) |
370 | 36.4M | { |
371 | 36.4M | bigint_comba_sqr4(z, x); |
372 | 36.4M | } |
373 | 137M | else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) |
374 | 33.7M | { |
375 | 33.7M | bigint_comba_sqr6(z, x); |
376 | 33.7M | } |
377 | 104M | else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) |
378 | 8.05M | { |
379 | 8.05M | bigint_comba_sqr8(z, x); |
380 | 8.05M | } |
381 | 96.1M | else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) |
382 | 77.6M | { |
383 | 77.6M | bigint_comba_sqr9(z, x); |
384 | 77.6M | } |
385 | 18.5M | else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) |
386 | 408k | { |
387 | 408k | bigint_comba_sqr16(z, x); |
388 | 408k | } |
389 | 18.1M | else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) |
390 | 1.38M | { |
391 | 1.38M | bigint_comba_sqr24(z, x); |
392 | 1.38M | } |
393 | 16.7M | else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) |
394 | 598k | { |
395 | 598k | basecase_sqr(z, z_size, x, x_sw); |
396 | 598k | } |
397 | 16.1M | else |
398 | 16.1M | { |
399 | 16.1M | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
400 | 16.1M | |
401 | 16.1M | if(N && z_size >= 2*N && ws_size >= 2*N) |
402 | 15.5M | karatsuba_sqr(z, x, N, workspace); |
403 | 573k | else |
404 | 573k | basecase_sqr(z, z_size, x, x_sw); |
405 | 16.1M | } |
406 | 174M | } |
407 | | |
408 | | } |