/src/botan/src/lib/math/mp/mp_karat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | |
11 | | #include <botan/exceptn.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/internal/ct_utils.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Simple O(N^2) Multiplication |
19 | | */ |
20 | 4.68M | void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) { |
21 | 4.68M | if(z_size < x_size + y_size) { |
22 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
23 | 0 | } |
24 | | |
25 | 4.68M | const size_t x_size_8 = x_size - (x_size % 8); |
26 | | |
27 | 4.68M | clear_mem(z, z_size); |
28 | | |
29 | 44.8M | for(size_t i = 0; i != y_size; ++i) { |
30 | 40.1M | const word y_i = y[i]; |
31 | | |
32 | 40.1M | word carry = 0; |
33 | | |
34 | 138M | for(size_t j = 0; j != x_size_8; j += 8) { |
35 | 98.2M | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
36 | 98.2M | } |
37 | | |
38 | 196M | for(size_t j = x_size_8; j != x_size; ++j) { |
39 | 156M | z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry); |
40 | 156M | } |
41 | | |
42 | 40.1M | z[x_size + i] = carry; |
43 | 40.1M | } |
44 | 4.68M | } |
45 | | |
46 | 7.50M | void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) { |
47 | 7.50M | if(z_size < 2 * x_size) { |
48 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
49 | 0 | } |
50 | | |
51 | 7.50M | const size_t x_size_8 = x_size - (x_size % 8); |
52 | | |
53 | 7.50M | clear_mem(z, z_size); |
54 | | |
55 | 64.7M | for(size_t i = 0; i != x_size; ++i) { |
56 | 57.2M | const word x_i = x[i]; |
57 | | |
58 | 57.2M | word carry = 0; |
59 | | |
60 | 197M | for(size_t j = 0; j != x_size_8; j += 8) { |
61 | 140M | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
62 | 140M | } |
63 | | |
64 | 317M | for(size_t j = x_size_8; j != x_size; ++j) { |
65 | 260M | z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry); |
66 | 260M | } |
67 | | |
68 | 57.2M | z[x_size + i] = carry; |
69 | 57.2M | } |
70 | 7.50M | } |
71 | | |
72 | | namespace { |
73 | | |
74 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
75 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
76 | | |
77 | | /* |
78 | | * Karatsuba Multiplication Operation |
79 | | */ |
80 | 909k | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) { |
81 | 909k | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) { |
82 | 661k | switch(N) { |
83 | 0 | case 6: |
84 | 0 | return bigint_comba_mul6(z, x, y); |
85 | 0 | case 8: |
86 | 0 | return bigint_comba_mul8(z, x, y); |
87 | 0 | case 9: |
88 | 0 | return bigint_comba_mul9(z, x, y); |
89 | 242k | case 16: |
90 | 242k | return bigint_comba_mul16(z, x, y); |
91 | 62.5k | case 24: |
92 | 62.5k | return bigint_comba_mul24(z, x, y); |
93 | 356k | default: |
94 | 356k | return basecase_mul(z, 2 * N, x, N, y, N); |
95 | 661k | } |
96 | 661k | } |
97 | | |
98 | 248k | const size_t N2 = N / 2; |
99 | | |
100 | 248k | const word* x0 = x; |
101 | 248k | const word* x1 = x + N2; |
102 | 248k | const word* y0 = y; |
103 | 248k | const word* y1 = y + N2; |
104 | 248k | word* z0 = z; |
105 | 248k | word* z1 = z + N; |
106 | | |
107 | 248k | word* ws0 = workspace; |
108 | 248k | word* ws1 = workspace + N; |
109 | | |
110 | 248k | clear_mem(workspace, 2 * N); |
111 | | |
112 | | /* |
113 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
114 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
115 | | * anything, clear_mem above already set the correct result. |
116 | | * |
117 | | * However we ignore the result of the comparisons and always perform the |
118 | | * subtractions and recursively multiply to avoid the timing channel. |
119 | | */ |
120 | | |
121 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
122 | 248k | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
123 | 248k | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
124 | 248k | const auto neg_mask = ~(cmp0 ^ cmp1); |
125 | | |
126 | 248k | karatsuba_mul(ws0, z0, z1, N2, ws1); |
127 | | |
128 | | // Compute X_lo * Y_lo |
129 | 248k | karatsuba_mul(z0, x0, y0, N2, ws1); |
130 | | |
131 | | // Compute X_hi * Y_hi |
132 | 248k | karatsuba_mul(z1, x1, y1, N2, ws1); |
133 | | |
134 | 248k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
135 | 248k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
136 | | |
137 | 248k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
138 | 248k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
139 | | |
140 | 248k | clear_mem(workspace + N, N2); |
141 | | |
142 | 248k | bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2); |
143 | 248k | } |
144 | | |
145 | | /* |
146 | | * Karatsuba Squaring Operation |
147 | | */ |
148 | 971k | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) { |
149 | 971k | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) { |
150 | 721k | switch(N) { |
151 | 0 | case 6: |
152 | 0 | return bigint_comba_sqr6(z, x); |
153 | 0 | case 8: |
154 | 0 | return bigint_comba_sqr8(z, x); |
155 | 0 | case 9: |
156 | 0 | return bigint_comba_sqr9(z, x); |
157 | 105k | case 16: |
158 | 105k | return bigint_comba_sqr16(z, x); |
159 | 77.8k | case 24: |
160 | 77.8k | return bigint_comba_sqr24(z, x); |
161 | 538k | default: |
162 | 538k | return basecase_sqr(z, 2 * N, x, N); |
163 | 721k | } |
164 | 721k | } |
165 | | |
166 | 249k | const size_t N2 = N / 2; |
167 | | |
168 | 249k | const word* x0 = x; |
169 | 249k | const word* x1 = x + N2; |
170 | 249k | word* z0 = z; |
171 | 249k | word* z1 = z + N; |
172 | | |
173 | 249k | word* ws0 = workspace; |
174 | 249k | word* ws1 = workspace + N; |
175 | | |
176 | 249k | clear_mem(workspace, 2 * N); |
177 | | |
178 | | // See comment in karatsuba_mul |
179 | 249k | bigint_sub_abs(z0, x0, x1, N2, workspace); |
180 | 249k | karatsuba_sqr(ws0, z0, N2, ws1); |
181 | | |
182 | 249k | karatsuba_sqr(z0, x0, N2, ws1); |
183 | 249k | karatsuba_sqr(z1, x1, N2, ws1); |
184 | | |
185 | 249k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
186 | 249k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
187 | | |
188 | 249k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
189 | 249k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
190 | | |
191 | | /* |
192 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
193 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
194 | | * timing channel. |
195 | | */ |
196 | 249k | bigint_sub2(z + N2, 2 * N - N2, ws0, N); |
197 | 249k | } |
198 | | |
199 | | /* |
200 | | * Pick a good size for the Karatsuba multiply |
201 | | */ |
202 | 313k | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) { |
203 | 313k | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) { |
204 | 496 | return 0; |
205 | 496 | } |
206 | | |
207 | 313k | if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) { |
208 | 12.7k | return 0; |
209 | 12.7k | } |
210 | | |
211 | 300k | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
212 | 300k | const size_t end = (x_size < y_size) ? x_size : y_size; |
213 | | |
214 | 300k | if(start == end) { |
215 | 45.5k | if(start % 2) { |
216 | 431 | return 0; |
217 | 431 | } |
218 | 45.1k | return start; |
219 | 45.5k | } |
220 | | |
221 | 379k | for(size_t j = start; j <= end; ++j) { |
222 | 379k | if(j % 2) { |
223 | 124k | continue; |
224 | 124k | } |
225 | | |
226 | 254k | if(2 * j > z_size) { |
227 | 133k | return 0; |
228 | 133k | } |
229 | | |
230 | 121k | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) { |
231 | 121k | if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) { |
232 | 15.0k | return j + 2; |
233 | 15.0k | } |
234 | 106k | return j; |
235 | 121k | } |
236 | 121k | } |
237 | | |
238 | 0 | return 0; |
239 | 254k | } |
240 | | |
241 | | /* |
242 | | * Pick a good size for the Karatsuba squaring |
243 | | */ |
244 | 375k | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) { |
245 | 375k | if(x_sw == x_size) { |
246 | 93 | if(x_sw % 2) { |
247 | 28 | return 0; |
248 | 28 | } |
249 | 65 | return x_sw; |
250 | 93 | } |
251 | | |
252 | 528k | for(size_t j = x_sw; j <= x_size; ++j) { |
253 | 528k | if(j % 2) { |
254 | 153k | continue; |
255 | 153k | } |
256 | | |
257 | 375k | if(2 * j > z_size) { |
258 | 153k | return 0; |
259 | 153k | } |
260 | | |
261 | 221k | if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) { |
262 | 50 | return j + 2; |
263 | 50 | } |
264 | 221k | return j; |
265 | 221k | } |
266 | | |
267 | 0 | return 0; |
268 | 375k | } |
269 | | |
270 | | template <size_t SZ> |
271 | 59.6M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { |
272 | 59.6M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); |
273 | 59.6M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 18.0M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 18.0M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 18.0M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 13.8M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 13.8M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 13.8M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 10.8M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 10.8M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 10.8M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 7.54M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 7.54M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 7.54M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 4.66M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 4.66M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 4.66M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 4.65M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 4.65M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 4.65M | } |
|
274 | | |
275 | | template <size_t SZ> |
276 | 72.1M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { |
277 | 72.1M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); |
278 | 72.1M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 19.3M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 19.3M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 19.3M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 16.0M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 16.0M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 16.0M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 13.0M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 13.0M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 13.0M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 9.05M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 9.05M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 9.05M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 7.33M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 7.33M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 7.33M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 7.31M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 7.31M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 7.31M | } |
|
279 | | |
280 | | } // namespace |
281 | | |
282 | | void bigint_mul(word z[], |
283 | | size_t z_size, |
284 | | const word x[], |
285 | | size_t x_size, |
286 | | size_t x_sw, |
287 | | const word y[], |
288 | | size_t y_size, |
289 | | size_t y_sw, |
290 | | word workspace[], |
291 | 18.2M | size_t ws_size) { |
292 | 18.2M | clear_mem(z, z_size); |
293 | | |
294 | 18.2M | if(x_sw == 1) { |
295 | 159k | bigint_linmul3(z, y, y_sw, x[0]); |
296 | 18.0M | } else if(y_sw == 1) { |
297 | 0 | bigint_linmul3(z, x, x_sw, y[0]); |
298 | 18.0M | } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) { |
299 | 4.25M | bigint_comba_mul4(z, x, y); |
300 | 13.8M | } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) { |
301 | 3.00M | bigint_comba_mul6(z, x, y); |
302 | 10.8M | } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) { |
303 | 3.28M | bigint_comba_mul8(z, x, y); |
304 | 7.54M | } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) { |
305 | 2.87M | bigint_comba_mul9(z, x, y); |
306 | 4.66M | } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) { |
307 | 13.0k | bigint_comba_mul16(z, x, y); |
308 | 4.65M | } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) { |
309 | 160k | bigint_comba_mul24(z, x, y); |
310 | 4.49M | } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) { |
311 | 4.17M | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
312 | 4.17M | } else { |
313 | 313k | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
314 | | |
315 | 313k | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
316 | 164k | karatsuba_mul(z, x, y, N, workspace); |
317 | 164k | } else { |
318 | 148k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
319 | 148k | } |
320 | 313k | } |
321 | 18.2M | } |
322 | | |
323 | | /* |
324 | | * Squaring Algorithm Dispatcher |
325 | | */ |
326 | 19.6M | void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) { |
327 | 19.6M | clear_mem(z, z_size); |
328 | | |
329 | 19.6M | BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient"); |
330 | | |
331 | 19.6M | if(x_sw == 1) { |
332 | 276k | bigint_linmul3(z, x, x_sw, x[0]); |
333 | 19.3M | } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) { |
334 | 3.35M | bigint_comba_sqr4(z, x); |
335 | 16.0M | } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) { |
336 | 2.93M | bigint_comba_sqr6(z, x); |
337 | 13.0M | } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) { |
338 | 4.03M | bigint_comba_sqr8(z, x); |
339 | 9.05M | } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) { |
340 | 1.72M | bigint_comba_sqr9(z, x); |
341 | 7.33M | } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) { |
342 | 17.4k | bigint_comba_sqr16(z, x); |
343 | 7.31M | } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) { |
344 | 123k | bigint_comba_sqr24(z, x); |
345 | 7.18M | } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) { |
346 | 6.81M | basecase_sqr(z, z_size, x, x_sw); |
347 | 6.81M | } else { |
348 | 375k | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
349 | | |
350 | 375k | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
351 | 221k | karatsuba_sqr(z, x, N, workspace); |
352 | 221k | } else { |
353 | 153k | basecase_sqr(z, z_size, x, x_sw); |
354 | 153k | } |
355 | 375k | } |
356 | 19.6M | } |
357 | | |
358 | | } // namespace Botan |