/src/botan/src/lib/math/mp/mp_karat.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Multiplication and Squaring |
3 | | * (C) 1999-2010,2018 Jack Lloyd |
4 | | * 2016 Matthias Gierlings |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #include <botan/internal/mp_core.h> |
10 | | |
11 | | #include <botan/exceptn.h> |
12 | | #include <botan/mem_ops.h> |
13 | | #include <botan/internal/ct_utils.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Simple O(N^2) Multiplication |
19 | | */ |
20 | 9.26M | void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) { |
21 | 9.26M | if(z_size < x_size + y_size) { |
22 | 0 | throw Invalid_Argument("basecase_mul z_size too small"); |
23 | 0 | } |
24 | | |
25 | 9.26M | const size_t x_size_8 = x_size - (x_size % 8); |
26 | | |
27 | 9.26M | clear_mem(z, z_size); |
28 | | |
29 | 78.7M | for(size_t i = 0; i != y_size; ++i) { |
30 | 69.5M | const word y_i = y[i]; |
31 | | |
32 | 69.5M | word carry = 0; |
33 | | |
34 | 180M | for(size_t j = 0; j != x_size_8; j += 8) { |
35 | 111M | carry = word8_madd3(z + i + j, x + j, y_i, carry); |
36 | 111M | } |
37 | | |
38 | 359M | for(size_t j = x_size_8; j != x_size; ++j) { |
39 | 290M | z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry); |
40 | 290M | } |
41 | | |
42 | 69.5M | z[x_size + i] = carry; |
43 | 69.5M | } |
44 | 9.26M | } |
45 | | |
46 | 4.67M | void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) { |
47 | 4.67M | if(z_size < 2 * x_size) { |
48 | 0 | throw Invalid_Argument("basecase_sqr z_size too small"); |
49 | 0 | } |
50 | | |
51 | 4.67M | const size_t x_size_8 = x_size - (x_size % 8); |
52 | | |
53 | 4.67M | clear_mem(z, z_size); |
54 | | |
55 | 29.7M | for(size_t i = 0; i != x_size; ++i) { |
56 | 25.0M | const word x_i = x[i]; |
57 | | |
58 | 25.0M | word carry = 0; |
59 | | |
60 | 58.9M | for(size_t j = 0; j != x_size_8; j += 8) { |
61 | 33.8M | carry = word8_madd3(z + i + j, x + j, x_i, carry); |
62 | 33.8M | } |
63 | | |
64 | 134M | for(size_t j = x_size_8; j != x_size; ++j) { |
65 | 109M | z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry); |
66 | 109M | } |
67 | | |
68 | 25.0M | z[x_size + i] = carry; |
69 | 25.0M | } |
70 | 4.67M | } |
71 | | |
72 | | namespace { |
73 | | |
74 | | const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; |
75 | | const size_t KARATSUBA_SQUARE_THRESHOLD = 32; |
76 | | |
77 | | /* |
78 | | * Karatsuba Multiplication Operation |
79 | | */ |
80 | 1.22M | void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) { |
81 | 1.22M | if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) { |
82 | 914k | switch(N) { |
83 | 0 | case 6: |
84 | 0 | return bigint_comba_mul6(z, x, y); |
85 | 0 | case 8: |
86 | 0 | return bigint_comba_mul8(z, x, y); |
87 | 0 | case 9: |
88 | 0 | return bigint_comba_mul9(z, x, y); |
89 | 889k | case 16: |
90 | 889k | return bigint_comba_mul16(z, x, y); |
91 | 3.09k | case 24: |
92 | 3.09k | return bigint_comba_mul24(z, x, y); |
93 | 22.0k | default: |
94 | 22.0k | return basecase_mul(z, 2 * N, x, N, y, N); |
95 | 914k | } |
96 | 914k | } |
97 | | |
98 | 310k | const size_t N2 = N / 2; |
99 | | |
100 | 310k | const word* x0 = x; |
101 | 310k | const word* x1 = x + N2; |
102 | 310k | const word* y0 = y; |
103 | 310k | const word* y1 = y + N2; |
104 | 310k | word* z0 = z; |
105 | 310k | word* z1 = z + N; |
106 | | |
107 | 310k | word* ws0 = workspace; |
108 | 310k | word* ws1 = workspace + N; |
109 | | |
110 | 310k | clear_mem(workspace, 2 * N); |
111 | | |
112 | | /* |
113 | | * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, |
114 | | * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do |
115 | | * anything, clear_mem above already set the correct result. |
116 | | * |
117 | | * However we ignore the result of the comparisons and always perform the |
118 | | * subtractions and recursively multiply to avoid the timing channel. |
119 | | */ |
120 | | |
121 | | // First compute (X_lo - X_hi)*(Y_hi - Y_lo) |
122 | 310k | const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); |
123 | 310k | const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); |
124 | 310k | const auto neg_mask = ~(cmp0 ^ cmp1); |
125 | | |
126 | 310k | karatsuba_mul(ws0, z0, z1, N2, ws1); |
127 | | |
128 | | // Compute X_lo * Y_lo |
129 | 310k | karatsuba_mul(z0, x0, y0, N2, ws1); |
130 | | |
131 | | // Compute X_hi * Y_hi |
132 | 310k | karatsuba_mul(z1, x1, y1, N2, ws1); |
133 | | |
134 | 310k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
135 | 310k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
136 | | |
137 | 310k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
138 | 310k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
139 | | |
140 | 310k | clear_mem(workspace + N, N2); |
141 | | |
142 | 310k | bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2); |
143 | 310k | } |
144 | | |
145 | | /* |
146 | | * Karatsuba Squaring Operation |
147 | | */ |
148 | 700k | void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) { |
149 | 700k | if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) { |
150 | 547k | switch(N) { |
151 | 0 | case 6: |
152 | 0 | return bigint_comba_sqr6(z, x); |
153 | 0 | case 8: |
154 | 0 | return bigint_comba_sqr8(z, x); |
155 | 0 | case 9: |
156 | 0 | return bigint_comba_sqr9(z, x); |
157 | 439k | case 16: |
158 | 439k | return bigint_comba_sqr16(z, x); |
159 | 2.26k | case 24: |
160 | 2.26k | return bigint_comba_sqr24(z, x); |
161 | 105k | default: |
162 | 105k | return basecase_sqr(z, 2 * N, x, N); |
163 | 547k | } |
164 | 547k | } |
165 | | |
166 | 152k | const size_t N2 = N / 2; |
167 | | |
168 | 152k | const word* x0 = x; |
169 | 152k | const word* x1 = x + N2; |
170 | 152k | word* z0 = z; |
171 | 152k | word* z1 = z + N; |
172 | | |
173 | 152k | word* ws0 = workspace; |
174 | 152k | word* ws1 = workspace + N; |
175 | | |
176 | 152k | clear_mem(workspace, 2 * N); |
177 | | |
178 | | // See comment in karatsuba_mul |
179 | 152k | bigint_sub_abs(z0, x0, x1, N2, workspace); |
180 | 152k | karatsuba_sqr(ws0, z0, N2, ws1); |
181 | | |
182 | 152k | karatsuba_sqr(z0, x0, N2, ws1); |
183 | 152k | karatsuba_sqr(z1, x1, N2, ws1); |
184 | | |
185 | 152k | const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); |
186 | 152k | word z_carry = bigint_add2_nc(z + N2, N, ws1, N); |
187 | | |
188 | 152k | z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); |
189 | 152k | bigint_add2_nc(z + N + N2, N2, &z_carry, 1); |
190 | | |
191 | | /* |
192 | | * This is only actually required if cmp (result of bigint_sub_abs) is != 0, |
193 | | * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a |
194 | | * timing channel. |
195 | | */ |
196 | 152k | bigint_sub2(z + N2, 2 * N - N2, ws0, N); |
197 | 152k | } |
198 | | |
199 | | /* |
200 | | * Pick a good size for the Karatsuba multiply |
201 | | */ |
202 | 393k | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) { |
203 | 393k | if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) { |
204 | 158 | return 0; |
205 | 158 | } |
206 | | |
207 | 393k | if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) { |
208 | 39 | return 0; |
209 | 39 | } |
210 | | |
211 | 393k | const size_t start = (x_sw > y_sw) ? x_sw : y_sw; |
212 | 393k | const size_t end = (x_size < y_size) ? x_size : y_size; |
213 | | |
214 | 393k | if(start == end) { |
215 | 108k | if(start % 2) { |
216 | 0 | return 0; |
217 | 0 | } |
218 | 108k | return start; |
219 | 108k | } |
220 | | |
221 | 385k | for(size_t j = start; j <= end; ++j) { |
222 | 385k | if(j % 2) { |
223 | 100k | continue; |
224 | 100k | } |
225 | | |
226 | 285k | if(2 * j > z_size) { |
227 | 99.7k | return 0; |
228 | 99.7k | } |
229 | | |
230 | 185k | if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) { |
231 | 185k | if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) { |
232 | 613 | return j + 2; |
233 | 613 | } |
234 | 184k | return j; |
235 | 185k | } |
236 | 185k | } |
237 | | |
238 | 0 | return 0; |
239 | 285k | } |
240 | | |
241 | | /* |
242 | | * Pick a good size for the Karatsuba squaring |
243 | | */ |
244 | 477k | size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) { |
245 | 477k | if(x_sw == x_size) { |
246 | 115 | if(x_sw % 2) { |
247 | 7 | return 0; |
248 | 7 | } |
249 | 108 | return x_sw; |
250 | 115 | } |
251 | | |
252 | 712k | for(size_t j = x_sw; j <= x_size; ++j) { |
253 | 712k | if(j % 2) { |
254 | 235k | continue; |
255 | 235k | } |
256 | | |
257 | 477k | if(2 * j > z_size) { |
258 | 235k | return 0; |
259 | 235k | } |
260 | | |
261 | 242k | if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) { |
262 | 0 | return j + 2; |
263 | 0 | } |
264 | 242k | return j; |
265 | 242k | } |
266 | | |
267 | 0 | return 0; |
268 | 477k | } |
269 | | |
270 | | template <size_t SZ> |
271 | 77.6M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { |
272 | 77.6M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); |
273 | 77.6M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 21.4M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 21.4M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 21.4M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 15.3M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 15.3M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 15.3M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 10.7M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 10.7M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 10.7M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 10.4M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 10.4M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 10.4M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 9.90M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 9.90M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 9.90M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Line | Count | Source | 271 | 9.67M | inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) { | 272 | 9.67M | return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ); | 273 | 9.67M | } |
|
274 | | |
275 | | template <size_t SZ> |
276 | 38.7M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { |
277 | 38.7M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); |
278 | 38.7M | } mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 9.84M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 9.84M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 9.84M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 8.13M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 8.13M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 8.13M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 5.60M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 5.60M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 5.60M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 5.30M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 5.30M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 5.30M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 4.97M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 4.97M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 4.97M | } |
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long) Line | Count | Source | 276 | 4.88M | inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) { | 277 | 4.88M | return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ); | 278 | 4.88M | } |
|
279 | | |
280 | | } // namespace |
281 | | |
282 | | void bigint_mul(word z[], |
283 | | size_t z_size, |
284 | | const word x[], |
285 | | size_t x_size, |
286 | | size_t x_sw, |
287 | | const word y[], |
288 | | size_t y_size, |
289 | | size_t y_sw, |
290 | | word workspace[], |
291 | 21.4M | size_t ws_size) { |
292 | 21.4M | clear_mem(z, z_size); |
293 | | |
294 | 21.4M | if(x_sw == 1) { |
295 | 45.9k | bigint_linmul3(z, y, y_sw, x[0]); |
296 | 21.4M | } else if(y_sw == 1) { |
297 | 473 | bigint_linmul3(z, x, x_sw, y[0]); |
298 | 21.4M | } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) { |
299 | 6.09M | bigint_comba_mul4(z, x, y); |
300 | 15.3M | } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) { |
301 | 4.61M | bigint_comba_mul6(z, x, y); |
302 | 10.7M | } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) { |
303 | 275k | bigint_comba_mul8(z, x, y); |
304 | 10.4M | } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) { |
305 | 564k | bigint_comba_mul9(z, x, y); |
306 | 9.90M | } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) { |
307 | 223k | bigint_comba_mul16(z, x, y); |
308 | 9.67M | } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) { |
309 | 139k | bigint_comba_mul24(z, x, y); |
310 | 9.54M | } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) { |
311 | 9.14M | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
312 | 9.14M | } else { |
313 | 393k | const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); |
314 | | |
315 | 393k | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
316 | 293k | karatsuba_mul(z, x, y, N, workspace); |
317 | 293k | } else { |
318 | 100k | basecase_mul(z, z_size, x, x_sw, y, y_sw); |
319 | 100k | } |
320 | 393k | } |
321 | 21.4M | } |
322 | | |
323 | | /* |
324 | | * Squaring Algorithm Dispatcher |
325 | | */ |
326 | 10.0M | void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) { |
327 | 10.0M | clear_mem(z, z_size); |
328 | | |
329 | 10.0M | BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient"); |
330 | | |
331 | 10.0M | if(x_sw == 1) { |
332 | 181k | bigint_linmul3(z, x, x_sw, x[0]); |
333 | 9.84M | } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) { |
334 | 1.71M | bigint_comba_sqr4(z, x); |
335 | 8.13M | } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) { |
336 | 2.52M | bigint_comba_sqr6(z, x); |
337 | 5.60M | } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) { |
338 | 299k | bigint_comba_sqr8(z, x); |
339 | 5.30M | } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) { |
340 | 336k | bigint_comba_sqr9(z, x); |
341 | 4.97M | } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) { |
342 | 87.9k | bigint_comba_sqr16(z, x); |
343 | 4.88M | } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) { |
344 | 73.9k | bigint_comba_sqr24(z, x); |
345 | 4.80M | } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) { |
346 | 4.33M | basecase_sqr(z, z_size, x, x_sw); |
347 | 4.33M | } else { |
348 | 477k | const size_t N = karatsuba_size(z_size, x_size, x_sw); |
349 | | |
350 | 477k | if(N && z_size >= 2 * N && ws_size >= 2 * N) { |
351 | 242k | karatsuba_sqr(z, x, N, workspace); |
352 | 242k | } else { |
353 | 235k | basecase_sqr(z, z_size, x, x_sw); |
354 | 235k | } |
355 | 477k | } |
356 | 10.0M | } |
357 | | |
358 | | } // namespace Botan |