Coverage Report

Created: 2020-02-14 15:38

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/mp_asmi.h>
11
#include <botan/internal/ct_utils.h>
12
#include <botan/mem_ops.h>
13
#include <botan/exceptn.h>
14
15
namespace Botan {
16
17
namespace {
18
19
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
20
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
21
22
/*
23
* Simple O(N^2) Multiplication
24
*/
25
void basecase_mul(word z[], size_t z_size,
26
                  const word x[], size_t x_size,
27
                  const word y[], size_t y_size)
28
7.08M
   {
29
7.08M
   if(z_size < x_size + y_size)
30
0
      throw Invalid_Argument("basecase_mul z_size too small");
31
7.08M
32
7.08M
   const size_t x_size_8 = x_size - (x_size % 8);
33
7.08M
34
7.08M
   clear_mem(z, z_size);
35
7.08M
36
92.2M
   for(size_t i = 0; i != y_size; ++i)
37
85.1M
      {
38
85.1M
      const word y_i = y[i];
39
85.1M
40
85.1M
      word carry = 0;
41
85.1M
42
278M
      for(size_t j = 0; j != x_size_8; j += 8)
43
193M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
44
85.1M
45
408M
      for(size_t j = x_size_8; j != x_size; ++j)
46
323M
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
47
85.1M
48
85.1M
      z[x_size+i] = carry;
49
85.1M
      }
50
7.08M
   }
51
52
void basecase_sqr(word z[], size_t z_size,
53
                  const word x[], size_t x_size)
54
1.43M
   {
55
1.43M
   if(z_size < 2*x_size)
56
0
      throw Invalid_Argument("basecase_sqr z_size too small");
57
1.43M
58
1.43M
   const size_t x_size_8 = x_size - (x_size % 8);
59
1.43M
60
1.43M
   clear_mem(z, z_size);
61
1.43M
62
26.6M
   for(size_t i = 0; i != x_size; ++i)
63
25.2M
      {
64
25.2M
      const word x_i = x[i];
65
25.2M
66
25.2M
      word carry = 0;
67
25.2M
68
90.4M
      for(size_t j = 0; j != x_size_8; j += 8)
69
65.1M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
70
25.2M
71
145M
      for(size_t j = x_size_8; j != x_size; ++j)
72
120M
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
73
25.2M
74
25.2M
      z[x_size+i] = carry;
75
25.2M
      }
76
1.43M
   }
77
78
/*
79
* Karatsuba Multiplication Operation
80
*/
81
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
82
                   word workspace[])
83
7.85M
   {
84
7.85M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
85
5.74M
      {
86
5.74M
      switch(N)
87
5.74M
         {
88
5.74M
         case 6:
89
0
            return bigint_comba_mul6(z, x, y);
90
5.74M
         case 8:
91
0
            return bigint_comba_mul8(z, x, y);
92
5.74M
         case 9:
93
0
            return bigint_comba_mul9(z, x, y);
94
5.74M
         case 16:
95
5.35M
            return bigint_comba_mul16(z, x, y);
96
5.74M
         case 24:
97
383k
            return bigint_comba_mul24(z, x, y);
98
5.74M
         default:
99
8.00k
            return basecase_mul(z, 2*N, x, N, y, N);
100
2.11M
         }
101
2.11M
      }
102
2.11M
103
2.11M
   const size_t N2 = N / 2;
104
2.11M
105
2.11M
   const word* x0 = x;
106
2.11M
   const word* x1 = x + N2;
107
2.11M
   const word* y0 = y;
108
2.11M
   const word* y1 = y + N2;
109
2.11M
   word* z0 = z;
110
2.11M
   word* z1 = z + N;
111
2.11M
112
2.11M
   word* ws0 = workspace;
113
2.11M
   word* ws1 = workspace + N;
114
2.11M
115
2.11M
   clear_mem(workspace, 2*N);
116
2.11M
117
2.11M
   /*
118
2.11M
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
119
2.11M
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
120
2.11M
   * anything, clear_mem above already set the correct result.
121
2.11M
   *
122
2.11M
   * However we ignore the result of the comparisons and always perform the
123
2.11M
   * subtractions and recursively multiply to avoid the timing channel.
124
2.11M
   */
125
2.11M
126
2.11M
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
127
2.11M
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
128
2.11M
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
129
2.11M
   const auto neg_mask = ~(cmp0 ^ cmp1);
130
2.11M
131
2.11M
   karatsuba_mul(ws0, z0, z1, N2, ws1);
132
2.11M
133
2.11M
   // Compute X_lo * Y_lo
134
2.11M
   karatsuba_mul(z0, x0, y0, N2, ws1);
135
2.11M
136
2.11M
   // Compute X_hi * Y_hi
137
2.11M
   karatsuba_mul(z1, x1, y1, N2, ws1);
138
2.11M
139
2.11M
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
140
2.11M
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
141
2.11M
142
2.11M
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
143
2.11M
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
144
2.11M
145
2.11M
   clear_mem(workspace + N, N2);
146
2.11M
147
2.11M
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
148
2.11M
   }
149
150
/*
151
* Karatsuba Squaring Operation
152
*/
153
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
154
21.7M
   {
155
21.7M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
156
15.9M
      {
157
15.9M
      switch(N)
158
15.9M
         {
159
15.9M
         case 6:
160
0
            return bigint_comba_sqr6(z, x);
161
15.9M
         case 8:
162
0
            return bigint_comba_sqr8(z, x);
163
15.9M
         case 9:
164
0
            return bigint_comba_sqr9(z, x);
165
15.9M
         case 16:
166
14.1M
            return bigint_comba_sqr16(z, x);
167
15.9M
         case 24:
168
1.26M
            return bigint_comba_sqr24(z, x);
169
15.9M
         default:
170
531k
            return basecase_sqr(z, 2*N, x, N);
171
5.81M
         }
172
5.81M
      }
173
5.81M
174
5.81M
   const size_t N2 = N / 2;
175
5.81M
176
5.81M
   const word* x0 = x;
177
5.81M
   const word* x1 = x + N2;
178
5.81M
   word* z0 = z;
179
5.81M
   word* z1 = z + N;
180
5.81M
181
5.81M
   word* ws0 = workspace;
182
5.81M
   word* ws1 = workspace + N;
183
5.81M
184
5.81M
   clear_mem(workspace, 2*N);
185
5.81M
186
5.81M
   // See comment in karatsuba_mul
187
5.81M
   bigint_sub_abs(z0, x0, x1, N2, workspace);
188
5.81M
   karatsuba_sqr(ws0, z0, N2, ws1);
189
5.81M
190
5.81M
   karatsuba_sqr(z0, x0, N2, ws1);
191
5.81M
   karatsuba_sqr(z1, x1, N2, ws1);
192
5.81M
193
5.81M
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
194
5.81M
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
195
5.81M
196
5.81M
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
197
5.81M
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
198
5.81M
199
5.81M
   /*
200
5.81M
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
201
5.81M
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
202
5.81M
   * timing channel.
203
5.81M
   */
204
5.81M
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
205
5.81M
   }
206
207
/*
208
* Pick a good size for the Karatsuba multiply
209
*/
210
size_t karatsuba_size(size_t z_size,
211
                      size_t x_size, size_t x_sw,
212
                      size_t y_size, size_t y_sw)
213
1.83M
   {
214
1.83M
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
215
462
      return 0;
216
1.83M
217
1.83M
   if(((x_size == x_sw) && (x_size % 2)) ||
218
1.83M
      ((y_size == y_sw) && (y_size % 2)))
219
1.14k
      return 0;
220
1.83M
221
1.83M
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
222
1.83M
   const size_t end = (x_size < y_size) ? x_size : y_size;
223
1.83M
224
1.83M
   if(start == end)
225
1.11M
      {
226
1.11M
      if(start % 2)
227
0
         return 0;
228
1.11M
      return start;
229
1.11M
      }
230
725k
231
1.04M
   for(size_t j = start; j <= end; ++j)
232
1.04M
      {
233
1.04M
      if(j % 2)
234
316k
         continue;
235
725k
236
725k
      if(2*j > z_size)
237
316k
         return 0;
238
409k
239
409k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
240
409k
         {
241
409k
         if(j % 4 == 2 &&
242
409k
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
243
378
            return j+2;
244
408k
         return j;
245
408k
         }
246
409k
      }
247
725k
248
725k
   return 0;
249
725k
   }
250
251
/*
252
* Pick a good size for the Karatsuba squaring
253
*/
254
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
255
4.65M
   {
256
4.65M
   if(x_sw == x_size)
257
19.4k
      {
258
19.4k
      if(x_sw % 2)
259
172
         return 0;
260
19.2k
      return x_sw;
261
19.2k
      }
262
4.63M
263
5.51M
   for(size_t j = x_sw; j <= x_size; ++j)
264
5.51M
      {
265
5.51M
      if(j % 2)
266
875k
         continue;
267
4.63M
268
4.63M
      if(2*j > z_size)
269
320k
         return 0;
270
4.31M
271
4.31M
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
272
142
         return j+2;
273
4.31M
      return j;
274
4.31M
      }
275
4.63M
276
4.63M
   return 0;
277
4.63M
   }
278
279
template<size_t SZ>
280
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
281
                                size_t y_sw, size_t y_size,
282
                                size_t z_size)
283
398M
   {
284
398M
   return (x_sw <= SZ && x_size >= SZ &&
285
398M
           y_sw <= SZ && y_size >= SZ &&
286
398M
           z_size >= 2*SZ);
287
398M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
143M
   {
284
143M
   return (x_sw <= SZ && x_size >= SZ &&
285
143M
           y_sw <= SZ && y_size >= SZ &&
286
143M
           z_size >= 2*SZ);
287
143M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
100M
   {
284
100M
   return (x_sw <= SZ && x_size >= SZ &&
285
100M
           y_sw <= SZ && y_size >= SZ &&
286
100M
           z_size >= 2*SZ);
287
100M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
71.8M
   {
284
71.8M
   return (x_sw <= SZ && x_size >= SZ &&
285
71.8M
           y_sw <= SZ && y_size >= SZ &&
286
71.8M
           z_size >= 2*SZ);
287
71.8M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
64.7M
   {
284
64.7M
   return (x_sw <= SZ && x_size >= SZ &&
285
64.7M
           y_sw <= SZ && y_size >= SZ &&
286
64.7M
           z_size >= 2*SZ);
287
64.7M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
9.08M
   {
284
9.08M
   return (x_sw <= SZ && x_size >= SZ &&
285
9.08M
           y_sw <= SZ && y_size >= SZ &&
286
9.08M
           z_size >= 2*SZ);
287
9.08M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
283
8.78M
   {
284
8.78M
   return (x_sw <= SZ && x_size >= SZ &&
285
8.78M
           y_sw <= SZ && y_size >= SZ &&
286
8.78M
           z_size >= 2*SZ);
287
8.78M
   }
288
289
template<size_t SZ>
290
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
291
                                size_t z_size)
292
387M
   {
293
387M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
387M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
138M
   {
293
138M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
138M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
102M
   {
293
102M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
102M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
71.0M
   {
293
71.0M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
71.0M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
64.9M
   {
293
64.9M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
64.9M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
5.69M
   {
293
5.69M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
5.69M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
292
5.41M
   {
293
5.41M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
294
5.41M
   }
295
296
}
297
298
void bigint_mul(word z[], size_t z_size,
299
                const word x[], size_t x_size, size_t x_sw,
300
                const word y[], size_t y_size, size_t y_sw,
301
                word workspace[], size_t ws_size)
302
143M
   {
303
143M
   clear_mem(z, z_size);
304
143M
305
143M
   if(x_sw == 1)
306
160k
      {
307
160k
      bigint_linmul3(z, y, y_sw, x[0]);
308
160k
      }
309
143M
   else if(y_sw == 1)
310
983
      {
311
983
      bigint_linmul3(z, x, x_sw, y[0]);
312
983
      }
313
143M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
314
42.4M
      {
315
42.4M
      bigint_comba_mul4(z, x, y);
316
42.4M
      }
317
100M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
318
29.0M
      {
319
29.0M
      bigint_comba_mul6(z, x, y);
320
29.0M
      }
321
71.8M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
322
7.17M
      {
323
7.17M
      bigint_comba_mul8(z, x, y);
324
7.17M
      }
325
64.7M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
326
55.6M
      {
327
55.6M
      bigint_comba_mul9(z, x, y);
328
55.6M
      }
329
9.08M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
330
294k
      {
331
294k
      bigint_comba_mul16(z, x, y);
332
294k
      }
333
8.78M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
334
193k
      {
335
193k
      bigint_comba_mul24(z, x, y);
336
193k
      }
337
8.59M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
8.59M
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
339
8.59M
           !workspace)
340
6.75M
      {
341
6.75M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
342
6.75M
      }
343
1.83M
   else
344
1.83M
      {
345
1.83M
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
346
1.83M
347
1.83M
      if(N && z_size >= 2*N && ws_size >= 2*N)
348
1.51M
         karatsuba_mul(z, x, y, N, workspace);
349
320k
      else
350
320k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
351
1.83M
      }
352
143M
   }
353
354
/*
355
* Squaring Algorithm Dispatcher
356
*/
357
void bigint_sqr(word z[], size_t z_size,
358
                const word x[], size_t x_size, size_t x_sw,
359
                word workspace[], size_t ws_size)
360
138M
   {
361
138M
   clear_mem(z, z_size);
362
138M
363
138M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
364
138M
365
138M
   if(x_sw == 1)
366
196k
      {
367
196k
      bigint_linmul3(z, x, x_sw, x[0]);
368
196k
      }
369
138M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
370
35.6M
      {
371
35.6M
      bigint_comba_sqr4(z, x);
372
35.6M
      }
373
102M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
374
31.4M
      {
375
31.4M
      bigint_comba_sqr6(z, x);
376
31.4M
      }
377
71.0M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
378
6.12M
      {
379
6.12M
      bigint_comba_sqr8(z, x);
380
6.12M
      }
381
64.9M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
382
59.2M
      {
383
59.2M
      bigint_comba_sqr9(z, x);
384
59.2M
      }
385
5.69M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
386
279k
      {
387
279k
      bigint_comba_sqr16(z, x);
388
279k
      }
389
5.41M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
390
177k
      {
391
177k
      bigint_comba_sqr24(z, x);
392
177k
      }
393
5.23M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
394
578k
      {
395
578k
      basecase_sqr(z, z_size, x, x_sw);
396
578k
      }
397
4.65M
   else
398
4.65M
      {
399
4.65M
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
400
4.65M
401
4.65M
      if(N && z_size >= 2*N && ws_size >= 2*N)
402
4.33M
         karatsuba_sqr(z, x, N, workspace);
403
320k
      else
404
320k
         basecase_sqr(z, z_size, x, x_sw);
405
4.65M
      }
406
138M
   }
407
408
}