Coverage Report

Created: 2021-06-10 10:30

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/ct_utils.h>
11
#include <botan/mem_ops.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
namespace {
17
18
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20
21
/*
22
* Simple O(N^2) Multiplication
23
*/
24
void basecase_mul(word z[], size_t z_size,
25
                  const word x[], size_t x_size,
26
                  const word y[], size_t y_size)
27
5.98M
   {
28
5.98M
   if(z_size < x_size + y_size)
29
0
      throw Invalid_Argument("basecase_mul z_size too small");
30
31
5.98M
   const size_t x_size_8 = x_size - (x_size % 8);
32
33
5.98M
   clear_mem(z, z_size);
34
35
61.8M
   for(size_t i = 0; i != y_size; ++i)
36
55.8M
      {
37
55.8M
      const word y_i = y[i];
38
39
55.8M
      word carry = 0;
40
41
153M
      for(size_t j = 0; j != x_size_8; j += 8)
42
98.0M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
43
44
281M
      for(size_t j = x_size_8; j != x_size; ++j)
45
225M
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46
47
55.8M
      z[x_size+i] = carry;
48
55.8M
      }
49
5.98M
   }
50
51
void basecase_sqr(word z[], size_t z_size,
52
                  const word x[], size_t x_size)
53
1.19M
   {
54
1.19M
   if(z_size < 2*x_size)
55
0
      throw Invalid_Argument("basecase_sqr z_size too small");
56
57
1.19M
   const size_t x_size_8 = x_size - (x_size % 8);
58
59
1.19M
   clear_mem(z, z_size);
60
61
15.4M
   for(size_t i = 0; i != x_size; ++i)
62
14.2M
      {
63
14.2M
      const word x_i = x[i];
64
65
14.2M
      word carry = 0;
66
67
42.9M
      for(size_t j = 0; j != x_size_8; j += 8)
68
28.7M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
69
70
81.3M
      for(size_t j = x_size_8; j != x_size; ++j)
71
67.1M
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72
73
14.2M
      z[x_size+i] = carry;
74
14.2M
      }
75
1.19M
   }
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81
                   word workspace[])
82
1.05M
   {
83
1.05M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84
790k
      {
85
790k
      switch(N)
86
790k
         {
87
0
         case 6:
88
0
            return bigint_comba_mul6(z, x, y);
89
0
         case 8:
90
0
            return bigint_comba_mul8(z, x, y);
91
0
         case 9:
92
0
            return bigint_comba_mul9(z, x, y);
93
769k
         case 16:
94
769k
            return bigint_comba_mul16(z, x, y);
95
1.16k
         case 24:
96
1.16k
            return bigint_comba_mul24(z, x, y);
97
19.4k
         default:
98
19.4k
            return basecase_mul(z, 2*N, x, N, y, N);
99
265k
         }
100
265k
      }
101
102
265k
   const size_t N2 = N / 2;
103
104
265k
   const word* x0 = x;
105
265k
   const word* x1 = x + N2;
106
265k
   const word* y0 = y;
107
265k
   const word* y1 = y + N2;
108
265k
   word* z0 = z;
109
265k
   word* z1 = z + N;
110
111
265k
   word* ws0 = workspace;
112
265k
   word* ws1 = workspace + N;
113
114
265k
   clear_mem(workspace, 2*N);
115
116
   /*
117
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119
   * anything, clear_mem above already set the correct result.
120
   *
121
   * However we ignore the result of the comparisons and always perform the
122
   * subtractions and recursively multiply to avoid the timing channel.
123
   */
124
125
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126
265k
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127
265k
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128
265k
   const auto neg_mask = ~(cmp0 ^ cmp1);
129
130
265k
   karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132
   // Compute X_lo * Y_lo
133
265k
   karatsuba_mul(z0, x0, y0, N2, ws1);
134
135
   // Compute X_hi * Y_hi
136
265k
   karatsuba_mul(z1, x1, y1, N2, ws1);
137
138
265k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139
265k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141
265k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142
265k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144
265k
   clear_mem(workspace + N, N2);
145
146
265k
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147
265k
   }
148
149
/*
150
* Karatsuba Squaring Operation
151
*/
152
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153
1.27M
   {
154
1.27M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155
1.01M
      {
156
1.01M
      switch(N)
157
1.01M
         {
158
0
         case 6:
159
0
            return bigint_comba_sqr6(z, x);
160
0
         case 8:
161
0
            return bigint_comba_sqr8(z, x);
162
0
         case 9:
163
0
            return bigint_comba_sqr9(z, x);
164
747k
         case 16:
165
747k
            return bigint_comba_sqr16(z, x);
166
600
         case 24:
167
600
            return bigint_comba_sqr24(z, x);
168
268k
         default:
169
268k
            return basecase_sqr(z, 2*N, x, N);
170
258k
         }
171
258k
      }
172
173
258k
   const size_t N2 = N / 2;
174
175
258k
   const word* x0 = x;
176
258k
   const word* x1 = x + N2;
177
258k
   word* z0 = z;
178
258k
   word* z1 = z + N;
179
180
258k
   word* ws0 = workspace;
181
258k
   word* ws1 = workspace + N;
182
183
258k
   clear_mem(workspace, 2*N);
184
185
   // See comment in karatsuba_mul
186
258k
   bigint_sub_abs(z0, x0, x1, N2, workspace);
187
258k
   karatsuba_sqr(ws0, z0, N2, ws1);
188
189
258k
   karatsuba_sqr(z0, x0, N2, ws1);
190
258k
   karatsuba_sqr(z1, x1, N2, ws1);
191
192
258k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193
258k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195
258k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196
258k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198
   /*
199
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201
   * timing channel.
202
   */
203
258k
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
204
258k
   }
205
206
/*
207
* Pick a good size for the Karatsuba multiply
208
*/
209
size_t karatsuba_size(size_t z_size,
210
                      size_t x_size, size_t x_sw,
211
                      size_t y_size, size_t y_sw)
212
348k
   {
213
348k
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214
663
      return 0;
215
216
348k
   if(((x_size == x_sw) && (x_size % 2)) ||
217
348k
      ((y_size == y_sw) && (y_size % 2)))
218
0
      return 0;
219
220
348k
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221
210k
   const size_t end = (x_size < y_size) ? x_size : y_size;
222
223
348k
   if(start == end)
224
93.0k
      {
225
93.0k
      if(start % 2)
226
0
         return 0;
227
93.0k
      return start;
228
93.0k
      }
229
230
347k
   for(size_t j = start; j <= end; ++j)
231
347k
      {
232
347k
      if(j % 2)
233
91.8k
         continue;
234
235
255k
      if(2*j > z_size)
236
88.0k
         return 0;
237
238
167k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239
167k
         {
240
167k
         if(j % 4 == 2 &&
241
4.69k
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242
587
            return j+2;
243
166k
         return j;
244
166k
         }
245
167k
      }
246
247
0
   return 0;
248
255k
   }
249
250
/*
251
* Pick a good size for the Karatsuba squaring
252
*/
253
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254
668k
   {
255
668k
   if(x_sw == x_size)
256
83
      {
257
83
      if(x_sw % 2)
258
0
         return 0;
259
83
      return x_sw;
260
83
      }
261
262
1.02M
   for(size_t j = x_sw; j <= x_size; ++j)
263
1.02M
      {
264
1.02M
      if(j % 2)
265
356k
         continue;
266
267
668k
      if(2*j > z_size)
268
167k
         return 0;
269
270
501k
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271
90
         return j+2;
272
500k
      return j;
273
500k
      }
274
275
0
   return 0;
276
668k
   }
277
278
template<size_t SZ>
279
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280
                                size_t y_sw, size_t y_size,
281
                                size_t z_size)
282
504M
   {
283
504M
   return (x_sw <= SZ && x_size >= SZ &&
284
181M
           y_sw <= SZ && y_size >= SZ &&
285
177M
           z_size >= 2*SZ);
286
504M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
174M
   {
283
174M
   return (x_sw <= SZ && x_size >= SZ &&
284
47.0M
           y_sw <= SZ && y_size >= SZ &&
285
44.4M
           z_size >= 2*SZ);
286
174M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
130M
   {
283
130M
   return (x_sw <= SZ && x_size >= SZ &&
284
32.4M
           y_sw <= SZ && y_size >= SZ &&
285
32.2M
           z_size >= 2*SZ);
286
130M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
101M
   {
283
101M
   return (x_sw <= SZ && x_size >= SZ &&
284
21.3M
           y_sw <= SZ && y_size >= SZ &&
285
21.0M
           z_size >= 2*SZ);
286
101M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
84.1M
   {
283
84.1M
   return (x_sw <= SZ && x_size >= SZ &&
284
78.4M
           y_sw <= SZ && y_size >= SZ &&
285
77.9M
           z_size >= 2*SZ);
286
84.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
6.57M
   {
283
6.57M
   return (x_sw <= SZ && x_size >= SZ &&
284
1.33M
           y_sw <= SZ && y_size >= SZ &&
285
1.11M
           z_size >= 2*SZ);
286
6.57M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
6.36M
   {
283
6.36M
   return (x_sw <= SZ && x_size >= SZ &&
284
1.18M
           y_sw <= SZ && y_size >= SZ &&
285
673k
           z_size >= 2*SZ);
286
6.36M
   }
287
288
template<size_t SZ>
289
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290
                                size_t z_size)
291
455M
   {
292
455M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
455M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
158M
   {
292
158M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
158M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
123M
   {
292
123M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
123M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
92.1M
   {
292
92.1M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
92.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
78.2M
   {
292
78.2M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
78.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
1.63M
   {
292
1.63M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
1.63M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
1.49M
   {
292
1.49M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
1.49M
   }
294
295
}
296
297
void bigint_mul(word z[], size_t z_size,
298
                const word x[], size_t x_size, size_t x_sw,
299
                const word y[], size_t y_size, size_t y_sw,
300
                word workspace[], size_t ws_size)
301
175M
   {
302
175M
   clear_mem(z, z_size);
303
304
175M
   if(x_sw == 1)
305
113k
      {
306
113k
      bigint_linmul3(z, y, y_sw, x[0]);
307
113k
      }
308
174M
   else if(y_sw == 1)
309
37
      {
310
37
      bigint_linmul3(z, x, x_sw, y[0]);
311
37
      }
312
174M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313
44.0M
      {
314
44.0M
      bigint_comba_mul4(z, x, y);
315
44.0M
      }
316
130M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317
28.9M
      {
318
28.9M
      bigint_comba_mul6(z, x, y);
319
28.9M
      }
320
101M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321
17.6M
      {
322
17.6M
      bigint_comba_mul8(z, x, y);
323
17.6M
      }
324
84.1M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325
77.6M
      {
326
77.6M
      bigint_comba_mul9(z, x, y);
327
77.6M
      }
328
6.57M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329
208k
      {
330
208k
      bigint_comba_mul16(z, x, y);
331
208k
      }
332
6.36M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333
137k
      {
334
137k
      bigint_comba_mul24(z, x, y);
335
137k
      }
336
6.22M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337
350k
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
348k
           !workspace)
339
5.88M
      {
340
5.88M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
341
5.88M
      }
342
348k
   else
343
348k
      {
344
348k
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346
348k
      if(N && z_size >= 2*N && ws_size >= 2*N)
347
260k
         karatsuba_mul(z, x, y, N, workspace);
348
88.7k
      else
349
88.7k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
350
348k
      }
351
175M
   }
352
353
/*
354
* Squaring Algorithm Dispatcher
355
*/
356
void bigint_sqr(word z[], size_t z_size,
357
                const word x[], size_t x_size, size_t x_sw,
358
                word workspace[], size_t ws_size)
359
158M
   {
360
158M
   clear_mem(z, z_size);
361
362
158M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364
158M
   if(x_sw == 1)
365
146k
      {
366
146k
      bigint_linmul3(z, x, x_sw, x[0]);
367
146k
      }
368
158M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369
35.4M
      {
370
35.4M
      bigint_comba_sqr4(z, x);
371
35.4M
      }
372
123M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373
30.8M
      {
374
30.8M
      bigint_comba_sqr6(z, x);
375
30.8M
      }
376
92.1M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377
13.9M
      {
378
13.9M
      bigint_comba_sqr8(z, x);
379
13.9M
      }
380
78.2M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381
76.5M
      {
382
76.5M
      bigint_comba_sqr9(z, x);
383
76.5M
      }
384
1.63M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385
144k
      {
386
144k
      bigint_comba_sqr16(z, x);
387
144k
      }
388
1.49M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389
64.7k
      {
390
64.7k
      bigint_comba_sqr24(z, x);
391
64.7k
      }
392
1.43M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393
761k
      {
394
761k
      basecase_sqr(z, z_size, x, x_sw);
395
761k
      }
396
668k
   else
397
668k
      {
398
668k
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400
668k
      if(N && z_size >= 2*N && ws_size >= 2*N)
401
501k
         karatsuba_sqr(z, x, N, workspace);
402
167k
      else
403
167k
         basecase_sqr(z, z_size, x, x_sw);
404
668k
      }
405
158M
   }
406
407
}