Coverage Report

Created: 2021-11-25 09:31

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/ct_utils.h>
11
#include <botan/mem_ops.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
namespace {
17
18
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20
21
/*
22
* Simple O(N^2) Multiplication
23
*/
24
void basecase_mul(word z[], size_t z_size,
25
                  const word x[], size_t x_size,
26
                  const word y[], size_t y_size)
27
9.03M
   {
28
9.03M
   if(z_size < x_size + y_size)
29
0
      throw Invalid_Argument("basecase_mul z_size too small");
30
31
9.03M
   const size_t x_size_8 = x_size - (x_size % 8);
32
33
9.03M
   clear_mem(z, z_size);
34
35
93.1M
   for(size_t i = 0; i != y_size; ++i)
36
84.1M
      {
37
84.1M
      const word y_i = y[i];
38
39
84.1M
      word carry = 0;
40
41
212M
      for(size_t j = 0; j != x_size_8; j += 8)
42
127M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
43
44
440M
      for(size_t j = x_size_8; j != x_size; ++j)
45
355M
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46
47
84.1M
      z[x_size+i] = carry;
48
84.1M
      }
49
9.03M
   }
50
51
void basecase_sqr(word z[], size_t z_size,
52
                  const word x[], size_t x_size)
53
2.03M
   {
54
2.03M
   if(z_size < 2*x_size)
55
0
      throw Invalid_Argument("basecase_sqr z_size too small");
56
57
2.03M
   const size_t x_size_8 = x_size - (x_size % 8);
58
59
2.03M
   clear_mem(z, z_size);
60
61
26.1M
   for(size_t i = 0; i != x_size; ++i)
62
24.0M
      {
63
24.0M
      const word x_i = x[i];
64
65
24.0M
      word carry = 0;
66
67
64.0M
      for(size_t j = 0; j != x_size_8; j += 8)
68
39.9M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
69
70
144M
      for(size_t j = x_size_8; j != x_size; ++j)
71
120M
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72
73
24.0M
      z[x_size+i] = carry;
74
24.0M
      }
75
2.03M
   }
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81
                   word workspace[])
82
1.04M
   {
83
1.04M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84
780k
      {
85
780k
      switch(N)
86
780k
         {
87
0
         case 6:
88
0
            return bigint_comba_mul6(z, x, y);
89
0
         case 8:
90
0
            return bigint_comba_mul8(z, x, y);
91
0
         case 9:
92
0
            return bigint_comba_mul9(z, x, y);
93
760k
         case 16:
94
760k
            return bigint_comba_mul16(z, x, y);
95
1.26k
         case 24:
96
1.26k
            return bigint_comba_mul24(z, x, y);
97
18.3k
         default:
98
18.3k
            return basecase_mul(z, 2*N, x, N, y, N);
99
780k
         }
100
780k
      }
101
102
262k
   const size_t N2 = N / 2;
103
104
262k
   const word* x0 = x;
105
262k
   const word* x1 = x + N2;
106
262k
   const word* y0 = y;
107
262k
   const word* y1 = y + N2;
108
262k
   word* z0 = z;
109
262k
   word* z1 = z + N;
110
111
262k
   word* ws0 = workspace;
112
262k
   word* ws1 = workspace + N;
113
114
262k
   clear_mem(workspace, 2*N);
115
116
   /*
117
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119
   * anything, clear_mem above already set the correct result.
120
   *
121
   * However we ignore the result of the comparisons and always perform the
122
   * subtractions and recursively multiply to avoid the timing channel.
123
   */
124
125
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126
262k
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127
262k
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128
262k
   const auto neg_mask = ~(cmp0 ^ cmp1);
129
130
262k
   karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132
   // Compute X_lo * Y_lo
133
262k
   karatsuba_mul(z0, x0, y0, N2, ws1);
134
135
   // Compute X_hi * Y_hi
136
262k
   karatsuba_mul(z1, x1, y1, N2, ws1);
137
138
262k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139
262k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141
262k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142
262k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144
262k
   clear_mem(workspace + N, N2);
145
146
262k
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147
262k
   }
148
149
/*
150
* Karatsuba Squaring Operation
151
*/
152
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153
1.62M
   {
154
1.62M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155
1.33M
      {
156
1.33M
      switch(N)
157
1.33M
         {
158
0
         case 6:
159
0
            return bigint_comba_sqr6(z, x);
160
0
         case 8:
161
0
            return bigint_comba_sqr8(z, x);
162
0
         case 9:
163
0
            return bigint_comba_sqr9(z, x);
164
847k
         case 16:
165
847k
            return bigint_comba_sqr16(z, x);
166
807
         case 24:
167
807
            return bigint_comba_sqr24(z, x);
168
484k
         default:
169
484k
            return basecase_sqr(z, 2*N, x, N);
170
1.33M
         }
171
1.33M
      }
172
173
289k
   const size_t N2 = N / 2;
174
175
289k
   const word* x0 = x;
176
289k
   const word* x1 = x + N2;
177
289k
   word* z0 = z;
178
289k
   word* z1 = z + N;
179
180
289k
   word* ws0 = workspace;
181
289k
   word* ws1 = workspace + N;
182
183
289k
   clear_mem(workspace, 2*N);
184
185
   // See comment in karatsuba_mul
186
289k
   bigint_sub_abs(z0, x0, x1, N2, workspace);
187
289k
   karatsuba_sqr(ws0, z0, N2, ws1);
188
189
289k
   karatsuba_sqr(z0, x0, N2, ws1);
190
289k
   karatsuba_sqr(z1, x1, N2, ws1);
191
192
289k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193
289k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195
289k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196
289k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198
   /*
199
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201
   * timing channel.
202
   */
203
289k
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
204
289k
   }
205
206
/*
207
* Pick a good size for the Karatsuba multiply
208
*/
209
size_t karatsuba_size(size_t z_size,
210
                      size_t x_size, size_t x_sw,
211
                      size_t y_size, size_t y_sw)
212
341k
   {
213
341k
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214
554
      return 0;
215
216
341k
   if(((x_size == x_sw) && (x_size % 2)) ||
217
341k
      ((y_size == y_sw) && (y_size % 2)))
218
0
      return 0;
219
220
341k
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221
341k
   const size_t end = (x_size < y_size) ? x_size : y_size;
222
223
341k
   if(start == end)
224
90.9k
      {
225
90.9k
      if(start % 2)
226
0
         return 0;
227
90.9k
      return start;
228
90.9k
      }
229
230
338k
   for(size_t j = start; j <= end; ++j)
231
338k
      {
232
338k
      if(j % 2)
233
88.6k
         continue;
234
235
250k
      if(2*j > z_size)
236
85.6k
         return 0;
237
238
164k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239
164k
         {
240
164k
         if(j % 4 == 2 &&
241
164k
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242
523
            return j+2;
243
164k
         return j;
244
164k
         }
245
164k
      }
246
247
0
   return 0;
248
250k
   }
249
250
/*
251
* Pick a good size for the Karatsuba squaring
252
*/
253
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254
951k
   {
255
951k
   if(x_sw == x_size)
256
108
      {
257
108
      if(x_sw % 2)
258
0
         return 0;
259
108
      return x_sw;
260
108
      }
261
262
1.43M
   for(size_t j = x_sw; j <= x_size; ++j)
263
1.43M
      {
264
1.43M
      if(j % 2)
265
479k
         continue;
266
267
951k
      if(2*j > z_size)
268
198k
         return 0;
269
270
753k
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271
32
         return j+2;
272
753k
      return j;
273
753k
      }
274
275
0
   return 0;
276
951k
   }
277
278
template<size_t SZ>
279
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280
                                size_t y_sw, size_t y_size,
281
                                size_t z_size)
282
613M
   {
283
613M
   return (x_sw <= SZ && x_size >= SZ &&
284
613M
           y_sw <= SZ && y_size >= SZ &&
285
613M
           z_size >= 2*SZ);
286
613M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
233M
   {
283
233M
   return (x_sw <= SZ && x_size >= SZ &&
284
233M
           y_sw <= SZ && y_size >= SZ &&
285
233M
           z_size >= 2*SZ);
286
233M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
158M
   {
283
158M
   return (x_sw <= SZ && x_size >= SZ &&
284
158M
           y_sw <= SZ && y_size >= SZ &&
285
158M
           z_size >= 2*SZ);
286
158M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
105M
   {
283
105M
   return (x_sw <= SZ && x_size >= SZ &&
284
105M
           y_sw <= SZ && y_size >= SZ &&
285
105M
           z_size >= 2*SZ);
286
105M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
96.5M
   {
283
96.5M
   return (x_sw <= SZ && x_size >= SZ &&
284
96.5M
           y_sw <= SZ && y_size >= SZ &&
285
96.5M
           z_size >= 2*SZ);
286
96.5M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
9.99M
   {
283
9.99M
   return (x_sw <= SZ && x_size >= SZ &&
284
9.99M
           y_sw <= SZ && y_size >= SZ &&
285
9.99M
           z_size >= 2*SZ);
286
9.99M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
9.42M
   {
283
9.42M
   return (x_sw <= SZ && x_size >= SZ &&
284
9.42M
           y_sw <= SZ && y_size >= SZ &&
285
9.42M
           z_size >= 2*SZ);
286
9.42M
   }
287
288
template<size_t SZ>
289
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290
                                size_t z_size)
291
554M
   {
292
554M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
554M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
210M
   {
292
210M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
210M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
150M
   {
292
150M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
150M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
97.6M
   {
292
97.6M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
97.6M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
90.8M
   {
292
90.8M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
90.8M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
2.80M
   {
292
2.80M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
2.80M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
2.38M
   {
292
2.38M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
2.38M
   }
294
295
}
296
297
void bigint_mul(word z[], size_t z_size,
298
                const word x[], size_t x_size, size_t x_sw,
299
                const word y[], size_t y_size, size_t y_sw,
300
                word workspace[], size_t ws_size)
301
234M
   {
302
234M
   clear_mem(z, z_size);
303
304
234M
   if(x_sw == 1)
305
368k
      {
306
368k
      bigint_linmul3(z, y, y_sw, x[0]);
307
368k
      }
308
233M
   else if(y_sw == 1)
309
240
      {
310
240
      bigint_linmul3(z, x, x_sw, y[0]);
311
240
      }
312
233M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313
75.2M
      {
314
75.2M
      bigint_comba_mul4(z, x, y);
315
75.2M
      }
316
158M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317
53.4M
      {
318
53.4M
      bigint_comba_mul6(z, x, y);
319
53.4M
      }
320
105M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321
8.48M
      {
322
8.48M
      bigint_comba_mul8(z, x, y);
323
8.48M
      }
324
96.5M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325
86.5M
      {
326
86.5M
      bigint_comba_mul9(z, x, y);
327
86.5M
      }
328
9.99M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329
571k
      {
330
571k
      bigint_comba_mul16(z, x, y);
331
571k
      }
332
9.42M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333
149k
      {
334
149k
      bigint_comba_mul24(z, x, y);
335
149k
      }
336
9.27M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337
9.27M
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
9.27M
           !workspace)
339
8.93M
      {
340
8.93M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
341
8.93M
      }
342
341k
   else
343
341k
      {
344
341k
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346
341k
      if(N && z_size >= 2*N && ws_size >= 2*N)
347
255k
         karatsuba_mul(z, x, y, N, workspace);
348
86.2k
      else
349
86.2k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
350
341k
      }
351
234M
   }
352
353
/*
354
* Squaring Algorithm Dispatcher
355
*/
356
void bigint_sqr(word z[], size_t z_size,
357
                const word x[], size_t x_size, size_t x_sw,
358
                word workspace[], size_t ws_size)
359
210M
   {
360
210M
   clear_mem(z, z_size);
361
362
210M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364
210M
   if(x_sw == 1)
365
536k
      {
366
536k
      bigint_linmul3(z, x, x_sw, x[0]);
367
536k
      }
368
210M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369
59.9M
      {
370
59.9M
      bigint_comba_sqr4(z, x);
371
59.9M
      }
372
150M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373
52.6M
      {
374
52.6M
      bigint_comba_sqr6(z, x);
375
52.6M
      }
376
97.6M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377
6.76M
      {
378
6.76M
      bigint_comba_sqr8(z, x);
379
6.76M
      }
380
90.8M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381
88.0M
      {
382
88.0M
      bigint_comba_sqr9(z, x);
383
88.0M
      }
384
2.80M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385
420k
      {
386
420k
      bigint_comba_sqr16(z, x);
387
420k
      }
388
2.38M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389
79.4k
      {
390
79.4k
      bigint_comba_sqr24(z, x);
391
79.4k
      }
392
2.30M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393
1.35M
      {
394
1.35M
      basecase_sqr(z, z_size, x, x_sw);
395
1.35M
      }
396
951k
   else
397
951k
      {
398
951k
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400
951k
      if(N && z_size >= 2*N && ws_size >= 2*N)
401
753k
         karatsuba_sqr(z, x, N, workspace);
402
198k
      else
403
198k
         basecase_sqr(z, z_size, x, x_sw);
404
951k
      }
405
210M
   }
406
407
}