Coverage Report

Created: 2021-05-04 09:02

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/ct_utils.h>
11
#include <botan/mem_ops.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
namespace {
17
18
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20
21
/*
22
* Simple O(N^2) Multiplication
23
*/
24
void basecase_mul(word z[], size_t z_size,
25
                  const word x[], size_t x_size,
26
                  const word y[], size_t y_size)
27
5.92M
   {
28
5.92M
   if(z_size < x_size + y_size)
29
0
      throw Invalid_Argument("basecase_mul z_size too small");
30
31
5.92M
   const size_t x_size_8 = x_size - (x_size % 8);
32
33
5.92M
   clear_mem(z, z_size);
34
35
63.5M
   for(size_t i = 0; i != y_size; ++i)
36
57.6M
      {
37
57.6M
      const word y_i = y[i];
38
39
57.6M
      word carry = 0;
40
41
163M
      for(size_t j = 0; j != x_size_8; j += 8)
42
105M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
43
44
293M
      for(size_t j = x_size_8; j != x_size; ++j)
45
235M
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46
47
57.6M
      z[x_size+i] = carry;
48
57.6M
      }
49
5.92M
   }
50
51
void basecase_sqr(word z[], size_t z_size,
52
                  const word x[], size_t x_size)
53
1.15M
   {
54
1.15M
   if(z_size < 2*x_size)
55
0
      throw Invalid_Argument("basecase_sqr z_size too small");
56
57
1.15M
   const size_t x_size_8 = x_size - (x_size % 8);
58
59
1.15M
   clear_mem(z, z_size);
60
61
15.4M
   for(size_t i = 0; i != x_size; ++i)
62
14.2M
      {
63
14.2M
      const word x_i = x[i];
64
65
14.2M
      word carry = 0;
66
67
43.4M
      for(size_t j = 0; j != x_size_8; j += 8)
68
29.1M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
69
70
84.5M
      for(size_t j = x_size_8; j != x_size; ++j)
71
70.2M
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72
73
14.2M
      z[x_size+i] = carry;
74
14.2M
      }
75
1.15M
   }
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81
                   word workspace[])
82
1.10M
   {
83
1.10M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84
825k
      {
85
825k
      switch(N)
86
825k
         {
87
0
         case 6:
88
0
            return bigint_comba_mul6(z, x, y);
89
0
         case 8:
90
0
            return bigint_comba_mul8(z, x, y);
91
0
         case 9:
92
0
            return bigint_comba_mul9(z, x, y);
93
812k
         case 16:
94
812k
            return bigint_comba_mul16(z, x, y);
95
1.05k
         case 24:
96
1.05k
            return bigint_comba_mul24(z, x, y);
97
12.2k
         default:
98
12.2k
            return basecase_mul(z, 2*N, x, N, y, N);
99
276k
         }
100
276k
      }
101
102
276k
   const size_t N2 = N / 2;
103
104
276k
   const word* x0 = x;
105
276k
   const word* x1 = x + N2;
106
276k
   const word* y0 = y;
107
276k
   const word* y1 = y + N2;
108
276k
   word* z0 = z;
109
276k
   word* z1 = z + N;
110
111
276k
   word* ws0 = workspace;
112
276k
   word* ws1 = workspace + N;
113
114
276k
   clear_mem(workspace, 2*N);
115
116
   /*
117
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119
   * anything, clear_mem above already set the correct result.
120
   *
121
   * However we ignore the result of the comparisons and always perform the
122
   * subtractions and recursively multiply to avoid the timing channel.
123
   */
124
125
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126
276k
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127
276k
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128
276k
   const auto neg_mask = ~(cmp0 ^ cmp1);
129
130
276k
   karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132
   // Compute X_lo * Y_lo
133
276k
   karatsuba_mul(z0, x0, y0, N2, ws1);
134
135
   // Compute X_hi * Y_hi
136
276k
   karatsuba_mul(z1, x1, y1, N2, ws1);
137
138
276k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139
276k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141
276k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142
276k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144
276k
   clear_mem(workspace + N, N2);
145
146
276k
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147
276k
   }
148
149
/*
150
* Karatsuba Squaring Operation
151
*/
152
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153
1.31M
   {
154
1.31M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155
1.05M
      {
156
1.05M
      switch(N)
157
1.05M
         {
158
0
         case 6:
159
0
            return bigint_comba_sqr6(z, x);
160
0
         case 8:
161
0
            return bigint_comba_sqr8(z, x);
162
0
         case 9:
163
0
            return bigint_comba_sqr9(z, x);
164
763k
         case 16:
165
763k
            return bigint_comba_sqr16(z, x);
166
627
         case 24:
167
627
            return bigint_comba_sqr24(z, x);
168
287k
         default:
169
287k
            return basecase_sqr(z, 2*N, x, N);
170
259k
         }
171
259k
      }
172
173
259k
   const size_t N2 = N / 2;
174
175
259k
   const word* x0 = x;
176
259k
   const word* x1 = x + N2;
177
259k
   word* z0 = z;
178
259k
   word* z1 = z + N;
179
180
259k
   word* ws0 = workspace;
181
259k
   word* ws1 = workspace + N;
182
183
259k
   clear_mem(workspace, 2*N);
184
185
   // See comment in karatsuba_mul
186
259k
   bigint_sub_abs(z0, x0, x1, N2, workspace);
187
259k
   karatsuba_sqr(ws0, z0, N2, ws1);
188
189
259k
   karatsuba_sqr(z0, x0, N2, ws1);
190
259k
   karatsuba_sqr(z1, x1, N2, ws1);
191
192
259k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193
259k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195
259k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196
259k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198
   /*
199
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201
   * timing channel.
202
   */
203
259k
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
204
259k
   }
205
206
/*
207
* Pick a good size for the Karatsuba multiply
208
*/
209
size_t karatsuba_size(size_t z_size,
210
                      size_t x_size, size_t x_sw,
211
                      size_t y_size, size_t y_sw)
212
373k
   {
213
373k
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214
454
      return 0;
215
216
373k
   if(((x_size == x_sw) && (x_size % 2)) ||
217
373k
      ((y_size == y_sw) && (y_size % 2)))
218
0
      return 0;
219
220
373k
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221
234k
   const size_t end = (x_size < y_size) ? x_size : y_size;
222
223
373k
   if(start == end)
224
112k
      {
225
112k
      if(start % 2)
226
0
         return 0;
227
112k
      return start;
228
112k
      }
229
230
363k
   for(size_t j = start; j <= end; ++j)
231
363k
      {
232
363k
      if(j % 2)
233
103k
         continue;
234
235
260k
      if(2*j > z_size)
236
101k
         return 0;
237
238
158k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239
158k
         {
240
158k
         if(j % 4 == 2 &&
241
2.39k
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242
420
            return j+2;
243
158k
         return j;
244
158k
         }
245
158k
      }
246
247
0
   return 0;
248
260k
   }
249
250
/*
251
* Pick a good size for the Karatsuba squaring
252
*/
253
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254
707k
   {
255
707k
   if(x_sw == x_size)
256
85
      {
257
85
      if(x_sw % 2)
258
0
         return 0;
259
85
      return x_sw;
260
85
      }
261
262
1.07M
   for(size_t j = x_sw; j <= x_size; ++j)
263
1.07M
      {
264
1.07M
      if(j % 2)
265
372k
         continue;
266
267
707k
      if(2*j > z_size)
268
174k
         return 0;
269
270
532k
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271
114
         return j+2;
272
532k
      return j;
273
532k
      }
274
275
0
   return 0;
276
707k
   }
277
278
template<size_t SZ>
279
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280
                                size_t y_sw, size_t y_size,
281
                                size_t z_size)
282
473M
   {
283
473M
   return (x_sw <= SZ && x_size >= SZ &&
284
175M
           y_sw <= SZ && y_size >= SZ &&
285
171M
           z_size >= 2*SZ);
286
473M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
169M
   {
283
169M
   return (x_sw <= SZ && x_size >= SZ &&
284
46.2M
           y_sw <= SZ && y_size >= SZ &&
285
43.7M
           z_size >= 2*SZ);
286
169M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
126M
   {
283
126M
   return (x_sw <= SZ && x_size >= SZ &&
284
33.1M
           y_sw <= SZ && y_size >= SZ &&
285
33.0M
           z_size >= 2*SZ);
286
126M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
96.2M
   {
283
96.2M
   return (x_sw <= SZ && x_size >= SZ &&
284
30.4M
           y_sw <= SZ && y_size >= SZ &&
285
30.2M
           z_size >= 2*SZ);
286
96.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
69.2M
   {
283
69.2M
   return (x_sw <= SZ && x_size >= SZ &&
284
63.5M
           y_sw <= SZ && y_size >= SZ &&
285
63.1M
           z_size >= 2*SZ);
286
69.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
6.52M
   {
283
6.52M
   return (x_sw <= SZ && x_size >= SZ &&
284
1.28M
           y_sw <= SZ && y_size >= SZ &&
285
1.06M
           z_size >= 2*SZ);
286
6.52M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
6.33M
   {
283
6.33M
   return (x_sw <= SZ && x_size >= SZ &&
284
1.17M
           y_sw <= SZ && y_size >= SZ &&
285
672k
           z_size >= 2*SZ);
286
6.33M
   }
287
288
template<size_t SZ>
289
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290
                                size_t z_size)
291
428M
   {
292
428M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
428M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
154M
   {
292
154M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
154M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
118M
   {
292
118M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
118M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
87.0M
   {
292
87.0M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
87.0M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
64.7M
   {
292
64.7M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
64.7M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
1.66M
   {
292
1.66M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
1.66M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
1.47M
   {
292
1.47M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
1.47M
   }
294
295
}
296
297
void bigint_mul(word z[], size_t z_size,
298
                const word x[], size_t x_size, size_t x_sw,
299
                const word y[], size_t y_size, size_t y_sw,
300
                word workspace[], size_t ws_size)
301
169M
   {
302
169M
   clear_mem(z, z_size);
303
304
169M
   if(x_sw == 1)
305
109k
      {
306
109k
      bigint_linmul3(z, y, y_sw, x[0]);
307
109k
      }
308
169M
   else if(y_sw == 1)
309
34
      {
310
34
      bigint_linmul3(z, x, x_sw, y[0]);
311
34
      }
312
169M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313
43.4M
      {
314
43.4M
      bigint_comba_mul4(z, x, y);
315
43.4M
      }
316
126M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317
29.8M
      {
318
29.8M
      bigint_comba_mul6(z, x, y);
319
29.8M
      }
320
96.2M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321
26.9M
      {
322
26.9M
      bigint_comba_mul8(z, x, y);
323
26.9M
      }
324
69.2M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325
62.7M
      {
326
62.7M
      bigint_comba_mul9(z, x, y);
327
62.7M
      }
328
6.52M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329
193k
      {
330
193k
      bigint_comba_mul16(z, x, y);
331
193k
      }
332
6.33M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333
143k
      {
334
143k
      bigint_comba_mul24(z, x, y);
335
143k
      }
336
6.18M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337
376k
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
373k
           !workspace)
339
5.81M
      {
340
5.81M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
341
5.81M
      }
342
373k
   else
343
373k
      {
344
373k
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346
373k
      if(N && z_size >= 2*N && ws_size >= 2*N)
347
271k
         karatsuba_mul(z, x, y, N, workspace);
348
101k
      else
349
101k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
350
373k
      }
351
169M
   }
352
353
/*
354
* Squaring Algorithm Dispatcher
355
*/
356
void bigint_sqr(word z[], size_t z_size,
357
                const word x[], size_t x_size, size_t x_sw,
358
                word workspace[], size_t ws_size)
359
154M
   {
360
154M
   clear_mem(z, z_size);
361
362
154M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364
154M
   if(x_sw == 1)
365
137k
      {
366
137k
      bigint_linmul3(z, x, x_sw, x[0]);
367
137k
      }
368
154M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369
35.6M
      {
370
35.6M
      bigint_comba_sqr4(z, x);
371
35.6M
      }
372
118M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373
31.6M
      {
374
31.6M
      bigint_comba_sqr6(z, x);
375
31.6M
      }
376
87.0M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377
22.2M
      {
378
22.2M
      bigint_comba_sqr8(z, x);
379
22.2M
      }
380
64.7M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381
63.1M
      {
382
63.1M
      bigint_comba_sqr9(z, x);
383
63.1M
      }
384
1.66M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385
187k
      {
386
187k
      bigint_comba_sqr16(z, x);
387
187k
      }
388
1.47M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389
70.4k
      {
390
70.4k
      bigint_comba_sqr24(z, x);
391
70.4k
      }
392
1.40M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393
695k
      {
394
695k
      basecase_sqr(z, z_size, x, x_sw);
395
695k
      }
396
707k
   else
397
707k
      {
398
707k
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400
707k
      if(N && z_size >= 2*N && ws_size >= 2*N)
401
532k
         karatsuba_sqr(z, x, N, workspace);
402
174k
      else
403
174k
         basecase_sqr(z, z_size, x, x_sw);
404
707k
      }
405
154M
   }
406
407
}