Coverage Report

Created: 2023-01-25 06:35

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/ct_utils.h>
11
#include <botan/mem_ops.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
/*
17
* Simple O(N^2) Multiplication
18
*/
19
void basecase_mul(word z[], size_t z_size,
20
                  const word x[], size_t x_size,
21
                  const word y[], size_t y_size)
22
12.9M
   {
23
12.9M
   if(z_size < x_size + y_size)
24
0
      throw Invalid_Argument("basecase_mul z_size too small");
25
26
12.9M
   const size_t x_size_8 = x_size - (x_size % 8);
27
28
12.9M
   clear_mem(z, z_size);
29
30
115M
   for(size_t i = 0; i != y_size; ++i)
31
102M
      {
32
102M
      const word y_i = y[i];
33
34
102M
      word carry = 0;
35
36
231M
      for(size_t j = 0; j != x_size_8; j += 8)
37
129M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
38
39
518M
      for(size_t j = x_size_8; j != x_size; ++j)
40
416M
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
41
42
102M
      z[x_size+i] = carry;
43
102M
      }
44
12.9M
   }
45
46
void basecase_sqr(word z[], size_t z_size,
47
                  const word x[], size_t x_size)
48
4.47M
   {
49
4.47M
   if(z_size < 2*x_size)
50
0
      throw Invalid_Argument("basecase_sqr z_size too small");
51
52
4.47M
   const size_t x_size_8 = x_size - (x_size % 8);
53
54
4.47M
   clear_mem(z, z_size);
55
56
40.8M
   for(size_t i = 0; i != x_size; ++i)
57
36.3M
      {
58
36.3M
      const word x_i = x[i];
59
60
36.3M
      word carry = 0;
61
62
83.5M
      for(size_t j = 0; j != x_size_8; j += 8)
63
47.2M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
64
65
212M
      for(size_t j = x_size_8; j != x_size; ++j)
66
175M
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
67
68
36.3M
      z[x_size+i] = carry;
69
36.3M
      }
70
4.47M
   }
71
72
namespace {
73
74
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81
                   word workspace[])
82
1.10M
   {
83
1.10M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84
826k
      {
85
826k
      switch(N)
86
826k
         {
87
0
         case 6:
88
0
            return bigint_comba_mul6(z, x, y);
89
0
         case 8:
90
0
            return bigint_comba_mul8(z, x, y);
91
0
         case 9:
92
0
            return bigint_comba_mul9(z, x, y);
93
812k
         case 16:
94
812k
            return bigint_comba_mul16(z, x, y);
95
1.38k
         case 24:
96
1.38k
            return bigint_comba_mul24(z, x, y);
97
12.1k
         default:
98
12.1k
            return basecase_mul(z, 2*N, x, N, y, N);
99
826k
         }
100
826k
      }
101
102
278k
   const size_t N2 = N / 2;
103
104
278k
   const word* x0 = x;
105
278k
   const word* x1 = x + N2;
106
278k
   const word* y0 = y;
107
278k
   const word* y1 = y + N2;
108
278k
   word* z0 = z;
109
278k
   word* z1 = z + N;
110
111
278k
   word* ws0 = workspace;
112
278k
   word* ws1 = workspace + N;
113
114
278k
   clear_mem(workspace, 2*N);
115
116
   /*
117
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119
   * anything, clear_mem above already set the correct result.
120
   *
121
   * However we ignore the result of the comparisons and always perform the
122
   * subtractions and recursively multiply to avoid the timing channel.
123
   */
124
125
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126
278k
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127
278k
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128
278k
   const auto neg_mask = ~(cmp0 ^ cmp1);
129
130
278k
   karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132
   // Compute X_lo * Y_lo
133
278k
   karatsuba_mul(z0, x0, y0, N2, ws1);
134
135
   // Compute X_hi * Y_hi
136
278k
   karatsuba_mul(z1, x1, y1, N2, ws1);
137
138
278k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139
278k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141
278k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142
278k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144
278k
   clear_mem(workspace + N, N2);
145
146
278k
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147
278k
   }
148
149
/*
150
* Karatsuba Squaring Operation
151
*/
152
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153
1.41M
   {
154
1.41M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155
1.11M
      {
156
1.11M
      switch(N)
157
1.11M
         {
158
0
         case 6:
159
0
            return bigint_comba_sqr6(z, x);
160
0
         case 8:
161
0
            return bigint_comba_sqr8(z, x);
162
0
         case 9:
163
0
            return bigint_comba_sqr9(z, x);
164
874k
         case 16:
165
874k
            return bigint_comba_sqr16(z, x);
166
888
         case 24:
167
888
            return bigint_comba_sqr24(z, x);
168
239k
         default:
169
239k
            return basecase_sqr(z, 2*N, x, N);
170
1.11M
         }
171
1.11M
      }
172
173
295k
   const size_t N2 = N / 2;
174
175
295k
   const word* x0 = x;
176
295k
   const word* x1 = x + N2;
177
295k
   word* z0 = z;
178
295k
   word* z1 = z + N;
179
180
295k
   word* ws0 = workspace;
181
295k
   word* ws1 = workspace + N;
182
183
295k
   clear_mem(workspace, 2*N);
184
185
   // See comment in karatsuba_mul
186
295k
   bigint_sub_abs(z0, x0, x1, N2, workspace);
187
295k
   karatsuba_sqr(ws0, z0, N2, ws1);
188
189
295k
   karatsuba_sqr(z0, x0, N2, ws1);
190
295k
   karatsuba_sqr(z1, x1, N2, ws1);
191
192
295k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193
295k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195
295k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196
295k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198
   /*
199
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201
   * timing channel.
202
   */
203
295k
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
204
295k
   }
205
206
/*
207
* Pick a good size for the Karatsuba multiply
208
*/
209
size_t karatsuba_size(size_t z_size,
210
                      size_t x_size, size_t x_sw,
211
                      size_t y_size, size_t y_sw)
212
380k
   {
213
380k
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214
531
      return 0;
215
216
379k
   if(((x_size == x_sw) && (x_size % 2)) ||
217
379k
      ((y_size == y_sw) && (y_size % 2)))
218
0
      return 0;
219
220
379k
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221
379k
   const size_t end = (x_size < y_size) ? x_size : y_size;
222
223
379k
   if(start == end)
224
119k
      {
225
119k
      if(start % 2)
226
0
         return 0;
227
119k
      return start;
228
119k
      }
229
230
371k
   for(size_t j = start; j <= end; ++j)
231
371k
      {
232
371k
      if(j % 2)
233
110k
         continue;
234
235
260k
      if(2*j > z_size)
236
110k
         return 0;
237
238
149k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239
149k
         {
240
149k
         if(j % 4 == 2 &&
241
149k
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242
575
            return j+2;
243
149k
         return j;
244
149k
         }
245
149k
      }
246
247
0
   return 0;
248
260k
   }
249
250
/*
251
* Pick a good size for the Karatsuba squaring
252
*/
253
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254
1.09M
   {
255
1.09M
   if(x_sw == x_size)
256
152
      {
257
152
      if(x_sw % 2)
258
0
         return 0;
259
152
      return x_sw;
260
152
      }
261
262
1.66M
   for(size_t j = x_sw; j <= x_size; ++j)
263
1.66M
      {
264
1.66M
      if(j % 2)
265
569k
         continue;
266
267
1.09M
      if(2*j > z_size)
268
569k
         return 0;
269
270
522k
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271
20
         return j+2;
272
522k
      return j;
273
522k
      }
274
275
0
   return 0;
276
1.09M
   }
277
278
template<size_t SZ>
279
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280
                                size_t y_sw, size_t y_size,
281
                                size_t z_size)
282
642M
   {
283
642M
   return (x_sw <= SZ && x_size >= SZ &&
284
642M
           y_sw <= SZ && y_size >= SZ &&
285
642M
           z_size >= 2*SZ);
286
642M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
245M
   {
283
245M
   return (x_sw <= SZ && x_size >= SZ &&
284
245M
           y_sw <= SZ && y_size >= SZ &&
285
245M
           z_size >= 2*SZ);
286
245M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
166M
   {
283
166M
   return (x_sw <= SZ && x_size >= SZ &&
284
166M
           y_sw <= SZ && y_size >= SZ &&
285
166M
           z_size >= 2*SZ);
286
166M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
112M
   {
283
112M
   return (x_sw <= SZ && x_size >= SZ &&
284
112M
           y_sw <= SZ && y_size >= SZ &&
285
112M
           z_size >= 2*SZ);
286
112M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
90.5M
   {
283
90.5M
   return (x_sw <= SZ && x_size >= SZ &&
284
90.5M
           y_sw <= SZ && y_size >= SZ &&
285
90.5M
           z_size >= 2*SZ);
286
90.5M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
14.2M
   {
283
14.2M
   return (x_sw <= SZ && x_size >= SZ &&
284
14.2M
           y_sw <= SZ && y_size >= SZ &&
285
14.2M
           z_size >= 2*SZ);
286
14.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
13.3M
   {
283
13.3M
   return (x_sw <= SZ && x_size >= SZ &&
284
13.3M
           y_sw <= SZ && y_size >= SZ &&
285
13.3M
           z_size >= 2*SZ);
286
13.3M
   }
287
288
template<size_t SZ>
289
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290
                                size_t z_size)
291
557M
   {
292
557M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
557M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
212M
   {
292
212M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
212M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
152M
   {
292
152M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
152M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
99.2M
   {
292
99.2M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
99.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
82.9M
   {
292
82.9M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
82.9M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
5.12M
   {
292
5.12M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
5.12M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
4.84M
   {
292
4.84M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
4.84M
   }
294
295
}
296
297
void bigint_mul(word z[], size_t z_size,
298
                const word x[], size_t x_size, size_t x_sw,
299
                const word y[], size_t y_size, size_t y_sw,
300
                word workspace[], size_t ws_size)
301
245M
   {
302
245M
   clear_mem(z, z_size);
303
304
245M
   if(x_sw == 1)
305
496k
      {
306
496k
      bigint_linmul3(z, y, y_sw, x[0]);
307
496k
      }
308
245M
   else if(y_sw == 1)
309
180
      {
310
180
      bigint_linmul3(z, x, x_sw, y[0]);
311
180
      }
312
245M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313
78.4M
      {
314
78.4M
      bigint_comba_mul4(z, x, y);
315
78.4M
      }
316
166M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317
54.2M
      {
318
54.2M
      bigint_comba_mul6(z, x, y);
319
54.2M
      }
320
112M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321
21.8M
      {
322
21.8M
      bigint_comba_mul8(z, x, y);
323
21.8M
      }
324
90.5M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325
76.2M
      {
326
76.2M
      bigint_comba_mul9(z, x, y);
327
76.2M
      }
328
14.2M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329
939k
      {
330
939k
      bigint_comba_mul16(z, x, y);
331
939k
      }
332
13.3M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333
150k
      {
334
150k
      bigint_comba_mul24(z, x, y);
335
150k
      }
336
13.1M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337
13.1M
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
13.1M
           !workspace)
339
12.8M
      {
340
12.8M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
341
12.8M
      }
342
380k
   else
343
380k
      {
344
380k
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346
380k
      if(N && z_size >= 2*N && ws_size >= 2*N)
347
268k
         karatsuba_mul(z, x, y, N, workspace);
348
111k
      else
349
111k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
350
380k
      }
351
245M
   }
352
353
/*
354
* Squaring Algorithm Dispatcher
355
*/
356
void bigint_sqr(word z[], size_t z_size,
357
                const word x[], size_t x_size, size_t x_sw,
358
                word workspace[], size_t ws_size)
359
213M
   {
360
213M
   clear_mem(z, z_size);
361
362
213M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364
213M
   if(x_sw == 1)
365
541k
      {
366
541k
      bigint_linmul3(z, x, x_sw, x[0]);
367
541k
      }
368
212M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369
60.6M
      {
370
60.6M
      bigint_comba_sqr4(z, x);
371
60.6M
      }
372
152M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373
52.8M
      {
374
52.8M
      bigint_comba_sqr6(z, x);
375
52.8M
      }
376
99.2M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377
16.3M
      {
378
16.3M
      bigint_comba_sqr8(z, x);
379
16.3M
      }
380
82.9M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381
77.8M
      {
382
77.8M
      bigint_comba_sqr9(z, x);
383
77.8M
      }
384
5.12M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385
285k
      {
386
285k
      bigint_comba_sqr16(z, x);
387
285k
      }
388
4.84M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389
82.3k
      {
390
82.3k
      bigint_comba_sqr24(z, x);
391
82.3k
      }
392
4.75M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393
3.66M
      {
394
3.66M
      basecase_sqr(z, z_size, x, x_sw);
395
3.66M
      }
396
1.09M
   else
397
1.09M
      {
398
1.09M
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400
1.09M
      if(N && z_size >= 2*N && ws_size >= 2*N)
401
523k
         karatsuba_sqr(z, x, N, workspace);
402
569k
      else
403
569k
         basecase_sqr(z, z_size, x, x_sw);
404
1.09M
      }
405
213M
   }
406
407
}