Coverage Report

Created: 2022-08-24 06:37

/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
#include <botan/internal/ct_utils.h>
11
#include <botan/mem_ops.h>
12
#include <botan/exceptn.h>
13
14
namespace Botan {
15
16
namespace {
17
18
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20
21
/*
22
* Simple O(N^2) Multiplication
23
*/
24
void basecase_mul(word z[], size_t z_size,
25
                  const word x[], size_t x_size,
26
                  const word y[], size_t y_size)
27
12.7M
   {
28
12.7M
   if(z_size < x_size + y_size)
29
0
      throw Invalid_Argument("basecase_mul z_size too small");
30
31
12.7M
   const size_t x_size_8 = x_size - (x_size % 8);
32
33
12.7M
   clear_mem(z, z_size);
34
35
357M
   for(size_t i = 0; i != y_size; ++i)
36
344M
      {
37
344M
      const word y_i = y[i];
38
39
344M
      word carry = 0;
40
41
2.44G
      for(size_t j = 0; j != x_size_8; j += 8)
42
2.09G
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
43
44
1.50G
      for(size_t j = x_size_8; j != x_size; ++j)
45
1.15G
         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46
47
344M
      z[x_size+i] = carry;
48
344M
      }
49
12.7M
   }
50
51
void basecase_sqr(word z[], size_t z_size,
52
                  const word x[], size_t x_size)
53
10.1M
   {
54
10.1M
   if(z_size < 2*x_size)
55
0
      throw Invalid_Argument("basecase_sqr z_size too small");
56
57
10.1M
   const size_t x_size_8 = x_size - (x_size % 8);
58
59
10.1M
   clear_mem(z, z_size);
60
61
280M
   for(size_t i = 0; i != x_size; ++i)
62
270M
      {
63
270M
      const word x_i = x[i];
64
65
270M
      word carry = 0;
66
67
1.61G
      for(size_t j = 0; j != x_size_8; j += 8)
68
1.34G
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
69
70
1.42G
      for(size_t j = x_size_8; j != x_size; ++j)
71
1.15G
         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72
73
270M
      z[x_size+i] = carry;
74
270M
      }
75
10.1M
   }
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81
                   word workspace[])
82
26.9M
   {
83
26.9M
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84
18.7M
      {
85
18.7M
      switch(N)
86
18.7M
         {
87
0
         case 6:
88
0
            return bigint_comba_mul6(z, x, y);
89
0
         case 8:
90
0
            return bigint_comba_mul8(z, x, y);
91
0
         case 9:
92
0
            return bigint_comba_mul9(z, x, y);
93
12.8M
         case 16:
94
12.8M
            return bigint_comba_mul16(z, x, y);
95
599k
         case 24:
96
599k
            return bigint_comba_mul24(z, x, y);
97
5.32M
         default:
98
5.32M
            return basecase_mul(z, 2*N, x, N, y, N);
99
18.7M
         }
100
18.7M
      }
101
102
8.19M
   const size_t N2 = N / 2;
103
104
8.19M
   const word* x0 = x;
105
8.19M
   const word* x1 = x + N2;
106
8.19M
   const word* y0 = y;
107
8.19M
   const word* y1 = y + N2;
108
8.19M
   word* z0 = z;
109
8.19M
   word* z1 = z + N;
110
111
8.19M
   word* ws0 = workspace;
112
8.19M
   word* ws1 = workspace + N;
113
114
8.19M
   clear_mem(workspace, 2*N);
115
116
   /*
117
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119
   * anything, clear_mem above already set the correct result.
120
   *
121
   * However we ignore the result of the comparisons and always perform the
122
   * subtractions and recursively multiply to avoid the timing channel.
123
   */
124
125
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126
8.19M
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127
8.19M
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128
8.19M
   const auto neg_mask = ~(cmp0 ^ cmp1);
129
130
8.19M
   karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132
   // Compute X_lo * Y_lo
133
8.19M
   karatsuba_mul(z0, x0, y0, N2, ws1);
134
135
   // Compute X_hi * Y_hi
136
8.19M
   karatsuba_mul(z1, x1, y1, N2, ws1);
137
138
8.19M
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139
8.19M
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141
8.19M
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142
8.19M
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144
8.19M
   clear_mem(workspace + N, N2);
145
146
8.19M
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147
8.19M
   }
148
149
/*
150
* Karatsuba Squaring Operation
151
*/
152
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153
25.3M
   {
154
25.3M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155
17.8M
      {
156
17.8M
      switch(N)
157
17.8M
         {
158
0
         case 6:
159
0
            return bigint_comba_sqr6(z, x);
160
0
         case 8:
161
0
            return bigint_comba_sqr8(z, x);
162
0
         case 9:
163
0
            return bigint_comba_sqr9(z, x);
164
10.0M
         case 16:
165
10.0M
            return bigint_comba_sqr16(z, x);
166
568k
         case 24:
167
568k
            return bigint_comba_sqr24(z, x);
168
7.28M
         default:
169
7.28M
            return basecase_sqr(z, 2*N, x, N);
170
17.8M
         }
171
17.8M
      }
172
173
7.47M
   const size_t N2 = N / 2;
174
175
7.47M
   const word* x0 = x;
176
7.47M
   const word* x1 = x + N2;
177
7.47M
   word* z0 = z;
178
7.47M
   word* z1 = z + N;
179
180
7.47M
   word* ws0 = workspace;
181
7.47M
   word* ws1 = workspace + N;
182
183
7.47M
   clear_mem(workspace, 2*N);
184
185
   // See comment in karatsuba_mul
186
7.47M
   bigint_sub_abs(z0, x0, x1, N2, workspace);
187
7.47M
   karatsuba_sqr(ws0, z0, N2, ws1);
188
189
7.47M
   karatsuba_sqr(z0, x0, N2, ws1);
190
7.47M
   karatsuba_sqr(z1, x1, N2, ws1);
191
192
7.47M
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193
7.47M
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195
7.47M
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196
7.47M
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198
   /*
199
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201
   * timing channel.
202
   */
203
7.47M
   bigint_sub2(z + N2, 2*N-N2, ws0, N);
204
7.47M
   }
205
206
/*
207
* Pick a good size for the Karatsuba multiply
208
*/
209
size_t karatsuba_size(size_t z_size,
210
                      size_t x_size, size_t x_sw,
211
                      size_t y_size, size_t y_sw)
212
4.59M
   {
213
4.59M
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214
1.84k
      return 0;
215
216
4.59M
   if(((x_size == x_sw) && (x_size % 2)) ||
217
4.59M
      ((y_size == y_sw) && (y_size % 2)))
218
241k
      return 0;
219
220
4.35M
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221
4.35M
   const size_t end = (x_size < y_size) ? x_size : y_size;
222
223
4.35M
   if(start == end)
224
383k
      {
225
383k
      if(start % 2)
226
1.90k
         return 0;
227
381k
      return start;
228
383k
      }
229
230
5.90M
   for(size_t j = start; j <= end; ++j)
231
5.90M
      {
232
5.90M
      if(j % 2)
233
1.93M
         continue;
234
235
3.96M
      if(2*j > z_size)
236
1.95M
         return 0;
237
238
2.01M
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239
2.01M
         {
240
2.01M
         if(j % 4 == 2 &&
241
2.01M
            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242
232k
            return j+2;
243
1.78M
         return j;
244
2.01M
         }
245
2.01M
      }
246
247
0
   return 0;
248
3.96M
   }
249
250
/*
251
* Pick a good size for the Karatsuba squaring
252
*/
253
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254
3.54M
   {
255
3.54M
   if(x_sw == x_size)
256
738
      {
257
738
      if(x_sw % 2)
258
281
         return 0;
259
457
      return x_sw;
260
738
      }
261
262
5.21M
   for(size_t j = x_sw; j <= x_size; ++j)
263
5.21M
      {
264
5.21M
      if(j % 2)
265
1.67M
         continue;
266
267
3.53M
      if(2*j > z_size)
268
627k
         return 0;
269
270
2.91M
      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271
0
         return j+2;
272
2.91M
      return j;
273
2.91M
      }
274
275
0
   return 0;
276
3.53M
   }
277
278
template<size_t SZ>
279
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280
                                size_t y_sw, size_t y_size,
281
                                size_t z_size)
282
145M
   {
283
145M
   return (x_sw <= SZ && x_size >= SZ &&
284
145M
           y_sw <= SZ && y_size >= SZ &&
285
145M
           z_size >= 2*SZ);
286
145M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
48.1M
   {
283
48.1M
   return (x_sw <= SZ && x_size >= SZ &&
284
48.1M
           y_sw <= SZ && y_size >= SZ &&
285
48.1M
           z_size >= 2*SZ);
286
48.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
34.0M
   {
283
34.0M
   return (x_sw <= SZ && x_size >= SZ &&
284
34.0M
           y_sw <= SZ && y_size >= SZ &&
285
34.0M
           z_size >= 2*SZ);
286
34.0M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
24.1M
   {
283
24.1M
   return (x_sw <= SZ && x_size >= SZ &&
284
24.1M
           y_sw <= SZ && y_size >= SZ &&
285
24.1M
           z_size >= 2*SZ);
286
24.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
18.8M
   {
283
18.8M
   return (x_sw <= SZ && x_size >= SZ &&
284
18.8M
           y_sw <= SZ && y_size >= SZ &&
285
18.8M
           z_size >= 2*SZ);
286
18.8M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
10.1M
   {
283
10.1M
   return (x_sw <= SZ && x_size >= SZ &&
284
10.1M
           y_sw <= SZ && y_size >= SZ &&
285
10.1M
           z_size >= 2*SZ);
286
10.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
282
10.0M
   {
283
10.0M
   return (x_sw <= SZ && x_size >= SZ &&
284
10.0M
           y_sw <= SZ && y_size >= SZ &&
285
10.0M
           z_size >= 2*SZ);
286
10.0M
   }
287
288
template<size_t SZ>
289
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290
                                size_t z_size)
291
121M
   {
292
121M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
121M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
44.7M
   {
292
44.7M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
44.7M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
32.1M
   {
292
32.1M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
32.1M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
21.4M
   {
292
21.4M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
21.4M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
11.2M
   {
292
11.2M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
11.2M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
5.96M
   {
292
5.96M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
5.96M
   }
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
291
5.81M
   {
292
5.81M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293
5.81M
   }
294
295
}
296
297
void bigint_mul(word z[], size_t z_size,
298
                const word x[], size_t x_size, size_t x_sw,
299
                const word y[], size_t y_size, size_t y_sw,
300
                word workspace[], size_t ws_size)
301
48.4M
   {
302
48.4M
   clear_mem(z, z_size);
303
304
48.4M
   if(x_sw == 1)
305
382k
      {
306
382k
      bigint_linmul3(z, y, y_sw, x[0]);
307
382k
      }
308
48.1M
   else if(y_sw == 1)
309
36
      {
310
36
      bigint_linmul3(z, x, x_sw, y[0]);
311
36
      }
312
48.1M
   else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313
14.0M
      {
314
14.0M
      bigint_comba_mul4(z, x, y);
315
14.0M
      }
316
34.0M
   else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317
9.84M
      {
318
9.84M
      bigint_comba_mul6(z, x, y);
319
9.84M
      }
320
24.1M
   else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321
5.27M
      {
322
5.27M
      bigint_comba_mul8(z, x, y);
323
5.27M
      }
324
18.8M
   else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325
8.72M
      {
326
8.72M
      bigint_comba_mul9(z, x, y);
327
8.72M
      }
328
10.1M
   else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329
167k
      {
330
167k
      bigint_comba_mul16(z, x, y);
331
167k
      }
332
10.0M
   else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333
163k
      {
334
163k
      bigint_comba_mul24(z, x, y);
335
163k
      }
336
9.84M
   else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337
9.84M
           y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338
9.84M
           !workspace)
339
5.24M
      {
340
5.24M
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
341
5.24M
      }
342
4.59M
   else
343
4.59M
      {
344
4.59M
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346
4.59M
      if(N && z_size >= 2*N && ws_size >= 2*N)
347
2.39M
         karatsuba_mul(z, x, y, N, workspace);
348
2.20M
      else
349
2.20M
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
350
4.59M
      }
351
48.4M
   }
352
353
/*
354
* Squaring Algorithm Dispatcher
355
*/
356
void bigint_sqr(word z[], size_t z_size,
357
                const word x[], size_t x_size, size_t x_sw,
358
                word workspace[], size_t ws_size)
359
45.4M
   {
360
45.4M
   clear_mem(z, z_size);
361
362
45.4M
   BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364
45.4M
   if(x_sw == 1)
365
742k
      {
366
742k
      bigint_linmul3(z, x, x_sw, x[0]);
367
742k
      }
368
44.7M
   else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369
12.5M
      {
370
12.5M
      bigint_comba_sqr4(z, x);
371
12.5M
      }
372
32.1M
   else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373
10.6M
      {
374
10.6M
      bigint_comba_sqr6(z, x);
375
10.6M
      }
376
21.4M
   else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377
10.2M
      {
378
10.2M
      bigint_comba_sqr8(z, x);
379
10.2M
      }
380
11.2M
   else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381
5.25M
      {
382
5.25M
      bigint_comba_sqr9(z, x);
383
5.25M
      }
384
5.96M
   else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385
149k
      {
386
149k
      bigint_comba_sqr16(z, x);
387
149k
      }
388
5.81M
   else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389
76.3k
      {
390
76.3k
      bigint_comba_sqr24(z, x);
391
76.3k
      }
392
5.74M
   else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393
2.20M
      {
394
2.20M
      basecase_sqr(z, z_size, x, x_sw);
395
2.20M
      }
396
3.54M
   else
397
3.54M
      {
398
3.54M
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400
3.54M
      if(N && z_size >= 2*N && ws_size >= 2*N)
401
2.91M
         karatsuba_sqr(z, x, N, workspace);
402
628k
      else
403
628k
         basecase_sqr(z, z_size, x, x_sw);
404
3.54M
      }
405
45.4M
   }
406
407
}