Coverage Report

Created: 2025-11-16 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/botan/src/lib/math/mp/mp_karat.cpp
Line
Count
Source
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
11
#include <botan/exceptn.h>
12
#include <botan/mem_ops.h>
13
#include <botan/internal/ct_utils.h>
14
15
namespace Botan {
16
17
/*
18
* Simple O(N^2) Multiplication
19
*/
20
51.2k
void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) {
21
51.2k
   if(z_size < x_size + y_size) {
22
0
      throw Invalid_Argument("basecase_mul z_size too small");
23
0
   }
24
25
51.2k
   const size_t x_size_8 = x_size - (x_size % 8);
26
27
51.2k
   clear_mem(z, z_size);
28
29
160k
   for(size_t i = 0; i != y_size; ++i) {
30
109k
      const word y_i = y[i];
31
32
109k
      word carry = 0;
33
34
133k
      for(size_t j = 0; j != x_size_8; j += 8) {
35
24.2k
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
36
24.2k
      }
37
38
341k
      for(size_t j = x_size_8; j != x_size; ++j) {
39
232k
         z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry);
40
232k
      }
41
42
109k
      z[x_size + i] = carry;
43
109k
   }
44
51.2k
}
45
46
21.9k
void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) {
47
21.9k
   if(z_size < 2 * x_size) {
48
0
      throw Invalid_Argument("basecase_sqr z_size too small");
49
0
   }
50
51
21.9k
   const size_t x_size_8 = x_size - (x_size % 8);
52
53
21.9k
   clear_mem(z, z_size);
54
55
72.8k
   for(size_t i = 0; i != x_size; ++i) {
56
50.8k
      const word x_i = x[i];
57
58
50.8k
      word carry = 0;
59
60
96.1k
      for(size_t j = 0; j != x_size_8; j += 8) {
61
45.2k
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
62
45.2k
      }
63
64
166k
      for(size_t j = x_size_8; j != x_size; ++j) {
65
116k
         z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry);
66
116k
      }
67
68
50.8k
      z[x_size + i] = carry;
69
50.8k
   }
70
21.9k
}
71
72
namespace {
73
74
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
301
void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) {
81
301
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2 != 0) {
82
210
      switch(N) {
83
0
         case 6:
84
0
            return bigint_comba_mul6(z, x, y);
85
0
         case 8:
86
0
            return bigint_comba_mul8(z, x, y);
87
0
         case 9:
88
0
            return bigint_comba_mul9(z, x, y);
89
66
         case 16:
90
66
            return bigint_comba_mul16(z, x, y);
91
6
         case 24:
92
6
            return bigint_comba_mul24(z, x, y);
93
138
         default:
94
138
            return basecase_mul(z, 2 * N, x, N, y, N);
95
210
      }
96
210
   }
97
98
91
   const size_t N2 = N / 2;
99
100
91
   const word* x0 = x;
101
91
   const word* x1 = x + N2;
102
91
   const word* y0 = y;
103
91
   const word* y1 = y + N2;
104
91
   word* z0 = z;
105
91
   word* z1 = z + N;
106
107
91
   word* ws0 = workspace;
108
91
   word* ws1 = workspace + N;
109
110
91
   clear_mem(workspace, 2 * N);
111
112
   /*
113
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
114
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
115
   * anything, clear_mem above already set the correct result.
116
   *
117
   * However we ignore the result of the comparisons and always perform the
118
   * subtractions and recursively multiply to avoid the timing channel.
119
   */
120
121
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
122
91
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
123
91
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
124
91
   const auto neg_mask = ~(cmp0 ^ cmp1);
125
126
91
   karatsuba_mul(ws0, z0, z1, N2, ws1);
127
128
   // Compute X_lo * Y_lo
129
91
   karatsuba_mul(z0, x0, y0, N2, ws1);
130
131
   // Compute X_hi * Y_hi
132
91
   karatsuba_mul(z1, x1, y1, N2, ws1);
133
134
91
   const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
135
91
   word z_carry = bigint_add2(z + N2, N, ws1, N);
136
137
91
   z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
138
91
   bigint_add2(z + N + N2, N2, &z_carry, 1);
139
140
91
   clear_mem(workspace + N, N2);
141
142
91
   bigint_cnd_add(neg_mask.value(), z + N2, workspace, 2 * N - N2);
143
91
   bigint_cnd_sub((~neg_mask).value(), z + N2, workspace, 2 * N - N2);
144
91
}
145
146
/*
147
* Karatsuba Squaring Operation
148
*/
149
394
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) {
150
394
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2 != 0) {
151
276
      switch(N) {
152
0
         case 6:
153
0
            return bigint_comba_sqr6(z, x);
154
0
         case 8:
155
0
            return bigint_comba_sqr8(z, x);
156
0
         case 9:
157
0
            return bigint_comba_sqr9(z, x);
158
42
         case 16:
159
42
            return bigint_comba_sqr16(z, x);
160
42
         case 24:
161
42
            return bigint_comba_sqr24(z, x);
162
192
         default:
163
192
            return basecase_sqr(z, 2 * N, x, N);
164
276
      }
165
276
   }
166
167
118
   const size_t N2 = N / 2;
168
169
118
   const word* x0 = x;
170
118
   const word* x1 = x + N2;
171
118
   word* z0 = z;
172
118
   word* z1 = z + N;
173
174
118
   word* ws0 = workspace;
175
118
   word* ws1 = workspace + N;
176
177
118
   clear_mem(workspace, 2 * N);
178
179
   // See comment in karatsuba_mul
180
118
   bigint_sub_abs(z0, x0, x1, N2, workspace);
181
118
   karatsuba_sqr(ws0, z0, N2, ws1);
182
183
118
   karatsuba_sqr(z0, x0, N2, ws1);
184
118
   karatsuba_sqr(z1, x1, N2, ws1);
185
186
118
   const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
187
118
   word z_carry = bigint_add2(z + N2, N, ws1, N);
188
189
118
   z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
190
118
   bigint_add2(z + N + N2, N2, &z_carry, 1);
191
192
   /*
193
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
194
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
195
   * timing channel.
196
   */
197
118
   bigint_sub2(z + N2, 2 * N - N2, ws0, N);
198
118
}
199
200
/*
201
* Pick a good size for the Karatsuba multiply
202
*/
203
39
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) {
204
39
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) {
205
7
      return 0;
206
7
   }
207
208
32
   if(((x_size == x_sw) && (x_size % 2 != 0)) || ((y_size == y_sw) && (y_size % 2 != 0))) {
209
0
      return 0;
210
0
   }
211
212
32
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
213
32
   const size_t end = (x_size < y_size) ? x_size : y_size;
214
215
32
   if(start == end) {
216
13
      if(start % 2 != 0) {
217
4
         return 0;
218
4
      }
219
9
      return start;
220
13
   }
221
222
27
   for(size_t j = start; j <= end; ++j) {
223
27
      if(j % 2 != 0) {
224
8
         continue;
225
8
      }
226
227
19
      if(2 * j > z_size) {
228
0
         return 0;
229
0
      }
230
231
19
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
232
19
         if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) {
233
4
            return j + 2;
234
4
         }
235
15
         return j;
236
19
      }
237
19
   }
238
239
0
   return 0;
240
19
}
241
242
/*
243
* Pick a good size for the Karatsuba squaring
244
*/
245
60
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) {
246
60
   if(x_sw == x_size) {
247
0
      if(x_sw % 2 != 0) {
248
0
         return 0;
249
0
      }
250
0
      return x_sw;
251
0
   }
252
253
80
   for(size_t j = x_sw; j <= x_size; ++j) {
254
80
      if(j % 2 != 0) {
255
20
         continue;
256
20
      }
257
258
60
      if(2 * j > z_size) {
259
20
         return 0;
260
20
      }
261
262
40
      if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) {
263
0
         return j + 2;
264
0
      }
265
40
      return j;
266
40
   }
267
268
0
   return 0;
269
60
}
270
271
template <size_t SZ>
272
1.05M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
1.05M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
1.05M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
521k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
521k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
521k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
330k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
330k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
330k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
52.2k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
52.2k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
52.2k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
51.1k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
51.1k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
51.1k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
51.1k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
51.1k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
51.1k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
272
51.1k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273
51.1k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274
51.1k
}
275
276
template <size_t SZ>
277
572k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
572k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
572k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
398k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
398k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
398k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
86.8k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
86.8k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
86.8k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
21.8k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
21.8k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
21.8k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
21.8k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
21.8k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
21.8k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
21.8k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
21.8k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
21.8k
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
277
21.8k
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278
21.8k
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279
21.8k
}
280
281
}  // namespace
282
283
void bigint_mul(word z[],
284
                size_t z_size,
285
                const word x[],
286
                size_t x_size,
287
                size_t x_sw,
288
                const word y[],
289
                size_t y_size,
290
                size_t y_sw,
291
                word workspace[],
292
521k
                size_t ws_size) {
293
521k
   clear_mem(z, z_size);
294
295
521k
   if(x_sw == 1) {
296
0
      bigint_linmul3(z, y, y_sw, x[0]);
297
521k
   } else if(y_sw == 1) {
298
0
      bigint_linmul3(z, x, x_sw, y[0]);
299
521k
   } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
300
190k
      bigint_comba_mul4(z, x, y);
301
330k
   } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
302
278k
      bigint_comba_mul6(z, x, y);
303
278k
   } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
304
1.10k
      bigint_comba_mul8(z, x, y);
305
51.1k
   } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
306
1
      bigint_comba_mul9(z, x, y);
307
51.1k
   } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
308
3
      bigint_comba_mul16(z, x, y);
309
51.1k
   } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
310
3
      bigint_comba_mul24(z, x, y);
311
51.1k
   } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || workspace == nullptr) {
312
51.0k
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
313
51.0k
   } else {
314
39
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
315
316
39
      if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
317
28
         karatsuba_mul(z, x, y, N, workspace);
318
28
      } else {
319
11
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
320
11
      }
321
39
   }
322
521k
}
323
324
/*
325
* Squaring Algorithm Dispatcher
326
*/
327
398k
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) {
328
398k
   clear_mem(z, z_size);
329
330
398k
   BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient");
331
332
398k
   if(x_sw == 1) {
333
71
      bigint_linmul3(z, x, x_sw, x[0]);
334
398k
   } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
335
311k
      bigint_comba_sqr4(z, x);
336
311k
   } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
337
65.0k
      bigint_comba_sqr6(z, x);
338
65.0k
   } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
339
3
      bigint_comba_sqr8(z, x);
340
21.8k
   } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
341
3
      bigint_comba_sqr9(z, x);
342
21.8k
   } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
343
1
      bigint_comba_sqr16(z, x);
344
21.8k
   } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
345
1
      bigint_comba_sqr24(z, x);
346
21.8k
   } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || workspace == nullptr) {
347
21.7k
      basecase_sqr(z, z_size, x, x_sw);
348
21.7k
   } else {
349
60
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
350
351
60
      if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
352
40
         karatsuba_sqr(z, x, N, workspace);
353
40
      } else {
354
20
         basecase_sqr(z, z_size, x, x_sw);
355
20
      }
356
60
   }
357
398k
}
358
359
}  // namespace Botan