Coverage Report

Created: 2026-06-09 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Botan-3.4.0/src/lib/math/mp/mp_karat.cpp
Line
Count
Source
1
/*
2
* Multiplication and Squaring
3
* (C) 1999-2010,2018 Jack Lloyd
4
*     2016 Matthias Gierlings
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#include <botan/internal/mp_core.h>
10
11
#include <botan/exceptn.h>
12
#include <botan/mem_ops.h>
13
#include <botan/internal/ct_utils.h>
14
15
namespace Botan {
16
17
/*
18
* Simple O(N^2) Multiplication
19
*/
20
751k
void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) {
21
751k
   if(z_size < x_size + y_size) {
22
0
      throw Invalid_Argument("basecase_mul z_size too small");
23
0
   }
24
25
751k
   const size_t x_size_8 = x_size - (x_size % 8);
26
27
751k
   clear_mem(z, z_size);
28
29
10.5M
   for(size_t i = 0; i != y_size; ++i) {
30
9.80M
      const word y_i = y[i];
31
32
9.80M
      word carry = 0;
33
34
28.3M
      for(size_t j = 0; j != x_size_8; j += 8) {
35
18.5M
         carry = word8_madd3(z + i + j, x + j, y_i, carry);
36
18.5M
      }
37
38
33.2M
      for(size_t j = x_size_8; j != x_size; ++j) {
39
23.4M
         z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry);
40
23.4M
      }
41
42
9.80M
      z[x_size + i] = carry;
43
9.80M
   }
44
751k
}
45
46
1.11M
void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) {
47
1.11M
   if(z_size < 2 * x_size) {
48
0
      throw Invalid_Argument("basecase_sqr z_size too small");
49
0
   }
50
51
1.11M
   const size_t x_size_8 = x_size - (x_size % 8);
52
53
1.11M
   clear_mem(z, z_size);
54
55
16.4M
   for(size_t i = 0; i != x_size; ++i) {
56
15.2M
      const word x_i = x[i];
57
58
15.2M
      word carry = 0;
59
60
51.0M
      for(size_t j = 0; j != x_size_8; j += 8) {
61
35.7M
         carry = word8_madd3(z + i + j, x + j, x_i, carry);
62
35.7M
      }
63
64
53.1M
      for(size_t j = x_size_8; j != x_size; ++j) {
65
37.8M
         z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry);
66
37.8M
      }
67
68
15.2M
      z[x_size + i] = carry;
69
15.2M
   }
70
1.11M
}
71
72
namespace {
73
74
const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75
const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
76
77
/*
78
* Karatsuba Multiplication Operation
79
*/
80
774k
void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) {
81
774k
   if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) {
82
579k
      switch(N) {
83
0
         case 6:
84
0
            return bigint_comba_mul6(z, x, y);
85
0
         case 8:
86
0
            return bigint_comba_mul8(z, x, y);
87
0
         case 9:
88
0
            return bigint_comba_mul9(z, x, y);
89
459k
         case 16:
90
459k
            return bigint_comba_mul16(z, x, y);
91
114k
         case 24:
92
114k
            return bigint_comba_mul24(z, x, y);
93
5.15k
         default:
94
5.15k
            return basecase_mul(z, 2 * N, x, N, y, N);
95
579k
      }
96
579k
   }
97
98
194k
   const size_t N2 = N / 2;
99
100
194k
   const word* x0 = x;
101
194k
   const word* x1 = x + N2;
102
194k
   const word* y0 = y;
103
194k
   const word* y1 = y + N2;
104
194k
   word* z0 = z;
105
194k
   word* z1 = z + N;
106
107
194k
   word* ws0 = workspace;
108
194k
   word* ws1 = workspace + N;
109
110
194k
   clear_mem(workspace, 2 * N);
111
112
   /*
113
   * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
114
   * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
115
   * anything, clear_mem above already set the correct result.
116
   *
117
   * However we ignore the result of the comparisons and always perform the
118
   * subtractions and recursively multiply to avoid the timing channel.
119
   */
120
121
   // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
122
194k
   const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
123
194k
   const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
124
194k
   const auto neg_mask = ~(cmp0 ^ cmp1);
125
126
194k
   karatsuba_mul(ws0, z0, z1, N2, ws1);
127
128
   // Compute X_lo * Y_lo
129
194k
   karatsuba_mul(z0, x0, y0, N2, ws1);
130
131
   // Compute X_hi * Y_hi
132
194k
   karatsuba_mul(z1, x1, y1, N2, ws1);
133
134
194k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
135
194k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
136
137
194k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
138
194k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
139
140
194k
   clear_mem(workspace + N, N2);
141
142
194k
   bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2 * N - N2);
143
194k
}
144
145
/*
146
* Karatsuba Squaring Operation
147
*/
148
1.29M
void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) {
149
1.29M
   if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) {
150
991k
      switch(N) {
151
0
         case 6:
152
0
            return bigint_comba_sqr6(z, x);
153
0
         case 8:
154
0
            return bigint_comba_sqr8(z, x);
155
0
         case 9:
156
0
            return bigint_comba_sqr9(z, x);
157
723k
         case 16:
158
723k
            return bigint_comba_sqr16(z, x);
159
180k
         case 24:
160
180k
            return bigint_comba_sqr24(z, x);
161
88.0k
         default:
162
88.0k
            return basecase_sqr(z, 2 * N, x, N);
163
991k
      }
164
991k
   }
165
166
303k
   const size_t N2 = N / 2;
167
168
303k
   const word* x0 = x;
169
303k
   const word* x1 = x + N2;
170
303k
   word* z0 = z;
171
303k
   word* z1 = z + N;
172
173
303k
   word* ws0 = workspace;
174
303k
   word* ws1 = workspace + N;
175
176
303k
   clear_mem(workspace, 2 * N);
177
178
   // See comment in karatsuba_mul
179
303k
   bigint_sub_abs(z0, x0, x1, N2, workspace);
180
303k
   karatsuba_sqr(ws0, z0, N2, ws1);
181
182
303k
   karatsuba_sqr(z0, x0, N2, ws1);
183
303k
   karatsuba_sqr(z1, x1, N2, ws1);
184
185
303k
   const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
186
303k
   word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
187
188
303k
   z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
189
303k
   bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
190
191
   /*
192
   * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
193
   * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
194
   * timing channel.
195
   */
196
303k
   bigint_sub2(z + N2, 2 * N - N2, ws0, N);
197
303k
}
198
199
/*
200
* Pick a good size for the Karatsuba multiply
201
*/
202
259k
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) {
203
259k
   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) {
204
344
      return 0;
205
344
   }
206
207
258k
   if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) {
208
0
      return 0;
209
0
   }
210
211
258k
   const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
212
258k
   const size_t end = (x_size < y_size) ? x_size : y_size;
213
214
258k
   if(start == end) {
215
7.81k
      if(start % 2) {
216
0
         return 0;
217
0
      }
218
7.81k
      return start;
219
7.81k
   }
220
221
318k
   for(size_t j = start; j <= end; ++j) {
222
318k
      if(j % 2) {
223
67.9k
         continue;
224
67.9k
      }
225
226
250k
      if(2 * j > z_size) {
227
61.2k
         return 0;
228
61.2k
      }
229
230
189k
      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
231
189k
         if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) {
232
6.73k
            return j + 2;
233
6.73k
         }
234
182k
         return j;
235
189k
      }
236
189k
   }
237
238
0
   return 0;
239
250k
}
240
241
/*
242
* Pick a good size for the Karatsuba squaring
243
*/
244
1.16M
size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) {
245
1.16M
   if(x_sw == x_size) {
246
0
      if(x_sw % 2) {
247
0
         return 0;
248
0
      }
249
0
      return x_sw;
250
0
   }
251
252
1.95M
   for(size_t j = x_sw; j <= x_size; ++j) {
253
1.95M
      if(j % 2) {
254
784k
         continue;
255
784k
      }
256
257
1.16M
      if(2 * j > z_size) {
258
784k
         return 0;
259
784k
      }
260
261
385k
      if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) {
262
422
         return j + 2;
263
422
      }
264
384k
      return j;
265
385k
   }
266
267
0
   return 0;
268
1.16M
}
269
270
template <size_t SZ>
271
161M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
161M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
161M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<4ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
64.6M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
64.6M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
64.6M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<6ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
34.1M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
34.1M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
34.1M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<8ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
31.1M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
31.1M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
31.1M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<9ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
29.1M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
29.1M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
29.1M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<16ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
1.70M
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
1.70M
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
1.70M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_mul<24ul>(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Line
Count
Source
271
941k
inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
272
941k
   return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
273
941k
}
274
275
template <size_t SZ>
276
150M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
150M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
150M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<4ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
56.8M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
56.8M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
56.8M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<6ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
32.3M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
32.3M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
32.3M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<8ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
29.7M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
29.7M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
29.7M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<9ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
27.7M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
27.7M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
27.7M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<16ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
2.45M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
2.45M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
2.45M
}
mp_karat.cpp:bool Botan::(anonymous namespace)::sized_for_comba_sqr<24ul>(unsigned long, unsigned long, unsigned long)
Line
Count
Source
276
1.41M
inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
277
1.41M
   return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
278
1.41M
}
279
280
}  // namespace
281
282
void bigint_mul(word z[],
283
                size_t z_size,
284
                const word x[],
285
                size_t x_size,
286
                size_t x_sw,
287
                const word y[],
288
                size_t y_size,
289
                size_t y_sw,
290
                word workspace[],
291
64.7M
                size_t ws_size) {
292
64.7M
   clear_mem(z, z_size);
293
294
64.7M
   if(x_sw == 1) {
295
7.49k
      bigint_linmul3(z, y, y_sw, x[0]);
296
64.6M
   } else if(y_sw == 1) {
297
0
      bigint_linmul3(z, x, x_sw, y[0]);
298
64.6M
   } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
299
30.5M
      bigint_comba_mul4(z, x, y);
300
34.1M
   } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
301
3.00M
      bigint_comba_mul6(z, x, y);
302
31.1M
   } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
303
2.07M
      bigint_comba_mul8(z, x, y);
304
29.1M
   } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
305
27.4M
      bigint_comba_mul9(z, x, y);
306
27.4M
   } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
307
767k
      bigint_comba_mul16(z, x, y);
308
941k
   } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
309
4.49k
      bigint_comba_mul24(z, x, y);
310
937k
   } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) {
311
678k
      basecase_mul(z, z_size, x, x_sw, y, y_sw);
312
678k
   } else {
313
259k
      const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
314
315
259k
      if(N && z_size >= 2 * N && ws_size >= 2 * N) {
316
191k
         karatsuba_mul(z, x, y, N, workspace);
317
191k
      } else {
318
67.7k
         basecase_mul(z, z_size, x, x_sw, y, y_sw);
319
67.7k
      }
320
259k
   }
321
64.7M
}
322
323
/*
324
* Squaring Algorithm Dispatcher
325
*/
326
56.8M
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) {
327
56.8M
   clear_mem(z, z_size);
328
329
56.8M
   BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient");
330
331
56.8M
   if(x_sw == 1) {
332
12.6k
      bigint_linmul3(z, x, x_sw, x[0]);
333
56.8M
   } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
334
24.5M
      bigint_comba_sqr4(z, x);
335
32.3M
   } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
336
2.67M
      bigint_comba_sqr6(z, x);
337
29.7M
   } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
338
1.96M
      bigint_comba_sqr8(z, x);
339
27.7M
   } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
340
25.2M
      bigint_comba_sqr9(z, x);
341
25.2M
   } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
342
1.04M
      bigint_comba_sqr16(z, x);
343
1.41M
   } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
344
304
      bigint_comba_sqr24(z, x);
345
1.41M
   } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) {
346
245k
      basecase_sqr(z, z_size, x, x_sw);
347
1.16M
   } else {
348
1.16M
      const size_t N = karatsuba_size(z_size, x_size, x_sw);
349
350
1.16M
      if(N && z_size >= 2 * N && ws_size >= 2 * N) {
351
384k
         karatsuba_sqr(z, x, N, workspace);
352
784k
      } else {
353
784k
         basecase_sqr(z, z_size, x, x_sw);
354
784k
      }
355
1.16M
   }
356
56.8M
}
357
358
}  // namespace Botan