Coverage Report

Created: 2024-06-28 06:08

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
13
#include <botan/internal/ed25519_internal.h>
14
15
namespace Botan {
16
17
//static
18
267
FE_25519 FE_25519::invert(const FE_25519& z) {
19
267
   FE_25519 t0;
20
267
   FE_25519 t1;
21
267
   FE_25519 t2;
22
267
   FE_25519 t3;
23
24
267
   fe_sq(t0, z);
25
267
   fe_sq_iter(t1, t0, 2);
26
267
   fe_mul(t1, z, t1);
27
267
   fe_mul(t0, t0, t1);
28
267
   fe_sq(t2, t0);
29
267
   fe_mul(t1, t1, t2);
30
267
   fe_sq_iter(t2, t1, 5);
31
267
   fe_mul(t1, t2, t1);
32
267
   fe_sq_iter(t2, t1, 10);
33
267
   fe_mul(t2, t2, t1);
34
267
   fe_sq_iter(t3, t2, 20);
35
267
   fe_mul(t2, t3, t2);
36
267
   fe_sq_iter(t2, t2, 10);
37
267
   fe_mul(t1, t2, t1);
38
267
   fe_sq_iter(t2, t1, 50);
39
267
   fe_mul(t2, t2, t1);
40
267
   fe_sq_iter(t3, t2, 100);
41
267
   fe_mul(t2, t3, t2);
42
267
   fe_sq_iter(t2, t2, 50);
43
267
   fe_mul(t1, t2, t1);
44
267
   fe_sq_iter(t1, t1, 5);
45
46
267
   fe_mul(t0, t1, t0);
47
267
   return t0;
48
267
}
49
50
138
FE_25519 FE_25519::pow_22523(const fe& z) {
51
138
   FE_25519 t0;
52
138
   FE_25519 t1;
53
138
   FE_25519 t2;
54
55
138
   fe_sq(t0, z);
56
138
   fe_sq_iter(t1, t0, 2);
57
138
   fe_mul(t1, z, t1);
58
138
   fe_mul(t0, t0, t1);
59
138
   fe_sq(t0, t0);
60
138
   fe_mul(t0, t1, t0);
61
138
   fe_sq_iter(t1, t0, 5);
62
138
   fe_mul(t0, t1, t0);
63
138
   fe_sq_iter(t1, t0, 10);
64
138
   fe_mul(t1, t1, t0);
65
138
   fe_sq_iter(t2, t1, 20);
66
138
   fe_mul(t1, t2, t1);
67
138
   fe_sq_iter(t1, t1, 10);
68
138
   fe_mul(t0, t1, t0);
69
138
   fe_sq_iter(t1, t0, 50);
70
138
   fe_mul(t1, t1, t0);
71
138
   fe_sq_iter(t2, t1, 100);
72
138
   fe_mul(t1, t2, t1);
73
138
   fe_sq_iter(t1, t1, 50);
74
138
   fe_mul(t0, t1, t0);
75
138
   fe_sq_iter(t0, t0, 2);
76
77
138
   fe_mul(t0, t0, z);
78
138
   return t0;
79
138
}
80
81
/*
82
h = f * g
83
Can overlap h with f or g.
84
85
Preconditions:
86
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
87
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
89
Postconditions:
90
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
91
*/
92
93
/*
94
Notes on implementation strategy:
95
96
Using schoolbook multiplication.
97
Karatsuba would save a little in some cost models.
98
99
Most multiplications by 2 and 19 are 32-bit precomputations;
100
cheaper than 64-bit postcomputations.
101
102
There is one remaining multiplication by 19 in the carry chain;
103
one *19 precomputation can be merged into this,
104
but the resulting data flow is considerably less clean.
105
106
There are 12 carries below.
107
10 of them are 2-way parallelizable and vectorizable.
108
Can get away with 11 carries, but then data flow is much deeper.
109
110
With tighter constraints on inputs can squeeze carries into int32.
111
*/
112
113
//static
114
250k
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g) {
115
250k
   const int32_t f0 = f[0];
116
250k
   const int32_t f1 = f[1];
117
250k
   const int32_t f2 = f[2];
118
250k
   const int32_t f3 = f[3];
119
250k
   const int32_t f4 = f[4];
120
250k
   const int32_t f5 = f[5];
121
250k
   const int32_t f6 = f[6];
122
250k
   const int32_t f7 = f[7];
123
250k
   const int32_t f8 = f[8];
124
250k
   const int32_t f9 = f[9];
125
126
250k
   const int32_t g0 = g[0];
127
250k
   const int32_t g1 = g[1];
128
250k
   const int32_t g2 = g[2];
129
250k
   const int32_t g3 = g[3];
130
250k
   const int32_t g4 = g[4];
131
250k
   const int32_t g5 = g[5];
132
250k
   const int32_t g6 = g[6];
133
250k
   const int32_t g7 = g[7];
134
250k
   const int32_t g8 = g[8];
135
250k
   const int32_t g9 = g[9];
136
137
250k
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
138
250k
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
139
250k
   const int32_t g3_19 = 19 * g3;
140
250k
   const int32_t g4_19 = 19 * g4;
141
250k
   const int32_t g5_19 = 19 * g5;
142
250k
   const int32_t g6_19 = 19 * g6;
143
250k
   const int32_t g7_19 = 19 * g7;
144
250k
   const int32_t g8_19 = 19 * g8;
145
250k
   const int32_t g9_19 = 19 * g9;
146
250k
   const int32_t f1_2 = 2 * f1;
147
250k
   const int32_t f3_2 = 2 * f3;
148
250k
   const int32_t f5_2 = 2 * f5;
149
250k
   const int32_t f7_2 = 2 * f7;
150
250k
   const int32_t f9_2 = 2 * f9;
151
152
250k
   const int64_t f0g0 = f0 * static_cast<int64_t>(g0);
153
250k
   const int64_t f0g1 = f0 * static_cast<int64_t>(g1);
154
250k
   const int64_t f0g2 = f0 * static_cast<int64_t>(g2);
155
250k
   const int64_t f0g3 = f0 * static_cast<int64_t>(g3);
156
250k
   const int64_t f0g4 = f0 * static_cast<int64_t>(g4);
157
250k
   const int64_t f0g5 = f0 * static_cast<int64_t>(g5);
158
250k
   const int64_t f0g6 = f0 * static_cast<int64_t>(g6);
159
250k
   const int64_t f0g7 = f0 * static_cast<int64_t>(g7);
160
250k
   const int64_t f0g8 = f0 * static_cast<int64_t>(g8);
161
250k
   const int64_t f0g9 = f0 * static_cast<int64_t>(g9);
162
250k
   const int64_t f1g0 = f1 * static_cast<int64_t>(g0);
163
250k
   const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
164
250k
   const int64_t f1g2 = f1 * static_cast<int64_t>(g2);
165
250k
   const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
166
250k
   const int64_t f1g4 = f1 * static_cast<int64_t>(g4);
167
250k
   const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
168
250k
   const int64_t f1g6 = f1 * static_cast<int64_t>(g6);
169
250k
   const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
170
250k
   const int64_t f1g8 = f1 * static_cast<int64_t>(g8);
171
250k
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
172
250k
   const int64_t f2g0 = f2 * static_cast<int64_t>(g0);
173
250k
   const int64_t f2g1 = f2 * static_cast<int64_t>(g1);
174
250k
   const int64_t f2g2 = f2 * static_cast<int64_t>(g2);
175
250k
   const int64_t f2g3 = f2 * static_cast<int64_t>(g3);
176
250k
   const int64_t f2g4 = f2 * static_cast<int64_t>(g4);
177
250k
   const int64_t f2g5 = f2 * static_cast<int64_t>(g5);
178
250k
   const int64_t f2g6 = f2 * static_cast<int64_t>(g6);
179
250k
   const int64_t f2g7 = f2 * static_cast<int64_t>(g7);
180
250k
   const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
181
250k
   const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
182
250k
   const int64_t f3g0 = f3 * static_cast<int64_t>(g0);
183
250k
   const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
184
250k
   const int64_t f3g2 = f3 * static_cast<int64_t>(g2);
185
250k
   const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
186
250k
   const int64_t f3g4 = f3 * static_cast<int64_t>(g4);
187
250k
   const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
188
250k
   const int64_t f3g6 = f3 * static_cast<int64_t>(g6);
189
250k
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
190
250k
   const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
191
250k
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
192
250k
   const int64_t f4g0 = f4 * static_cast<int64_t>(g0);
193
250k
   const int64_t f4g1 = f4 * static_cast<int64_t>(g1);
194
250k
   const int64_t f4g2 = f4 * static_cast<int64_t>(g2);
195
250k
   const int64_t f4g3 = f4 * static_cast<int64_t>(g3);
196
250k
   const int64_t f4g4 = f4 * static_cast<int64_t>(g4);
197
250k
   const int64_t f4g5 = f4 * static_cast<int64_t>(g5);
198
250k
   const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
199
250k
   const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
200
250k
   const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
201
250k
   const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
202
250k
   const int64_t f5g0 = f5 * static_cast<int64_t>(g0);
203
250k
   const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
204
250k
   const int64_t f5g2 = f5 * static_cast<int64_t>(g2);
205
250k
   const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
206
250k
   const int64_t f5g4 = f5 * static_cast<int64_t>(g4);
207
250k
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
208
250k
   const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
209
250k
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
210
250k
   const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
211
250k
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
212
250k
   const int64_t f6g0 = f6 * static_cast<int64_t>(g0);
213
250k
   const int64_t f6g1 = f6 * static_cast<int64_t>(g1);
214
250k
   const int64_t f6g2 = f6 * static_cast<int64_t>(g2);
215
250k
   const int64_t f6g3 = f6 * static_cast<int64_t>(g3);
216
250k
   const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
217
250k
   const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
218
250k
   const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
219
250k
   const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
220
250k
   const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
221
250k
   const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
222
250k
   const int64_t f7g0 = f7 * static_cast<int64_t>(g0);
223
250k
   const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
224
250k
   const int64_t f7g2 = f7 * static_cast<int64_t>(g2);
225
250k
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
226
250k
   const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
227
250k
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
228
250k
   const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
229
250k
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
230
250k
   const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
231
250k
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
232
250k
   const int64_t f8g0 = f8 * static_cast<int64_t>(g0);
233
250k
   const int64_t f8g1 = f8 * static_cast<int64_t>(g1);
234
250k
   const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
235
250k
   const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
236
250k
   const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
237
250k
   const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
238
250k
   const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
239
250k
   const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
240
250k
   const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
241
250k
   const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
242
250k
   const int64_t f9g0 = f9 * static_cast<int64_t>(g0);
243
250k
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
244
250k
   const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
245
250k
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
246
250k
   const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
247
250k
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
248
250k
   const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
249
250k
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
250
250k
   const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
251
250k
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
252
253
250k
   int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
254
250k
   int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
255
250k
   int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
256
250k
   int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
257
250k
   int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
258
250k
   int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
259
250k
   int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
260
250k
   int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
261
250k
   int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
262
250k
   int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
263
264
   /*
265
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
266
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
267
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
268
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
269
   */
270
250k
   carry<26>(h0, h1);
271
250k
   carry<26>(h4, h5);
272
273
   /* |h0| <= 2^25 */
274
   /* |h4| <= 2^25 */
275
   /* |h1| <= 1.71*2^59 */
276
   /* |h5| <= 1.71*2^59 */
277
278
250k
   carry<25>(h1, h2);
279
250k
   carry<25>(h5, h6);
280
281
   /* |h1| <= 2^24; from now on fits into int32 */
282
   /* |h5| <= 2^24; from now on fits into int32 */
283
   /* |h2| <= 1.41*2^60 */
284
   /* |h6| <= 1.41*2^60 */
285
286
250k
   carry<26>(h2, h3);
287
250k
   carry<26>(h6, h7);
288
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
289
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
290
   /* |h3| <= 1.71*2^59 */
291
   /* |h7| <= 1.71*2^59 */
292
293
250k
   carry<25>(h3, h4);
294
250k
   carry<25>(h7, h8);
295
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
296
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
297
   /* |h4| <= 1.72*2^34 */
298
   /* |h8| <= 1.41*2^60 */
299
300
250k
   carry<26>(h4, h5);
301
250k
   carry<26>(h8, h9);
302
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
303
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
304
   /* |h5| <= 1.01*2^24 */
305
   /* |h9| <= 1.71*2^59 */
306
307
250k
   carry<25, 19>(h9, h0);
308
309
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
310
   /* |h0| <= 1.1*2^39 */
311
312
250k
   carry<26>(h0, h1);
313
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
314
   /* |h1| <= 1.01*2^24 */
315
316
250k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
317
250k
}
318
319
/*
320
h = f * f
321
Can overlap h with f.
322
323
Preconditions:
324
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
325
326
Postconditions:
327
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
328
*/
329
330
/*
331
See fe_mul.c for discussion of implementation strategy.
332
*/
333
334
//static
335
107k
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter) {
336
107k
   int32_t f0 = f[0];
337
107k
   int32_t f1 = f[1];
338
107k
   int32_t f2 = f[2];
339
107k
   int32_t f3 = f[3];
340
107k
   int32_t f4 = f[4];
341
107k
   int32_t f5 = f[5];
342
107k
   int32_t f6 = f[6];
343
107k
   int32_t f7 = f[7];
344
107k
   int32_t f8 = f[8];
345
107k
   int32_t f9 = f[9];
346
347
312k
   for(size_t i = 0; i != iter; ++i) {
348
205k
      const int32_t f0_2 = 2 * f0;
349
205k
      const int32_t f1_2 = 2 * f1;
350
205k
      const int32_t f2_2 = 2 * f2;
351
205k
      const int32_t f3_2 = 2 * f3;
352
205k
      const int32_t f4_2 = 2 * f4;
353
205k
      const int32_t f5_2 = 2 * f5;
354
205k
      const int32_t f6_2 = 2 * f6;
355
205k
      const int32_t f7_2 = 2 * f7;
356
205k
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
357
205k
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
358
205k
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
359
205k
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
360
205k
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
361
362
205k
      const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
363
205k
      const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
364
205k
      const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
365
205k
      const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
366
205k
      const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
367
205k
      const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
368
205k
      const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
369
205k
      const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
370
205k
      const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
371
205k
      const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
372
205k
      const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
373
205k
      const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
374
205k
      const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
375
205k
      const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
376
205k
      const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
377
205k
      const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
378
205k
      const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
379
205k
      const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
380
205k
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
381
205k
      const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
382
205k
      const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
383
205k
      const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
384
205k
      const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
385
205k
      const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
386
205k
      const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
387
205k
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
388
205k
      const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
389
205k
      const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
390
205k
      const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
391
205k
      const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
392
205k
      const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
393
205k
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
394
205k
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
395
205k
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
396
205k
      const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
397
205k
      const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
398
205k
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
399
205k
      const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
400
205k
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
401
205k
      const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
402
205k
      const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
403
205k
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
404
205k
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
405
205k
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
406
205k
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
407
205k
      const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
408
205k
      const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
409
205k
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
410
205k
      const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
411
205k
      const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
412
205k
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
413
205k
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
414
205k
      const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
415
205k
      const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
416
205k
      const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
417
418
205k
      int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
419
205k
      int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
420
205k
      int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
421
205k
      int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
422
205k
      int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
423
205k
      int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
424
205k
      int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
425
205k
      int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
426
205k
      int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
427
205k
      int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
428
429
205k
      carry<26>(h0, h1);
430
205k
      carry<26>(h4, h5);
431
205k
      carry<25>(h1, h2);
432
205k
      carry<25>(h5, h6);
433
205k
      carry<26>(h2, h3);
434
205k
      carry<26>(h6, h7);
435
436
205k
      carry<25>(h3, h4);
437
205k
      carry<25>(h7, h8);
438
439
205k
      carry<26>(h4, h5);
440
205k
      carry<26>(h8, h9);
441
205k
      carry<25, 19>(h9, h0);
442
205k
      carry<26>(h0, h1);
443
444
205k
      f0 = static_cast<int32_t>(h0);
445
205k
      f1 = static_cast<int32_t>(h1);
446
205k
      f2 = static_cast<int32_t>(h2);
447
205k
      f3 = static_cast<int32_t>(h3);
448
205k
      f4 = static_cast<int32_t>(h4);
449
205k
      f5 = static_cast<int32_t>(h5);
450
205k
      f6 = static_cast<int32_t>(h6);
451
205k
      f7 = static_cast<int32_t>(h7);
452
205k
      f8 = static_cast<int32_t>(h8);
453
205k
      f9 = static_cast<int32_t>(h9);
454
205k
   }
455
456
107k
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
457
107k
}
458
459
/*
460
h = 2 * f * f
461
Can overlap h with f.
462
463
Preconditions:
464
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
465
466
Postconditions:
467
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
468
*/
469
470
/*
471
See fe_mul.c for discussion of implementation strategy.
472
*/
473
474
//static
475
34.1k
FE_25519 FE_25519::sqr2(const FE_25519& f) {
476
34.1k
   const int32_t f0 = f[0];
477
34.1k
   const int32_t f1 = f[1];
478
34.1k
   const int32_t f2 = f[2];
479
34.1k
   const int32_t f3 = f[3];
480
34.1k
   const int32_t f4 = f[4];
481
34.1k
   const int32_t f5 = f[5];
482
34.1k
   const int32_t f6 = f[6];
483
34.1k
   const int32_t f7 = f[7];
484
34.1k
   const int32_t f8 = f[8];
485
34.1k
   const int32_t f9 = f[9];
486
34.1k
   const int32_t f0_2 = 2 * f0;
487
34.1k
   const int32_t f1_2 = 2 * f1;
488
34.1k
   const int32_t f2_2 = 2 * f2;
489
34.1k
   const int32_t f3_2 = 2 * f3;
490
34.1k
   const int32_t f4_2 = 2 * f4;
491
34.1k
   const int32_t f5_2 = 2 * f5;
492
34.1k
   const int32_t f6_2 = 2 * f6;
493
34.1k
   const int32_t f7_2 = 2 * f7;
494
34.1k
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
495
34.1k
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
496
34.1k
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
497
34.1k
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
498
34.1k
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
499
34.1k
   const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
500
34.1k
   const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
501
34.1k
   const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
502
34.1k
   const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
503
34.1k
   const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
504
34.1k
   const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
505
34.1k
   const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
506
34.1k
   const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
507
34.1k
   const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
508
34.1k
   const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
509
34.1k
   const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
510
34.1k
   const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
511
34.1k
   const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
512
34.1k
   const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
513
34.1k
   const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
514
34.1k
   const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
515
34.1k
   const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
516
34.1k
   const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
517
34.1k
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
518
34.1k
   const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
519
34.1k
   const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
520
34.1k
   const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
521
34.1k
   const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
522
34.1k
   const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
523
34.1k
   const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
524
34.1k
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
525
34.1k
   const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
526
34.1k
   const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
527
34.1k
   const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
528
34.1k
   const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
529
34.1k
   const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
530
34.1k
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
531
34.1k
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
532
34.1k
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
533
34.1k
   const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
534
34.1k
   const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
535
34.1k
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
536
34.1k
   const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
537
34.1k
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
538
34.1k
   const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
539
34.1k
   const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
540
34.1k
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
541
34.1k
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
542
34.1k
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
543
34.1k
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
544
34.1k
   const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
545
34.1k
   const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
546
34.1k
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
547
34.1k
   const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
548
34.1k
   const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
549
34.1k
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
550
34.1k
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
551
34.1k
   const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
552
34.1k
   const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
553
34.1k
   const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
554
555
34.1k
   int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
556
34.1k
   int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
557
34.1k
   int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
558
34.1k
   int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
559
34.1k
   int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
560
34.1k
   int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
561
34.1k
   int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
562
34.1k
   int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
563
34.1k
   int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
564
34.1k
   int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
565
566
34.1k
   h0 += h0;
567
34.1k
   h1 += h1;
568
34.1k
   h2 += h2;
569
34.1k
   h3 += h3;
570
34.1k
   h4 += h4;
571
34.1k
   h5 += h5;
572
34.1k
   h6 += h6;
573
34.1k
   h7 += h7;
574
34.1k
   h8 += h8;
575
34.1k
   h9 += h9;
576
577
34.1k
   carry<26>(h0, h1);
578
34.1k
   carry<26>(h4, h5);
579
580
34.1k
   carry<25>(h1, h2);
581
34.1k
   carry<25>(h5, h6);
582
583
34.1k
   carry<26>(h2, h3);
584
34.1k
   carry<26>(h6, h7);
585
586
34.1k
   carry<25>(h3, h4);
587
34.1k
   carry<25>(h7, h8);
588
34.1k
   carry<26>(h4, h5);
589
34.1k
   carry<26>(h8, h9);
590
34.1k
   carry<25, 19>(h9, h0);
591
34.1k
   carry<26>(h0, h1);
592
593
34.1k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
594
34.1k
}
595
596
/*
597
Ignores top bit of h.
598
*/
599
600
138
void FE_25519::from_bytes(const uint8_t s[32]) {
601
138
   int64_t h0 = load_4(s);
602
138
   int64_t h1 = load_3(s + 4) << 6;
603
138
   int64_t h2 = load_3(s + 7) << 5;
604
138
   int64_t h3 = load_3(s + 10) << 3;
605
138
   int64_t h4 = load_3(s + 13) << 2;
606
138
   int64_t h5 = load_4(s + 16);
607
138
   int64_t h6 = load_3(s + 20) << 7;
608
138
   int64_t h7 = load_3(s + 23) << 5;
609
138
   int64_t h8 = load_3(s + 26) << 4;
610
138
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
611
612
138
   carry<25, 19>(h9, h0);
613
138
   carry<25>(h1, h2);
614
138
   carry<25>(h3, h4);
615
138
   carry<25>(h5, h6);
616
138
   carry<25>(h7, h8);
617
618
138
   carry<26>(h0, h1);
619
138
   carry<26>(h2, h3);
620
138
   carry<26>(h4, h5);
621
138
   carry<26>(h6, h7);
622
138
   carry<26>(h8, h9);
623
624
138
   m_fe[0] = static_cast<int32_t>(h0);
625
138
   m_fe[1] = static_cast<int32_t>(h1);
626
138
   m_fe[2] = static_cast<int32_t>(h2);
627
138
   m_fe[3] = static_cast<int32_t>(h3);
628
138
   m_fe[4] = static_cast<int32_t>(h4);
629
138
   m_fe[5] = static_cast<int32_t>(h5);
630
138
   m_fe[6] = static_cast<int32_t>(h6);
631
138
   m_fe[7] = static_cast<int32_t>(h7);
632
138
   m_fe[8] = static_cast<int32_t>(h8);
633
138
   m_fe[9] = static_cast<int32_t>(h9);
634
138
}
635
636
/*
637
Preconditions:
638
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
639
640
Write p=2^255-19; q=floor(h/p).
641
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
642
643
Proof:
644
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
645
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
646
647
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
648
Then 0<y<1.
649
650
Write r=h-pq.
651
Have 0<=r<=p-1=2^255-20.
652
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
653
654
Write x=r+19(2^-255)r+y.
655
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
656
657
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
658
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
659
*/
660
661
865
void FE_25519::to_bytes(uint8_t s[32]) const {
662
865
   const int64_t X25 = (1 << 25);
663
664
865
   int32_t h0 = m_fe[0];
665
865
   int32_t h1 = m_fe[1];
666
865
   int32_t h2 = m_fe[2];
667
865
   int32_t h3 = m_fe[3];
668
865
   int32_t h4 = m_fe[4];
669
865
   int32_t h5 = m_fe[5];
670
865
   int32_t h6 = m_fe[6];
671
865
   int32_t h7 = m_fe[7];
672
865
   int32_t h8 = m_fe[8];
673
865
   int32_t h9 = m_fe[9];
674
865
   int32_t q;
675
676
865
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
677
865
   q = (h0 + q) >> 26;
678
865
   q = (h1 + q) >> 25;
679
865
   q = (h2 + q) >> 26;
680
865
   q = (h3 + q) >> 25;
681
865
   q = (h4 + q) >> 26;
682
865
   q = (h5 + q) >> 25;
683
865
   q = (h6 + q) >> 26;
684
865
   q = (h7 + q) >> 25;
685
865
   q = (h8 + q) >> 26;
686
865
   q = (h9 + q) >> 25;
687
688
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
689
865
   h0 += 19 * q;
690
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
691
692
865
   carry0<26>(h0, h1);
693
865
   carry0<25>(h1, h2);
694
865
   carry0<26>(h2, h3);
695
865
   carry0<25>(h3, h4);
696
865
   carry0<26>(h4, h5);
697
865
   carry0<25>(h5, h6);
698
865
   carry0<26>(h6, h7);
699
865
   carry0<25>(h7, h8);
700
865
   carry0<26>(h8, h9);
701
702
865
   int32_t carry9 = h9 >> 25;
703
865
   h9 -= carry9 * X25;
704
   /* h10 = carry9 */
705
706
   /*
707
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
708
   Have h0+...+2^230 h9 between 0 and 2^255-1;
709
   evidently 2^255 h10-2^255 q = 0.
710
   Goal: Output h0+...+2^230 h9.
711
   */
712
713
865
   s[0] = static_cast<uint8_t>(h0 >> 0);
714
865
   s[1] = static_cast<uint8_t>(h0 >> 8);
715
865
   s[2] = static_cast<uint8_t>(h0 >> 16);
716
865
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
717
865
   s[4] = static_cast<uint8_t>(h1 >> 6);
718
865
   s[5] = static_cast<uint8_t>(h1 >> 14);
719
865
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
720
865
   s[7] = static_cast<uint8_t>(h2 >> 5);
721
865
   s[8] = static_cast<uint8_t>(h2 >> 13);
722
865
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
723
865
   s[10] = static_cast<uint8_t>(h3 >> 3);
724
865
   s[11] = static_cast<uint8_t>(h3 >> 11);
725
865
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
726
865
   s[13] = static_cast<uint8_t>(h4 >> 2);
727
865
   s[14] = static_cast<uint8_t>(h4 >> 10);
728
865
   s[15] = static_cast<uint8_t>(h4 >> 18);
729
865
   s[16] = static_cast<uint8_t>(h5 >> 0);
730
865
   s[17] = static_cast<uint8_t>(h5 >> 8);
731
865
   s[18] = static_cast<uint8_t>(h5 >> 16);
732
865
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
733
865
   s[20] = static_cast<uint8_t>(h6 >> 7);
734
865
   s[21] = static_cast<uint8_t>(h6 >> 15);
735
865
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
736
865
   s[23] = static_cast<uint8_t>(h7 >> 5);
737
865
   s[24] = static_cast<uint8_t>(h7 >> 13);
738
865
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
739
865
   s[26] = static_cast<uint8_t>(h8 >> 4);
740
865
   s[27] = static_cast<uint8_t>(h8 >> 12);
741
865
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
742
865
   s[29] = static_cast<uint8_t>(h9 >> 2);
743
865
   s[30] = static_cast<uint8_t>(h9 >> 10);
744
865
   s[31] = static_cast<uint8_t>(h9 >> 18);
745
865
}
746
747
}  // namespace Botan