Coverage Report

Created: 2020-06-30 13:58

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
13
14
namespace Botan {
15
16
//static
17
FE_25519 FE_25519::invert(const FE_25519& z)
18
78
   {
19
78
   fe t0;
20
78
   fe t1;
21
78
   fe t2;
22
78
   fe t3;
23
78
24
78
   fe_sq(t0, z);
25
78
   fe_sq_iter(t1, t0, 2);
26
78
   fe_mul(t1, z, t1);
27
78
   fe_mul(t0, t0, t1);
28
78
   fe_sq(t2, t0);
29
78
   fe_mul(t1, t1, t2);
30
78
   fe_sq_iter(t2, t1, 5);
31
78
   fe_mul(t1, t2, t1);
32
78
   fe_sq_iter(t2, t1, 10);
33
78
   fe_mul(t2, t2, t1);
34
78
   fe_sq_iter(t3, t2, 20);
35
78
   fe_mul(t2, t3, t2);
36
78
   fe_sq_iter(t2, t2, 10);
37
78
   fe_mul(t1, t2, t1);
38
78
   fe_sq_iter(t2, t1, 50);
39
78
   fe_mul(t2, t2, t1);
40
78
   fe_sq_iter(t3, t2, 100);
41
78
   fe_mul(t2, t3, t2);
42
78
   fe_sq_iter(t2, t2, 50);
43
78
   fe_mul(t1, t2, t1);
44
78
   fe_sq_iter(t1, t1, 5);
45
78
46
78
   fe_mul(t0, t1, t0);
47
78
   return t0;
48
78
   }
49
50
FE_25519 FE_25519::pow_22523(const fe& z)
51
78
   {
52
78
   fe t0;
53
78
   fe t1;
54
78
   fe t2;
55
78
56
78
   fe_sq(t0, z);
57
78
   fe_sq_iter(t1, t0, 2);
58
78
   fe_mul(t1, z, t1);
59
78
   fe_mul(t0, t0, t1);
60
78
   fe_sq(t0, t0);
61
78
   fe_mul(t0, t1, t0);
62
78
   fe_sq_iter(t1, t0, 5);
63
78
   fe_mul(t0, t1, t0);
64
78
   fe_sq_iter(t1, t0, 10);
65
78
   fe_mul(t1, t1, t0);
66
78
   fe_sq_iter(t2, t1, 20);
67
78
   fe_mul(t1, t2, t1);
68
78
   fe_sq_iter(t1, t1, 10);
69
78
   fe_mul(t0, t1, t0);
70
78
   fe_sq_iter(t1, t0, 50);
71
78
   fe_mul(t1, t1, t0);
72
78
   fe_sq_iter(t2, t1, 100);
73
78
   fe_mul(t1, t2, t1);
74
78
   fe_sq_iter(t1, t1, 50);
75
78
   fe_mul(t0, t1, t0);
76
78
   fe_sq_iter(t0, t0, 2);
77
78
78
78
   fe_mul(t0, t0, z);
79
78
   return t0;
80
78
   }
81
82
/*
83
h = f * g
84
Can overlap h with f or g.
85
86
Preconditions:
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89
90
Postconditions:
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92
*/
93
94
/*
95
Notes on implementation strategy:
96
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
99
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
102
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
106
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
110
111
With tighter constraints on inputs can squeeze carries into int32.
112
*/
113
114
//static
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
116
102k
   {
117
102k
   const int32_t f0 = f[0];
118
102k
   const int32_t f1 = f[1];
119
102k
   const int32_t f2 = f[2];
120
102k
   const int32_t f3 = f[3];
121
102k
   const int32_t f4 = f[4];
122
102k
   const int32_t f5 = f[5];
123
102k
   const int32_t f6 = f[6];
124
102k
   const int32_t f7 = f[7];
125
102k
   const int32_t f8 = f[8];
126
102k
   const int32_t f9 = f[9];
127
102k
128
102k
   const int32_t g0 = g[0];
129
102k
   const int32_t g1 = g[1];
130
102k
   const int32_t g2 = g[2];
131
102k
   const int32_t g3 = g[3];
132
102k
   const int32_t g4 = g[4];
133
102k
   const int32_t g5 = g[5];
134
102k
   const int32_t g6 = g[6];
135
102k
   const int32_t g7 = g[7];
136
102k
   const int32_t g8 = g[8];
137
102k
   const int32_t g9 = g[9];
138
102k
139
102k
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
102k
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
102k
   const int32_t g3_19 = 19 * g3;
142
102k
   const int32_t g4_19 = 19 * g4;
143
102k
   const int32_t g5_19 = 19 * g5;
144
102k
   const int32_t g6_19 = 19 * g6;
145
102k
   const int32_t g7_19 = 19 * g7;
146
102k
   const int32_t g8_19 = 19 * g8;
147
102k
   const int32_t g9_19 = 19 * g9;
148
102k
   const int32_t f1_2 = 2 * f1;
149
102k
   const int32_t f3_2 = 2 * f3;
150
102k
   const int32_t f5_2 = 2 * f5;
151
102k
   const int32_t f7_2 = 2 * f7;
152
102k
   const int32_t f9_2 = 2 * f9;
153
102k
154
102k
   const int64_t f0g0    = f0   * static_cast<int64_t>(g0);
155
102k
   const int64_t f0g1    = f0   * static_cast<int64_t>(g1);
156
102k
   const int64_t f0g2    = f0   * static_cast<int64_t>(g2);
157
102k
   const int64_t f0g3    = f0   * static_cast<int64_t>(g3);
158
102k
   const int64_t f0g4    = f0   * static_cast<int64_t>(g4);
159
102k
   const int64_t f0g5    = f0   * static_cast<int64_t>(g5);
160
102k
   const int64_t f0g6    = f0   * static_cast<int64_t>(g6);
161
102k
   const int64_t f0g7    = f0   * static_cast<int64_t>(g7);
162
102k
   const int64_t f0g8    = f0   * static_cast<int64_t>(g8);
163
102k
   const int64_t f0g9    = f0   * static_cast<int64_t>(g9);
164
102k
   const int64_t f1g0    = f1   * static_cast<int64_t>(g0);
165
102k
   const int64_t f1g1_2  = f1_2 * static_cast<int64_t>(g1);
166
102k
   const int64_t f1g2    = f1   * static_cast<int64_t>(g2);
167
102k
   const int64_t f1g3_2  = f1_2 * static_cast<int64_t>(g3);
168
102k
   const int64_t f1g4    = f1   * static_cast<int64_t>(g4);
169
102k
   const int64_t f1g5_2  = f1_2 * static_cast<int64_t>(g5);
170
102k
   const int64_t f1g6    = f1   * static_cast<int64_t>(g6);
171
102k
   const int64_t f1g7_2  = f1_2 * static_cast<int64_t>(g7);
172
102k
   const int64_t f1g8    = f1   * static_cast<int64_t>(g8);
173
102k
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174
102k
   const int64_t f2g0    = f2   * static_cast<int64_t>(g0);
175
102k
   const int64_t f2g1    = f2   * static_cast<int64_t>(g1);
176
102k
   const int64_t f2g2    = f2   * static_cast<int64_t>(g2);
177
102k
   const int64_t f2g3    = f2   * static_cast<int64_t>(g3);
178
102k
   const int64_t f2g4    = f2   * static_cast<int64_t>(g4);
179
102k
   const int64_t f2g5    = f2   * static_cast<int64_t>(g5);
180
102k
   const int64_t f2g6    = f2   * static_cast<int64_t>(g6);
181
102k
   const int64_t f2g7    = f2   * static_cast<int64_t>(g7);
182
102k
   const int64_t f2g8_19 = f2   * static_cast<int64_t>(g8_19);
183
102k
   const int64_t f2g9_19 = f2   * static_cast<int64_t>(g9_19);
184
102k
   const int64_t f3g0    = f3   * static_cast<int64_t>(g0);
185
102k
   const int64_t f3g1_2  = f3_2 * static_cast<int64_t>(g1);
186
102k
   const int64_t f3g2    = f3   * static_cast<int64_t>(g2);
187
102k
   const int64_t f3g3_2  = f3_2 * static_cast<int64_t>(g3);
188
102k
   const int64_t f3g4    = f3   * static_cast<int64_t>(g4);
189
102k
   const int64_t f3g5_2  = f3_2 * static_cast<int64_t>(g5);
190
102k
   const int64_t f3g6    = f3   * static_cast<int64_t>(g6);
191
102k
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192
102k
   const int64_t f3g8_19 = f3   * static_cast<int64_t>(g8_19);
193
102k
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194
102k
   const int64_t f4g0    = f4   * static_cast<int64_t>(g0);
195
102k
   const int64_t f4g1    = f4   * static_cast<int64_t>(g1);
196
102k
   const int64_t f4g2    = f4   * static_cast<int64_t>(g2);
197
102k
   const int64_t f4g3    = f4   * static_cast<int64_t>(g3);
198
102k
   const int64_t f4g4    = f4   * static_cast<int64_t>(g4);
199
102k
   const int64_t f4g5    = f4   * static_cast<int64_t>(g5);
200
102k
   const int64_t f4g6_19 = f4   * static_cast<int64_t>(g6_19);
201
102k
   const int64_t f4g7_19 = f4   * static_cast<int64_t>(g7_19);
202
102k
   const int64_t f4g8_19 = f4   * static_cast<int64_t>(g8_19);
203
102k
   const int64_t f4g9_19 = f4   * static_cast<int64_t>(g9_19);
204
102k
   const int64_t f5g0    = f5   * static_cast<int64_t>(g0);
205
102k
   const int64_t f5g1_2  = f5_2 * static_cast<int64_t>(g1);
206
102k
   const int64_t f5g2    = f5   * static_cast<int64_t>(g2);
207
102k
   const int64_t f5g3_2  = f5_2 * static_cast<int64_t>(g3);
208
102k
   const int64_t f5g4    = f5   * static_cast<int64_t>(g4);
209
102k
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210
102k
   const int64_t f5g6_19 = f5   * static_cast<int64_t>(g6_19);
211
102k
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212
102k
   const int64_t f5g8_19 = f5   * static_cast<int64_t>(g8_19);
213
102k
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214
102k
   const int64_t f6g0    = f6   * static_cast<int64_t>(g0);
215
102k
   const int64_t f6g1    = f6   * static_cast<int64_t>(g1);
216
102k
   const int64_t f6g2    = f6   * static_cast<int64_t>(g2);
217
102k
   const int64_t f6g3    = f6   * static_cast<int64_t>(g3);
218
102k
   const int64_t f6g4_19 = f6   * static_cast<int64_t>(g4_19);
219
102k
   const int64_t f6g5_19 = f6   * static_cast<int64_t>(g5_19);
220
102k
   const int64_t f6g6_19 = f6   * static_cast<int64_t>(g6_19);
221
102k
   const int64_t f6g7_19 = f6   * static_cast<int64_t>(g7_19);
222
102k
   const int64_t f6g8_19 = f6   * static_cast<int64_t>(g8_19);
223
102k
   const int64_t f6g9_19 = f6   * static_cast<int64_t>(g9_19);
224
102k
   const int64_t f7g0    = f7   * static_cast<int64_t>(g0);
225
102k
   const int64_t f7g1_2  = f7_2 * static_cast<int64_t>(g1);
226
102k
   const int64_t f7g2    = f7   * static_cast<int64_t>(g2);
227
102k
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228
102k
   const int64_t f7g4_19 = f7   * static_cast<int64_t>(g4_19);
229
102k
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230
102k
   const int64_t f7g6_19 = f7   * static_cast<int64_t>(g6_19);
231
102k
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232
102k
   const int64_t f7g8_19 = f7   * static_cast<int64_t>(g8_19);
233
102k
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234
102k
   const int64_t f8g0    = f8   * static_cast<int64_t>(g0);
235
102k
   const int64_t f8g1    = f8   * static_cast<int64_t>(g1);
236
102k
   const int64_t f8g2_19 = f8   * static_cast<int64_t>(g2_19);
237
102k
   const int64_t f8g3_19 = f8   * static_cast<int64_t>(g3_19);
238
102k
   const int64_t f8g4_19 = f8   * static_cast<int64_t>(g4_19);
239
102k
   const int64_t f8g5_19 = f8   * static_cast<int64_t>(g5_19);
240
102k
   const int64_t f8g6_19 = f8   * static_cast<int64_t>(g6_19);
241
102k
   const int64_t f8g7_19 = f8   * static_cast<int64_t>(g7_19);
242
102k
   const int64_t f8g8_19 = f8   * static_cast<int64_t>(g8_19);
243
102k
   const int64_t f8g9_19 = f8   * static_cast<int64_t>(g9_19);
244
102k
   const int64_t f9g0    = f9   * static_cast<int64_t>(g0);
245
102k
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246
102k
   const int64_t f9g2_19 = f9   * static_cast<int64_t>(g2_19);
247
102k
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248
102k
   const int64_t f9g4_19 = f9   * static_cast<int64_t>(g4_19);
249
102k
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250
102k
   const int64_t f9g6_19 = f9   * static_cast<int64_t>(g6_19);
251
102k
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252
102k
   const int64_t f9g8_19 = f9   * static_cast<int64_t>(g8_19);
253
102k
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254
102k
255
102k
   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256
102k
   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257
102k
   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258
102k
   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259
102k
   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260
102k
   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261
102k
   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
262
102k
   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
263
102k
   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
264
102k
   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
265
102k
266
102k
   /*
267
102k
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268
102k
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269
102k
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270
102k
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271
102k
   */
272
102k
   carry<26>(h0, h1);
273
102k
   carry<26>(h4, h5);
274
102k
275
102k
   /* |h0| <= 2^25 */
276
102k
   /* |h4| <= 2^25 */
277
102k
   /* |h1| <= 1.71*2^59 */
278
102k
   /* |h5| <= 1.71*2^59 */
279
102k
280
102k
   carry<25>(h1, h2);
281
102k
   carry<25>(h5, h6);
282
102k
283
102k
   /* |h1| <= 2^24; from now on fits into int32 */
284
102k
   /* |h5| <= 2^24; from now on fits into int32 */
285
102k
   /* |h2| <= 1.41*2^60 */
286
102k
   /* |h6| <= 1.41*2^60 */
287
102k
288
102k
   carry<26>(h2, h3);
289
102k
   carry<26>(h6, h7);
290
102k
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
291
102k
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
292
102k
   /* |h3| <= 1.71*2^59 */
293
102k
   /* |h7| <= 1.71*2^59 */
294
102k
295
102k
   carry<25>(h3, h4);
296
102k
   carry<25>(h7, h8);
297
102k
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
298
102k
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
299
102k
   /* |h4| <= 1.72*2^34 */
300
102k
   /* |h8| <= 1.41*2^60 */
301
102k
302
102k
   carry<26>(h4, h5);
303
102k
   carry<26>(h8, h9);
304
102k
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
305
102k
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
306
102k
   /* |h5| <= 1.01*2^24 */
307
102k
   /* |h9| <= 1.71*2^59 */
308
102k
309
102k
   carry<25, 19>(h9, h0);
310
102k
311
102k
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
312
102k
   /* |h0| <= 1.1*2^39 */
313
102k
314
102k
   carry<26>(h0, h1);
315
102k
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
316
102k
   /* |h1| <= 1.01*2^24 */
317
102k
318
102k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319
102k
   }
320
321
/*
322
h = f * f
323
Can overlap h with f.
324
325
Preconditions:
326
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327
328
Postconditions:
329
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330
*/
331
332
/*
333
See fe_mul.c for discussion of implementation strategy.
334
*/
335
336
//static
337
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338
58.6k
   {
339
58.6k
   int32_t f0 = f[0];
340
58.6k
   int32_t f1 = f[1];
341
58.6k
   int32_t f2 = f[2];
342
58.6k
   int32_t f3 = f[3];
343
58.6k
   int32_t f4 = f[4];
344
58.6k
   int32_t f5 = f[5];
345
58.6k
   int32_t f6 = f[6];
346
58.6k
   int32_t f7 = f[7];
347
58.6k
   int32_t f8 = f[8];
348
58.6k
   int32_t f9 = f[9];
349
58.6k
350
154k
   for(size_t i = 0; i != iter; ++i)
351
96.3k
      {
352
96.3k
      const int32_t f0_2 = 2 * f0;
353
96.3k
      const int32_t f1_2 = 2 * f1;
354
96.3k
      const int32_t f2_2 = 2 * f2;
355
96.3k
      const int32_t f3_2 = 2 * f3;
356
96.3k
      const int32_t f4_2 = 2 * f4;
357
96.3k
      const int32_t f5_2 = 2 * f5;
358
96.3k
      const int32_t f6_2 = 2 * f6;
359
96.3k
      const int32_t f7_2 = 2 * f7;
360
96.3k
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361
96.3k
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362
96.3k
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363
96.3k
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364
96.3k
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365
96.3k
366
96.3k
      const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
367
96.3k
      const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
368
96.3k
      const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
369
96.3k
      const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
370
96.3k
      const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
371
96.3k
      const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
372
96.3k
      const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
373
96.3k
      const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
374
96.3k
      const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
375
96.3k
      const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
376
96.3k
      const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
377
96.3k
      const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
378
96.3k
      const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
379
96.3k
      const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
380
96.3k
      const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
381
96.3k
      const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
382
96.3k
      const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
383
96.3k
      const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
384
96.3k
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385
96.3k
      const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
386
96.3k
      const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
387
96.3k
      const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
388
96.3k
      const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
389
96.3k
      const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
390
96.3k
      const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
391
96.3k
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392
96.3k
      const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
393
96.3k
      const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
394
96.3k
      const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
395
96.3k
      const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
396
96.3k
      const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
397
96.3k
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398
96.3k
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399
96.3k
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400
96.3k
      const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
401
96.3k
      const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
402
96.3k
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403
96.3k
      const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
404
96.3k
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405
96.3k
      const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
406
96.3k
      const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
407
96.3k
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408
96.3k
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409
96.3k
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410
96.3k
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411
96.3k
      const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
412
96.3k
      const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
413
96.3k
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414
96.3k
      const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
415
96.3k
      const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
416
96.3k
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417
96.3k
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418
96.3k
      const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
419
96.3k
      const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
420
96.3k
      const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
421
96.3k
422
96.3k
      int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423
96.3k
      int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424
96.3k
      int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425
96.3k
      int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426
96.3k
      int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
427
96.3k
      int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428
96.3k
      int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429
96.3k
      int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430
96.3k
      int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
431
96.3k
      int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432
96.3k
433
96.3k
      carry<26>(h0, h1);
434
96.3k
      carry<26>(h4, h5);
435
96.3k
      carry<25>(h1, h2);
436
96.3k
      carry<25>(h5, h6);
437
96.3k
      carry<26>(h2, h3);
438
96.3k
      carry<26>(h6, h7);
439
96.3k
440
96.3k
      carry<25>(h3, h4);
441
96.3k
      carry<25>(h7, h8);
442
96.3k
443
96.3k
      carry<26>(h4, h5);
444
96.3k
      carry<26>(h8, h9);
445
96.3k
      carry<25,19>(h9, h0);
446
96.3k
      carry<26>(h0, h1);
447
96.3k
448
96.3k
      f0 = static_cast<int32_t>(h0);
449
96.3k
      f1 = static_cast<int32_t>(h1);
450
96.3k
      f2 = static_cast<int32_t>(h2);
451
96.3k
      f3 = static_cast<int32_t>(h3);
452
96.3k
      f4 = static_cast<int32_t>(h4);
453
96.3k
      f5 = static_cast<int32_t>(h5);
454
96.3k
      f6 = static_cast<int32_t>(h6);
455
96.3k
      f7 = static_cast<int32_t>(h7);
456
96.3k
      f8 = static_cast<int32_t>(h8);
457
96.3k
      f9 = static_cast<int32_t>(h9);
458
96.3k
      }
459
58.6k
460
58.6k
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461
58.6k
   }
462
463
/*
464
h = 2 * f * f
465
Can overlap h with f.
466
467
Preconditions:
468
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469
470
Postconditions:
471
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472
*/
473
474
/*
475
See fe_mul.c for discussion of implementation strategy.
476
*/
477
478
//static
479
FE_25519 FE_25519::sqr2(const FE_25519& f)
480
18.8k
   {
481
18.8k
   const int32_t f0 = f[0];
482
18.8k
   const int32_t f1 = f[1];
483
18.8k
   const int32_t f2 = f[2];
484
18.8k
   const int32_t f3 = f[3];
485
18.8k
   const int32_t f4 = f[4];
486
18.8k
   const int32_t f5 = f[5];
487
18.8k
   const int32_t f6 = f[6];
488
18.8k
   const int32_t f7 = f[7];
489
18.8k
   const int32_t f8 = f[8];
490
18.8k
   const int32_t f9 = f[9];
491
18.8k
   const int32_t f0_2 = 2 * f0;
492
18.8k
   const int32_t f1_2 = 2 * f1;
493
18.8k
   const int32_t f2_2 = 2 * f2;
494
18.8k
   const int32_t f3_2 = 2 * f3;
495
18.8k
   const int32_t f4_2 = 2 * f4;
496
18.8k
   const int32_t f5_2 = 2 * f5;
497
18.8k
   const int32_t f6_2 = 2 * f6;
498
18.8k
   const int32_t f7_2 = 2 * f7;
499
18.8k
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500
18.8k
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501
18.8k
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502
18.8k
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503
18.8k
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504
18.8k
   const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
505
18.8k
   const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
506
18.8k
   const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
507
18.8k
   const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
508
18.8k
   const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
509
18.8k
   const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
510
18.8k
   const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
511
18.8k
   const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
512
18.8k
   const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
513
18.8k
   const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
514
18.8k
   const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
515
18.8k
   const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
516
18.8k
   const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
517
18.8k
   const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
518
18.8k
   const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
519
18.8k
   const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
520
18.8k
   const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
521
18.8k
   const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
522
18.8k
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523
18.8k
   const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
524
18.8k
   const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
525
18.8k
   const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
526
18.8k
   const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
527
18.8k
   const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
528
18.8k
   const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
529
18.8k
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530
18.8k
   const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
531
18.8k
   const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
532
18.8k
   const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
533
18.8k
   const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
534
18.8k
   const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
535
18.8k
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536
18.8k
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537
18.8k
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538
18.8k
   const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
539
18.8k
   const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
540
18.8k
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541
18.8k
   const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
542
18.8k
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543
18.8k
   const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
544
18.8k
   const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
545
18.8k
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546
18.8k
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547
18.8k
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548
18.8k
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549
18.8k
   const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
550
18.8k
   const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
551
18.8k
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552
18.8k
   const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
553
18.8k
   const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
554
18.8k
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555
18.8k
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556
18.8k
   const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
557
18.8k
   const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
558
18.8k
   const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
559
18.8k
560
18.8k
   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561
18.8k
   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562
18.8k
   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563
18.8k
   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564
18.8k
   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
565
18.8k
   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566
18.8k
   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567
18.8k
   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568
18.8k
   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
569
18.8k
   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570
18.8k
571
18.8k
   h0 += h0;
572
18.8k
   h1 += h1;
573
18.8k
   h2 += h2;
574
18.8k
   h3 += h3;
575
18.8k
   h4 += h4;
576
18.8k
   h5 += h5;
577
18.8k
   h6 += h6;
578
18.8k
   h7 += h7;
579
18.8k
   h8 += h8;
580
18.8k
   h9 += h9;
581
18.8k
582
18.8k
   carry<26>(h0, h1);
583
18.8k
   carry<26>(h4, h5);
584
18.8k
585
18.8k
   carry<25>(h1, h2);
586
18.8k
   carry<25>(h5, h6);
587
18.8k
588
18.8k
   carry<26>(h2, h3);
589
18.8k
   carry<26>(h6, h7);
590
18.8k
591
18.8k
   carry<25>(h3, h4);
592
18.8k
   carry<25>(h7, h8);
593
18.8k
   carry<26>(h4, h5);
594
18.8k
   carry<26>(h8, h9);
595
18.8k
   carry<25,19>(h9, h0);
596
18.8k
   carry<26>(h0, h1);
597
18.8k
598
18.8k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599
18.8k
   }
600
601
/*
602
Ignores top bit of h.
603
*/
604
605
void FE_25519::from_bytes(const uint8_t s[32])
606
78
   {
607
78
   int64_t h0 = load_4(s);
608
78
   int64_t h1 = load_3(s + 4) << 6;
609
78
   int64_t h2 = load_3(s + 7) << 5;
610
78
   int64_t h3 = load_3(s + 10) << 3;
611
78
   int64_t h4 = load_3(s + 13) << 2;
612
78
   int64_t h5 = load_4(s + 16);
613
78
   int64_t h6 = load_3(s + 20) << 7;
614
78
   int64_t h7 = load_3(s + 23) << 5;
615
78
   int64_t h8 = load_3(s + 26) << 4;
616
78
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617
78
618
78
   carry<25,19>(h9, h0);
619
78
   carry<25>(h1, h2);
620
78
   carry<25>(h3, h4);
621
78
   carry<25>(h5, h6);
622
78
   carry<25>(h7, h8);
623
78
624
78
   carry<26>(h0, h1);
625
78
   carry<26>(h2, h3);
626
78
   carry<26>(h4, h5);
627
78
   carry<26>(h6, h7);
628
78
   carry<26>(h8, h9);
629
78
630
78
   m_fe[0] = static_cast<int32_t>(h0);
631
78
   m_fe[1] = static_cast<int32_t>(h1);
632
78
   m_fe[2] = static_cast<int32_t>(h2);
633
78
   m_fe[3] = static_cast<int32_t>(h3);
634
78
   m_fe[4] = static_cast<int32_t>(h4);
635
78
   m_fe[5] = static_cast<int32_t>(h5);
636
78
   m_fe[6] = static_cast<int32_t>(h6);
637
78
   m_fe[7] = static_cast<int32_t>(h7);
638
78
   m_fe[8] = static_cast<int32_t>(h8);
639
78
   m_fe[9] = static_cast<int32_t>(h9);
640
78
   }
641
642
/*
643
Preconditions:
644
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645
646
Write p=2^255-19; q=floor(h/p).
647
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648
649
Proof:
650
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652
653
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654
Then 0<y<1.
655
656
Write r=h-pq.
657
Have 0<=r<=p-1=2^255-20.
658
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659
660
Write x=r+19(2^-255)r+y.
661
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662
663
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665
*/
666
667
void FE_25519::to_bytes(uint8_t s[32]) const
668
338
   {
669
338
   const int64_t X25 = (1 << 25);
670
338
671
338
   int32_t h0 = m_fe[0];
672
338
   int32_t h1 = m_fe[1];
673
338
   int32_t h2 = m_fe[2];
674
338
   int32_t h3 = m_fe[3];
675
338
   int32_t h4 = m_fe[4];
676
338
   int32_t h5 = m_fe[5];
677
338
   int32_t h6 = m_fe[6];
678
338
   int32_t h7 = m_fe[7];
679
338
   int32_t h8 = m_fe[8];
680
338
   int32_t h9 = m_fe[9];
681
338
   int32_t q;
682
338
683
338
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684
338
   q = (h0 + q) >> 26;
685
338
   q = (h1 + q) >> 25;
686
338
   q = (h2 + q) >> 26;
687
338
   q = (h3 + q) >> 25;
688
338
   q = (h4 + q) >> 26;
689
338
   q = (h5 + q) >> 25;
690
338
   q = (h6 + q) >> 26;
691
338
   q = (h7 + q) >> 25;
692
338
   q = (h8 + q) >> 26;
693
338
   q = (h9 + q) >> 25;
694
338
695
338
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696
338
   h0 += 19 * q;
697
338
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698
338
699
338
   carry0<26>(h0, h1);
700
338
   carry0<25>(h1, h2);
701
338
   carry0<26>(h2, h3);
702
338
   carry0<25>(h3, h4);
703
338
   carry0<26>(h4, h5);
704
338
   carry0<25>(h5, h6);
705
338
   carry0<26>(h6, h7);
706
338
   carry0<25>(h7, h8);
707
338
   carry0<26>(h8, h9);
708
338
709
338
   int32_t carry9 = h9 >> 25;
710
338
   h9 -= carry9 * X25;
711
338
   /* h10 = carry9 */
712
338
713
338
   /*
714
338
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715
338
   Have h0+...+2^230 h9 between 0 and 2^255-1;
716
338
   evidently 2^255 h10-2^255 q = 0.
717
338
   Goal: Output h0+...+2^230 h9.
718
338
   */
719
338
720
338
   s[0] = static_cast<uint8_t>(h0 >> 0);
721
338
   s[1] = static_cast<uint8_t>(h0 >> 8);
722
338
   s[2] = static_cast<uint8_t>(h0 >> 16);
723
338
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
724
338
   s[4] = static_cast<uint8_t>(h1 >> 6);
725
338
   s[5] = static_cast<uint8_t>(h1 >> 14);
726
338
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
727
338
   s[7] = static_cast<uint8_t>(h2 >> 5);
728
338
   s[8] = static_cast<uint8_t>(h2 >> 13);
729
338
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
730
338
   s[10] = static_cast<uint8_t>(h3 >> 3);
731
338
   s[11] = static_cast<uint8_t>(h3 >> 11);
732
338
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
733
338
   s[13] = static_cast<uint8_t>(h4 >> 2);
734
338
   s[14] = static_cast<uint8_t>(h4 >> 10);
735
338
   s[15] = static_cast<uint8_t>(h4 >> 18);
736
338
   s[16] = static_cast<uint8_t>(h5 >> 0);
737
338
   s[17] = static_cast<uint8_t>(h5 >> 8);
738
338
   s[18] = static_cast<uint8_t>(h5 >> 16);
739
338
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
740
338
   s[20] = static_cast<uint8_t>(h6 >> 7);
741
338
   s[21] = static_cast<uint8_t>(h6 >> 15);
742
338
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
743
338
   s[23] = static_cast<uint8_t>(h7 >> 5);
744
338
   s[24] = static_cast<uint8_t>(h7 >> 13);
745
338
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
746
338
   s[26] = static_cast<uint8_t>(h8 >> 4);
747
338
   s[27] = static_cast<uint8_t>(h8 >> 12);
748
338
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
749
338
   s[29] = static_cast<uint8_t>(h9 >> 2);
750
338
   s[30] = static_cast<uint8_t>(h9 >> 10);
751
338
   s[31] = static_cast<uint8_t>(h9 >> 18);
752
338
   }
753
754
}