Coverage Report

Created: 2020-05-23 13:54

/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line
Count
Source
1
/*
2
* Ed25519 field element
3
* (C) 2017 Ribose Inc
4
*
5
* Based on the public domain code from SUPERCOP ref10 by
6
* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/ed25519_fe.h>
12
#include <botan/internal/ed25519_internal.h>
13
14
namespace Botan {
15
16
//static
17
FE_25519 FE_25519::invert(const FE_25519& z)
18
57
   {
19
57
   fe t0;
20
57
   fe t1;
21
57
   fe t2;
22
57
   fe t3;
23
57
24
57
   fe_sq(t0, z);
25
57
   fe_sq_iter(t1, t0, 2);
26
57
   fe_mul(t1, z, t1);
27
57
   fe_mul(t0, t0, t1);
28
57
   fe_sq(t2, t0);
29
57
   fe_mul(t1, t1, t2);
30
57
   fe_sq_iter(t2, t1, 5);
31
57
   fe_mul(t1, t2, t1);
32
57
   fe_sq_iter(t2, t1, 10);
33
57
   fe_mul(t2, t2, t1);
34
57
   fe_sq_iter(t3, t2, 20);
35
57
   fe_mul(t2, t3, t2);
36
57
   fe_sq_iter(t2, t2, 10);
37
57
   fe_mul(t1, t2, t1);
38
57
   fe_sq_iter(t2, t1, 50);
39
57
   fe_mul(t2, t2, t1);
40
57
   fe_sq_iter(t3, t2, 100);
41
57
   fe_mul(t2, t3, t2);
42
57
   fe_sq_iter(t2, t2, 50);
43
57
   fe_mul(t1, t2, t1);
44
57
   fe_sq_iter(t1, t1, 5);
45
57
46
57
   fe_mul(t0, t1, t0);
47
57
   return t0;
48
57
   }
49
50
FE_25519 FE_25519::pow_22523(const fe& z)
51
55
   {
52
55
   fe t0;
53
55
   fe t1;
54
55
   fe t2;
55
55
56
55
   fe_sq(t0, z);
57
55
   fe_sq_iter(t1, t0, 2);
58
55
   fe_mul(t1, z, t1);
59
55
   fe_mul(t0, t0, t1);
60
55
   fe_sq(t0, t0);
61
55
   fe_mul(t0, t1, t0);
62
55
   fe_sq_iter(t1, t0, 5);
63
55
   fe_mul(t0, t1, t0);
64
55
   fe_sq_iter(t1, t0, 10);
65
55
   fe_mul(t1, t1, t0);
66
55
   fe_sq_iter(t2, t1, 20);
67
55
   fe_mul(t1, t2, t1);
68
55
   fe_sq_iter(t1, t1, 10);
69
55
   fe_mul(t0, t1, t0);
70
55
   fe_sq_iter(t1, t0, 50);
71
55
   fe_mul(t1, t1, t0);
72
55
   fe_sq_iter(t2, t1, 100);
73
55
   fe_mul(t1, t2, t1);
74
55
   fe_sq_iter(t1, t1, 50);
75
55
   fe_mul(t0, t1, t0);
76
55
   fe_sq_iter(t0, t0, 2);
77
55
78
55
   fe_mul(t0, t0, z);
79
55
   return t0;
80
55
   }
81
82
/*
83
h = f * g
84
Can overlap h with f or g.
85
86
Preconditions:
87
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89
90
Postconditions:
91
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92
*/
93
94
/*
95
Notes on implementation strategy:
96
97
Using schoolbook multiplication.
98
Karatsuba would save a little in some cost models.
99
100
Most multiplications by 2 and 19 are 32-bit precomputations;
101
cheaper than 64-bit postcomputations.
102
103
There is one remaining multiplication by 19 in the carry chain;
104
one *19 precomputation can be merged into this,
105
but the resulting data flow is considerably less clean.
106
107
There are 12 carries below.
108
10 of them are 2-way parallelizable and vectorizable.
109
Can get away with 11 carries, but then data flow is much deeper.
110
111
With tighter constraints on inputs can squeeze carries into int32.
112
*/
113
114
//static
115
FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g)
116
73.5k
   {
117
73.5k
   const int32_t f0 = f[0];
118
73.5k
   const int32_t f1 = f[1];
119
73.5k
   const int32_t f2 = f[2];
120
73.5k
   const int32_t f3 = f[3];
121
73.5k
   const int32_t f4 = f[4];
122
73.5k
   const int32_t f5 = f[5];
123
73.5k
   const int32_t f6 = f[6];
124
73.5k
   const int32_t f7 = f[7];
125
73.5k
   const int32_t f8 = f[8];
126
73.5k
   const int32_t f9 = f[9];
127
73.5k
128
73.5k
   const int32_t g0 = g[0];
129
73.5k
   const int32_t g1 = g[1];
130
73.5k
   const int32_t g2 = g[2];
131
73.5k
   const int32_t g3 = g[3];
132
73.5k
   const int32_t g4 = g[4];
133
73.5k
   const int32_t g5 = g[5];
134
73.5k
   const int32_t g6 = g[6];
135
73.5k
   const int32_t g7 = g[7];
136
73.5k
   const int32_t g8 = g[8];
137
73.5k
   const int32_t g9 = g[9];
138
73.5k
139
73.5k
   const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140
73.5k
   const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141
73.5k
   const int32_t g3_19 = 19 * g3;
142
73.5k
   const int32_t g4_19 = 19 * g4;
143
73.5k
   const int32_t g5_19 = 19 * g5;
144
73.5k
   const int32_t g6_19 = 19 * g6;
145
73.5k
   const int32_t g7_19 = 19 * g7;
146
73.5k
   const int32_t g8_19 = 19 * g8;
147
73.5k
   const int32_t g9_19 = 19 * g9;
148
73.5k
   const int32_t f1_2 = 2 * f1;
149
73.5k
   const int32_t f3_2 = 2 * f3;
150
73.5k
   const int32_t f5_2 = 2 * f5;
151
73.5k
   const int32_t f7_2 = 2 * f7;
152
73.5k
   const int32_t f9_2 = 2 * f9;
153
73.5k
154
73.5k
   const int64_t f0g0    = f0   * static_cast<int64_t>(g0);
155
73.5k
   const int64_t f0g1    = f0   * static_cast<int64_t>(g1);
156
73.5k
   const int64_t f0g2    = f0   * static_cast<int64_t>(g2);
157
73.5k
   const int64_t f0g3    = f0   * static_cast<int64_t>(g3);
158
73.5k
   const int64_t f0g4    = f0   * static_cast<int64_t>(g4);
159
73.5k
   const int64_t f0g5    = f0   * static_cast<int64_t>(g5);
160
73.5k
   const int64_t f0g6    = f0   * static_cast<int64_t>(g6);
161
73.5k
   const int64_t f0g7    = f0   * static_cast<int64_t>(g7);
162
73.5k
   const int64_t f0g8    = f0   * static_cast<int64_t>(g8);
163
73.5k
   const int64_t f0g9    = f0   * static_cast<int64_t>(g9);
164
73.5k
   const int64_t f1g0    = f1   * static_cast<int64_t>(g0);
165
73.5k
   const int64_t f1g1_2  = f1_2 * static_cast<int64_t>(g1);
166
73.5k
   const int64_t f1g2    = f1   * static_cast<int64_t>(g2);
167
73.5k
   const int64_t f1g3_2  = f1_2 * static_cast<int64_t>(g3);
168
73.5k
   const int64_t f1g4    = f1   * static_cast<int64_t>(g4);
169
73.5k
   const int64_t f1g5_2  = f1_2 * static_cast<int64_t>(g5);
170
73.5k
   const int64_t f1g6    = f1   * static_cast<int64_t>(g6);
171
73.5k
   const int64_t f1g7_2  = f1_2 * static_cast<int64_t>(g7);
172
73.5k
   const int64_t f1g8    = f1   * static_cast<int64_t>(g8);
173
73.5k
   const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174
73.5k
   const int64_t f2g0    = f2   * static_cast<int64_t>(g0);
175
73.5k
   const int64_t f2g1    = f2   * static_cast<int64_t>(g1);
176
73.5k
   const int64_t f2g2    = f2   * static_cast<int64_t>(g2);
177
73.5k
   const int64_t f2g3    = f2   * static_cast<int64_t>(g3);
178
73.5k
   const int64_t f2g4    = f2   * static_cast<int64_t>(g4);
179
73.5k
   const int64_t f2g5    = f2   * static_cast<int64_t>(g5);
180
73.5k
   const int64_t f2g6    = f2   * static_cast<int64_t>(g6);
181
73.5k
   const int64_t f2g7    = f2   * static_cast<int64_t>(g7);
182
73.5k
   const int64_t f2g8_19 = f2   * static_cast<int64_t>(g8_19);
183
73.5k
   const int64_t f2g9_19 = f2   * static_cast<int64_t>(g9_19);
184
73.5k
   const int64_t f3g0    = f3   * static_cast<int64_t>(g0);
185
73.5k
   const int64_t f3g1_2  = f3_2 * static_cast<int64_t>(g1);
186
73.5k
   const int64_t f3g2    = f3   * static_cast<int64_t>(g2);
187
73.5k
   const int64_t f3g3_2  = f3_2 * static_cast<int64_t>(g3);
188
73.5k
   const int64_t f3g4    = f3   * static_cast<int64_t>(g4);
189
73.5k
   const int64_t f3g5_2  = f3_2 * static_cast<int64_t>(g5);
190
73.5k
   const int64_t f3g6    = f3   * static_cast<int64_t>(g6);
191
73.5k
   const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192
73.5k
   const int64_t f3g8_19 = f3   * static_cast<int64_t>(g8_19);
193
73.5k
   const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194
73.5k
   const int64_t f4g0    = f4   * static_cast<int64_t>(g0);
195
73.5k
   const int64_t f4g1    = f4   * static_cast<int64_t>(g1);
196
73.5k
   const int64_t f4g2    = f4   * static_cast<int64_t>(g2);
197
73.5k
   const int64_t f4g3    = f4   * static_cast<int64_t>(g3);
198
73.5k
   const int64_t f4g4    = f4   * static_cast<int64_t>(g4);
199
73.5k
   const int64_t f4g5    = f4   * static_cast<int64_t>(g5);
200
73.5k
   const int64_t f4g6_19 = f4   * static_cast<int64_t>(g6_19);
201
73.5k
   const int64_t f4g7_19 = f4   * static_cast<int64_t>(g7_19);
202
73.5k
   const int64_t f4g8_19 = f4   * static_cast<int64_t>(g8_19);
203
73.5k
   const int64_t f4g9_19 = f4   * static_cast<int64_t>(g9_19);
204
73.5k
   const int64_t f5g0    = f5   * static_cast<int64_t>(g0);
205
73.5k
   const int64_t f5g1_2  = f5_2 * static_cast<int64_t>(g1);
206
73.5k
   const int64_t f5g2    = f5   * static_cast<int64_t>(g2);
207
73.5k
   const int64_t f5g3_2  = f5_2 * static_cast<int64_t>(g3);
208
73.5k
   const int64_t f5g4    = f5   * static_cast<int64_t>(g4);
209
73.5k
   const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210
73.5k
   const int64_t f5g6_19 = f5   * static_cast<int64_t>(g6_19);
211
73.5k
   const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212
73.5k
   const int64_t f5g8_19 = f5   * static_cast<int64_t>(g8_19);
213
73.5k
   const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214
73.5k
   const int64_t f6g0    = f6   * static_cast<int64_t>(g0);
215
73.5k
   const int64_t f6g1    = f6   * static_cast<int64_t>(g1);
216
73.5k
   const int64_t f6g2    = f6   * static_cast<int64_t>(g2);
217
73.5k
   const int64_t f6g3    = f6   * static_cast<int64_t>(g3);
218
73.5k
   const int64_t f6g4_19 = f6   * static_cast<int64_t>(g4_19);
219
73.5k
   const int64_t f6g5_19 = f6   * static_cast<int64_t>(g5_19);
220
73.5k
   const int64_t f6g6_19 = f6   * static_cast<int64_t>(g6_19);
221
73.5k
   const int64_t f6g7_19 = f6   * static_cast<int64_t>(g7_19);
222
73.5k
   const int64_t f6g8_19 = f6   * static_cast<int64_t>(g8_19);
223
73.5k
   const int64_t f6g9_19 = f6   * static_cast<int64_t>(g9_19);
224
73.5k
   const int64_t f7g0    = f7   * static_cast<int64_t>(g0);
225
73.5k
   const int64_t f7g1_2  = f7_2 * static_cast<int64_t>(g1);
226
73.5k
   const int64_t f7g2    = f7   * static_cast<int64_t>(g2);
227
73.5k
   const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228
73.5k
   const int64_t f7g4_19 = f7   * static_cast<int64_t>(g4_19);
229
73.5k
   const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230
73.5k
   const int64_t f7g6_19 = f7   * static_cast<int64_t>(g6_19);
231
73.5k
   const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232
73.5k
   const int64_t f7g8_19 = f7   * static_cast<int64_t>(g8_19);
233
73.5k
   const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234
73.5k
   const int64_t f8g0    = f8   * static_cast<int64_t>(g0);
235
73.5k
   const int64_t f8g1    = f8   * static_cast<int64_t>(g1);
236
73.5k
   const int64_t f8g2_19 = f8   * static_cast<int64_t>(g2_19);
237
73.5k
   const int64_t f8g3_19 = f8   * static_cast<int64_t>(g3_19);
238
73.5k
   const int64_t f8g4_19 = f8   * static_cast<int64_t>(g4_19);
239
73.5k
   const int64_t f8g5_19 = f8   * static_cast<int64_t>(g5_19);
240
73.5k
   const int64_t f8g6_19 = f8   * static_cast<int64_t>(g6_19);
241
73.5k
   const int64_t f8g7_19 = f8   * static_cast<int64_t>(g7_19);
242
73.5k
   const int64_t f8g8_19 = f8   * static_cast<int64_t>(g8_19);
243
73.5k
   const int64_t f8g9_19 = f8   * static_cast<int64_t>(g9_19);
244
73.5k
   const int64_t f9g0    = f9   * static_cast<int64_t>(g0);
245
73.5k
   const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246
73.5k
   const int64_t f9g2_19 = f9   * static_cast<int64_t>(g2_19);
247
73.5k
   const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248
73.5k
   const int64_t f9g4_19 = f9   * static_cast<int64_t>(g4_19);
249
73.5k
   const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250
73.5k
   const int64_t f9g6_19 = f9   * static_cast<int64_t>(g6_19);
251
73.5k
   const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252
73.5k
   const int64_t f9g8_19 = f9   * static_cast<int64_t>(g8_19);
253
73.5k
   const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254
73.5k
255
73.5k
   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256
73.5k
   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257
73.5k
   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258
73.5k
   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259
73.5k
   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260
73.5k
   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261
73.5k
   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
262
73.5k
   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
263
73.5k
   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
264
73.5k
   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
265
73.5k
266
73.5k
   /*
267
73.5k
   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268
73.5k
   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269
73.5k
   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270
73.5k
   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271
73.5k
   */
272
73.5k
   carry<26>(h0, h1);
273
73.5k
   carry<26>(h4, h5);
274
73.5k
275
73.5k
   /* |h0| <= 2^25 */
276
73.5k
   /* |h4| <= 2^25 */
277
73.5k
   /* |h1| <= 1.71*2^59 */
278
73.5k
   /* |h5| <= 1.71*2^59 */
279
73.5k
280
73.5k
   carry<25>(h1, h2);
281
73.5k
   carry<25>(h5, h6);
282
73.5k
283
73.5k
   /* |h1| <= 2^24; from now on fits into int32 */
284
73.5k
   /* |h5| <= 2^24; from now on fits into int32 */
285
73.5k
   /* |h2| <= 1.41*2^60 */
286
73.5k
   /* |h6| <= 1.41*2^60 */
287
73.5k
288
73.5k
   carry<26>(h2, h3);
289
73.5k
   carry<26>(h6, h7);
290
73.5k
   /* |h2| <= 2^25; from now on fits into int32 unchanged */
291
73.5k
   /* |h6| <= 2^25; from now on fits into int32 unchanged */
292
73.5k
   /* |h3| <= 1.71*2^59 */
293
73.5k
   /* |h7| <= 1.71*2^59 */
294
73.5k
295
73.5k
   carry<25>(h3, h4);
296
73.5k
   carry<25>(h7, h8);
297
73.5k
   /* |h3| <= 2^24; from now on fits into int32 unchanged */
298
73.5k
   /* |h7| <= 2^24; from now on fits into int32 unchanged */
299
73.5k
   /* |h4| <= 1.72*2^34 */
300
73.5k
   /* |h8| <= 1.41*2^60 */
301
73.5k
302
73.5k
   carry<26>(h4, h5);
303
73.5k
   carry<26>(h8, h9);
304
73.5k
   /* |h4| <= 2^25; from now on fits into int32 unchanged */
305
73.5k
   /* |h8| <= 2^25; from now on fits into int32 unchanged */
306
73.5k
   /* |h5| <= 1.01*2^24 */
307
73.5k
   /* |h9| <= 1.71*2^59 */
308
73.5k
309
73.5k
   carry<25, 19>(h9, h0);
310
73.5k
311
73.5k
   /* |h9| <= 2^24; from now on fits into int32 unchanged */
312
73.5k
   /* |h0| <= 1.1*2^39 */
313
73.5k
314
73.5k
   carry<26>(h0, h1);
315
73.5k
   /* |h0| <= 2^25; from now on fits into int32 unchanged */
316
73.5k
   /* |h1| <= 1.01*2^24 */
317
73.5k
318
73.5k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319
73.5k
   }
320
321
/*
322
h = f * f
323
Can overlap h with f.
324
325
Preconditions:
326
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327
328
Postconditions:
329
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330
*/
331
332
/*
333
See fe_mul.c for discussion of implementation strategy.
334
*/
335
336
//static
337
FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338
42.2k
   {
339
42.2k
   int32_t f0 = f[0];
340
42.2k
   int32_t f1 = f[1];
341
42.2k
   int32_t f2 = f[2];
342
42.2k
   int32_t f3 = f[3];
343
42.2k
   int32_t f4 = f[4];
344
42.2k
   int32_t f5 = f[5];
345
42.2k
   int32_t f6 = f[6];
346
42.2k
   int32_t f7 = f[7];
347
42.2k
   int32_t f8 = f[8];
348
42.2k
   int32_t f9 = f[9];
349
42.2k
350
111k
   for(size_t i = 0; i != iter; ++i)
351
69.2k
      {
352
69.2k
      const int32_t f0_2 = 2 * f0;
353
69.2k
      const int32_t f1_2 = 2 * f1;
354
69.2k
      const int32_t f2_2 = 2 * f2;
355
69.2k
      const int32_t f3_2 = 2 * f3;
356
69.2k
      const int32_t f4_2 = 2 * f4;
357
69.2k
      const int32_t f5_2 = 2 * f5;
358
69.2k
      const int32_t f6_2 = 2 * f6;
359
69.2k
      const int32_t f7_2 = 2 * f7;
360
69.2k
      const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361
69.2k
      const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362
69.2k
      const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363
69.2k
      const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364
69.2k
      const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365
69.2k
366
69.2k
      const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
367
69.2k
      const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
368
69.2k
      const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
369
69.2k
      const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
370
69.2k
      const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
371
69.2k
      const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
372
69.2k
      const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
373
69.2k
      const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
374
69.2k
      const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
375
69.2k
      const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
376
69.2k
      const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
377
69.2k
      const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
378
69.2k
      const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
379
69.2k
      const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
380
69.2k
      const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
381
69.2k
      const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
382
69.2k
      const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
383
69.2k
      const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
384
69.2k
      const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385
69.2k
      const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
386
69.2k
      const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
387
69.2k
      const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
388
69.2k
      const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
389
69.2k
      const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
390
69.2k
      const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
391
69.2k
      const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392
69.2k
      const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
393
69.2k
      const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
394
69.2k
      const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
395
69.2k
      const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
396
69.2k
      const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
397
69.2k
      const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398
69.2k
      const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399
69.2k
      const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400
69.2k
      const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
401
69.2k
      const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
402
69.2k
      const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403
69.2k
      const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
404
69.2k
      const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405
69.2k
      const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
406
69.2k
      const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
407
69.2k
      const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408
69.2k
      const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409
69.2k
      const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410
69.2k
      const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411
69.2k
      const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
412
69.2k
      const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
413
69.2k
      const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414
69.2k
      const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
415
69.2k
      const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
416
69.2k
      const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417
69.2k
      const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418
69.2k
      const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
419
69.2k
      const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
420
69.2k
      const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
421
69.2k
422
69.2k
      int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423
69.2k
      int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424
69.2k
      int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425
69.2k
      int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426
69.2k
      int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
427
69.2k
      int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428
69.2k
      int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429
69.2k
      int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430
69.2k
      int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
431
69.2k
      int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432
69.2k
433
69.2k
      carry<26>(h0, h1);
434
69.2k
      carry<26>(h4, h5);
435
69.2k
      carry<25>(h1, h2);
436
69.2k
      carry<25>(h5, h6);
437
69.2k
      carry<26>(h2, h3);
438
69.2k
      carry<26>(h6, h7);
439
69.2k
440
69.2k
      carry<25>(h3, h4);
441
69.2k
      carry<25>(h7, h8);
442
69.2k
443
69.2k
      carry<26>(h4, h5);
444
69.2k
      carry<26>(h8, h9);
445
69.2k
      carry<25,19>(h9, h0);
446
69.2k
      carry<26>(h0, h1);
447
69.2k
448
69.2k
      f0 = static_cast<int32_t>(h0);
449
69.2k
      f1 = static_cast<int32_t>(h1);
450
69.2k
      f2 = static_cast<int32_t>(h2);
451
69.2k
      f3 = static_cast<int32_t>(h3);
452
69.2k
      f4 = static_cast<int32_t>(h4);
453
69.2k
      f5 = static_cast<int32_t>(h5);
454
69.2k
      f6 = static_cast<int32_t>(h6);
455
69.2k
      f7 = static_cast<int32_t>(h7);
456
69.2k
      f8 = static_cast<int32_t>(h8);
457
69.2k
      f9 = static_cast<int32_t>(h9);
458
69.2k
      }
459
42.2k
460
42.2k
   return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461
42.2k
   }
462
463
/*
464
h = 2 * f * f
465
Can overlap h with f.
466
467
Preconditions:
468
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469
470
Postconditions:
471
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472
*/
473
474
/*
475
See fe_mul.c for discussion of implementation strategy.
476
*/
477
478
//static
479
FE_25519 FE_25519::sqr2(const FE_25519& f)
480
13.5k
   {
481
13.5k
   const int32_t f0 = f[0];
482
13.5k
   const int32_t f1 = f[1];
483
13.5k
   const int32_t f2 = f[2];
484
13.5k
   const int32_t f3 = f[3];
485
13.5k
   const int32_t f4 = f[4];
486
13.5k
   const int32_t f5 = f[5];
487
13.5k
   const int32_t f6 = f[6];
488
13.5k
   const int32_t f7 = f[7];
489
13.5k
   const int32_t f8 = f[8];
490
13.5k
   const int32_t f9 = f[9];
491
13.5k
   const int32_t f0_2 = 2 * f0;
492
13.5k
   const int32_t f1_2 = 2 * f1;
493
13.5k
   const int32_t f2_2 = 2 * f2;
494
13.5k
   const int32_t f3_2 = 2 * f3;
495
13.5k
   const int32_t f4_2 = 2 * f4;
496
13.5k
   const int32_t f5_2 = 2 * f5;
497
13.5k
   const int32_t f6_2 = 2 * f6;
498
13.5k
   const int32_t f7_2 = 2 * f7;
499
13.5k
   const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500
13.5k
   const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501
13.5k
   const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502
13.5k
   const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503
13.5k
   const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504
13.5k
   const int64_t f0f0    = f0   * static_cast<int64_t>(f0);
505
13.5k
   const int64_t f0f1_2  = f0_2 * static_cast<int64_t>(f1);
506
13.5k
   const int64_t f0f2_2  = f0_2 * static_cast<int64_t>(f2);
507
13.5k
   const int64_t f0f3_2  = f0_2 * static_cast<int64_t>(f3);
508
13.5k
   const int64_t f0f4_2  = f0_2 * static_cast<int64_t>(f4);
509
13.5k
   const int64_t f0f5_2  = f0_2 * static_cast<int64_t>(f5);
510
13.5k
   const int64_t f0f6_2  = f0_2 * static_cast<int64_t>(f6);
511
13.5k
   const int64_t f0f7_2  = f0_2 * static_cast<int64_t>(f7);
512
13.5k
   const int64_t f0f8_2  = f0_2 * static_cast<int64_t>(f8);
513
13.5k
   const int64_t f0f9_2  = f0_2 * static_cast<int64_t>(f9);
514
13.5k
   const int64_t f1f1_2  = f1_2 * static_cast<int64_t>(f1);
515
13.5k
   const int64_t f1f2_2  = f1_2 * static_cast<int64_t>(f2);
516
13.5k
   const int64_t f1f3_4  = f1_2 * static_cast<int64_t>(f3_2);
517
13.5k
   const int64_t f1f4_2  = f1_2 * static_cast<int64_t>(f4);
518
13.5k
   const int64_t f1f5_4  = f1_2 * static_cast<int64_t>(f5_2);
519
13.5k
   const int64_t f1f6_2  = f1_2 * static_cast<int64_t>(f6);
520
13.5k
   const int64_t f1f7_4  = f1_2 * static_cast<int64_t>(f7_2);
521
13.5k
   const int64_t f1f8_2  = f1_2 * static_cast<int64_t>(f8);
522
13.5k
   const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523
13.5k
   const int64_t f2f2    = f2   * static_cast<int64_t>(f2);
524
13.5k
   const int64_t f2f3_2  = f2_2 * static_cast<int64_t>(f3);
525
13.5k
   const int64_t f2f4_2  = f2_2 * static_cast<int64_t>(f4);
526
13.5k
   const int64_t f2f5_2  = f2_2 * static_cast<int64_t>(f5);
527
13.5k
   const int64_t f2f6_2  = f2_2 * static_cast<int64_t>(f6);
528
13.5k
   const int64_t f2f7_2  = f2_2 * static_cast<int64_t>(f7);
529
13.5k
   const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530
13.5k
   const int64_t f2f9_38 = f2   * static_cast<int64_t>(f9_38);
531
13.5k
   const int64_t f3f3_2  = f3_2 * static_cast<int64_t>(f3);
532
13.5k
   const int64_t f3f4_2  = f3_2 * static_cast<int64_t>(f4);
533
13.5k
   const int64_t f3f5_4  = f3_2 * static_cast<int64_t>(f5_2);
534
13.5k
   const int64_t f3f6_2  = f3_2 * static_cast<int64_t>(f6);
535
13.5k
   const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536
13.5k
   const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537
13.5k
   const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538
13.5k
   const int64_t f4f4    = f4   * static_cast<int64_t>(f4);
539
13.5k
   const int64_t f4f5_2  = f4_2 * static_cast<int64_t>(f5);
540
13.5k
   const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541
13.5k
   const int64_t f4f7_38 = f4   * static_cast<int64_t>(f7_38);
542
13.5k
   const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543
13.5k
   const int64_t f4f9_38 = f4   * static_cast<int64_t>(f9_38);
544
13.5k
   const int64_t f5f5_38 = f5   * static_cast<int64_t>(f5_38);
545
13.5k
   const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546
13.5k
   const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547
13.5k
   const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548
13.5k
   const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549
13.5k
   const int64_t f6f6_19 = f6   * static_cast<int64_t>(f6_19);
550
13.5k
   const int64_t f6f7_38 = f6   * static_cast<int64_t>(f7_38);
551
13.5k
   const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552
13.5k
   const int64_t f6f9_38 = f6   * static_cast<int64_t>(f9_38);
553
13.5k
   const int64_t f7f7_38 = f7   * static_cast<int64_t>(f7_38);
554
13.5k
   const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555
13.5k
   const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556
13.5k
   const int64_t f8f8_19 = f8   * static_cast<int64_t>(f8_19);
557
13.5k
   const int64_t f8f9_38 = f8   * static_cast<int64_t>(f9_38);
558
13.5k
   const int64_t f9f9_38 = f9   * static_cast<int64_t>(f9_38);
559
13.5k
560
13.5k
   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561
13.5k
   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562
13.5k
   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563
13.5k
   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564
13.5k
   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
565
13.5k
   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566
13.5k
   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567
13.5k
   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568
13.5k
   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
569
13.5k
   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570
13.5k
571
13.5k
   h0 += h0;
572
13.5k
   h1 += h1;
573
13.5k
   h2 += h2;
574
13.5k
   h3 += h3;
575
13.5k
   h4 += h4;
576
13.5k
   h5 += h5;
577
13.5k
   h6 += h6;
578
13.5k
   h7 += h7;
579
13.5k
   h8 += h8;
580
13.5k
   h9 += h9;
581
13.5k
582
13.5k
   carry<26>(h0, h1);
583
13.5k
   carry<26>(h4, h5);
584
13.5k
585
13.5k
   carry<25>(h1, h2);
586
13.5k
   carry<25>(h5, h6);
587
13.5k
588
13.5k
   carry<26>(h2, h3);
589
13.5k
   carry<26>(h6, h7);
590
13.5k
591
13.5k
   carry<25>(h3, h4);
592
13.5k
   carry<25>(h7, h8);
593
13.5k
   carry<26>(h4, h5);
594
13.5k
   carry<26>(h8, h9);
595
13.5k
   carry<25,19>(h9, h0);
596
13.5k
   carry<26>(h0, h1);
597
13.5k
598
13.5k
   return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599
13.5k
   }
600
601
/*
602
Ignores top bit of h.
603
*/
604
605
void FE_25519::from_bytes(const uint8_t s[32])
606
55
   {
607
55
   int64_t h0 = load_4(s);
608
55
   int64_t h1 = load_3(s + 4) << 6;
609
55
   int64_t h2 = load_3(s + 7) << 5;
610
55
   int64_t h3 = load_3(s + 10) << 3;
611
55
   int64_t h4 = load_3(s + 13) << 2;
612
55
   int64_t h5 = load_4(s + 16);
613
55
   int64_t h6 = load_3(s + 20) << 7;
614
55
   int64_t h7 = load_3(s + 23) << 5;
615
55
   int64_t h8 = load_3(s + 26) << 4;
616
55
   int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617
55
618
55
   carry<25,19>(h9, h0);
619
55
   carry<25>(h1, h2);
620
55
   carry<25>(h3, h4);
621
55
   carry<25>(h5, h6);
622
55
   carry<25>(h7, h8);
623
55
624
55
   carry<26>(h0, h1);
625
55
   carry<26>(h2, h3);
626
55
   carry<26>(h4, h5);
627
55
   carry<26>(h6, h7);
628
55
   carry<26>(h8, h9);
629
55
630
55
   m_fe[0] = static_cast<int32_t>(h0);
631
55
   m_fe[1] = static_cast<int32_t>(h1);
632
55
   m_fe[2] = static_cast<int32_t>(h2);
633
55
   m_fe[3] = static_cast<int32_t>(h3);
634
55
   m_fe[4] = static_cast<int32_t>(h4);
635
55
   m_fe[5] = static_cast<int32_t>(h5);
636
55
   m_fe[6] = static_cast<int32_t>(h6);
637
55
   m_fe[7] = static_cast<int32_t>(h7);
638
55
   m_fe[8] = static_cast<int32_t>(h8);
639
55
   m_fe[9] = static_cast<int32_t>(h9);
640
55
   }
641
642
/*
643
Preconditions:
644
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645
646
Write p=2^255-19; q=floor(h/p).
647
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648
649
Proof:
650
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651
Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652
653
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654
Then 0<y<1.
655
656
Write r=h-pq.
657
Have 0<=r<=p-1=2^255-20.
658
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659
660
Write x=r+19(2^-255)r+y.
661
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662
663
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665
*/
666
667
void FE_25519::to_bytes(uint8_t s[32]) const
668
249
   {
669
249
   const int64_t X25 = (1 << 25);
670
249
671
249
   int32_t h0 = m_fe[0];
672
249
   int32_t h1 = m_fe[1];
673
249
   int32_t h2 = m_fe[2];
674
249
   int32_t h3 = m_fe[3];
675
249
   int32_t h4 = m_fe[4];
676
249
   int32_t h5 = m_fe[5];
677
249
   int32_t h6 = m_fe[6];
678
249
   int32_t h7 = m_fe[7];
679
249
   int32_t h8 = m_fe[8];
680
249
   int32_t h9 = m_fe[9];
681
249
   int32_t q;
682
249
683
249
   q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684
249
   q = (h0 + q) >> 26;
685
249
   q = (h1 + q) >> 25;
686
249
   q = (h2 + q) >> 26;
687
249
   q = (h3 + q) >> 25;
688
249
   q = (h4 + q) >> 26;
689
249
   q = (h5 + q) >> 25;
690
249
   q = (h6 + q) >> 26;
691
249
   q = (h7 + q) >> 25;
692
249
   q = (h8 + q) >> 26;
693
249
   q = (h9 + q) >> 25;
694
249
695
249
   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696
249
   h0 += 19 * q;
697
249
   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698
249
699
249
   carry0<26>(h0, h1);
700
249
   carry0<25>(h1, h2);
701
249
   carry0<26>(h2, h3);
702
249
   carry0<25>(h3, h4);
703
249
   carry0<26>(h4, h5);
704
249
   carry0<25>(h5, h6);
705
249
   carry0<26>(h6, h7);
706
249
   carry0<25>(h7, h8);
707
249
   carry0<26>(h8, h9);
708
249
709
249
   int32_t carry9 = h9 >> 25;
710
249
   h9 -= carry9 * X25;
711
249
   /* h10 = carry9 */
712
249
713
249
   /*
714
249
   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715
249
   Have h0+...+2^230 h9 between 0 and 2^255-1;
716
249
   evidently 2^255 h10-2^255 q = 0.
717
249
   Goal: Output h0+...+2^230 h9.
718
249
   */
719
249
720
249
   s[0] = static_cast<uint8_t>(h0 >> 0);
721
249
   s[1] = static_cast<uint8_t>(h0 >> 8);
722
249
   s[2] = static_cast<uint8_t>(h0 >> 16);
723
249
   s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
724
249
   s[4] = static_cast<uint8_t>(h1 >> 6);
725
249
   s[5] = static_cast<uint8_t>(h1 >> 14);
726
249
   s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
727
249
   s[7] = static_cast<uint8_t>(h2 >> 5);
728
249
   s[8] = static_cast<uint8_t>(h2 >> 13);
729
249
   s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
730
249
   s[10] = static_cast<uint8_t>(h3 >> 3);
731
249
   s[11] = static_cast<uint8_t>(h3 >> 11);
732
249
   s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
733
249
   s[13] = static_cast<uint8_t>(h4 >> 2);
734
249
   s[14] = static_cast<uint8_t>(h4 >> 10);
735
249
   s[15] = static_cast<uint8_t>(h4 >> 18);
736
249
   s[16] = static_cast<uint8_t>(h5 >> 0);
737
249
   s[17] = static_cast<uint8_t>(h5 >> 8);
738
249
   s[18] = static_cast<uint8_t>(h5 >> 16);
739
249
   s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
740
249
   s[20] = static_cast<uint8_t>(h6 >> 7);
741
249
   s[21] = static_cast<uint8_t>(h6 >> 15);
742
249
   s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
743
249
   s[23] = static_cast<uint8_t>(h7 >> 5);
744
249
   s[24] = static_cast<uint8_t>(h7 >> 13);
745
249
   s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
746
249
   s[26] = static_cast<uint8_t>(h8 >> 4);
747
249
   s[27] = static_cast<uint8_t>(h8 >> 12);
748
249
   s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
749
249
   s[29] = static_cast<uint8_t>(h9 >> 2);
750
249
   s[30] = static_cast<uint8_t>(h9 >> 10);
751
249
   s[31] = static_cast<uint8_t>(h9 >> 18);
752
249
   }
753
754
}