/src/botan/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line | Count | Source |
1 | | /* |
2 | | * Ed25519 field element |
3 | | * (C) 2017 Ribose Inc |
4 | | * |
5 | | * Based on the public domain code from SUPERCOP ref10 by |
6 | | * Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang |
7 | | * |
8 | | * Botan is released under the Simplified BSD License (see license.txt) |
9 | | */ |
10 | | |
11 | | #include <botan/internal/ed25519_fe.h> |
12 | | #include <botan/internal/ed25519_internal.h> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | //static |
17 | | FE_25519 FE_25519::invert(const FE_25519& z) |
18 | 78 | { |
19 | 78 | fe t0; |
20 | 78 | fe t1; |
21 | 78 | fe t2; |
22 | 78 | fe t3; |
23 | 78 | |
24 | 78 | fe_sq(t0, z); |
25 | 78 | fe_sq_iter(t1, t0, 2); |
26 | 78 | fe_mul(t1, z, t1); |
27 | 78 | fe_mul(t0, t0, t1); |
28 | 78 | fe_sq(t2, t0); |
29 | 78 | fe_mul(t1, t1, t2); |
30 | 78 | fe_sq_iter(t2, t1, 5); |
31 | 78 | fe_mul(t1, t2, t1); |
32 | 78 | fe_sq_iter(t2, t1, 10); |
33 | 78 | fe_mul(t2, t2, t1); |
34 | 78 | fe_sq_iter(t3, t2, 20); |
35 | 78 | fe_mul(t2, t3, t2); |
36 | 78 | fe_sq_iter(t2, t2, 10); |
37 | 78 | fe_mul(t1, t2, t1); |
38 | 78 | fe_sq_iter(t2, t1, 50); |
39 | 78 | fe_mul(t2, t2, t1); |
40 | 78 | fe_sq_iter(t3, t2, 100); |
41 | 78 | fe_mul(t2, t3, t2); |
42 | 78 | fe_sq_iter(t2, t2, 50); |
43 | 78 | fe_mul(t1, t2, t1); |
44 | 78 | fe_sq_iter(t1, t1, 5); |
45 | 78 | |
46 | 78 | fe_mul(t0, t1, t0); |
47 | 78 | return t0; |
48 | 78 | } |
49 | | |
50 | | FE_25519 FE_25519::pow_22523(const fe& z) |
51 | 78 | { |
52 | 78 | fe t0; |
53 | 78 | fe t1; |
54 | 78 | fe t2; |
55 | 78 | |
56 | 78 | fe_sq(t0, z); |
57 | 78 | fe_sq_iter(t1, t0, 2); |
58 | 78 | fe_mul(t1, z, t1); |
59 | 78 | fe_mul(t0, t0, t1); |
60 | 78 | fe_sq(t0, t0); |
61 | 78 | fe_mul(t0, t1, t0); |
62 | 78 | fe_sq_iter(t1, t0, 5); |
63 | 78 | fe_mul(t0, t1, t0); |
64 | 78 | fe_sq_iter(t1, t0, 10); |
65 | 78 | fe_mul(t1, t1, t0); |
66 | 78 | fe_sq_iter(t2, t1, 20); |
67 | 78 | fe_mul(t1, t2, t1); |
68 | 78 | fe_sq_iter(t1, t1, 10); |
69 | 78 | fe_mul(t0, t1, t0); |
70 | 78 | fe_sq_iter(t1, t0, 50); |
71 | 78 | fe_mul(t1, t1, t0); |
72 | 78 | fe_sq_iter(t2, t1, 100); |
73 | 78 | fe_mul(t1, t2, t1); |
74 | 78 | fe_sq_iter(t1, t1, 50); |
75 | 78 | fe_mul(t0, t1, t0); |
76 | 78 | fe_sq_iter(t0, t0, 2); |
77 | 78 | |
78 | 78 | fe_mul(t0, t0, z); |
79 | 78 | return t0; |
80 | 78 | } |
81 | | |
82 | | /* |
83 | | h = f * g |
84 | | Can overlap h with f or g. |
85 | | |
86 | | Preconditions: |
87 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
88 | | |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
89 | | |
90 | | Postconditions: |
91 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
92 | | */ |
93 | | |
94 | | /* |
95 | | Notes on implementation strategy: |
96 | | |
97 | | Using schoolbook multiplication. |
98 | | Karatsuba would save a little in some cost models. |
99 | | |
100 | | Most multiplications by 2 and 19 are 32-bit precomputations; |
101 | | cheaper than 64-bit postcomputations. |
102 | | |
103 | | There is one remaining multiplication by 19 in the carry chain; |
104 | | one *19 precomputation can be merged into this, |
105 | | but the resulting data flow is considerably less clean. |
106 | | |
107 | | There are 12 carries below. |
108 | | 10 of them are 2-way parallelizable and vectorizable. |
109 | | Can get away with 11 carries, but then data flow is much deeper. |
110 | | |
111 | | With tighter constraints on inputs can squeeze carries into int32. |
112 | | */ |
113 | | |
114 | | //static |
115 | | FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g) |
116 | 102k | { |
117 | 102k | const int32_t f0 = f[0]; |
118 | 102k | const int32_t f1 = f[1]; |
119 | 102k | const int32_t f2 = f[2]; |
120 | 102k | const int32_t f3 = f[3]; |
121 | 102k | const int32_t f4 = f[4]; |
122 | 102k | const int32_t f5 = f[5]; |
123 | 102k | const int32_t f6 = f[6]; |
124 | 102k | const int32_t f7 = f[7]; |
125 | 102k | const int32_t f8 = f[8]; |
126 | 102k | const int32_t f9 = f[9]; |
127 | 102k | |
128 | 102k | const int32_t g0 = g[0]; |
129 | 102k | const int32_t g1 = g[1]; |
130 | 102k | const int32_t g2 = g[2]; |
131 | 102k | const int32_t g3 = g[3]; |
132 | 102k | const int32_t g4 = g[4]; |
133 | 102k | const int32_t g5 = g[5]; |
134 | 102k | const int32_t g6 = g[6]; |
135 | 102k | const int32_t g7 = g[7]; |
136 | 102k | const int32_t g8 = g[8]; |
137 | 102k | const int32_t g9 = g[9]; |
138 | 102k | |
139 | 102k | const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ |
140 | 102k | const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ |
141 | 102k | const int32_t g3_19 = 19 * g3; |
142 | 102k | const int32_t g4_19 = 19 * g4; |
143 | 102k | const int32_t g5_19 = 19 * g5; |
144 | 102k | const int32_t g6_19 = 19 * g6; |
145 | 102k | const int32_t g7_19 = 19 * g7; |
146 | 102k | const int32_t g8_19 = 19 * g8; |
147 | 102k | const int32_t g9_19 = 19 * g9; |
148 | 102k | const int32_t f1_2 = 2 * f1; |
149 | 102k | const int32_t f3_2 = 2 * f3; |
150 | 102k | const int32_t f5_2 = 2 * f5; |
151 | 102k | const int32_t f7_2 = 2 * f7; |
152 | 102k | const int32_t f9_2 = 2 * f9; |
153 | 102k | |
154 | 102k | const int64_t f0g0 = f0 * static_cast<int64_t>(g0); |
155 | 102k | const int64_t f0g1 = f0 * static_cast<int64_t>(g1); |
156 | 102k | const int64_t f0g2 = f0 * static_cast<int64_t>(g2); |
157 | 102k | const int64_t f0g3 = f0 * static_cast<int64_t>(g3); |
158 | 102k | const int64_t f0g4 = f0 * static_cast<int64_t>(g4); |
159 | 102k | const int64_t f0g5 = f0 * static_cast<int64_t>(g5); |
160 | 102k | const int64_t f0g6 = f0 * static_cast<int64_t>(g6); |
161 | 102k | const int64_t f0g7 = f0 * static_cast<int64_t>(g7); |
162 | 102k | const int64_t f0g8 = f0 * static_cast<int64_t>(g8); |
163 | 102k | const int64_t f0g9 = f0 * static_cast<int64_t>(g9); |
164 | 102k | const int64_t f1g0 = f1 * static_cast<int64_t>(g0); |
165 | 102k | const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1); |
166 | 102k | const int64_t f1g2 = f1 * static_cast<int64_t>(g2); |
167 | 102k | const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3); |
168 | 102k | const int64_t f1g4 = f1 * static_cast<int64_t>(g4); |
169 | 102k | const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5); |
170 | 102k | const int64_t f1g6 = f1 * static_cast<int64_t>(g6); |
171 | 102k | const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7); |
172 | 102k | const int64_t f1g8 = f1 * static_cast<int64_t>(g8); |
173 | 102k | const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19); |
174 | 102k | const int64_t f2g0 = f2 * static_cast<int64_t>(g0); |
175 | 102k | const int64_t f2g1 = f2 * static_cast<int64_t>(g1); |
176 | 102k | const int64_t f2g2 = f2 * static_cast<int64_t>(g2); |
177 | 102k | const int64_t f2g3 = f2 * static_cast<int64_t>(g3); |
178 | 102k | const int64_t f2g4 = f2 * static_cast<int64_t>(g4); |
179 | 102k | const int64_t f2g5 = f2 * static_cast<int64_t>(g5); |
180 | 102k | const int64_t f2g6 = f2 * static_cast<int64_t>(g6); |
181 | 102k | const int64_t f2g7 = f2 * static_cast<int64_t>(g7); |
182 | 102k | const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19); |
183 | 102k | const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19); |
184 | 102k | const int64_t f3g0 = f3 * static_cast<int64_t>(g0); |
185 | 102k | const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1); |
186 | 102k | const int64_t f3g2 = f3 * static_cast<int64_t>(g2); |
187 | 102k | const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3); |
188 | 102k | const int64_t f3g4 = f3 * static_cast<int64_t>(g4); |
189 | 102k | const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5); |
190 | 102k | const int64_t f3g6 = f3 * static_cast<int64_t>(g6); |
191 | 102k | const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19); |
192 | 102k | const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19); |
193 | 102k | const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19); |
194 | 102k | const int64_t f4g0 = f4 * static_cast<int64_t>(g0); |
195 | 102k | const int64_t f4g1 = f4 * static_cast<int64_t>(g1); |
196 | 102k | const int64_t f4g2 = f4 * static_cast<int64_t>(g2); |
197 | 102k | const int64_t f4g3 = f4 * static_cast<int64_t>(g3); |
198 | 102k | const int64_t f4g4 = f4 * static_cast<int64_t>(g4); |
199 | 102k | const int64_t f4g5 = f4 * static_cast<int64_t>(g5); |
200 | 102k | const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19); |
201 | 102k | const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19); |
202 | 102k | const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19); |
203 | 102k | const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19); |
204 | 102k | const int64_t f5g0 = f5 * static_cast<int64_t>(g0); |
205 | 102k | const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1); |
206 | 102k | const int64_t f5g2 = f5 * static_cast<int64_t>(g2); |
207 | 102k | const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3); |
208 | 102k | const int64_t f5g4 = f5 * static_cast<int64_t>(g4); |
209 | 102k | const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19); |
210 | 102k | const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19); |
211 | 102k | const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19); |
212 | 102k | const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19); |
213 | 102k | const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19); |
214 | 102k | const int64_t f6g0 = f6 * static_cast<int64_t>(g0); |
215 | 102k | const int64_t f6g1 = f6 * static_cast<int64_t>(g1); |
216 | 102k | const int64_t f6g2 = f6 * static_cast<int64_t>(g2); |
217 | 102k | const int64_t f6g3 = f6 * static_cast<int64_t>(g3); |
218 | 102k | const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19); |
219 | 102k | const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19); |
220 | 102k | const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19); |
221 | 102k | const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19); |
222 | 102k | const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19); |
223 | 102k | const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19); |
224 | 102k | const int64_t f7g0 = f7 * static_cast<int64_t>(g0); |
225 | 102k | const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1); |
226 | 102k | const int64_t f7g2 = f7 * static_cast<int64_t>(g2); |
227 | 102k | const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19); |
228 | 102k | const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19); |
229 | 102k | const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19); |
230 | 102k | const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19); |
231 | 102k | const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19); |
232 | 102k | const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19); |
233 | 102k | const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19); |
234 | 102k | const int64_t f8g0 = f8 * static_cast<int64_t>(g0); |
235 | 102k | const int64_t f8g1 = f8 * static_cast<int64_t>(g1); |
236 | 102k | const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19); |
237 | 102k | const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19); |
238 | 102k | const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19); |
239 | 102k | const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19); |
240 | 102k | const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19); |
241 | 102k | const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19); |
242 | 102k | const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19); |
243 | 102k | const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19); |
244 | 102k | const int64_t f9g0 = f9 * static_cast<int64_t>(g0); |
245 | 102k | const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19); |
246 | 102k | const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19); |
247 | 102k | const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19); |
248 | 102k | const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19); |
249 | 102k | const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19); |
250 | 102k | const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19); |
251 | 102k | const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19); |
252 | 102k | const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19); |
253 | 102k | const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19); |
254 | 102k | |
255 | 102k | int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; |
256 | 102k | int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; |
257 | 102k | int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; |
258 | 102k | int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; |
259 | 102k | int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; |
260 | 102k | int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; |
261 | 102k | int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; |
262 | 102k | int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; |
263 | 102k | int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; |
264 | 102k | int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; |
265 | 102k | |
266 | 102k | /* |
267 | 102k | |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) |
268 | 102k | i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 |
269 | 102k | |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) |
270 | 102k | i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 |
271 | 102k | */ |
272 | 102k | carry<26>(h0, h1); |
273 | 102k | carry<26>(h4, h5); |
274 | 102k | |
275 | 102k | /* |h0| <= 2^25 */ |
276 | 102k | /* |h4| <= 2^25 */ |
277 | 102k | /* |h1| <= 1.71*2^59 */ |
278 | 102k | /* |h5| <= 1.71*2^59 */ |
279 | 102k | |
280 | 102k | carry<25>(h1, h2); |
281 | 102k | carry<25>(h5, h6); |
282 | 102k | |
283 | 102k | /* |h1| <= 2^24; from now on fits into int32 */ |
284 | 102k | /* |h5| <= 2^24; from now on fits into int32 */ |
285 | 102k | /* |h2| <= 1.41*2^60 */ |
286 | 102k | /* |h6| <= 1.41*2^60 */ |
287 | 102k | |
288 | 102k | carry<26>(h2, h3); |
289 | 102k | carry<26>(h6, h7); |
290 | 102k | /* |h2| <= 2^25; from now on fits into int32 unchanged */ |
291 | 102k | /* |h6| <= 2^25; from now on fits into int32 unchanged */ |
292 | 102k | /* |h3| <= 1.71*2^59 */ |
293 | 102k | /* |h7| <= 1.71*2^59 */ |
294 | 102k | |
295 | 102k | carry<25>(h3, h4); |
296 | 102k | carry<25>(h7, h8); |
297 | 102k | /* |h3| <= 2^24; from now on fits into int32 unchanged */ |
298 | 102k | /* |h7| <= 2^24; from now on fits into int32 unchanged */ |
299 | 102k | /* |h4| <= 1.72*2^34 */ |
300 | 102k | /* |h8| <= 1.41*2^60 */ |
301 | 102k | |
302 | 102k | carry<26>(h4, h5); |
303 | 102k | carry<26>(h8, h9); |
304 | 102k | /* |h4| <= 2^25; from now on fits into int32 unchanged */ |
305 | 102k | /* |h8| <= 2^25; from now on fits into int32 unchanged */ |
306 | 102k | /* |h5| <= 1.01*2^24 */ |
307 | 102k | /* |h9| <= 1.71*2^59 */ |
308 | 102k | |
309 | 102k | carry<25, 19>(h9, h0); |
310 | 102k | |
311 | 102k | /* |h9| <= 2^24; from now on fits into int32 unchanged */ |
312 | 102k | /* |h0| <= 1.1*2^39 */ |
313 | 102k | |
314 | 102k | carry<26>(h0, h1); |
315 | 102k | /* |h0| <= 2^25; from now on fits into int32 unchanged */ |
316 | 102k | /* |h1| <= 1.01*2^24 */ |
317 | 102k | |
318 | 102k | return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9); |
319 | 102k | } |
320 | | |
321 | | /* |
322 | | h = f * f |
323 | | Can overlap h with f. |
324 | | |
325 | | Preconditions: |
326 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
327 | | |
328 | | Postconditions: |
329 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
330 | | */ |
331 | | |
332 | | /* |
333 | | See fe_mul.c for discussion of implementation strategy. |
334 | | */ |
335 | | |
336 | | //static |
337 | | FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter) |
338 | 58.6k | { |
339 | 58.6k | int32_t f0 = f[0]; |
340 | 58.6k | int32_t f1 = f[1]; |
341 | 58.6k | int32_t f2 = f[2]; |
342 | 58.6k | int32_t f3 = f[3]; |
343 | 58.6k | int32_t f4 = f[4]; |
344 | 58.6k | int32_t f5 = f[5]; |
345 | 58.6k | int32_t f6 = f[6]; |
346 | 58.6k | int32_t f7 = f[7]; |
347 | 58.6k | int32_t f8 = f[8]; |
348 | 58.6k | int32_t f9 = f[9]; |
349 | 58.6k | |
350 | 154k | for(size_t i = 0; i != iter; ++i) |
351 | 96.3k | { |
352 | 96.3k | const int32_t f0_2 = 2 * f0; |
353 | 96.3k | const int32_t f1_2 = 2 * f1; |
354 | 96.3k | const int32_t f2_2 = 2 * f2; |
355 | 96.3k | const int32_t f3_2 = 2 * f3; |
356 | 96.3k | const int32_t f4_2 = 2 * f4; |
357 | 96.3k | const int32_t f5_2 = 2 * f5; |
358 | 96.3k | const int32_t f6_2 = 2 * f6; |
359 | 96.3k | const int32_t f7_2 = 2 * f7; |
360 | 96.3k | const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
361 | 96.3k | const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
362 | 96.3k | const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
363 | 96.3k | const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
364 | 96.3k | const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
365 | 96.3k | |
366 | 96.3k | const int64_t f0f0 = f0 * static_cast<int64_t>(f0); |
367 | 96.3k | const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1); |
368 | 96.3k | const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2); |
369 | 96.3k | const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3); |
370 | 96.3k | const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4); |
371 | 96.3k | const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5); |
372 | 96.3k | const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6); |
373 | 96.3k | const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7); |
374 | 96.3k | const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8); |
375 | 96.3k | const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9); |
376 | 96.3k | const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1); |
377 | 96.3k | const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2); |
378 | 96.3k | const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2); |
379 | 96.3k | const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4); |
380 | 96.3k | const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2); |
381 | 96.3k | const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6); |
382 | 96.3k | const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2); |
383 | 96.3k | const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8); |
384 | 96.3k | const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38); |
385 | 96.3k | const int64_t f2f2 = f2 * static_cast<int64_t>(f2); |
386 | 96.3k | const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3); |
387 | 96.3k | const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4); |
388 | 96.3k | const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5); |
389 | 96.3k | const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6); |
390 | 96.3k | const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7); |
391 | 96.3k | const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19); |
392 | 96.3k | const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38); |
393 | 96.3k | const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3); |
394 | 96.3k | const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4); |
395 | 96.3k | const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2); |
396 | 96.3k | const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6); |
397 | 96.3k | const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38); |
398 | 96.3k | const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19); |
399 | 96.3k | const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38); |
400 | 96.3k | const int64_t f4f4 = f4 * static_cast<int64_t>(f4); |
401 | 96.3k | const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5); |
402 | 96.3k | const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19); |
403 | 96.3k | const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38); |
404 | 96.3k | const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19); |
405 | 96.3k | const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38); |
406 | 96.3k | const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38); |
407 | 96.3k | const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19); |
408 | 96.3k | const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38); |
409 | 96.3k | const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19); |
410 | 96.3k | const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38); |
411 | 96.3k | const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19); |
412 | 96.3k | const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38); |
413 | 96.3k | const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19); |
414 | 96.3k | const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38); |
415 | 96.3k | const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38); |
416 | 96.3k | const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19); |
417 | 96.3k | const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38); |
418 | 96.3k | const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19); |
419 | 96.3k | const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38); |
420 | 96.3k | const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38); |
421 | 96.3k | |
422 | 96.3k | int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; |
423 | 96.3k | int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; |
424 | 96.3k | int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; |
425 | 96.3k | int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; |
426 | 96.3k | int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; |
427 | 96.3k | int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; |
428 | 96.3k | int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; |
429 | 96.3k | int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; |
430 | 96.3k | int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; |
431 | 96.3k | int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; |
432 | 96.3k | |
433 | 96.3k | carry<26>(h0, h1); |
434 | 96.3k | carry<26>(h4, h5); |
435 | 96.3k | carry<25>(h1, h2); |
436 | 96.3k | carry<25>(h5, h6); |
437 | 96.3k | carry<26>(h2, h3); |
438 | 96.3k | carry<26>(h6, h7); |
439 | 96.3k | |
440 | 96.3k | carry<25>(h3, h4); |
441 | 96.3k | carry<25>(h7, h8); |
442 | 96.3k | |
443 | 96.3k | carry<26>(h4, h5); |
444 | 96.3k | carry<26>(h8, h9); |
445 | 96.3k | carry<25,19>(h9, h0); |
446 | 96.3k | carry<26>(h0, h1); |
447 | 96.3k | |
448 | 96.3k | f0 = static_cast<int32_t>(h0); |
449 | 96.3k | f1 = static_cast<int32_t>(h1); |
450 | 96.3k | f2 = static_cast<int32_t>(h2); |
451 | 96.3k | f3 = static_cast<int32_t>(h3); |
452 | 96.3k | f4 = static_cast<int32_t>(h4); |
453 | 96.3k | f5 = static_cast<int32_t>(h5); |
454 | 96.3k | f6 = static_cast<int32_t>(h6); |
455 | 96.3k | f7 = static_cast<int32_t>(h7); |
456 | 96.3k | f8 = static_cast<int32_t>(h8); |
457 | 96.3k | f9 = static_cast<int32_t>(h9); |
458 | 96.3k | } |
459 | 58.6k | |
460 | 58.6k | return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); |
461 | 58.6k | } |
462 | | |
463 | | /* |
464 | | h = 2 * f * f |
465 | | Can overlap h with f. |
466 | | |
467 | | Preconditions: |
468 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
469 | | |
470 | | Postconditions: |
471 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
472 | | */ |
473 | | |
474 | | /* |
475 | | See fe_mul.c for discussion of implementation strategy. |
476 | | */ |
477 | | |
478 | | //static |
479 | | FE_25519 FE_25519::sqr2(const FE_25519& f) |
480 | 18.8k | { |
481 | 18.8k | const int32_t f0 = f[0]; |
482 | 18.8k | const int32_t f1 = f[1]; |
483 | 18.8k | const int32_t f2 = f[2]; |
484 | 18.8k | const int32_t f3 = f[3]; |
485 | 18.8k | const int32_t f4 = f[4]; |
486 | 18.8k | const int32_t f5 = f[5]; |
487 | 18.8k | const int32_t f6 = f[6]; |
488 | 18.8k | const int32_t f7 = f[7]; |
489 | 18.8k | const int32_t f8 = f[8]; |
490 | 18.8k | const int32_t f9 = f[9]; |
491 | 18.8k | const int32_t f0_2 = 2 * f0; |
492 | 18.8k | const int32_t f1_2 = 2 * f1; |
493 | 18.8k | const int32_t f2_2 = 2 * f2; |
494 | 18.8k | const int32_t f3_2 = 2 * f3; |
495 | 18.8k | const int32_t f4_2 = 2 * f4; |
496 | 18.8k | const int32_t f5_2 = 2 * f5; |
497 | 18.8k | const int32_t f6_2 = 2 * f6; |
498 | 18.8k | const int32_t f7_2 = 2 * f7; |
499 | 18.8k | const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
500 | 18.8k | const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
501 | 18.8k | const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
502 | 18.8k | const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
503 | 18.8k | const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
504 | 18.8k | const int64_t f0f0 = f0 * static_cast<int64_t>(f0); |
505 | 18.8k | const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1); |
506 | 18.8k | const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2); |
507 | 18.8k | const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3); |
508 | 18.8k | const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4); |
509 | 18.8k | const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5); |
510 | 18.8k | const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6); |
511 | 18.8k | const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7); |
512 | 18.8k | const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8); |
513 | 18.8k | const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9); |
514 | 18.8k | const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1); |
515 | 18.8k | const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2); |
516 | 18.8k | const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2); |
517 | 18.8k | const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4); |
518 | 18.8k | const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2); |
519 | 18.8k | const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6); |
520 | 18.8k | const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2); |
521 | 18.8k | const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8); |
522 | 18.8k | const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38); |
523 | 18.8k | const int64_t f2f2 = f2 * static_cast<int64_t>(f2); |
524 | 18.8k | const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3); |
525 | 18.8k | const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4); |
526 | 18.8k | const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5); |
527 | 18.8k | const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6); |
528 | 18.8k | const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7); |
529 | 18.8k | const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19); |
530 | 18.8k | const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38); |
531 | 18.8k | const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3); |
532 | 18.8k | const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4); |
533 | 18.8k | const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2); |
534 | 18.8k | const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6); |
535 | 18.8k | const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38); |
536 | 18.8k | const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19); |
537 | 18.8k | const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38); |
538 | 18.8k | const int64_t f4f4 = f4 * static_cast<int64_t>(f4); |
539 | 18.8k | const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5); |
540 | 18.8k | const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19); |
541 | 18.8k | const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38); |
542 | 18.8k | const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19); |
543 | 18.8k | const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38); |
544 | 18.8k | const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38); |
545 | 18.8k | const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19); |
546 | 18.8k | const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38); |
547 | 18.8k | const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19); |
548 | 18.8k | const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38); |
549 | 18.8k | const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19); |
550 | 18.8k | const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38); |
551 | 18.8k | const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19); |
552 | 18.8k | const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38); |
553 | 18.8k | const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38); |
554 | 18.8k | const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19); |
555 | 18.8k | const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38); |
556 | 18.8k | const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19); |
557 | 18.8k | const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38); |
558 | 18.8k | const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38); |
559 | 18.8k | |
560 | 18.8k | int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; |
561 | 18.8k | int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; |
562 | 18.8k | int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; |
563 | 18.8k | int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; |
564 | 18.8k | int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; |
565 | 18.8k | int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; |
566 | 18.8k | int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; |
567 | 18.8k | int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; |
568 | 18.8k | int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; |
569 | 18.8k | int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; |
570 | 18.8k | |
571 | 18.8k | h0 += h0; |
572 | 18.8k | h1 += h1; |
573 | 18.8k | h2 += h2; |
574 | 18.8k | h3 += h3; |
575 | 18.8k | h4 += h4; |
576 | 18.8k | h5 += h5; |
577 | 18.8k | h6 += h6; |
578 | 18.8k | h7 += h7; |
579 | 18.8k | h8 += h8; |
580 | 18.8k | h9 += h9; |
581 | 18.8k | |
582 | 18.8k | carry<26>(h0, h1); |
583 | 18.8k | carry<26>(h4, h5); |
584 | 18.8k | |
585 | 18.8k | carry<25>(h1, h2); |
586 | 18.8k | carry<25>(h5, h6); |
587 | 18.8k | |
588 | 18.8k | carry<26>(h2, h3); |
589 | 18.8k | carry<26>(h6, h7); |
590 | 18.8k | |
591 | 18.8k | carry<25>(h3, h4); |
592 | 18.8k | carry<25>(h7, h8); |
593 | 18.8k | carry<26>(h4, h5); |
594 | 18.8k | carry<26>(h8, h9); |
595 | 18.8k | carry<25,19>(h9, h0); |
596 | 18.8k | carry<26>(h0, h1); |
597 | 18.8k | |
598 | 18.8k | return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9); |
599 | 18.8k | } |
600 | | |
601 | | /* |
602 | | Ignores top bit of h. |
603 | | */ |
604 | | |
605 | | void FE_25519::from_bytes(const uint8_t s[32]) |
606 | 78 | { |
607 | 78 | int64_t h0 = load_4(s); |
608 | 78 | int64_t h1 = load_3(s + 4) << 6; |
609 | 78 | int64_t h2 = load_3(s + 7) << 5; |
610 | 78 | int64_t h3 = load_3(s + 10) << 3; |
611 | 78 | int64_t h4 = load_3(s + 13) << 2; |
612 | 78 | int64_t h5 = load_4(s + 16); |
613 | 78 | int64_t h6 = load_3(s + 20) << 7; |
614 | 78 | int64_t h7 = load_3(s + 23) << 5; |
615 | 78 | int64_t h8 = load_3(s + 26) << 4; |
616 | 78 | int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2; |
617 | 78 | |
618 | 78 | carry<25,19>(h9, h0); |
619 | 78 | carry<25>(h1, h2); |
620 | 78 | carry<25>(h3, h4); |
621 | 78 | carry<25>(h5, h6); |
622 | 78 | carry<25>(h7, h8); |
623 | 78 | |
624 | 78 | carry<26>(h0, h1); |
625 | 78 | carry<26>(h2, h3); |
626 | 78 | carry<26>(h4, h5); |
627 | 78 | carry<26>(h6, h7); |
628 | 78 | carry<26>(h8, h9); |
629 | 78 | |
630 | 78 | m_fe[0] = static_cast<int32_t>(h0); |
631 | 78 | m_fe[1] = static_cast<int32_t>(h1); |
632 | 78 | m_fe[2] = static_cast<int32_t>(h2); |
633 | 78 | m_fe[3] = static_cast<int32_t>(h3); |
634 | 78 | m_fe[4] = static_cast<int32_t>(h4); |
635 | 78 | m_fe[5] = static_cast<int32_t>(h5); |
636 | 78 | m_fe[6] = static_cast<int32_t>(h6); |
637 | 78 | m_fe[7] = static_cast<int32_t>(h7); |
638 | 78 | m_fe[8] = static_cast<int32_t>(h8); |
639 | 78 | m_fe[9] = static_cast<int32_t>(h9); |
640 | 78 | } |
641 | | |
642 | | /* |
643 | | Preconditions: |
644 | | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
645 | | |
646 | | Write p=2^255-19; q=floor(h/p). |
647 | | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). |
648 | | |
649 | | Proof: |
650 | | Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. |
651 | | Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. |
652 | | |
653 | | Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). |
654 | | Then 0<y<1. |
655 | | |
656 | | Write r=h-pq. |
657 | | Have 0<=r<=p-1=2^255-20. |
658 | | Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. |
659 | | |
660 | | Write x=r+19(2^-255)r+y. |
661 | | Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. |
662 | | |
663 | | Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) |
664 | | so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. |
665 | | */ |
666 | | |
667 | | void FE_25519::to_bytes(uint8_t s[32]) const |
668 | 338 | { |
669 | 338 | const int64_t X25 = (1 << 25); |
670 | 338 | |
671 | 338 | int32_t h0 = m_fe[0]; |
672 | 338 | int32_t h1 = m_fe[1]; |
673 | 338 | int32_t h2 = m_fe[2]; |
674 | 338 | int32_t h3 = m_fe[3]; |
675 | 338 | int32_t h4 = m_fe[4]; |
676 | 338 | int32_t h5 = m_fe[5]; |
677 | 338 | int32_t h6 = m_fe[6]; |
678 | 338 | int32_t h7 = m_fe[7]; |
679 | 338 | int32_t h8 = m_fe[8]; |
680 | 338 | int32_t h9 = m_fe[9]; |
681 | 338 | int32_t q; |
682 | 338 | |
683 | 338 | q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25; |
684 | 338 | q = (h0 + q) >> 26; |
685 | 338 | q = (h1 + q) >> 25; |
686 | 338 | q = (h2 + q) >> 26; |
687 | 338 | q = (h3 + q) >> 25; |
688 | 338 | q = (h4 + q) >> 26; |
689 | 338 | q = (h5 + q) >> 25; |
690 | 338 | q = (h6 + q) >> 26; |
691 | 338 | q = (h7 + q) >> 25; |
692 | 338 | q = (h8 + q) >> 26; |
693 | 338 | q = (h9 + q) >> 25; |
694 | 338 | |
695 | 338 | /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ |
696 | 338 | h0 += 19 * q; |
697 | 338 | /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ |
698 | 338 | |
699 | 338 | carry0<26>(h0, h1); |
700 | 338 | carry0<25>(h1, h2); |
701 | 338 | carry0<26>(h2, h3); |
702 | 338 | carry0<25>(h3, h4); |
703 | 338 | carry0<26>(h4, h5); |
704 | 338 | carry0<25>(h5, h6); |
705 | 338 | carry0<26>(h6, h7); |
706 | 338 | carry0<25>(h7, h8); |
707 | 338 | carry0<26>(h8, h9); |
708 | 338 | |
709 | 338 | int32_t carry9 = h9 >> 25; |
710 | 338 | h9 -= carry9 * X25; |
711 | 338 | /* h10 = carry9 */ |
712 | 338 | |
713 | 338 | /* |
714 | 338 | Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. |
715 | 338 | Have h0+...+2^230 h9 between 0 and 2^255-1; |
716 | 338 | evidently 2^255 h10-2^255 q = 0. |
717 | 338 | Goal: Output h0+...+2^230 h9. |
718 | 338 | */ |
719 | 338 | |
720 | 338 | s[0] = static_cast<uint8_t>(h0 >> 0); |
721 | 338 | s[1] = static_cast<uint8_t>(h0 >> 8); |
722 | 338 | s[2] = static_cast<uint8_t>(h0 >> 16); |
723 | 338 | s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2)); |
724 | 338 | s[4] = static_cast<uint8_t>(h1 >> 6); |
725 | 338 | s[5] = static_cast<uint8_t>(h1 >> 14); |
726 | 338 | s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3)); |
727 | 338 | s[7] = static_cast<uint8_t>(h2 >> 5); |
728 | 338 | s[8] = static_cast<uint8_t>(h2 >> 13); |
729 | 338 | s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5)); |
730 | 338 | s[10] = static_cast<uint8_t>(h3 >> 3); |
731 | 338 | s[11] = static_cast<uint8_t>(h3 >> 11); |
732 | 338 | s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6)); |
733 | 338 | s[13] = static_cast<uint8_t>(h4 >> 2); |
734 | 338 | s[14] = static_cast<uint8_t>(h4 >> 10); |
735 | 338 | s[15] = static_cast<uint8_t>(h4 >> 18); |
736 | 338 | s[16] = static_cast<uint8_t>(h5 >> 0); |
737 | 338 | s[17] = static_cast<uint8_t>(h5 >> 8); |
738 | 338 | s[18] = static_cast<uint8_t>(h5 >> 16); |
739 | 338 | s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1)); |
740 | 338 | s[20] = static_cast<uint8_t>(h6 >> 7); |
741 | 338 | s[21] = static_cast<uint8_t>(h6 >> 15); |
742 | 338 | s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3)); |
743 | 338 | s[23] = static_cast<uint8_t>(h7 >> 5); |
744 | 338 | s[24] = static_cast<uint8_t>(h7 >> 13); |
745 | 338 | s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4)); |
746 | 338 | s[26] = static_cast<uint8_t>(h8 >> 4); |
747 | 338 | s[27] = static_cast<uint8_t>(h8 >> 12); |
748 | 338 | s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6)); |
749 | 338 | s[29] = static_cast<uint8_t>(h9 >> 2); |
750 | 338 | s[30] = static_cast<uint8_t>(h9 >> 10); |
751 | 338 | s[31] = static_cast<uint8_t>(h9 >> 18); |
752 | 338 | } |
753 | | |
754 | | } |