/src/Botan-3.4.0/src/lib/pubkey/ed25519/ed25519_fe.cpp
Line | Count | Source |
1 | | /* |
2 | | * Ed25519 field element |
3 | | * (C) 2017 Ribose Inc |
4 | | * |
5 | | * Based on the public domain code from SUPERCOP ref10 by |
6 | | * Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang |
7 | | * |
8 | | * Botan is released under the Simplified BSD License (see license.txt) |
9 | | */ |
10 | | |
11 | | #include <botan/internal/ed25519_fe.h> |
12 | | |
13 | | #include <botan/internal/ed25519_internal.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | //static |
18 | 63.1k | FE_25519 FE_25519::invert(const FE_25519& z) { |
19 | 63.1k | FE_25519 t0; |
20 | 63.1k | FE_25519 t1; |
21 | 63.1k | FE_25519 t2; |
22 | 63.1k | FE_25519 t3; |
23 | | |
24 | 63.1k | fe_sq(t0, z); |
25 | 63.1k | fe_sq_iter(t1, t0, 2); |
26 | 63.1k | fe_mul(t1, z, t1); |
27 | 63.1k | fe_mul(t0, t0, t1); |
28 | 63.1k | fe_sq(t2, t0); |
29 | 63.1k | fe_mul(t1, t1, t2); |
30 | 63.1k | fe_sq_iter(t2, t1, 5); |
31 | 63.1k | fe_mul(t1, t2, t1); |
32 | 63.1k | fe_sq_iter(t2, t1, 10); |
33 | 63.1k | fe_mul(t2, t2, t1); |
34 | 63.1k | fe_sq_iter(t3, t2, 20); |
35 | 63.1k | fe_mul(t2, t3, t2); |
36 | 63.1k | fe_sq_iter(t2, t2, 10); |
37 | 63.1k | fe_mul(t1, t2, t1); |
38 | 63.1k | fe_sq_iter(t2, t1, 50); |
39 | 63.1k | fe_mul(t2, t2, t1); |
40 | 63.1k | fe_sq_iter(t3, t2, 100); |
41 | 63.1k | fe_mul(t2, t3, t2); |
42 | 63.1k | fe_sq_iter(t2, t2, 50); |
43 | 63.1k | fe_mul(t1, t2, t1); |
44 | 63.1k | fe_sq_iter(t1, t1, 5); |
45 | | |
46 | 63.1k | fe_mul(t0, t1, t0); |
47 | 63.1k | return t0; |
48 | 63.1k | } |
49 | | |
50 | 64.3k | FE_25519 FE_25519::pow_22523(const fe& z) { |
51 | 64.3k | FE_25519 t0; |
52 | 64.3k | FE_25519 t1; |
53 | 64.3k | FE_25519 t2; |
54 | | |
55 | 64.3k | fe_sq(t0, z); |
56 | 64.3k | fe_sq_iter(t1, t0, 2); |
57 | 64.3k | fe_mul(t1, z, t1); |
58 | 64.3k | fe_mul(t0, t0, t1); |
59 | 64.3k | fe_sq(t0, t0); |
60 | 64.3k | fe_mul(t0, t1, t0); |
61 | 64.3k | fe_sq_iter(t1, t0, 5); |
62 | 64.3k | fe_mul(t0, t1, t0); |
63 | 64.3k | fe_sq_iter(t1, t0, 10); |
64 | 64.3k | fe_mul(t1, t1, t0); |
65 | 64.3k | fe_sq_iter(t2, t1, 20); |
66 | 64.3k | fe_mul(t1, t2, t1); |
67 | 64.3k | fe_sq_iter(t1, t1, 10); |
68 | 64.3k | fe_mul(t0, t1, t0); |
69 | 64.3k | fe_sq_iter(t1, t0, 50); |
70 | 64.3k | fe_mul(t1, t1, t0); |
71 | 64.3k | fe_sq_iter(t2, t1, 100); |
72 | 64.3k | fe_mul(t1, t2, t1); |
73 | 64.3k | fe_sq_iter(t1, t1, 50); |
74 | 64.3k | fe_mul(t0, t1, t0); |
75 | 64.3k | fe_sq_iter(t0, t0, 2); |
76 | | |
77 | 64.3k | fe_mul(t0, t0, z); |
78 | 64.3k | return t0; |
79 | 64.3k | } |
80 | | |
81 | | /* |
82 | | h = f * g |
83 | | Can overlap h with f or g. |
84 | | |
85 | | Preconditions: |
86 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
87 | | |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
88 | | |
89 | | Postconditions: |
90 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
91 | | */ |
92 | | |
93 | | /* |
94 | | Notes on implementation strategy: |
95 | | |
96 | | Using schoolbook multiplication. |
97 | | Karatsuba would save a little in some cost models. |
98 | | |
99 | | Most multiplications by 2 and 19 are 32-bit precomputations; |
100 | | cheaper than 64-bit postcomputations. |
101 | | |
102 | | There is one remaining multiplication by 19 in the carry chain; |
103 | | one *19 precomputation can be merged into this, |
104 | | but the resulting data flow is considerably less clean. |
105 | | |
106 | | There are 12 carries below. |
107 | | 10 of them are 2-way parallelizable and vectorizable. |
108 | | Can get away with 11 carries, but then data flow is much deeper. |
109 | | |
110 | | With tighter constraints on inputs can squeeze carries into int32. |
111 | | */ |
112 | | |
113 | | //static |
114 | 93.2M | FE_25519 FE_25519::mul(const FE_25519& f, const FE_25519& g) { |
115 | 93.2M | const int32_t f0 = f[0]; |
116 | 93.2M | const int32_t f1 = f[1]; |
117 | 93.2M | const int32_t f2 = f[2]; |
118 | 93.2M | const int32_t f3 = f[3]; |
119 | 93.2M | const int32_t f4 = f[4]; |
120 | 93.2M | const int32_t f5 = f[5]; |
121 | 93.2M | const int32_t f6 = f[6]; |
122 | 93.2M | const int32_t f7 = f[7]; |
123 | 93.2M | const int32_t f8 = f[8]; |
124 | 93.2M | const int32_t f9 = f[9]; |
125 | | |
126 | 93.2M | const int32_t g0 = g[0]; |
127 | 93.2M | const int32_t g1 = g[1]; |
128 | 93.2M | const int32_t g2 = g[2]; |
129 | 93.2M | const int32_t g3 = g[3]; |
130 | 93.2M | const int32_t g4 = g[4]; |
131 | 93.2M | const int32_t g5 = g[5]; |
132 | 93.2M | const int32_t g6 = g[6]; |
133 | 93.2M | const int32_t g7 = g[7]; |
134 | 93.2M | const int32_t g8 = g[8]; |
135 | 93.2M | const int32_t g9 = g[9]; |
136 | | |
137 | 93.2M | const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ |
138 | 93.2M | const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ |
139 | 93.2M | const int32_t g3_19 = 19 * g3; |
140 | 93.2M | const int32_t g4_19 = 19 * g4; |
141 | 93.2M | const int32_t g5_19 = 19 * g5; |
142 | 93.2M | const int32_t g6_19 = 19 * g6; |
143 | 93.2M | const int32_t g7_19 = 19 * g7; |
144 | 93.2M | const int32_t g8_19 = 19 * g8; |
145 | 93.2M | const int32_t g9_19 = 19 * g9; |
146 | 93.2M | const int32_t f1_2 = 2 * f1; |
147 | 93.2M | const int32_t f3_2 = 2 * f3; |
148 | 93.2M | const int32_t f5_2 = 2 * f5; |
149 | 93.2M | const int32_t f7_2 = 2 * f7; |
150 | 93.2M | const int32_t f9_2 = 2 * f9; |
151 | | |
152 | 93.2M | const int64_t f0g0 = f0 * static_cast<int64_t>(g0); |
153 | 93.2M | const int64_t f0g1 = f0 * static_cast<int64_t>(g1); |
154 | 93.2M | const int64_t f0g2 = f0 * static_cast<int64_t>(g2); |
155 | 93.2M | const int64_t f0g3 = f0 * static_cast<int64_t>(g3); |
156 | 93.2M | const int64_t f0g4 = f0 * static_cast<int64_t>(g4); |
157 | 93.2M | const int64_t f0g5 = f0 * static_cast<int64_t>(g5); |
158 | 93.2M | const int64_t f0g6 = f0 * static_cast<int64_t>(g6); |
159 | 93.2M | const int64_t f0g7 = f0 * static_cast<int64_t>(g7); |
160 | 93.2M | const int64_t f0g8 = f0 * static_cast<int64_t>(g8); |
161 | 93.2M | const int64_t f0g9 = f0 * static_cast<int64_t>(g9); |
162 | 93.2M | const int64_t f1g0 = f1 * static_cast<int64_t>(g0); |
163 | 93.2M | const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1); |
164 | 93.2M | const int64_t f1g2 = f1 * static_cast<int64_t>(g2); |
165 | 93.2M | const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3); |
166 | 93.2M | const int64_t f1g4 = f1 * static_cast<int64_t>(g4); |
167 | 93.2M | const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5); |
168 | 93.2M | const int64_t f1g6 = f1 * static_cast<int64_t>(g6); |
169 | 93.2M | const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7); |
170 | 93.2M | const int64_t f1g8 = f1 * static_cast<int64_t>(g8); |
171 | 93.2M | const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19); |
172 | 93.2M | const int64_t f2g0 = f2 * static_cast<int64_t>(g0); |
173 | 93.2M | const int64_t f2g1 = f2 * static_cast<int64_t>(g1); |
174 | 93.2M | const int64_t f2g2 = f2 * static_cast<int64_t>(g2); |
175 | 93.2M | const int64_t f2g3 = f2 * static_cast<int64_t>(g3); |
176 | 93.2M | const int64_t f2g4 = f2 * static_cast<int64_t>(g4); |
177 | 93.2M | const int64_t f2g5 = f2 * static_cast<int64_t>(g5); |
178 | 93.2M | const int64_t f2g6 = f2 * static_cast<int64_t>(g6); |
179 | 93.2M | const int64_t f2g7 = f2 * static_cast<int64_t>(g7); |
180 | 93.2M | const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19); |
181 | 93.2M | const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19); |
182 | 93.2M | const int64_t f3g0 = f3 * static_cast<int64_t>(g0); |
183 | 93.2M | const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1); |
184 | 93.2M | const int64_t f3g2 = f3 * static_cast<int64_t>(g2); |
185 | 93.2M | const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3); |
186 | 93.2M | const int64_t f3g4 = f3 * static_cast<int64_t>(g4); |
187 | 93.2M | const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5); |
188 | 93.2M | const int64_t f3g6 = f3 * static_cast<int64_t>(g6); |
189 | 93.2M | const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19); |
190 | 93.2M | const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19); |
191 | 93.2M | const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19); |
192 | 93.2M | const int64_t f4g0 = f4 * static_cast<int64_t>(g0); |
193 | 93.2M | const int64_t f4g1 = f4 * static_cast<int64_t>(g1); |
194 | 93.2M | const int64_t f4g2 = f4 * static_cast<int64_t>(g2); |
195 | 93.2M | const int64_t f4g3 = f4 * static_cast<int64_t>(g3); |
196 | 93.2M | const int64_t f4g4 = f4 * static_cast<int64_t>(g4); |
197 | 93.2M | const int64_t f4g5 = f4 * static_cast<int64_t>(g5); |
198 | 93.2M | const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19); |
199 | 93.2M | const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19); |
200 | 93.2M | const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19); |
201 | 93.2M | const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19); |
202 | 93.2M | const int64_t f5g0 = f5 * static_cast<int64_t>(g0); |
203 | 93.2M | const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1); |
204 | 93.2M | const int64_t f5g2 = f5 * static_cast<int64_t>(g2); |
205 | 93.2M | const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3); |
206 | 93.2M | const int64_t f5g4 = f5 * static_cast<int64_t>(g4); |
207 | 93.2M | const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19); |
208 | 93.2M | const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19); |
209 | 93.2M | const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19); |
210 | 93.2M | const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19); |
211 | 93.2M | const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19); |
212 | 93.2M | const int64_t f6g0 = f6 * static_cast<int64_t>(g0); |
213 | 93.2M | const int64_t f6g1 = f6 * static_cast<int64_t>(g1); |
214 | 93.2M | const int64_t f6g2 = f6 * static_cast<int64_t>(g2); |
215 | 93.2M | const int64_t f6g3 = f6 * static_cast<int64_t>(g3); |
216 | 93.2M | const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19); |
217 | 93.2M | const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19); |
218 | 93.2M | const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19); |
219 | 93.2M | const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19); |
220 | 93.2M | const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19); |
221 | 93.2M | const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19); |
222 | 93.2M | const int64_t f7g0 = f7 * static_cast<int64_t>(g0); |
223 | 93.2M | const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1); |
224 | 93.2M | const int64_t f7g2 = f7 * static_cast<int64_t>(g2); |
225 | 93.2M | const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19); |
226 | 93.2M | const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19); |
227 | 93.2M | const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19); |
228 | 93.2M | const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19); |
229 | 93.2M | const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19); |
230 | 93.2M | const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19); |
231 | 93.2M | const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19); |
232 | 93.2M | const int64_t f8g0 = f8 * static_cast<int64_t>(g0); |
233 | 93.2M | const int64_t f8g1 = f8 * static_cast<int64_t>(g1); |
234 | 93.2M | const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19); |
235 | 93.2M | const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19); |
236 | 93.2M | const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19); |
237 | 93.2M | const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19); |
238 | 93.2M | const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19); |
239 | 93.2M | const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19); |
240 | 93.2M | const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19); |
241 | 93.2M | const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19); |
242 | 93.2M | const int64_t f9g0 = f9 * static_cast<int64_t>(g0); |
243 | 93.2M | const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19); |
244 | 93.2M | const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19); |
245 | 93.2M | const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19); |
246 | 93.2M | const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19); |
247 | 93.2M | const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19); |
248 | 93.2M | const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19); |
249 | 93.2M | const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19); |
250 | 93.2M | const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19); |
251 | 93.2M | const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19); |
252 | | |
253 | 93.2M | int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; |
254 | 93.2M | int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19; |
255 | 93.2M | int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38; |
256 | 93.2M | int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19; |
257 | 93.2M | int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38; |
258 | 93.2M | int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19; |
259 | 93.2M | int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38; |
260 | 93.2M | int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19; |
261 | 93.2M | int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38; |
262 | 93.2M | int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0; |
263 | | |
264 | | /* |
265 | | |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) |
266 | | i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 |
267 | | |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) |
268 | | i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 |
269 | | */ |
270 | 93.2M | carry<26>(h0, h1); |
271 | 93.2M | carry<26>(h4, h5); |
272 | | |
273 | | /* |h0| <= 2^25 */ |
274 | | /* |h4| <= 2^25 */ |
275 | | /* |h1| <= 1.71*2^59 */ |
276 | | /* |h5| <= 1.71*2^59 */ |
277 | | |
278 | 93.2M | carry<25>(h1, h2); |
279 | 93.2M | carry<25>(h5, h6); |
280 | | |
281 | | /* |h1| <= 2^24; from now on fits into int32 */ |
282 | | /* |h5| <= 2^24; from now on fits into int32 */ |
283 | | /* |h2| <= 1.41*2^60 */ |
284 | | /* |h6| <= 1.41*2^60 */ |
285 | | |
286 | 93.2M | carry<26>(h2, h3); |
287 | 93.2M | carry<26>(h6, h7); |
288 | | /* |h2| <= 2^25; from now on fits into int32 unchanged */ |
289 | | /* |h6| <= 2^25; from now on fits into int32 unchanged */ |
290 | | /* |h3| <= 1.71*2^59 */ |
291 | | /* |h7| <= 1.71*2^59 */ |
292 | | |
293 | 93.2M | carry<25>(h3, h4); |
294 | 93.2M | carry<25>(h7, h8); |
295 | | /* |h3| <= 2^24; from now on fits into int32 unchanged */ |
296 | | /* |h7| <= 2^24; from now on fits into int32 unchanged */ |
297 | | /* |h4| <= 1.72*2^34 */ |
298 | | /* |h8| <= 1.41*2^60 */ |
299 | | |
300 | 93.2M | carry<26>(h4, h5); |
301 | 93.2M | carry<26>(h8, h9); |
302 | | /* |h4| <= 2^25; from now on fits into int32 unchanged */ |
303 | | /* |h8| <= 2^25; from now on fits into int32 unchanged */ |
304 | | /* |h5| <= 1.01*2^24 */ |
305 | | /* |h9| <= 1.71*2^59 */ |
306 | | |
307 | 93.2M | carry<25, 19>(h9, h0); |
308 | | |
309 | | /* |h9| <= 2^24; from now on fits into int32 unchanged */ |
310 | | /* |h0| <= 1.1*2^39 */ |
311 | | |
312 | 93.2M | carry<26>(h0, h1); |
313 | | /* |h0| <= 2^25; from now on fits into int32 unchanged */ |
314 | | /* |h1| <= 1.01*2^24 */ |
315 | | |
316 | 93.2M | return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9); |
317 | 93.2M | } |
318 | | |
319 | | /* |
320 | | h = f * f |
321 | | Can overlap h with f. |
322 | | |
323 | | Preconditions: |
324 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
325 | | |
326 | | Postconditions: |
327 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
328 | | */ |
329 | | |
330 | | /* |
331 | | See fe_mul.c for discussion of implementation strategy. |
332 | | */ |
333 | | |
334 | | //static |
335 | 49.2M | FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter) { |
336 | 49.2M | int32_t f0 = f[0]; |
337 | 49.2M | int32_t f1 = f[1]; |
338 | 49.2M | int32_t f2 = f[2]; |
339 | 49.2M | int32_t f3 = f[3]; |
340 | 49.2M | int32_t f4 = f[4]; |
341 | 49.2M | int32_t f5 = f[5]; |
342 | 49.2M | int32_t f6 = f[6]; |
343 | 49.2M | int32_t f7 = f[7]; |
344 | 49.2M | int32_t f8 = f[8]; |
345 | 49.2M | int32_t f9 = f[9]; |
346 | | |
347 | 129M | for(size_t i = 0; i != iter; ++i) { |
348 | 80.0M | const int32_t f0_2 = 2 * f0; |
349 | 80.0M | const int32_t f1_2 = 2 * f1; |
350 | 80.0M | const int32_t f2_2 = 2 * f2; |
351 | 80.0M | const int32_t f3_2 = 2 * f3; |
352 | 80.0M | const int32_t f4_2 = 2 * f4; |
353 | 80.0M | const int32_t f5_2 = 2 * f5; |
354 | 80.0M | const int32_t f6_2 = 2 * f6; |
355 | 80.0M | const int32_t f7_2 = 2 * f7; |
356 | 80.0M | const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
357 | 80.0M | const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
358 | 80.0M | const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
359 | 80.0M | const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
360 | 80.0M | const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
361 | | |
362 | 80.0M | const int64_t f0f0 = f0 * static_cast<int64_t>(f0); |
363 | 80.0M | const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1); |
364 | 80.0M | const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2); |
365 | 80.0M | const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3); |
366 | 80.0M | const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4); |
367 | 80.0M | const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5); |
368 | 80.0M | const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6); |
369 | 80.0M | const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7); |
370 | 80.0M | const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8); |
371 | 80.0M | const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9); |
372 | 80.0M | const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1); |
373 | 80.0M | const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2); |
374 | 80.0M | const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2); |
375 | 80.0M | const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4); |
376 | 80.0M | const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2); |
377 | 80.0M | const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6); |
378 | 80.0M | const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2); |
379 | 80.0M | const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8); |
380 | 80.0M | const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38); |
381 | 80.0M | const int64_t f2f2 = f2 * static_cast<int64_t>(f2); |
382 | 80.0M | const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3); |
383 | 80.0M | const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4); |
384 | 80.0M | const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5); |
385 | 80.0M | const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6); |
386 | 80.0M | const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7); |
387 | 80.0M | const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19); |
388 | 80.0M | const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38); |
389 | 80.0M | const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3); |
390 | 80.0M | const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4); |
391 | 80.0M | const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2); |
392 | 80.0M | const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6); |
393 | 80.0M | const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38); |
394 | 80.0M | const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19); |
395 | 80.0M | const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38); |
396 | 80.0M | const int64_t f4f4 = f4 * static_cast<int64_t>(f4); |
397 | 80.0M | const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5); |
398 | 80.0M | const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19); |
399 | 80.0M | const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38); |
400 | 80.0M | const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19); |
401 | 80.0M | const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38); |
402 | 80.0M | const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38); |
403 | 80.0M | const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19); |
404 | 80.0M | const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38); |
405 | 80.0M | const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19); |
406 | 80.0M | const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38); |
407 | 80.0M | const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19); |
408 | 80.0M | const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38); |
409 | 80.0M | const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19); |
410 | 80.0M | const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38); |
411 | 80.0M | const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38); |
412 | 80.0M | const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19); |
413 | 80.0M | const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38); |
414 | 80.0M | const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19); |
415 | 80.0M | const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38); |
416 | 80.0M | const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38); |
417 | | |
418 | 80.0M | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; |
419 | 80.0M | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; |
420 | 80.0M | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; |
421 | 80.0M | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; |
422 | 80.0M | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; |
423 | 80.0M | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; |
424 | 80.0M | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; |
425 | 80.0M | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; |
426 | 80.0M | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; |
427 | 80.0M | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; |
428 | | |
429 | 80.0M | carry<26>(h0, h1); |
430 | 80.0M | carry<26>(h4, h5); |
431 | 80.0M | carry<25>(h1, h2); |
432 | 80.0M | carry<25>(h5, h6); |
433 | 80.0M | carry<26>(h2, h3); |
434 | 80.0M | carry<26>(h6, h7); |
435 | | |
436 | 80.0M | carry<25>(h3, h4); |
437 | 80.0M | carry<25>(h7, h8); |
438 | | |
439 | 80.0M | carry<26>(h4, h5); |
440 | 80.0M | carry<26>(h8, h9); |
441 | 80.0M | carry<25, 19>(h9, h0); |
442 | 80.0M | carry<26>(h0, h1); |
443 | | |
444 | 80.0M | f0 = static_cast<int32_t>(h0); |
445 | 80.0M | f1 = static_cast<int32_t>(h1); |
446 | 80.0M | f2 = static_cast<int32_t>(h2); |
447 | 80.0M | f3 = static_cast<int32_t>(h3); |
448 | 80.0M | f4 = static_cast<int32_t>(h4); |
449 | 80.0M | f5 = static_cast<int32_t>(h5); |
450 | 80.0M | f6 = static_cast<int32_t>(h6); |
451 | 80.0M | f7 = static_cast<int32_t>(h7); |
452 | 80.0M | f8 = static_cast<int32_t>(h8); |
453 | 80.0M | f9 = static_cast<int32_t>(h9); |
454 | 80.0M | } |
455 | | |
456 | 49.2M | return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); |
457 | 49.2M | } |
458 | | |
459 | | /* |
460 | | h = 2 * f * f |
461 | | Can overlap h with f. |
462 | | |
463 | | Preconditions: |
464 | | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |
465 | | |
466 | | Postconditions: |
467 | | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. |
468 | | */ |
469 | | |
470 | | /* |
471 | | See fe_mul.c for discussion of implementation strategy. |
472 | | */ |
473 | | |
474 | | //static |
475 | 15.8M | FE_25519 FE_25519::sqr2(const FE_25519& f) { |
476 | 15.8M | const int32_t f0 = f[0]; |
477 | 15.8M | const int32_t f1 = f[1]; |
478 | 15.8M | const int32_t f2 = f[2]; |
479 | 15.8M | const int32_t f3 = f[3]; |
480 | 15.8M | const int32_t f4 = f[4]; |
481 | 15.8M | const int32_t f5 = f[5]; |
482 | 15.8M | const int32_t f6 = f[6]; |
483 | 15.8M | const int32_t f7 = f[7]; |
484 | 15.8M | const int32_t f8 = f[8]; |
485 | 15.8M | const int32_t f9 = f[9]; |
486 | 15.8M | const int32_t f0_2 = 2 * f0; |
487 | 15.8M | const int32_t f1_2 = 2 * f1; |
488 | 15.8M | const int32_t f2_2 = 2 * f2; |
489 | 15.8M | const int32_t f3_2 = 2 * f3; |
490 | 15.8M | const int32_t f4_2 = 2 * f4; |
491 | 15.8M | const int32_t f5_2 = 2 * f5; |
492 | 15.8M | const int32_t f6_2 = 2 * f6; |
493 | 15.8M | const int32_t f7_2 = 2 * f7; |
494 | 15.8M | const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ |
495 | 15.8M | const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ |
496 | 15.8M | const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ |
497 | 15.8M | const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ |
498 | 15.8M | const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ |
499 | 15.8M | const int64_t f0f0 = f0 * static_cast<int64_t>(f0); |
500 | 15.8M | const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1); |
501 | 15.8M | const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2); |
502 | 15.8M | const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3); |
503 | 15.8M | const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4); |
504 | 15.8M | const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5); |
505 | 15.8M | const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6); |
506 | 15.8M | const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7); |
507 | 15.8M | const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8); |
508 | 15.8M | const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9); |
509 | 15.8M | const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1); |
510 | 15.8M | const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2); |
511 | 15.8M | const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2); |
512 | 15.8M | const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4); |
513 | 15.8M | const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2); |
514 | 15.8M | const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6); |
515 | 15.8M | const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2); |
516 | 15.8M | const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8); |
517 | 15.8M | const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38); |
518 | 15.8M | const int64_t f2f2 = f2 * static_cast<int64_t>(f2); |
519 | 15.8M | const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3); |
520 | 15.8M | const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4); |
521 | 15.8M | const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5); |
522 | 15.8M | const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6); |
523 | 15.8M | const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7); |
524 | 15.8M | const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19); |
525 | 15.8M | const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38); |
526 | 15.8M | const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3); |
527 | 15.8M | const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4); |
528 | 15.8M | const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2); |
529 | 15.8M | const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6); |
530 | 15.8M | const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38); |
531 | 15.8M | const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19); |
532 | 15.8M | const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38); |
533 | 15.8M | const int64_t f4f4 = f4 * static_cast<int64_t>(f4); |
534 | 15.8M | const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5); |
535 | 15.8M | const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19); |
536 | 15.8M | const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38); |
537 | 15.8M | const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19); |
538 | 15.8M | const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38); |
539 | 15.8M | const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38); |
540 | 15.8M | const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19); |
541 | 15.8M | const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38); |
542 | 15.8M | const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19); |
543 | 15.8M | const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38); |
544 | 15.8M | const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19); |
545 | 15.8M | const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38); |
546 | 15.8M | const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19); |
547 | 15.8M | const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38); |
548 | 15.8M | const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38); |
549 | 15.8M | const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19); |
550 | 15.8M | const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38); |
551 | 15.8M | const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19); |
552 | 15.8M | const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38); |
553 | 15.8M | const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38); |
554 | | |
555 | 15.8M | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; |
556 | 15.8M | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; |
557 | 15.8M | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; |
558 | 15.8M | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; |
559 | 15.8M | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; |
560 | 15.8M | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; |
561 | 15.8M | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; |
562 | 15.8M | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; |
563 | 15.8M | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; |
564 | 15.8M | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; |
565 | | |
566 | 15.8M | h0 += h0; |
567 | 15.8M | h1 += h1; |
568 | 15.8M | h2 += h2; |
569 | 15.8M | h3 += h3; |
570 | 15.8M | h4 += h4; |
571 | 15.8M | h5 += h5; |
572 | 15.8M | h6 += h6; |
573 | 15.8M | h7 += h7; |
574 | 15.8M | h8 += h8; |
575 | 15.8M | h9 += h9; |
576 | | |
577 | 15.8M | carry<26>(h0, h1); |
578 | 15.8M | carry<26>(h4, h5); |
579 | | |
580 | 15.8M | carry<25>(h1, h2); |
581 | 15.8M | carry<25>(h5, h6); |
582 | | |
583 | 15.8M | carry<26>(h2, h3); |
584 | 15.8M | carry<26>(h6, h7); |
585 | | |
586 | 15.8M | carry<25>(h3, h4); |
587 | 15.8M | carry<25>(h7, h8); |
588 | 15.8M | carry<26>(h4, h5); |
589 | 15.8M | carry<26>(h8, h9); |
590 | 15.8M | carry<25, 19>(h9, h0); |
591 | 15.8M | carry<26>(h0, h1); |
592 | | |
593 | 15.8M | return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9); |
594 | 15.8M | } |
595 | | |
596 | | /* |
597 | | Ignores top bit of h. |
598 | | */ |
599 | | |
600 | 64.3k | void FE_25519::from_bytes(const uint8_t s[32]) { |
601 | 64.3k | int64_t h0 = load_4(s); |
602 | 64.3k | int64_t h1 = load_3(s + 4) << 6; |
603 | 64.3k | int64_t h2 = load_3(s + 7) << 5; |
604 | 64.3k | int64_t h3 = load_3(s + 10) << 3; |
605 | 64.3k | int64_t h4 = load_3(s + 13) << 2; |
606 | 64.3k | int64_t h5 = load_4(s + 16); |
607 | 64.3k | int64_t h6 = load_3(s + 20) << 7; |
608 | 64.3k | int64_t h7 = load_3(s + 23) << 5; |
609 | 64.3k | int64_t h8 = load_3(s + 26) << 4; |
610 | 64.3k | int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2; |
611 | | |
612 | 64.3k | carry<25, 19>(h9, h0); |
613 | 64.3k | carry<25>(h1, h2); |
614 | 64.3k | carry<25>(h3, h4); |
615 | 64.3k | carry<25>(h5, h6); |
616 | 64.3k | carry<25>(h7, h8); |
617 | | |
618 | 64.3k | carry<26>(h0, h1); |
619 | 64.3k | carry<26>(h2, h3); |
620 | 64.3k | carry<26>(h4, h5); |
621 | 64.3k | carry<26>(h6, h7); |
622 | 64.3k | carry<26>(h8, h9); |
623 | | |
624 | 64.3k | m_fe[0] = static_cast<int32_t>(h0); |
625 | 64.3k | m_fe[1] = static_cast<int32_t>(h1); |
626 | 64.3k | m_fe[2] = static_cast<int32_t>(h2); |
627 | 64.3k | m_fe[3] = static_cast<int32_t>(h3); |
628 | 64.3k | m_fe[4] = static_cast<int32_t>(h4); |
629 | 64.3k | m_fe[5] = static_cast<int32_t>(h5); |
630 | 64.3k | m_fe[6] = static_cast<int32_t>(h6); |
631 | 64.3k | m_fe[7] = static_cast<int32_t>(h7); |
632 | 64.3k | m_fe[8] = static_cast<int32_t>(h8); |
633 | 64.3k | m_fe[9] = static_cast<int32_t>(h9); |
634 | 64.3k | } |
635 | | |
636 | | /* |
637 | | Preconditions: |
638 | | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
639 | | |
640 | | Write p=2^255-19; q=floor(h/p). |
641 | | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). |
642 | | |
643 | | Proof: |
644 | | Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. |
645 | | Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. |
646 | | |
647 | | Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). |
648 | | Then 0<y<1. |
649 | | |
650 | | Write r=h-pq. |
651 | | Have 0<=r<=p-1=2^255-20. |
652 | | Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. |
653 | | |
654 | | Write x=r+19(2^-255)r+y. |
655 | | Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. |
656 | | |
657 | | Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) |
658 | | so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. |
659 | | */ |
660 | | |
661 | 296k | void FE_25519::to_bytes(uint8_t s[32]) const { |
662 | 296k | const int64_t X25 = (1 << 25); |
663 | | |
664 | 296k | int32_t h0 = m_fe[0]; |
665 | 296k | int32_t h1 = m_fe[1]; |
666 | 296k | int32_t h2 = m_fe[2]; |
667 | 296k | int32_t h3 = m_fe[3]; |
668 | 296k | int32_t h4 = m_fe[4]; |
669 | 296k | int32_t h5 = m_fe[5]; |
670 | 296k | int32_t h6 = m_fe[6]; |
671 | 296k | int32_t h7 = m_fe[7]; |
672 | 296k | int32_t h8 = m_fe[8]; |
673 | 296k | int32_t h9 = m_fe[9]; |
674 | 296k | int32_t q; |
675 | | |
676 | 296k | q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25; |
677 | 296k | q = (h0 + q) >> 26; |
678 | 296k | q = (h1 + q) >> 25; |
679 | 296k | q = (h2 + q) >> 26; |
680 | 296k | q = (h3 + q) >> 25; |
681 | 296k | q = (h4 + q) >> 26; |
682 | 296k | q = (h5 + q) >> 25; |
683 | 296k | q = (h6 + q) >> 26; |
684 | 296k | q = (h7 + q) >> 25; |
685 | 296k | q = (h8 + q) >> 26; |
686 | 296k | q = (h9 + q) >> 25; |
687 | | |
688 | | /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ |
689 | 296k | h0 += 19 * q; |
690 | | /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ |
691 | | |
692 | 296k | carry0<26>(h0, h1); |
693 | 296k | carry0<25>(h1, h2); |
694 | 296k | carry0<26>(h2, h3); |
695 | 296k | carry0<25>(h3, h4); |
696 | 296k | carry0<26>(h4, h5); |
697 | 296k | carry0<25>(h5, h6); |
698 | 296k | carry0<26>(h6, h7); |
699 | 296k | carry0<25>(h7, h8); |
700 | 296k | carry0<26>(h8, h9); |
701 | | |
702 | 296k | int32_t carry9 = h9 >> 25; |
703 | 296k | h9 -= carry9 * X25; |
704 | | /* h10 = carry9 */ |
705 | | |
706 | | /* |
707 | | Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. |
708 | | Have h0+...+2^230 h9 between 0 and 2^255-1; |
709 | | evidently 2^255 h10-2^255 q = 0. |
710 | | Goal: Output h0+...+2^230 h9. |
711 | | */ |
712 | | |
713 | 296k | s[0] = static_cast<uint8_t>(h0 >> 0); |
714 | 296k | s[1] = static_cast<uint8_t>(h0 >> 8); |
715 | 296k | s[2] = static_cast<uint8_t>(h0 >> 16); |
716 | 296k | s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2)); |
717 | 296k | s[4] = static_cast<uint8_t>(h1 >> 6); |
718 | 296k | s[5] = static_cast<uint8_t>(h1 >> 14); |
719 | 296k | s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3)); |
720 | 296k | s[7] = static_cast<uint8_t>(h2 >> 5); |
721 | 296k | s[8] = static_cast<uint8_t>(h2 >> 13); |
722 | 296k | s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5)); |
723 | 296k | s[10] = static_cast<uint8_t>(h3 >> 3); |
724 | 296k | s[11] = static_cast<uint8_t>(h3 >> 11); |
725 | 296k | s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6)); |
726 | 296k | s[13] = static_cast<uint8_t>(h4 >> 2); |
727 | 296k | s[14] = static_cast<uint8_t>(h4 >> 10); |
728 | 296k | s[15] = static_cast<uint8_t>(h4 >> 18); |
729 | 296k | s[16] = static_cast<uint8_t>(h5 >> 0); |
730 | 296k | s[17] = static_cast<uint8_t>(h5 >> 8); |
731 | 296k | s[18] = static_cast<uint8_t>(h5 >> 16); |
732 | 296k | s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1)); |
733 | 296k | s[20] = static_cast<uint8_t>(h6 >> 7); |
734 | 296k | s[21] = static_cast<uint8_t>(h6 >> 15); |
735 | 296k | s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3)); |
736 | 296k | s[23] = static_cast<uint8_t>(h7 >> 5); |
737 | 296k | s[24] = static_cast<uint8_t>(h7 >> 13); |
738 | 296k | s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4)); |
739 | 296k | s[26] = static_cast<uint8_t>(h8 >> 4); |
740 | 296k | s[27] = static_cast<uint8_t>(h8 >> 12); |
741 | 296k | s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6)); |
742 | 296k | s[29] = static_cast<uint8_t>(h9 >> 2); |
743 | 296k | s[30] = static_cast<uint8_t>(h9 >> 10); |
744 | 296k | s[31] = static_cast<uint8_t>(h9 >> 18); |
745 | 296k | } |
746 | | |
747 | | } // namespace Botan |