/src/cryptopp/donna_64.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // donna_64.cpp - written and placed in public domain by Jeffrey Walton |
2 | | // Crypto++ specific implementation wrapped around Andrew |
3 | | // Moon's public domain curve25519-donna and ed25519-donna, |
4 | | // https://github.com/floodyberry/curve25519-donna and |
5 | | // https://github.com/floodyberry/ed25519-donna. |
6 | | |
7 | | // The curve25519 and ed25519 source files multiplex different repos and |
8 | | // architectures using namespaces. The repos are Andrew Moon's |
9 | | // curve25519-donna and ed25519-donna. The architectures are 32-bit, 64-bit |
10 | | // and SSE. For example, 32-bit x25519 uses symbols from Donna::X25519 and |
11 | | // Donna::Arch32. |
12 | | |
13 | | // A fair amount of duplication happens below, but we could not directly |
14 | | // use curve25519 for both x25519 and ed25519. A close examination reveals |
15 | | // slight differences in the implementation. For example, look at the |
16 | | // two curve25519_sub functions. |
17 | | |
18 | | // If needed, see Moon's commit "Go back to ignoring 256th bit [sic]", |
19 | | // https://github.com/floodyberry/curve25519-donna/commit/57a683d18721a658 |
20 | | |
21 | | #include "pch.h" |
22 | | |
23 | | #include "config.h" |
24 | | #include "donna.h" |
25 | | #include "secblock.h" |
26 | | #include "sha.h" |
27 | | #include "misc.h" |
28 | | #include "cpu.h" |
29 | | |
30 | | #include <istream> |
31 | | #include <sstream> |
32 | | |
33 | | #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE |
34 | | # pragma GCC diagnostic ignored "-Wunused-function" |
35 | | #endif |
36 | | |
37 | | #if CRYPTOPP_MSC_VERSION |
38 | | # pragma warning(disable: 4244) |
39 | | #endif |
40 | | |
41 | | // Squash MS LNK4221 and libtool warnings |
42 | | extern const char DONNA64_FNAME[] = __FILE__; |
43 | | |
44 | | ANONYMOUS_NAMESPACE_BEGIN |
45 | | |
46 | | // Can't use GetAlignmentOf<word64>() because of C++11 and constexpr |
47 | | // Can use 'const unsigned int' because of MSVC 2013 |
48 | | #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) |
49 | | # define ALIGN_SPEC 16 |
50 | | #else |
51 | | # define ALIGN_SPEC 8 |
52 | | #endif |
53 | | |
54 | | ANONYMOUS_NAMESPACE_END |
55 | | |
56 | | #if defined(CRYPTOPP_CURVE25519_64BIT) |
57 | | |
58 | | #include "donna_64.h" |
59 | | |
60 | | ANONYMOUS_NAMESPACE_BEGIN |
61 | | |
62 | | using CryptoPP::byte; |
63 | | using CryptoPP::word64; |
64 | | using CryptoPP::GetWord; |
65 | | using CryptoPP::PutWord; |
66 | | using CryptoPP::LITTLE_ENDIAN_ORDER; |
67 | | |
68 | | inline word64 U8TO64_LE(const byte* p) |
69 | 0 | { |
70 | 0 | return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, p); |
71 | 0 | } |
72 | | |
73 | | inline void U64TO8_LE(byte* p, word64 w) |
74 | 0 | { |
75 | 0 | PutWord(false, LITTLE_ENDIAN_ORDER, p, w); |
76 | 0 | } |
77 | | |
78 | | ANONYMOUS_NAMESPACE_END |
79 | | |
80 | | NAMESPACE_BEGIN(CryptoPP) |
81 | | NAMESPACE_BEGIN(Donna) |
82 | | NAMESPACE_BEGIN(X25519) |
83 | | ANONYMOUS_NAMESPACE_BEGIN |
84 | | |
85 | | using CryptoPP::byte; |
86 | | using CryptoPP::word32; |
87 | | using CryptoPP::sword32; |
88 | | using CryptoPP::word64; |
89 | | using CryptoPP::sword64; |
90 | | |
91 | | using CryptoPP::GetBlock; |
92 | | using CryptoPP::LittleEndian; |
93 | | |
94 | | // Bring in all the symbols from the 64-bit header |
95 | | using namespace CryptoPP::Donna::Arch64; |
96 | | |
97 | | /* out = in */ |
98 | | inline void |
99 | 28 | curve25519_copy(bignum25519 out, const bignum25519 in) { |
100 | 28 | out[0] = in[0]; out[1] = in[1]; |
101 | 28 | out[2] = in[2]; out[3] = in[3]; |
102 | 28 | out[4] = in[4]; |
103 | 28 | } |
104 | | |
105 | | /* out = a + b */ |
106 | | inline void |
107 | 28.3k | curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
108 | 28.3k | out[0] = a[0] + b[0]; |
109 | 28.3k | out[1] = a[1] + b[1]; |
110 | 28.3k | out[2] = a[2] + b[2]; |
111 | 28.3k | out[3] = a[3] + b[3]; |
112 | 28.3k | out[4] = a[4] + b[4]; |
113 | 28.3k | } |
114 | | |
115 | | /* out = a - b */ |
116 | | inline void |
117 | 28.3k | curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
118 | 28.3k | out[0] = a[0] + two54m152 - b[0]; |
119 | 28.3k | out[1] = a[1] + two54m8 - b[1]; |
120 | 28.3k | out[2] = a[2] + two54m8 - b[2]; |
121 | 28.3k | out[3] = a[3] + two54m8 - b[3]; |
122 | 28.3k | out[4] = a[4] + two54m8 - b[4]; |
123 | 28.3k | } |
124 | | |
125 | | /* out = (in * scalar) */ |
126 | | inline void |
127 | 7.14k | curve25519_scalar_product(bignum25519 out, const bignum25519 in, const word64 scalar) { |
128 | 7.14k | word128 a; |
129 | 7.14k | word64 c; |
130 | | |
131 | 7.14k | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
132 | 7.14k | a = ((word128) in[0]) * scalar; out[0] = (word64)a & reduce_mask_51; c = (word64)(a >> 51); |
133 | 7.14k | a = ((word128) in[1]) * scalar + c; out[1] = (word64)a & reduce_mask_51; c = (word64)(a >> 51); |
134 | 7.14k | a = ((word128) in[2]) * scalar + c; out[2] = (word64)a & reduce_mask_51; c = (word64)(a >> 51); |
135 | 7.14k | a = ((word128) in[3]) * scalar + c; out[3] = (word64)a & reduce_mask_51; c = (word64)(a >> 51); |
136 | 7.14k | a = ((word128) in[4]) * scalar + c; out[4] = (word64)a & reduce_mask_51; c = (word64)(a >> 51); |
137 | 7.14k | out[0] += c * 19; |
138 | | #else |
139 | | mul64x64_128(a, in[0], scalar) out[0] = lo128(a) & reduce_mask_51; shr128(c, a, 51); |
140 | | mul64x64_128(a, in[1], scalar) add128_64(a, c) out[1] = lo128(a) & reduce_mask_51; shr128(c, a, 51); |
141 | | mul64x64_128(a, in[2], scalar) add128_64(a, c) out[2] = lo128(a) & reduce_mask_51; shr128(c, a, 51); |
142 | | mul64x64_128(a, in[3], scalar) add128_64(a, c) out[3] = lo128(a) & reduce_mask_51; shr128(c, a, 51); |
143 | | mul64x64_128(a, in[4], scalar) add128_64(a, c) out[4] = lo128(a) & reduce_mask_51; shr128(c, a, 51); |
144 | | out[0] += c * 19; |
145 | | #endif |
146 | 7.14k | } |
147 | | |
148 | | /* out = a * b */ |
149 | | inline void |
150 | 35.7k | curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
151 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
152 | | word128 mul; |
153 | | #endif |
154 | 35.7k | word128 t[5]; |
155 | 35.7k | word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; |
156 | | |
157 | 35.7k | r0 = b[0]; r1 = b[1]; r2 = b[2]; r3 = b[3]; r4 = b[4]; |
158 | 35.7k | s0 = a[0]; s1 = a[1]; s2 = a[2]; s3 = a[3]; s4 = a[4]; |
159 | | |
160 | 35.7k | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
161 | 35.7k | t[0] = ((word128) r0) * s0; |
162 | 35.7k | t[1] = ((word128) r0) * s1 + ((word128) r1) * s0; |
163 | 35.7k | t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1; |
164 | 35.7k | t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1; |
165 | 35.7k | t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2; |
166 | | #else |
167 | | mul64x64_128(t[0], r0, s0) |
168 | | mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul) |
169 | | mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul) |
170 | | mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul) |
171 | | mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul) |
172 | | #endif |
173 | | |
174 | 35.7k | r1 *= 19; r2 *= 19; r3 *= 19; r4 *= 19; |
175 | | |
176 | 35.7k | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
177 | 35.7k | t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2; |
178 | 35.7k | t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3; |
179 | 35.7k | t[2] += ((word128) r4) * s3 + ((word128) r3) * s4; |
180 | 35.7k | t[3] += ((word128) r4) * s4; |
181 | | #else |
182 | | mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul) |
183 | | mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul) |
184 | | mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul) |
185 | | mul64x64_128(mul, r4, s4) add128(t[3], mul) |
186 | | #endif |
187 | | |
188 | 35.7k | r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); |
189 | 35.7k | add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); |
190 | 35.7k | add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); |
191 | 35.7k | add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); |
192 | 35.7k | add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); |
193 | 35.7k | r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; |
194 | 35.7k | r1 += c; |
195 | | |
196 | 35.7k | out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4; |
197 | 35.7k | } |
198 | | |
199 | | /* out = in^(2 * count) */ |
200 | | inline void |
201 | 252 | curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) { |
202 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
203 | | word128 mul; |
204 | | #endif |
205 | 252 | word128 t[5]; |
206 | 252 | word64 r0,r1,r2,r3,r4,c; |
207 | 252 | word64 d0,d1,d2,d4,d419; |
208 | | |
209 | 252 | r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4]; |
210 | | |
211 | 7.05k | do { |
212 | 7.05k | d0 = r0 * 2; d1 = r1 * 2; |
213 | 7.05k | d2 = r2 * 2 * 19; |
214 | 7.05k | d419 = r4 * 19; d4 = d419 * 2; |
215 | | |
216 | 7.05k | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
217 | 7.05k | t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 )); |
218 | 7.05k | t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19)); |
219 | 7.05k | t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 )); |
220 | 7.05k | t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 )); |
221 | 7.05k | t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 )); |
222 | | #else |
223 | | mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) |
224 | | mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) |
225 | | mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) |
226 | | mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) |
227 | | mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) |
228 | | #endif |
229 | | |
230 | 7.05k | r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); |
231 | 7.05k | add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); |
232 | 7.05k | add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); |
233 | 7.05k | add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); |
234 | 7.05k | add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); |
235 | 7.05k | r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; |
236 | 7.05k | r1 += c; |
237 | 7.05k | } while(--count); |
238 | | |
239 | 252 | out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4; |
240 | 252 | } |
241 | | |
242 | | inline void |
243 | 28.4k | curve25519_square(bignum25519 out, const bignum25519 in) { |
244 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
245 | | word128 mul; |
246 | | #endif |
247 | 28.4k | word128 t[5]; |
248 | 28.4k | word64 r0,r1,r2,r3,r4,c; |
249 | 28.4k | word64 d0,d1,d2,d4,d419; |
250 | | |
251 | 28.4k | r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4]; |
252 | | |
253 | 28.4k | d0 = r0 * 2; d1 = r1 * 2; |
254 | 28.4k | d2 = r2 * 2 * 19; |
255 | 28.4k | d419 = r4 * 19; d4 = d419 * 2; |
256 | | |
257 | 28.4k | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
258 | 28.4k | t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 )); |
259 | 28.4k | t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19)); |
260 | 28.4k | t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 )); |
261 | 28.4k | t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 )); |
262 | 28.4k | t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 )); |
263 | | #else |
264 | | mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) |
265 | | mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) |
266 | | mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) |
267 | | mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) |
268 | | mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) |
269 | | #endif |
270 | | |
271 | 28.4k | r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); |
272 | 28.4k | add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); |
273 | 28.4k | add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); |
274 | 28.4k | add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); |
275 | 28.4k | add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); |
276 | 28.4k | r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; |
277 | 28.4k | r1 += c; |
278 | | |
279 | 28.4k | out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4; |
280 | 28.4k | } |
281 | | |
282 | | /* Take a little-endian, 32-byte number and expand it into polynomial form */ |
283 | | inline void |
284 | 28 | curve25519_expand(bignum25519 out, const byte *in) { |
285 | 28 | word64 x0,x1,x2,x3; |
286 | 28 | GetBlock<word64, LittleEndian> block(in); |
287 | 28 | block(x0)(x1)(x2)(x3); |
288 | | |
289 | 28 | out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13); |
290 | 28 | out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26); |
291 | 28 | out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39); |
292 | 28 | out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12); |
293 | 28 | out[4] = x3 & reduce_mask_51; /* ignore the top bit */ |
294 | 28 | } |
295 | | |
296 | | /* Take a fully reduced polynomial form number and contract it into a |
297 | | * little-endian, 32-byte array |
298 | | */ |
299 | | inline void |
300 | 28 | curve25519_contract(byte *out, const bignum25519 input) { |
301 | 28 | word64 t[5]; |
302 | 28 | word64 f, i; |
303 | | |
304 | 28 | t[0] = input[0]; |
305 | 28 | t[1] = input[1]; |
306 | 28 | t[2] = input[2]; |
307 | 28 | t[3] = input[3]; |
308 | 28 | t[4] = input[4]; |
309 | | |
310 | 28 | #define curve25519_contract_carry() \ |
311 | 112 | t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \ |
312 | 112 | t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \ |
313 | 112 | t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \ |
314 | 112 | t[4] += t[3] >> 51; t[3] &= reduce_mask_51; |
315 | | |
316 | 84 | #define curve25519_contract_carry_full() curve25519_contract_carry() \ |
317 | 84 | t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51; |
318 | | |
319 | 28 | #define curve25519_contract_carry_final() curve25519_contract_carry() \ |
320 | 28 | t[4] &= reduce_mask_51; |
321 | | |
322 | 28 | curve25519_contract_carry_full() |
323 | 28 | curve25519_contract_carry_full() |
324 | | |
325 | | /* now t is between 0 and 2^255-1, properly carried. */ |
326 | | /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ |
327 | 28 | t[0] += 19; |
328 | 28 | curve25519_contract_carry_full() |
329 | | |
330 | | /* now between 19 and 2^255-1 in both cases, and offset by 19. */ |
331 | 28 | t[0] += 0x8000000000000 - 19; |
332 | 28 | t[1] += 0x8000000000000 - 1; |
333 | 28 | t[2] += 0x8000000000000 - 1; |
334 | 28 | t[3] += 0x8000000000000 - 1; |
335 | 28 | t[4] += 0x8000000000000 - 1; |
336 | | |
337 | | /* now between 2^255 and 2^256-20, and offset by 2^255. */ |
338 | 28 | curve25519_contract_carry_final() |
339 | | |
340 | 28 | #define write51full(n,shift) \ |
341 | 112 | f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \ |
342 | 1.00k | for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f; |
343 | 112 | #define write51(n) write51full(n,13*n) |
344 | | |
345 | 28 | write51(0) |
346 | 28 | write51(1) |
347 | 28 | write51(2) |
348 | 28 | write51(3) |
349 | | |
350 | 28 | #undef curve25519_contract_carry |
351 | 28 | #undef curve25519_contract_carry_full |
352 | 28 | #undef curve25519_contract_carry_final |
353 | 28 | #undef write51full |
354 | 28 | #undef write51 |
355 | 28 | } |
356 | | |
357 | | /* |
358 | | * Swap the contents of [qx] and [qpx] iff @swap is non-zero |
359 | | */ |
360 | | inline void |
361 | 14.1k | curve25519_swap_conditional(bignum25519 x, bignum25519 qpx, word64 iswap) { |
362 | 14.1k | const word64 swap = (word64)(-(sword64)iswap); |
363 | 14.1k | word64 x0,x1,x2,x3,x4; |
364 | | |
365 | 14.1k | x0 = swap & (x[0] ^ qpx[0]); x[0] ^= x0; qpx[0] ^= x0; |
366 | 14.1k | x1 = swap & (x[1] ^ qpx[1]); x[1] ^= x1; qpx[1] ^= x1; |
367 | 14.1k | x2 = swap & (x[2] ^ qpx[2]); x[2] ^= x2; qpx[2] ^= x2; |
368 | 14.1k | x3 = swap & (x[3] ^ qpx[3]); x[3] ^= x3; qpx[3] ^= x3; |
369 | 14.1k | x4 = swap & (x[4] ^ qpx[4]); x[4] ^= x4; qpx[4] ^= x4; |
370 | 14.1k | } |
371 | | |
372 | | /* |
373 | | * In: b = 2^5 - 2^0 |
374 | | * Out: b = 2^250 - 2^0 |
375 | | */ |
376 | | void |
377 | 28 | curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) { |
378 | 28 | ALIGN(ALIGN_SPEC) bignum25519 t0,c; |
379 | | |
380 | | /* 2^5 - 2^0 */ /* b */ |
381 | 28 | /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5); |
382 | 28 | /* 2^10 - 2^0 */ curve25519_mul(b, t0, b); |
383 | 28 | /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10); |
384 | 28 | /* 2^20 - 2^0 */ curve25519_mul(c, t0, b); |
385 | 28 | /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20); |
386 | 28 | /* 2^40 - 2^0 */ curve25519_mul(t0, t0, c); |
387 | 28 | /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10); |
388 | 28 | /* 2^50 - 2^0 */ curve25519_mul(b, t0, b); |
389 | 28 | /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50); |
390 | 28 | /* 2^100 - 2^0 */ curve25519_mul(c, t0, b); |
391 | 28 | /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100); |
392 | 28 | /* 2^200 - 2^0 */ curve25519_mul(t0, t0, c); |
393 | 28 | /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50); |
394 | 28 | /* 2^250 - 2^0 */ curve25519_mul(b, t0, b); |
395 | 28 | } |
396 | | |
397 | | /* |
398 | | * z^(p - 2) = z(2^255 - 21) |
399 | | */ |
400 | | void |
401 | 28 | curve25519_recip(bignum25519 out, const bignum25519 z) { |
402 | 28 | ALIGN(ALIGN_SPEC) bignum25519 a, t0, b; |
403 | | |
404 | 28 | /* 2 */ curve25519_square(a, z); /* a = 2 */ |
405 | 28 | /* 8 */ curve25519_square_times(t0, a, 2); |
406 | 28 | /* 9 */ curve25519_mul(b, t0, z); /* b = 9 */ |
407 | 28 | /* 11 */ curve25519_mul(a, b, a); /* a = 11 */ |
408 | 28 | /* 22 */ curve25519_square(t0, a); |
409 | 28 | /* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b); |
410 | 28 | /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b); |
411 | 28 | /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5); |
412 | 28 | /* 2^255 - 21 */ curve25519_mul(out, b, a); |
413 | 28 | } |
414 | | |
415 | | ANONYMOUS_NAMESPACE_END |
416 | | NAMESPACE_END // X25519 |
417 | | NAMESPACE_END // Donna |
418 | | NAMESPACE_END // CryptoPP |
419 | | |
420 | | //******************************* ed25519 *******************************// |
421 | | |
422 | | NAMESPACE_BEGIN(CryptoPP) |
423 | | NAMESPACE_BEGIN(Donna) |
424 | | NAMESPACE_BEGIN(Ed25519) |
425 | | ANONYMOUS_NAMESPACE_BEGIN |
426 | | |
427 | | using CryptoPP::byte; |
428 | | using CryptoPP::word32; |
429 | | using CryptoPP::sword32; |
430 | | using CryptoPP::word64; |
431 | | using CryptoPP::sword64; |
432 | | |
433 | | using CryptoPP::GetBlock; |
434 | | using CryptoPP::LittleEndian; |
435 | | |
436 | | using CryptoPP::SHA512; |
437 | | |
438 | | // Bring in all the symbols from the 64-bit header |
439 | | using namespace CryptoPP::Donna::Arch64; |
440 | | |
441 | | /* out = in */ |
442 | | inline void |
443 | 0 | curve25519_copy(bignum25519 out, const bignum25519 in) { |
444 | 0 | out[0] = in[0]; out[1] = in[1]; |
445 | 0 | out[2] = in[2]; out[3] = in[3]; |
446 | 0 | out[4] = in[4]; |
447 | 0 | } |
448 | | |
449 | | /* out = a + b */ |
450 | | inline void |
451 | 0 | curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
452 | 0 | out[0] = a[0] + b[0]; out[1] = a[1] + b[1]; |
453 | 0 | out[2] = a[2] + b[2]; out[3] = a[3] + b[3]; |
454 | 0 | out[4] = a[4] + b[4]; |
455 | 0 | } |
456 | | |
457 | | /* out = a + b, where a and/or b are the result of a basic op (add,sub) */ |
458 | | inline void |
459 | 0 | curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
460 | 0 | out[0] = a[0] + b[0]; out[1] = a[1] + b[1]; |
461 | 0 | out[2] = a[2] + b[2]; out[3] = a[3] + b[3]; |
462 | 0 | out[4] = a[4] + b[4]; |
463 | 0 | } |
464 | | |
465 | | inline void |
466 | 0 | curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
467 | 0 | word64 c; |
468 | 0 | out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; |
469 | 0 | out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; |
470 | 0 | out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; |
471 | 0 | out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; |
472 | 0 | out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; |
473 | 0 | out[0] += c * 19; |
474 | 0 | } |
475 | | |
476 | | /* out = a - b */ |
477 | | inline void |
478 | 0 | curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
479 | 0 | out[0] = a[0] + twoP0 - b[0]; |
480 | 0 | out[1] = a[1] + twoP1234 - b[1]; |
481 | 0 | out[2] = a[2] + twoP1234 - b[2]; |
482 | 0 | out[3] = a[3] + twoP1234 - b[3]; |
483 | 0 | out[4] = a[4] + twoP1234 - b[4]; |
484 | 0 | } |
485 | | |
486 | | /* out = a - b, where a and/or b are the result of a basic op (add,sub) */ |
487 | | inline void |
488 | 0 | curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
489 | 0 | out[0] = a[0] + fourP0 - b[0]; |
490 | 0 | out[1] = a[1] + fourP1234 - b[1]; |
491 | 0 | out[2] = a[2] + fourP1234 - b[2]; |
492 | 0 | out[3] = a[3] + fourP1234 - b[3]; |
493 | 0 | out[4] = a[4] + fourP1234 - b[4]; |
494 | 0 | } |
495 | | |
496 | | inline void |
497 | 0 | curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) { |
498 | 0 | word64 c; |
499 | 0 | out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; |
500 | 0 | out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; |
501 | 0 | out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; |
502 | 0 | out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; |
503 | 0 | out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; |
504 | 0 | out[0] += c * 19; |
505 | 0 | } |
506 | | |
507 | | /* out = -a */ |
508 | | inline void |
509 | 0 | curve25519_neg(bignum25519 out, const bignum25519 a) { |
510 | 0 | word64 c; |
511 | 0 | out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51; |
512 | 0 | out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51; |
513 | 0 | out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51; |
514 | 0 | out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51; |
515 | 0 | out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51; |
516 | 0 | out[0] += c * 19; |
517 | 0 | } |
518 | | |
519 | | /* out = a * b */ |
520 | | inline void |
521 | 0 | curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) { |
522 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
523 | | word128 mul; |
524 | | #endif |
525 | 0 | word128 t[5]; |
526 | 0 | word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; |
527 | |
|
528 | 0 | r0 = in[0]; r1 = in[1]; |
529 | 0 | r2 = in[2]; r3 = in[3]; |
530 | 0 | r4 = in[4]; |
531 | |
|
532 | 0 | s0 = in2[0]; s1 = in2[1]; |
533 | 0 | s2 = in2[2]; s3 = in2[3]; |
534 | 0 | s4 = in2[4]; |
535 | |
|
536 | 0 | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
537 | 0 | t[0] = ((word128) r0) * s0; |
538 | 0 | t[1] = ((word128) r0) * s1 + ((word128) r1) * s0; |
539 | 0 | t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1; |
540 | 0 | t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1; |
541 | 0 | t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2; |
542 | | #else |
543 | | mul64x64_128(t[0], r0, s0) |
544 | | mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul) |
545 | | mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul) |
546 | | mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul) |
547 | | mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul) |
548 | | #endif |
549 | |
|
550 | 0 | r1 *= 19; r2 *= 19; |
551 | 0 | r3 *= 19; r4 *= 19; |
552 | |
|
553 | 0 | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
554 | 0 | t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2; |
555 | 0 | t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3; |
556 | 0 | t[2] += ((word128) r4) * s3 + ((word128) r3) * s4; |
557 | 0 | t[3] += ((word128) r4) * s4; |
558 | | #else |
559 | | mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul) |
560 | | mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul) |
561 | | mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul) |
562 | | mul64x64_128(mul, r4, s4) add128(t[3], mul) |
563 | | #endif |
564 | |
|
565 | 0 | r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); |
566 | 0 | add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); |
567 | 0 | add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); |
568 | 0 | add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); |
569 | 0 | add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); |
570 | 0 | r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; |
571 | 0 | r1 += c; |
572 | |
|
573 | 0 | out[0] = r0; out[1] = r1; |
574 | 0 | out[2] = r2; out[3] = r3; |
575 | 0 | out[4] = r4; |
576 | 0 | } |
577 | | |
578 | | void |
579 | 0 | curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) { |
580 | 0 | curve25519_mul(out, in2, in); |
581 | 0 | } |
582 | | |
583 | | /* out = in^(2 * count) */ |
584 | | void |
585 | 0 | curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) { |
586 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
587 | | word128 mul; |
588 | | #endif |
589 | 0 | word128 t[5]; |
590 | 0 | word64 r0,r1,r2,r3,r4,c; |
591 | 0 | word64 d0,d1,d2,d4,d419; |
592 | |
|
593 | 0 | r0 = in[0]; r1 = in[1]; |
594 | 0 | r2 = in[2]; r3 = in[3]; |
595 | 0 | r4 = in[4]; |
596 | |
|
597 | 0 | do { |
598 | 0 | d0 = r0 * 2; |
599 | 0 | d1 = r1 * 2; |
600 | 0 | d2 = r2 * 2 * 19; |
601 | 0 | d419 = r4 * 19; |
602 | 0 | d4 = d419 * 2; |
603 | |
|
604 | 0 | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
605 | 0 | t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 )); |
606 | 0 | t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19)); |
607 | 0 | t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 )); |
608 | 0 | t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 )); |
609 | 0 | t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 )); |
610 | | #else |
611 | | mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) |
612 | | mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) |
613 | | mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) |
614 | | mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) |
615 | | mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) |
616 | | #endif |
617 | |
|
618 | 0 | r0 = lo128(t[0]) & reduce_mask_51; |
619 | 0 | r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c; |
620 | 0 | r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c; |
621 | 0 | r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c; |
622 | 0 | r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c; |
623 | 0 | shl128(c, t[4], 13); r0 += c * 19; |
624 | 0 | c = r0 >> 51; r0 &= reduce_mask_51; |
625 | 0 | r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51; |
626 | 0 | r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51; |
627 | 0 | r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51; |
628 | 0 | r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51; |
629 | 0 | r0 += c * 19; |
630 | 0 | } while(--count); |
631 | |
|
632 | 0 | out[0] = r0; out[1] = r1; |
633 | 0 | out[2] = r2; out[3] = r3; |
634 | 0 | out[4] = r4; |
635 | 0 | } |
636 | | |
637 | | inline void |
638 | 0 | curve25519_square(bignum25519 out, const bignum25519 in) { |
639 | | #if !defined(CRYPTOPP_WORD128_AVAILABLE) |
640 | | word128 mul; |
641 | | #endif |
642 | 0 | word128 t[5]; |
643 | 0 | word64 r0,r1,r2,r3,r4,c; |
644 | 0 | word64 d0,d1,d2,d4,d419; |
645 | |
|
646 | 0 | r0 = in[0]; r1 = in[1]; |
647 | 0 | r2 = in[2]; r3 = in[3]; |
648 | 0 | r4 = in[4]; |
649 | |
|
650 | 0 | d0 = r0 * 2; d1 = r1 * 2; |
651 | 0 | d2 = r2 * 2 * 19; |
652 | 0 | d419 = r4 * 19; |
653 | 0 | d4 = d419 * 2; |
654 | |
|
655 | 0 | #if defined(CRYPTOPP_WORD128_AVAILABLE) |
656 | 0 | t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 )); |
657 | 0 | t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19)); |
658 | 0 | t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 )); |
659 | 0 | t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 )); |
660 | 0 | t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 )); |
661 | | #else |
662 | | mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul) |
663 | | mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul) |
664 | | mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul) |
665 | | mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul) |
666 | | mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul) |
667 | | #endif |
668 | |
|
669 | 0 | r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51); |
670 | 0 | add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51); |
671 | 0 | add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51); |
672 | 0 | add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51); |
673 | 0 | add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51); |
674 | 0 | r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51; |
675 | 0 | r1 += c; |
676 | |
|
677 | 0 | out[0] = r0; out[1] = r1; |
678 | 0 | out[2] = r2; out[3] = r3; |
679 | 0 | out[4] = r4; |
680 | 0 | } |
681 | | |
682 | | /* Take a little-endian, 32-byte number and expand it into polynomial form */ |
683 | | inline void |
684 | 0 | curve25519_expand(bignum25519 out, const byte *in) { |
685 | 0 | word64 x0,x1,x2,x3; |
686 | 0 | GetBlock<word64, LittleEndian> block(in); |
687 | 0 | block(x0)(x1)(x2)(x3); |
688 | |
|
689 | 0 | out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13); |
690 | 0 | out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26); |
691 | 0 | out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39); |
692 | 0 | out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12); |
693 | 0 | out[4] = x3 & reduce_mask_51; |
694 | 0 | } |
695 | | |
696 | | /* Take a fully reduced polynomial form number and contract it into a |
697 | | * little-endian, 32-byte array |
698 | | */ |
699 | | inline void |
700 | 0 | curve25519_contract(byte *out, const bignum25519 input) { |
701 | 0 | word64 t[5]; |
702 | 0 | word64 f, i; |
703 | |
|
704 | 0 | t[0] = input[0]; |
705 | 0 | t[1] = input[1]; |
706 | 0 | t[2] = input[2]; |
707 | 0 | t[3] = input[3]; |
708 | 0 | t[4] = input[4]; |
709 | |
|
710 | 0 | #define curve25519_contract_carry() \ |
711 | 0 | t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \ |
712 | 0 | t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \ |
713 | 0 | t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \ |
714 | 0 | t[4] += t[3] >> 51; t[3] &= reduce_mask_51; |
715 | |
|
716 | 0 | #define curve25519_contract_carry_full() curve25519_contract_carry() \ |
717 | 0 | t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51; |
718 | |
|
719 | 0 | #define curve25519_contract_carry_final() curve25519_contract_carry() \ |
720 | 0 | t[4] &= reduce_mask_51; |
721 | |
|
722 | 0 | curve25519_contract_carry_full() |
723 | 0 | curve25519_contract_carry_full() |
724 | | |
725 | | /* now t is between 0 and 2^255-1, properly carried. */ |
726 | | /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ |
727 | 0 | t[0] += 19; |
728 | 0 | curve25519_contract_carry_full() |
729 | | |
730 | | /* now between 19 and 2^255-1 in both cases, and offset by 19. */ |
731 | 0 | t[0] += (reduce_mask_51 + 1) - 19; |
732 | 0 | t[1] += (reduce_mask_51 + 1) - 1; |
733 | 0 | t[2] += (reduce_mask_51 + 1) - 1; |
734 | 0 | t[3] += (reduce_mask_51 + 1) - 1; |
735 | 0 | t[4] += (reduce_mask_51 + 1) - 1; |
736 | | |
737 | | /* now between 2^255 and 2^256-20, and offset by 2^255. */ |
738 | 0 | curve25519_contract_carry_final() |
739 | |
|
740 | 0 | #define write51full(n,shift) \ |
741 | 0 | f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \ |
742 | 0 | for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f; |
743 | 0 | #define write51(n) write51full(n,13*n) |
744 | 0 | write51(0) |
745 | 0 | write51(1) |
746 | 0 | write51(2) |
747 | 0 | write51(3) |
748 | 0 | } |
749 | | |
750 | | #if !defined(ED25519_GCC_64BIT_CHOOSE) |
751 | | |
752 | | /* out = (flag) ? in : out */ |
753 | | inline void |
754 | | curve25519_move_conditional_bytes(byte out[96], const byte in[96], word64 flag) |
755 | 0 | { |
756 | | // TODO: enable this code path once we can test and benchmark it. |
757 | | // It is about 24 insns shorter, it avoids punning which may be UB, |
758 | | // and it is guaranteed constant time. |
759 | | #if defined(__GNUC__) && defined(__x86_64__) && 0 |
760 | | const word32 iter = 96/sizeof(word64); |
761 | | word64* outq = reinterpret_cast<word64*>(out); |
762 | | const word64* inq = reinterpret_cast<const word64*>(in); |
763 | | word64 idx=0, val; |
764 | | |
765 | | __asm__ __volatile__ ( |
766 | | ".att_syntax ;\n" |
767 | | "cmpq $0, %[flag] ;\n" // compare, set ZERO flag |
768 | | "movq %[iter], %%rcx ;\n" // load iteration count |
769 | | "1: ;\n" |
770 | | " movq (%[idx],%[out]), %[val] ;\n" // val = out[idx] |
771 | | " cmovnzq (%[idx],%[in]), %[val] ;\n" // copy in[idx] to val if NZ |
772 | | " movq %[val], (%[idx],%[out]) ;\n" // out[idx] = val |
773 | | " leaq 8(%[idx]), %[idx] ;\n" // increment index |
774 | | " loopnz 1b ;\n" // does not affect flags |
775 | | : [out] "+S" (outq), [in] "+D" (inq), |
776 | | [idx] "+b" (idx), [val] "=r" (val) |
777 | | : [flag] "g" (flag), [iter] "I" (iter) |
778 | | : "rcx", "memory", "cc" |
779 | | ); |
780 | | #else |
781 | 0 | const word64 nb = flag - 1, b = ~nb; |
782 | 0 | const word64 *inq = (const word64 *)(const void*)in; |
783 | 0 | word64 *outq = (word64 *)(void *)out; |
784 | 0 | outq[0] = (outq[0] & nb) | (inq[0] & b); |
785 | 0 | outq[1] = (outq[1] & nb) | (inq[1] & b); |
786 | 0 | outq[2] = (outq[2] & nb) | (inq[2] & b); |
787 | 0 | outq[3] = (outq[3] & nb) | (inq[3] & b); |
788 | 0 | outq[4] = (outq[4] & nb) | (inq[4] & b); |
789 | 0 | outq[5] = (outq[5] & nb) | (inq[5] & b); |
790 | 0 | outq[6] = (outq[6] & nb) | (inq[6] & b); |
791 | 0 | outq[7] = (outq[7] & nb) | (inq[7] & b); |
792 | 0 | outq[8] = (outq[8] & nb) | (inq[8] & b); |
793 | 0 | outq[9] = (outq[9] & nb) | (inq[9] & b); |
794 | 0 | outq[10] = (outq[10] & nb) | (inq[10] & b); |
795 | 0 | outq[11] = (outq[11] & nb) | (inq[11] & b); |
796 | 0 | #endif |
797 | 0 | } |
798 | | |
799 | | /* if (iswap) swap(a, b) */ |
800 | | inline void |
801 | 0 | curve25519_swap_conditional(bignum25519 a, bignum25519 b, word64 iswap) { |
802 | 0 | const word64 swap = (word64)(-(sword64)iswap); |
803 | 0 | word64 x0,x1,x2,x3,x4; |
804 | |
|
805 | 0 | x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0; |
806 | 0 | x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1; |
807 | 0 | x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2; |
808 | 0 | x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3; |
809 | 0 | x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4; |
810 | 0 | } |
811 | | |
812 | | #endif /* ED25519_GCC_64BIT_CHOOSE */ |
813 | | |
814 | | // ************************************************************************************ |
815 | | |
816 | | inline void |
817 | 0 | ed25519_hash(byte *hash, const byte *in, size_t inlen) { |
818 | 0 | SHA512().CalculateDigest(hash, in, inlen); |
819 | 0 | } |
820 | | |
821 | | inline void |
822 | 0 | ed25519_extsk(hash_512bits extsk, const byte sk[32]) { |
823 | 0 | ed25519_hash(extsk, sk, 32); |
824 | 0 | extsk[0] &= 248; |
825 | 0 | extsk[31] &= 127; |
826 | 0 | extsk[31] |= 64; |
827 | 0 | } |
828 | | |
829 | | void |
830 | | UpdateFromStream(HashTransformation& hash, std::istream& stream) |
831 | 0 | { |
832 | 0 | SecByteBlock block(4096); |
833 | 0 | while (stream.read((char*)block.begin(), block.size())) |
834 | 0 | hash.Update(block, block.size()); |
835 | |
|
836 | 0 | std::streamsize rem = stream.gcount(); |
837 | 0 | if (rem) |
838 | 0 | hash.Update(block, rem); |
839 | |
|
840 | 0 | block.SetMark(0); |
841 | 0 | } |
842 | | |
843 | | void |
844 | 0 | ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], const byte *m, size_t mlen) { |
845 | 0 | SHA512 hash; |
846 | 0 | hash.Update(RS, 32); |
847 | 0 | hash.Update(pk, 32); |
848 | 0 | hash.Update(m, mlen); |
849 | 0 | hash.Final(hram); |
850 | 0 | } |
851 | | |
852 | | void |
853 | 0 | ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], std::istream& stream) { |
854 | 0 | SHA512 hash; |
855 | 0 | hash.Update(RS, 32); |
856 | 0 | hash.Update(pk, 32); |
857 | 0 | UpdateFromStream(hash, stream); |
858 | 0 | hash.Final(hram); |
859 | 0 | } |
860 | | |
861 | | bignum256modm_element_t |
862 | 0 | lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) { |
863 | 0 | return (a - b) >> 63; |
864 | 0 | } |
865 | | |
866 | | void |
867 | 0 | reduce256_modm(bignum256modm r) { |
868 | 0 | bignum256modm t; |
869 | 0 | bignum256modm_element_t b = 0, pb, mask; |
870 | | |
871 | | /* t = r - m */ |
872 | 0 | pb = 0; |
873 | 0 | pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b; |
874 | 0 | pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b; |
875 | 0 | pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b; |
876 | 0 | pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b; |
877 | 0 | pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32)); |
878 | | |
879 | | /* keep r if r was smaller than m */ |
880 | 0 | mask = b - 1; |
881 | |
|
882 | 0 | r[0] ^= mask & (r[0] ^ t[0]); |
883 | 0 | r[1] ^= mask & (r[1] ^ t[1]); |
884 | 0 | r[2] ^= mask & (r[2] ^ t[2]); |
885 | 0 | r[3] ^= mask & (r[3] ^ t[3]); |
886 | 0 | r[4] ^= mask & (r[4] ^ t[4]); |
887 | 0 | } |
888 | | |
889 | | void |
890 | 0 | barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) { |
891 | 0 | bignum256modm q3, r2; |
892 | 0 | word128 c, mul; |
893 | 0 | bignum256modm_element_t f, b, pb; |
894 | | |
895 | | /* q1 = x >> 248 = 264 bits = 5 56 bit elements |
896 | | q2 = mu * q1 |
897 | | q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */ |
898 | 0 | mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56); |
899 | 0 | mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul) |
900 | 0 | f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56); |
901 | 0 | mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul) |
902 | 0 | f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56); |
903 | 0 | mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul) |
904 | 0 | f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56); |
905 | 0 | mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul) |
906 | 0 | f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56); |
907 | 0 | mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f) |
908 | 0 | f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56); |
909 | 0 | q3[4] |= (f << 16); |
910 | |
|
911 | 0 | mul64x64_128(c, modm_m[0], q3[0]) |
912 | 0 | r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56); |
913 | 0 | mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul) |
914 | 0 | r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56); |
915 | 0 | mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul) |
916 | 0 | r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56); |
917 | 0 | mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul) |
918 | 0 | r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56); |
919 | 0 | mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul) |
920 | 0 | r2[4] = lo128(c) & 0x0000ffffffffff; |
921 | |
|
922 | 0 | pb = 0; |
923 | 0 | pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b; |
924 | 0 | pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b; |
925 | 0 | pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b; |
926 | 0 | pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b; |
927 | 0 | pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40)); |
928 | |
|
929 | 0 | reduce256_modm(r); |
930 | 0 | reduce256_modm(r); |
931 | 0 | } |
932 | | |
933 | | void |
934 | 0 | add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) { |
935 | 0 | bignum256modm_element_t c; |
936 | |
|
937 | 0 | c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56; |
938 | 0 | c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56; |
939 | 0 | c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56; |
940 | 0 | c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56; |
941 | 0 | c += x[4] + y[4]; r[4] = c; |
942 | |
|
943 | 0 | reduce256_modm(r); |
944 | 0 | } |
945 | | |
946 | | void |
947 | 0 | mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) { |
948 | 0 | bignum256modm q1, r1; |
949 | 0 | word128 c, mul; |
950 | 0 | bignum256modm_element_t f; |
951 | |
|
952 | 0 | mul64x64_128(c, x[0], y[0]) |
953 | 0 | f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56); |
954 | 0 | mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul) |
955 | 0 | f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56); |
956 | 0 | mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul) |
957 | 0 | f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56); |
958 | 0 | mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul) |
959 | 0 | f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56); |
960 | 0 | mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul) |
961 | 0 | f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56); |
962 | 0 | mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul) |
963 | 0 | f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56); |
964 | 0 | mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul) |
965 | 0 | f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56); |
966 | 0 | mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul) |
967 | 0 | f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56); |
968 | 0 | mul64x64_128(c, x[4], y[4]) add128_64(c, f) |
969 | 0 | f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56); |
970 | 0 | q1[4] |= (f << 32); |
971 | |
|
972 | 0 | barrett_reduce256_modm(r, q1, r1); |
973 | 0 | } |
974 | | |
975 | | void |
976 | 0 | expand256_modm(bignum256modm out, const byte *in, size_t len) { |
977 | 0 | byte work[64] = {0}; |
978 | 0 | bignum256modm_element_t x[16]; |
979 | 0 | bignum256modm q1; |
980 | |
|
981 | 0 | std::memcpy(work, in, len); |
982 | 0 | x[0] = U8TO64_LE(work + 0); |
983 | 0 | x[1] = U8TO64_LE(work + 8); |
984 | 0 | x[2] = U8TO64_LE(work + 16); |
985 | 0 | x[3] = U8TO64_LE(work + 24); |
986 | 0 | x[4] = U8TO64_LE(work + 32); |
987 | 0 | x[5] = U8TO64_LE(work + 40); |
988 | 0 | x[6] = U8TO64_LE(work + 48); |
989 | 0 | x[7] = U8TO64_LE(work + 56); |
990 | | |
991 | | /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */ |
992 | 0 | out[0] = ( x[0]) & 0xffffffffffffff; |
993 | 0 | out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff; |
994 | 0 | out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff; |
995 | 0 | out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff; |
996 | 0 | out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff; |
997 | | |
998 | | /* under 252 bits, no need to reduce */ |
999 | 0 | if (len < 32) |
1000 | 0 | return; |
1001 | | |
1002 | | /* q1 = x >> 248 = 264 bits */ |
1003 | 0 | q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff; |
1004 | 0 | q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff; |
1005 | 0 | q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff; |
1006 | 0 | q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff; |
1007 | 0 | q1[4] = ((x[ 7] >> 24) ); |
1008 | |
|
1009 | 0 | barrett_reduce256_modm(out, q1, out); |
1010 | 0 | } |
1011 | | |
1012 | | void |
1013 | 0 | expand_raw256_modm(bignum256modm out, const byte in[32]) { |
1014 | 0 | bignum256modm_element_t x[4]; |
1015 | 0 |
|
1016 | 0 | x[0] = U8TO64_LE(in + 0); |
1017 | 0 | x[1] = U8TO64_LE(in + 8); |
1018 | 0 | x[2] = U8TO64_LE(in + 16); |
1019 | 0 | x[3] = U8TO64_LE(in + 24); |
1020 | 0 |
|
1021 | 0 | out[0] = ( x[0]) & 0xffffffffffffff; |
1022 | 0 | out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff; |
1023 | 0 | out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff; |
1024 | 0 | out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff; |
1025 | 0 | out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff; |
1026 | 0 | } |
1027 | | |
1028 | | void |
1029 | 0 | contract256_modm(byte out[32], const bignum256modm in) { |
1030 | 0 | U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56)); |
1031 | 0 | U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48)); |
1032 | 0 | U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40)); |
1033 | 0 | U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32)); |
1034 | 0 | } |
1035 | | |
1036 | | void |
1037 | 0 | contract256_window4_modm(signed char r[64], const bignum256modm in) { |
1038 | 0 | char carry; |
1039 | 0 | signed char *quads = r; |
1040 | 0 | bignum256modm_element_t i, j, v, m; |
1041 | |
|
1042 | 0 | for (i = 0; i < 5; i++) { |
1043 | 0 | v = in[i]; |
1044 | 0 | m = (i == 4) ? 8 : 14; |
1045 | 0 | for (j = 0; j < m; j++) { |
1046 | 0 | *quads++ = (v & 15); |
1047 | 0 | v >>= 4; |
1048 | 0 | } |
1049 | 0 | } |
1050 | | |
1051 | | /* making it signed */ |
1052 | 0 | carry = 0; |
1053 | 0 | for(i = 0; i < 63; i++) { |
1054 | 0 | r[i] += carry; |
1055 | 0 | r[i+1] += (r[i] >> 4); |
1056 | 0 | r[i] &= 15; |
1057 | 0 | carry = (r[i] >> 3); |
1058 | 0 | r[i] -= (carry << 4); |
1059 | 0 | } |
1060 | 0 | r[63] += carry; |
1061 | 0 | } |
1062 | | |
1063 | | void |
1064 | 0 | contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) { |
1065 | 0 | int i,j,k,b; |
1066 | 0 | int m = (1 << (windowsize - 1)) - 1, soplen = 256; |
1067 | 0 | signed char *bits = r; |
1068 | 0 | bignum256modm_element_t v; |
1069 | | |
1070 | | /* first put the binary expansion into r */ |
1071 | 0 | for (i = 0; i < 4; i++) { |
1072 | 0 | v = s[i]; |
1073 | 0 | for (j = 0; j < 56; j++, v >>= 1) |
1074 | 0 | *bits++ = (v & 1); |
1075 | 0 | } |
1076 | 0 | v = s[4]; |
1077 | 0 | for (j = 0; j < 32; j++, v >>= 1) |
1078 | 0 | *bits++ = (v & 1); |
1079 | | |
1080 | | /* Making it sliding window */ |
1081 | 0 | for (j = 0; j < soplen; j++) { |
1082 | 0 | if (!r[j]) |
1083 | 0 | continue; |
1084 | | |
1085 | 0 | for (b = 1; (b < (soplen - j)) && (b <= 6); b++) { |
1086 | 0 | if ((r[j] + (r[j + b] << b)) <= m) { |
1087 | 0 | r[j] += r[j + b] << b; |
1088 | 0 | r[j + b] = 0; |
1089 | 0 | } else if ((r[j] - (r[j + b] << b)) >= -m) { |
1090 | 0 | r[j] -= r[j + b] << b; |
1091 | 0 | for (k = j + b; k < soplen; k++) { |
1092 | 0 | if (!r[k]) { |
1093 | 0 | r[k] = 1; |
1094 | 0 | break; |
1095 | 0 | } |
1096 | 0 | r[k] = 0; |
1097 | 0 | } |
1098 | 0 | } else if (r[j + b]) { |
1099 | 0 | break; |
1100 | 0 | } |
1101 | 0 | } |
1102 | 0 | } |
1103 | 0 | } |
1104 | | |
1105 | | /* |
1106 | | * In: b = 2^5 - 2^0 |
1107 | | * Out: b = 2^250 - 2^0 |
1108 | | */ |
1109 | | void |
1110 | 0 | curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) { |
1111 | 0 | ALIGN(ALIGN_SPEC) bignum25519 t0,c; |
1112 | | |
1113 | | /* 2^5 - 2^0 */ /* b */ |
1114 | 0 | /* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5); |
1115 | 0 | /* 2^10 - 2^0 */ curve25519_mul_noinline(b, t0, b); |
1116 | 0 | /* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10); |
1117 | 0 | /* 2^20 - 2^0 */ curve25519_mul_noinline(c, t0, b); |
1118 | 0 | /* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20); |
1119 | 0 | /* 2^40 - 2^0 */ curve25519_mul_noinline(t0, t0, c); |
1120 | 0 | /* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10); |
1121 | 0 | /* 2^50 - 2^0 */ curve25519_mul_noinline(b, t0, b); |
1122 | 0 | /* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50); |
1123 | 0 | /* 2^100 - 2^0 */ curve25519_mul_noinline(c, t0, b); |
1124 | 0 | /* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100); |
1125 | 0 | /* 2^200 - 2^0 */ curve25519_mul_noinline(t0, t0, c); |
1126 | 0 | /* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50); |
1127 | 0 | /* 2^250 - 2^0 */ curve25519_mul_noinline(b, t0, b); |
1128 | 0 | } |
1129 | | |
1130 | | /* |
1131 | | * z^(p - 2) = z(2^255 - 21) |
1132 | | */ |
1133 | | void |
1134 | 0 | curve25519_recip(bignum25519 out, const bignum25519 z) { |
1135 | 0 | ALIGN(ALIGN_SPEC) bignum25519 a,t0,b; |
1136 | |
|
1137 | 0 | /* 2 */ curve25519_square_times(a, z, 1); /* a = 2 */ |
1138 | 0 | /* 8 */ curve25519_square_times(t0, a, 2); |
1139 | 0 | /* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */ |
1140 | 0 | /* 11 */ curve25519_mul_noinline(a, b, a); /* a = 11 */ |
1141 | 0 | /* 22 */ curve25519_square_times(t0, a, 1); |
1142 | 0 | /* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b); |
1143 | 0 | /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b); |
1144 | 0 | /* 2^255 - 2^5 */ curve25519_square_times(b, b, 5); |
1145 | 0 | /* 2^255 - 21 */ curve25519_mul_noinline(out, b, a); |
1146 | 0 | } |
1147 | | |
1148 | | /* |
1149 | | * z^((p-5)/8) = z^(2^252 - 3) |
1150 | | */ |
1151 | | void |
1152 | 0 | curve25519_pow_two252m3(bignum25519 two252m3, const bignum25519 z) { |
1153 | 0 | ALIGN(ALIGN_SPEC) bignum25519 b,c,t0; |
1154 | |
|
1155 | 0 | /* 2 */ curve25519_square_times(c, z, 1); /* c = 2 */ |
1156 | 0 | /* 8 */ curve25519_square_times(t0, c, 2); /* t0 = 8 */ |
1157 | 0 | /* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */ |
1158 | 0 | /* 11 */ curve25519_mul_noinline(c, b, c); /* c = 11 */ |
1159 | 0 | /* 22 */ curve25519_square_times(t0, c, 1); |
1160 | 0 | /* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b); |
1161 | 0 | /* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b); |
1162 | 0 | /* 2^252 - 2^2 */ curve25519_square_times(b, b, 2); |
1163 | 0 | /* 2^252 - 3 */ curve25519_mul_noinline(two252m3, b, z); |
1164 | 0 | } |
1165 | | |
1166 | | inline void |
1167 | 0 | ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) { |
1168 | 0 | curve25519_mul(r->x, p->x, p->t); |
1169 | 0 | curve25519_mul(r->y, p->y, p->z); |
1170 | 0 | curve25519_mul(r->z, p->z, p->t); |
1171 | 0 | } |
1172 | | |
1173 | | inline void |
1174 | 0 | ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) { |
1175 | 0 | curve25519_mul(r->x, p->x, p->t); |
1176 | 0 | curve25519_mul(r->y, p->y, p->z); |
1177 | 0 | curve25519_mul(r->z, p->z, p->t); |
1178 | 0 | curve25519_mul(r->t, p->x, p->y); |
1179 | 0 | } |
1180 | | |
1181 | | void |
1182 | 0 | ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) { |
1183 | 0 | curve25519_sub(p->ysubx, r->y, r->x); |
1184 | 0 | curve25519_add(p->xaddy, r->y, r->x); |
1185 | 0 | curve25519_copy(p->z, r->z); |
1186 | 0 | curve25519_mul(p->t2d, r->t, ge25519_ec2d); |
1187 | 0 | } |
1188 | | |
1189 | | void |
1190 | 0 | ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) { |
1191 | 0 | bignum25519 a,b,c,d,t,u; |
1192 | 0 |
|
1193 | 0 | curve25519_sub(a, p->y, p->x); |
1194 | 0 | curve25519_add(b, p->y, p->x); |
1195 | 0 | curve25519_sub(t, q->y, q->x); |
1196 | 0 | curve25519_add(u, q->y, q->x); |
1197 | 0 | curve25519_mul(a, a, t); |
1198 | 0 | curve25519_mul(b, b, u); |
1199 | 0 | curve25519_mul(c, p->t, q->t); |
1200 | 0 | curve25519_mul(c, c, ge25519_ec2d); |
1201 | 0 | curve25519_mul(d, p->z, q->z); |
1202 | 0 | curve25519_add(d, d, d); |
1203 | 0 | curve25519_sub(r->x, b, a); |
1204 | 0 | curve25519_add(r->y, b, a); |
1205 | 0 | curve25519_add_after_basic(r->z, d, c); |
1206 | 0 | curve25519_sub_after_basic(r->t, d, c); |
1207 | 0 | } |
1208 | | |
1209 | | void |
1210 | 0 | ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) { |
1211 | 0 | bignum25519 a,b,c; |
1212 | |
|
1213 | 0 | curve25519_square(a, p->x); |
1214 | 0 | curve25519_square(b, p->y); |
1215 | 0 | curve25519_square(c, p->z); |
1216 | 0 | curve25519_add_reduce(c, c, c); |
1217 | 0 | curve25519_add(r->x, p->x, p->y); |
1218 | 0 | curve25519_square(r->x, r->x); |
1219 | 0 | curve25519_add(r->y, b, a); |
1220 | 0 | curve25519_sub(r->z, b, a); |
1221 | 0 | curve25519_sub_after_basic(r->x, r->x, r->y); |
1222 | 0 | curve25519_sub_after_basic(r->t, c, r->z); |
1223 | 0 | } |
1224 | | |
1225 | | void |
1226 | 0 | ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, byte signbit) { |
1227 | 0 | const bignum25519 *qb = (const bignum25519 *)q; |
1228 | 0 | bignum25519 *rb = (bignum25519 *)r; |
1229 | 0 | bignum25519 a,b,c; |
1230 | |
|
1231 | 0 | curve25519_sub(a, p->y, p->x); |
1232 | 0 | curve25519_add(b, p->y, p->x); |
1233 | 0 | curve25519_mul(a, a, qb[signbit]); /* x for +, y for - */ |
1234 | 0 | curve25519_mul(r->x, b, qb[signbit^1]); /* y for +, x for - */ |
1235 | 0 | curve25519_add(r->y, r->x, a); |
1236 | 0 | curve25519_sub(r->x, r->x, a); |
1237 | 0 | curve25519_mul(c, p->t, q->t2d); |
1238 | 0 | curve25519_add_reduce(r->t, p->z, p->z); |
1239 | 0 | curve25519_copy(r->z, r->t); |
1240 | 0 | curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */ |
1241 | 0 | curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */ |
1242 | 0 | } |
1243 | | |
1244 | | void |
1245 | 0 | ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, byte signbit) { |
1246 | 0 | const bignum25519 *qb = (const bignum25519 *)q; |
1247 | 0 | bignum25519 *rb = (bignum25519 *)r; |
1248 | 0 | bignum25519 a,b,c; |
1249 | |
|
1250 | 0 | curve25519_sub(a, p->y, p->x); |
1251 | 0 | curve25519_add(b, p->y, p->x); |
1252 | 0 | curve25519_mul(a, a, qb[signbit]); /* ysubx for +, xaddy for - */ |
1253 | 0 | curve25519_mul(r->x, b, qb[signbit^1]); /* xaddy for +, ysubx for - */ |
1254 | 0 | curve25519_add(r->y, r->x, a); |
1255 | 0 | curve25519_sub(r->x, r->x, a); |
1256 | 0 | curve25519_mul(c, p->t, q->t2d); |
1257 | 0 | curve25519_mul(r->t, p->z, q->z); |
1258 | 0 | curve25519_add_reduce(r->t, r->t, r->t); |
1259 | 0 | curve25519_copy(r->z, r->t); |
1260 | 0 | curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */ |
1261 | 0 | curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */ |
1262 | 0 | } |
1263 | | |
1264 | | void |
1265 | 0 | ge25519_double_partial(ge25519 *r, const ge25519 *p) { |
1266 | 0 | ge25519_p1p1 t; |
1267 | 0 | ge25519_double_p1p1(&t, p); |
1268 | 0 | ge25519_p1p1_to_partial(r, &t); |
1269 | 0 | } |
1270 | | |
1271 | | void |
1272 | 0 | ge25519_double(ge25519 *r, const ge25519 *p) { |
1273 | 0 | ge25519_p1p1 t; |
1274 | 0 | ge25519_double_p1p1(&t, p); |
1275 | 0 | ge25519_p1p1_to_full(r, &t); |
1276 | 0 | } |
1277 | | |
1278 | | void |
1279 | 0 | ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) { |
1280 | 0 | ge25519_p1p1 t; |
1281 | 0 | ge25519_add_p1p1(&t, p, q); |
1282 | 0 | ge25519_p1p1_to_full(r, &t); |
1283 | 0 | } |
1284 | | |
1285 | | void |
1286 | 0 | ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) { |
1287 | 0 | bignum25519 a,b,c,e,f,g,h; |
1288 | |
|
1289 | 0 | curve25519_sub(a, r->y, r->x); |
1290 | 0 | curve25519_add(b, r->y, r->x); |
1291 | 0 | curve25519_mul(a, a, q->ysubx); |
1292 | 0 | curve25519_mul(e, b, q->xaddy); |
1293 | 0 | curve25519_add(h, e, a); |
1294 | 0 | curve25519_sub(e, e, a); |
1295 | 0 | curve25519_mul(c, r->t, q->t2d); |
1296 | 0 | curve25519_add(f, r->z, r->z); |
1297 | 0 | curve25519_add_after_basic(g, f, c); |
1298 | 0 | curve25519_sub_after_basic(f, f, c); |
1299 | 0 | curve25519_mul(r->x, e, f); |
1300 | 0 | curve25519_mul(r->y, h, g); |
1301 | 0 | curve25519_mul(r->z, g, f); |
1302 | 0 | curve25519_mul(r->t, e, h); |
1303 | 0 | } |
1304 | | |
1305 | | void |
1306 | 0 | ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) { |
1307 | 0 | bignum25519 a,b,c,x,y,z,t; |
1308 | |
|
1309 | 0 | curve25519_sub(a, p->y, p->x); |
1310 | 0 | curve25519_add(b, p->y, p->x); |
1311 | 0 | curve25519_mul(a, a, q->ysubx); |
1312 | 0 | curve25519_mul(x, b, q->xaddy); |
1313 | 0 | curve25519_add(y, x, a); |
1314 | 0 | curve25519_sub(x, x, a); |
1315 | 0 | curve25519_mul(c, p->t, q->t2d); |
1316 | 0 | curve25519_mul(t, p->z, q->z); |
1317 | 0 | curve25519_add(t, t, t); |
1318 | 0 | curve25519_add_after_basic(z, t, c); |
1319 | 0 | curve25519_sub_after_basic(t, t, c); |
1320 | 0 | curve25519_mul(r->xaddy, x, t); |
1321 | 0 | curve25519_mul(r->ysubx, y, z); |
1322 | 0 | curve25519_mul(r->z, z, t); |
1323 | 0 | curve25519_mul(r->t2d, x, y); |
1324 | 0 | curve25519_copy(y, r->ysubx); |
1325 | 0 | curve25519_sub(r->ysubx, r->ysubx, r->xaddy); |
1326 | 0 | curve25519_add(r->xaddy, r->xaddy, y); |
1327 | 0 | curve25519_mul(r->t2d, r->t2d, ge25519_ec2d); |
1328 | 0 | } |
1329 | | |
1330 | | void |
1331 | 0 | ge25519_pack(byte r[32], const ge25519 *p) { |
1332 | 0 | bignum25519 tx, ty, zi; |
1333 | 0 | byte parity[32]; |
1334 | 0 | curve25519_recip(zi, p->z); |
1335 | 0 | curve25519_mul(tx, p->x, zi); |
1336 | 0 | curve25519_mul(ty, p->y, zi); |
1337 | 0 | curve25519_contract(r, ty); |
1338 | 0 | curve25519_contract(parity, tx); |
1339 | 0 | r[31] ^= ((parity[0] & 1) << 7); |
1340 | 0 | } |
1341 | | |
1342 | | int |
1343 | 0 | ed25519_verify(const byte *x, const byte *y, size_t len) { |
1344 | 0 | size_t differentbits = 0; |
1345 | 0 | while (len--) |
1346 | 0 | differentbits |= (*x++ ^ *y++); |
1347 | 0 | return (int) (1 & ((differentbits - 1) >> 8)); |
1348 | 0 | } |
1349 | | |
1350 | | int |
1351 | 0 | ge25519_unpack_negative_vartime(ge25519 *r, const byte p[32]) { |
1352 | 0 | const byte zero[32] = {0}; |
1353 | 0 | const bignum25519 one = {1}; |
1354 | 0 | byte parity = p[31] >> 7; |
1355 | 0 | byte check[32]; |
1356 | 0 | bignum25519 t, root, num, den, d3; |
1357 | |
|
1358 | 0 | curve25519_expand(r->y, p); |
1359 | 0 | curve25519_copy(r->z, one); |
1360 | 0 | curve25519_square(num, r->y); /* x = y^2 */ |
1361 | 0 | curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */ |
1362 | 0 | curve25519_sub_reduce(num, num, r->z); /* x = y^1 - 1 */ |
1363 | 0 | curve25519_add(den, den, r->z); /* den = dy^2 + 1 */ |
1364 | | |
1365 | | /* Computation of sqrt(num/den) */ |
1366 | | /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */ |
1367 | 0 | curve25519_square(t, den); |
1368 | 0 | curve25519_mul(d3, t, den); |
1369 | 0 | curve25519_square(r->x, d3); |
1370 | 0 | curve25519_mul(r->x, r->x, den); |
1371 | 0 | curve25519_mul(r->x, r->x, num); |
1372 | 0 | curve25519_pow_two252m3(r->x, r->x); |
1373 | | |
1374 | | /* 2. computation of r->x = num * den^3 * (num*den^7)^((p-5)/8) */ |
1375 | 0 | curve25519_mul(r->x, r->x, d3); |
1376 | 0 | curve25519_mul(r->x, r->x, num); |
1377 | | |
1378 | | /* 3. Check if either of the roots works: */ |
1379 | 0 | curve25519_square(t, r->x); |
1380 | 0 | curve25519_mul(t, t, den); |
1381 | 0 | curve25519_sub_reduce(root, t, num); |
1382 | 0 | curve25519_contract(check, root); |
1383 | 0 | if (!ed25519_verify(check, zero, 32)) { |
1384 | 0 | curve25519_add_reduce(t, t, num); |
1385 | 0 | curve25519_contract(check, t); |
1386 | 0 | if (!ed25519_verify(check, zero, 32)) |
1387 | 0 | return 0; |
1388 | 0 | curve25519_mul(r->x, r->x, ge25519_sqrtneg1); |
1389 | 0 | } |
1390 | | |
1391 | 0 | curve25519_contract(check, r->x); |
1392 | 0 | if ((check[0] & 1) == parity) { |
1393 | 0 | curve25519_copy(t, r->x); |
1394 | 0 | curve25519_neg(r->x, t); |
1395 | 0 | } |
1396 | 0 | curve25519_mul(r->t, r->x, r->y); |
1397 | 0 | return 1; |
1398 | 0 | } |
1399 | | |
1400 | | /* computes [s1]p1 + [s2]basepoint */ |
1401 | | void |
1402 | 0 | ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) { |
1403 | 0 | signed char slide1[256], slide2[256]; |
1404 | 0 | ge25519_pniels pre1[S1_TABLE_SIZE]; |
1405 | 0 | ge25519 d1; |
1406 | 0 | ge25519_p1p1 t; |
1407 | 0 | sword32 i; |
1408 | |
|
1409 | 0 | contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE); |
1410 | 0 | contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE); |
1411 | |
|
1412 | 0 | ge25519_double(&d1, p1); |
1413 | 0 | ge25519_full_to_pniels(pre1, p1); |
1414 | 0 | for (i = 0; i < S1_TABLE_SIZE - 1; i++) |
1415 | 0 | ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]); |
1416 | | |
1417 | | /* set neutral */ |
1418 | 0 | std::memset(r, 0, sizeof(ge25519)); |
1419 | 0 | r->y[0] = 1; |
1420 | 0 | r->z[0] = 1; |
1421 | |
|
1422 | 0 | i = 255; |
1423 | 0 | while ((i >= 0) && !(slide1[i] | slide2[i])) |
1424 | 0 | i--; |
1425 | |
|
1426 | 0 | for (; i >= 0; i--) { |
1427 | 0 | ge25519_double_p1p1(&t, r); |
1428 | |
|
1429 | 0 | if (slide1[i]) { |
1430 | 0 | ge25519_p1p1_to_full(r, &t); |
1431 | 0 | ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (byte)slide1[i] >> 7); |
1432 | 0 | } |
1433 | |
|
1434 | 0 | if (slide2[i]) { |
1435 | 0 | ge25519_p1p1_to_full(r, &t); |
1436 | 0 | ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (byte)slide2[i] >> 7); |
1437 | 0 | } |
1438 | |
|
1439 | 0 | ge25519_p1p1_to_partial(r, &t); |
1440 | 0 | } |
1441 | 0 | } |
1442 | | |
1443 | | #if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS) |
1444 | | |
1445 | | word32 |
1446 | 0 | ge25519_windowb_equal(word32 b, word32 c) { |
1447 | 0 | return ((b ^ c) - 1) >> 31; |
1448 | 0 | } |
1449 | | |
1450 | | void |
1451 | 0 | ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const byte table[256][96], word32 pos, signed char b) { |
1452 | 0 | bignum25519 neg; |
1453 | 0 | word32 sign = (word32)((byte)b >> 7); |
1454 | 0 | word32 mask = ~(sign - 1); |
1455 | 0 | word32 u = (b + mask) ^ mask; |
1456 | 0 | word32 i; |
1457 | | |
1458 | | /* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */ |
1459 | 0 | byte packed[96] = {0}; |
1460 | 0 | packed[0] = 1; |
1461 | 0 | packed[32] = 1; |
1462 | |
|
1463 | 0 | for (i = 0; i < 8; i++) |
1464 | 0 | curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1)); |
1465 | | |
1466 | | /* expand in to t */ |
1467 | 0 | curve25519_expand(t->ysubx, packed + 0); |
1468 | 0 | curve25519_expand(t->xaddy, packed + 32); |
1469 | 0 | curve25519_expand(t->t2d , packed + 64); |
1470 | | |
1471 | | /* adjust for sign */ |
1472 | 0 | curve25519_swap_conditional(t->ysubx, t->xaddy, sign); |
1473 | 0 | curve25519_neg(neg, t->t2d); |
1474 | 0 | curve25519_swap_conditional(t->t2d, neg, sign); |
1475 | 0 | } |
1476 | | |
1477 | | #endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */ |
1478 | | |
1479 | | /* computes [s]basepoint */ |
1480 | | void |
1481 | 0 | ge25519_scalarmult_base_niels(ge25519 *r, const byte basepoint_table[256][96], const bignum256modm s) { |
1482 | 0 | signed char b[64]; |
1483 | 0 | word32 i; |
1484 | 0 | ge25519_niels t; |
1485 | |
|
1486 | 0 | contract256_window4_modm(b, s); |
1487 | |
|
1488 | 0 | ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[1]); |
1489 | 0 | curve25519_sub_reduce(r->x, t.xaddy, t.ysubx); |
1490 | 0 | curve25519_add_reduce(r->y, t.xaddy, t.ysubx); |
1491 | 0 | std::memset(r->z, 0, sizeof(bignum25519)); |
1492 | 0 | curve25519_copy(r->t, t.t2d); |
1493 | 0 | r->z[0] = 2; |
1494 | 0 | for (i = 3; i < 64; i += 2) { |
1495 | 0 | ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]); |
1496 | 0 | ge25519_nielsadd2(r, &t); |
1497 | 0 | } |
1498 | 0 | ge25519_double_partial(r, r); |
1499 | 0 | ge25519_double_partial(r, r); |
1500 | 0 | ge25519_double_partial(r, r); |
1501 | 0 | ge25519_double(r, r); |
1502 | 0 | ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[0]); |
1503 | 0 | curve25519_mul(t.t2d, t.t2d, ge25519_ecd); |
1504 | 0 | ge25519_nielsadd2(r, &t); |
1505 | 0 | for(i = 2; i < 64; i += 2) { |
1506 | 0 | ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]); |
1507 | 0 | ge25519_nielsadd2(r, &t); |
1508 | 0 | } |
1509 | 0 | } |
1510 | | |
1511 | | ANONYMOUS_NAMESPACE_END |
1512 | | NAMESPACE_END // Ed25519 |
1513 | | NAMESPACE_END // Donna |
1514 | | NAMESPACE_END // CryptoPP |
1515 | | |
1516 | | //***************************** curve25519 *****************************// |
1517 | | |
1518 | | NAMESPACE_BEGIN(CryptoPP) |
1519 | | NAMESPACE_BEGIN(Donna) |
1520 | | |
1521 | | int curve25519_mult_CXX(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32]) |
1522 | 28 | { |
1523 | 28 | using namespace CryptoPP::Donna::X25519; |
1524 | | |
1525 | 28 | FixedSizeSecBlock<byte, 32> e; |
1526 | 924 | for (size_t i = 0;i < 32;++i) |
1527 | 896 | e[i] = secretKey[i]; |
1528 | 28 | e[0] &= 0xf8; e[31] &= 0x7f; e[31] |= 0x40; |
1529 | | |
1530 | 28 | bignum25519 nqpqx = {1}, nqpqz = {0}, nqz = {1}, nqx; |
1531 | 28 | bignum25519 q, qx, qpqx, qqx, zzz, zmone; |
1532 | 28 | size_t bit, lastbit; |
1533 | | |
1534 | 28 | curve25519_expand(q, othersKey); |
1535 | 28 | curve25519_copy(nqx, q); |
1536 | | |
1537 | | /* bit 255 is always 0, and bit 254 is always 1, so skip bit 255 and |
1538 | | start pre-swapped on bit 254 */ |
1539 | 28 | lastbit = 1; |
1540 | | |
1541 | | /* we are doing bits 254..3 in the loop, but are swapping in bits 253..2 */ |
1542 | 7.08k | for (int i = 253; i >= 2; i--) { |
1543 | 7.05k | curve25519_add(qx, nqx, nqz); |
1544 | 7.05k | curve25519_sub(nqz, nqx, nqz); |
1545 | 7.05k | curve25519_add(qpqx, nqpqx, nqpqz); |
1546 | 7.05k | curve25519_sub(nqpqz, nqpqx, nqpqz); |
1547 | 7.05k | curve25519_mul(nqpqx, qpqx, nqz); |
1548 | 7.05k | curve25519_mul(nqpqz, qx, nqpqz); |
1549 | 7.05k | curve25519_add(qqx, nqpqx, nqpqz); |
1550 | 7.05k | curve25519_sub(nqpqz, nqpqx, nqpqz); |
1551 | 7.05k | curve25519_square(nqpqz, nqpqz); |
1552 | 7.05k | curve25519_square(nqpqx, qqx); |
1553 | 7.05k | curve25519_mul(nqpqz, nqpqz, q); |
1554 | 7.05k | curve25519_square(qx, qx); |
1555 | 7.05k | curve25519_square(nqz, nqz); |
1556 | 7.05k | curve25519_mul(nqx, qx, nqz); |
1557 | 7.05k | curve25519_sub(nqz, qx, nqz); |
1558 | 7.05k | curve25519_scalar_product(zzz, nqz, 121665); |
1559 | 7.05k | curve25519_add(zzz, zzz, qx); |
1560 | 7.05k | curve25519_mul(nqz, nqz, zzz); |
1561 | | |
1562 | 7.05k | bit = (e[i/8] >> (i & 7)) & 1; |
1563 | 7.05k | curve25519_swap_conditional(nqx, nqpqx, bit ^ lastbit); |
1564 | 7.05k | curve25519_swap_conditional(nqz, nqpqz, bit ^ lastbit); |
1565 | 7.05k | lastbit = bit; |
1566 | 7.05k | } |
1567 | | |
1568 | | /* the final 3 bits are always zero, so we only need to double */ |
1569 | 112 | for (int i = 0; i < 3; i++) { |
1570 | 84 | curve25519_add(qx, nqx, nqz); |
1571 | 84 | curve25519_sub(nqz, nqx, nqz); |
1572 | 84 | curve25519_square(qx, qx); |
1573 | 84 | curve25519_square(nqz, nqz); |
1574 | 84 | curve25519_mul(nqx, qx, nqz); |
1575 | 84 | curve25519_sub(nqz, qx, nqz); |
1576 | 84 | curve25519_scalar_product(zzz, nqz, 121665); |
1577 | 84 | curve25519_add(zzz, zzz, qx); |
1578 | 84 | curve25519_mul(nqz, nqz, zzz); |
1579 | 84 | } |
1580 | | |
1581 | 28 | curve25519_recip(zmone, nqz); |
1582 | 28 | curve25519_mul(nqz, nqx, zmone); |
1583 | 28 | curve25519_contract(sharedKey, nqz); |
1584 | | |
1585 | 28 | return 0; |
1586 | 28 | } |
1587 | | |
1588 | | int curve25519_mult(byte publicKey[32], const byte secretKey[32]) |
1589 | 28 | { |
1590 | 28 | using namespace CryptoPP::Donna::X25519; |
1591 | | |
1592 | | #if (CRYPTOPP_CURVE25519_SSE2) |
1593 | | if (HasSSE2()) |
1594 | | return curve25519_mult_SSE2(publicKey, secretKey, basePoint); |
1595 | | else |
1596 | | #endif |
1597 | | |
1598 | 28 | return curve25519_mult_CXX(publicKey, secretKey, basePoint); |
1599 | 28 | } |
1600 | | |
1601 | | int curve25519_mult(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32]) |
1602 | 0 | { |
1603 | | #if (CRYPTOPP_CURVE25519_SSE2) |
1604 | | if (HasSSE2()) |
1605 | | return curve25519_mult_SSE2(sharedKey, secretKey, othersKey); |
1606 | | else |
1607 | | #endif |
1608 | |
|
1609 | 0 | return curve25519_mult_CXX(sharedKey, secretKey, othersKey); |
1610 | 0 | } |
1611 | | |
1612 | | NAMESPACE_END // Donna |
1613 | | NAMESPACE_END // CryptoPP |
1614 | | |
1615 | | //******************************* ed25519 *******************************// |
1616 | | |
1617 | | NAMESPACE_BEGIN(CryptoPP) |
1618 | | NAMESPACE_BEGIN(Donna) |
1619 | | |
1620 | | int |
1621 | | ed25519_publickey_CXX(byte publicKey[32], const byte secretKey[32]) |
1622 | 0 | { |
1623 | 0 | using namespace CryptoPP::Donna::Ed25519; |
1624 | |
|
1625 | 0 | bignum256modm a; |
1626 | 0 | ALIGN(ALIGN_SPEC) ge25519 A; |
1627 | 0 | hash_512bits extsk; |
1628 | | |
1629 | | /* A = aB */ |
1630 | 0 | ed25519_extsk(extsk, secretKey); |
1631 | 0 | expand256_modm(a, extsk, 32); |
1632 | 0 | ge25519_scalarmult_base_niels(&A, ge25519_niels_base_multiples, a); |
1633 | 0 | ge25519_pack(publicKey, &A); |
1634 | |
|
1635 | 0 | return 0; |
1636 | 0 | } |
1637 | | |
1638 | | int |
1639 | | ed25519_publickey(byte publicKey[32], const byte secretKey[32]) |
1640 | 0 | { |
1641 | 0 | return ed25519_publickey_CXX(publicKey, secretKey); |
1642 | 0 | } |
1643 | | |
1644 | | int |
1645 | | ed25519_sign_CXX(std::istream& stream, const byte sk[32], const byte pk[32], byte RS[64]) |
1646 | 0 | { |
1647 | 0 | using namespace CryptoPP::Donna::Ed25519; |
1648 | |
|
1649 | 0 | bignum256modm r, S, a; |
1650 | 0 | ALIGN(ALIGN_SPEC) ge25519 R; |
1651 | 0 | hash_512bits extsk, hashr, hram; |
1652 | | |
1653 | | // Unfortunately we need to read the stream twice. The first time calculates |
1654 | | // 'r = H(aExt[32..64], m)'. The second time calculates 'S = H(R,A,m)'. There |
1655 | | // is a data dependency due to hashing 'RS' with 'R = [r]B' that does not |
1656 | | // allow us to read the stream once. |
1657 | 0 | std::streampos where = stream.tellg(); |
1658 | |
|
1659 | 0 | ed25519_extsk(extsk, sk); |
1660 | | |
1661 | | /* r = H(aExt[32..64], m) */ |
1662 | 0 | SHA512 hash; |
1663 | 0 | hash.Update(extsk + 32, 32); |
1664 | 0 | UpdateFromStream(hash, stream); |
1665 | 0 | hash.Final(hashr); |
1666 | 0 | expand256_modm(r, hashr, 64); |
1667 | | |
1668 | | /* R = rB */ |
1669 | 0 | ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r); |
1670 | 0 | ge25519_pack(RS, &R); |
1671 | | |
1672 | | // Reset stream for the second digest |
1673 | 0 | stream.clear(); |
1674 | 0 | stream.seekg(where); |
1675 | | |
1676 | | /* S = H(R,A,m).. */ |
1677 | 0 | ed25519_hram(hram, RS, pk, stream); |
1678 | 0 | expand256_modm(S, hram, 64); |
1679 | | |
1680 | | /* S = H(R,A,m)a */ |
1681 | 0 | expand256_modm(a, extsk, 32); |
1682 | 0 | mul256_modm(S, S, a); |
1683 | | |
1684 | | /* S = (r + H(R,A,m)a) */ |
1685 | 0 | add256_modm(S, S, r); |
1686 | | |
1687 | | /* S = (r + H(R,A,m)a) mod L */ |
1688 | 0 | contract256_modm(RS + 32, S); |
1689 | 0 | return 0; |
1690 | 0 | } |
1691 | | |
1692 | | int |
1693 | | ed25519_sign_CXX(const byte *m, size_t mlen, const byte sk[32], const byte pk[32], byte RS[64]) |
1694 | 0 | { |
1695 | 0 | using namespace CryptoPP::Donna::Ed25519; |
1696 | |
|
1697 | 0 | bignum256modm r, S, a; |
1698 | 0 | ALIGN(ALIGN_SPEC) ge25519 R; |
1699 | 0 | hash_512bits extsk, hashr, hram; |
1700 | |
|
1701 | 0 | ed25519_extsk(extsk, sk); |
1702 | | |
1703 | | /* r = H(aExt[32..64], m) */ |
1704 | 0 | SHA512 hash; |
1705 | 0 | hash.Update(extsk + 32, 32); |
1706 | 0 | hash.Update(m, mlen); |
1707 | 0 | hash.Final(hashr); |
1708 | 0 | expand256_modm(r, hashr, 64); |
1709 | | |
1710 | | /* R = rB */ |
1711 | 0 | ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r); |
1712 | 0 | ge25519_pack(RS, &R); |
1713 | | |
1714 | | /* S = H(R,A,m).. */ |
1715 | 0 | ed25519_hram(hram, RS, pk, m, mlen); |
1716 | 0 | expand256_modm(S, hram, 64); |
1717 | | |
1718 | | /* S = H(R,A,m)a */ |
1719 | 0 | expand256_modm(a, extsk, 32); |
1720 | 0 | mul256_modm(S, S, a); |
1721 | | |
1722 | | /* S = (r + H(R,A,m)a) */ |
1723 | 0 | add256_modm(S, S, r); |
1724 | | |
1725 | | /* S = (r + H(R,A,m)a) mod L */ |
1726 | 0 | contract256_modm(RS + 32, S); |
1727 | 0 | return 0; |
1728 | 0 | } |
1729 | | |
1730 | | int |
1731 | | ed25519_sign(std::istream& stream, const byte secretKey[32], const byte publicKey[32], |
1732 | | byte signature[64]) |
1733 | 0 | { |
1734 | 0 | return ed25519_sign_CXX(stream, secretKey, publicKey, signature); |
1735 | 0 | } |
1736 | | |
1737 | | int |
1738 | | ed25519_sign(const byte* message, size_t messageLength, const byte secretKey[32], |
1739 | | const byte publicKey[32], byte signature[64]) |
1740 | 0 | { |
1741 | 0 | return ed25519_sign_CXX(message, messageLength, secretKey, publicKey, signature); |
1742 | 0 | } |
1743 | | |
1744 | | int |
1745 | 0 | ed25519_sign_open_CXX(const byte *m, size_t mlen, const byte pk[32], const byte RS[64]) { |
1746 | |
|
1747 | 0 | using namespace CryptoPP::Donna::Ed25519; |
1748 | |
|
1749 | 0 | ALIGN(ALIGN_SPEC) ge25519 R, A; |
1750 | 0 | hash_512bits hash; |
1751 | 0 | bignum256modm hram, S; |
1752 | 0 | byte checkR[32]; |
1753 | |
|
1754 | 0 | if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk)) |
1755 | 0 | return -1; |
1756 | | |
1757 | | /* hram = H(R,A,m) */ |
1758 | 0 | ed25519_hram(hash, RS, pk, m, mlen); |
1759 | 0 | expand256_modm(hram, hash, 64); |
1760 | | |
1761 | | /* S */ |
1762 | 0 | expand256_modm(S, RS + 32, 32); |
1763 | | |
1764 | | /* SB - H(R,A,m)A */ |
1765 | 0 | ge25519_double_scalarmult_vartime(&R, &A, hram, S); |
1766 | 0 | ge25519_pack(checkR, &R); |
1767 | | |
1768 | | /* check that R = SB - H(R,A,m)A */ |
1769 | 0 | return ed25519_verify(RS, checkR, 32) ? 0 : -1; |
1770 | 0 | } |
1771 | | |
1772 | | int |
1773 | 0 | ed25519_sign_open_CXX(std::istream& stream, const byte pk[32], const byte RS[64]) { |
1774 | |
|
1775 | 0 | using namespace CryptoPP::Donna::Ed25519; |
1776 | |
|
1777 | 0 | ALIGN(ALIGN_SPEC) ge25519 R, A; |
1778 | 0 | hash_512bits hash; |
1779 | 0 | bignum256modm hram, S; |
1780 | 0 | byte checkR[32]; |
1781 | |
|
1782 | 0 | if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk)) |
1783 | 0 | return -1; |
1784 | | |
1785 | | /* hram = H(R,A,m) */ |
1786 | 0 | ed25519_hram(hash, RS, pk, stream); |
1787 | 0 | expand256_modm(hram, hash, 64); |
1788 | | |
1789 | | /* S */ |
1790 | 0 | expand256_modm(S, RS + 32, 32); |
1791 | | |
1792 | | /* SB - H(R,A,m)A */ |
1793 | 0 | ge25519_double_scalarmult_vartime(&R, &A, hram, S); |
1794 | 0 | ge25519_pack(checkR, &R); |
1795 | | |
1796 | | /* check that R = SB - H(R,A,m)A */ |
1797 | 0 | return ed25519_verify(RS, checkR, 32) ? 0 : -1; |
1798 | 0 | } |
1799 | | |
1800 | | int |
1801 | | ed25519_sign_open(std::istream& stream, const byte publicKey[32], const byte signature[64]) |
1802 | 0 | { |
1803 | 0 | return ed25519_sign_open_CXX(stream, publicKey, signature); |
1804 | 0 | } |
1805 | | |
1806 | | int |
1807 | | ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64]) |
1808 | 0 | { |
1809 | 0 | return ed25519_sign_open_CXX(message, messageLength, publicKey, signature); |
1810 | 0 | } |
1811 | | |
1812 | | NAMESPACE_END // Donna |
1813 | | NAMESPACE_END // CryptoPP |
1814 | | |
1815 | | #endif // CRYPTOPP_CURVE25519_64BIT |