/rust/registry/src/index.crates.io-1949cf8c6b5b557f/ring-0.17.14/crypto/curve25519/curve25519.c
Line  | Count  | Source  | 
1  |  | // Copyright 2020 The BoringSSL Authors  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //     https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  |  | 
15  |  | // Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP  | 
16  |  | // 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as  | 
17  |  | // public domain. Other parts have been replaced to call into code generated by  | 
18  |  | // Fiat (https://github.com/mit-plv/fiat-crypto) in //third_party/fiat.  | 
19  |  | //  | 
20  |  | // The field functions are shared by Ed25519 and X25519 where possible.  | 
21  |  |  | 
22  |  | #include <ring-core/mem.h>  | 
23  |  |  | 
24  |  | #include "internal.h"  | 
25  |  | #include "../internal.h"  | 
26  |  |  | 
27  |  | #if defined(_MSC_VER) && !defined(__clang__)  | 
28  |  | // '=': conversion from 'int64_t' to 'int32_t', possible loss of data  | 
29  |  | #pragma warning(disable: 4242)  | 
30  |  | // '=': conversion from 'int32_t' to 'uint8_t', possible loss of data  | 
31  |  | #pragma warning(disable: 4244)  | 
32  |  | #endif  | 
33  |  |  | 
34  |  | #if defined(__GNUC__) || defined(__clang__)  | 
35  |  | #pragma GCC diagnostic ignored "-Wconversion"  | 
36  |  | #pragma GCC diagnostic ignored "-Wsign-conversion"  | 
37  |  | #endif  | 
38  |  |  | 
39  |  | #if defined(__GNUC__) && !defined(__clang__)  | 
40  |  | #pragma GCC diagnostic ignored "-Winline"  | 
41  |  | #endif  | 
42  |  |  | 
43  |  | // Various pre-computed constants.  | 
44  |  | #include "./curve25519_tables.h"  | 
45  |  |  | 
46  |  | #if defined(BORINGSSL_HAS_UINT128)  | 
47  |  | #if defined(__GNUC__)  | 
48  |  | #pragma GCC diagnostic ignored "-Wpedantic"  | 
49  |  | #endif  | 
50  |  | #include "../../third_party/fiat/curve25519_64.h"  | 
51  |  | #elif defined(OPENSSL_64_BIT)  | 
52  |  | #include "../../third_party/fiat/curve25519_64_msvc.h"  | 
53  |  | #else  | 
54  |  | #include "../../third_party/fiat/curve25519_32.h"  | 
55  |  | #endif  | 
56  |  |  | 
57  |  |  | 
58  |  | // Low-level intrinsic operations  | 
59  |  |  | 
60  | 0  | static uint64_t load_3(const uint8_t *in) { | 
61  | 0  |   uint64_t result;  | 
62  | 0  |   result = (uint64_t)in[0];  | 
63  | 0  |   result |= ((uint64_t)in[1]) << 8;  | 
64  | 0  |   result |= ((uint64_t)in[2]) << 16;  | 
65  | 0  |   return result;  | 
66  | 0  | }  | 
67  |  |  | 
68  | 0  | static uint64_t load_4(const uint8_t *in) { | 
69  | 0  |   uint64_t result;  | 
70  | 0  |   result = (uint64_t)in[0];  | 
71  | 0  |   result |= ((uint64_t)in[1]) << 8;  | 
72  | 0  |   result |= ((uint64_t)in[2]) << 16;  | 
73  | 0  |   result |= ((uint64_t)in[3]) << 24;  | 
74  | 0  |   return result;  | 
75  | 0  | }  | 
76  |  |  | 
77  |  |  | 
78  |  | // Field operations.  | 
79  |  |  | 
80  |  | #if defined(OPENSSL_64_BIT)  | 
81  |  |  | 
82  |  | // assert_fe asserts that |f| satisfies bounds:  | 
83  |  | //  | 
84  |  | //  [[0x0 ~> 0x8cccccccccccc],  | 
85  |  | //   [0x0 ~> 0x8cccccccccccc],  | 
86  |  | //   [0x0 ~> 0x8cccccccccccc],  | 
87  |  | //   [0x0 ~> 0x8cccccccccccc],  | 
88  |  | //   [0x0 ~> 0x8cccccccccccc]]  | 
89  |  | //  | 
90  |  | // See comments in curve25519_64.h for which functions use these bounds for  | 
91  |  | // inputs or outputs.  | 
92  |  | #define assert_fe(f)                                                    \  | 
93  | 0  |   do {                                                                  \ | 
94  | 0  |     for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \ | 
95  | 0  |       declassify_assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc));  \  | 
96  | 0  |     }                                                                   \  | 
97  | 0  |   } while (0)  | 
98  |  |  | 
99  |  | // assert_fe_loose asserts that |f| satisfies bounds:  | 
100  |  | //  | 
101  |  | //  [[0x0 ~> 0x1a666666666664],  | 
102  |  | //   [0x0 ~> 0x1a666666666664],  | 
103  |  | //   [0x0 ~> 0x1a666666666664],  | 
104  |  | //   [0x0 ~> 0x1a666666666664],  | 
105  |  | //   [0x0 ~> 0x1a666666666664]]  | 
106  |  | //  | 
107  |  | // See comments in curve25519_64.h for which functions use these bounds for  | 
108  |  | // inputs or outputs.  | 
109  |  | #define assert_fe_loose(f)                                              \  | 
110  | 0  |   do {                                                                  \ | 
111  | 0  |     for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \ | 
112  | 0  |       declassify_assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664)); \  | 
113  | 0  |     }                                                                   \  | 
114  | 0  |   } while (0)  | 
115  |  |  | 
116  |  | #else  | 
117  |  |  | 
118  |  | // assert_fe asserts that |f| satisfies bounds:  | 
119  |  | //  | 
120  |  | //  [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],  | 
121  |  | //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],  | 
122  |  | //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],  | 
123  |  | //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],  | 
124  |  | //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]  | 
125  |  | //  | 
126  |  | // See comments in curve25519_32.h for which functions use these bounds for  | 
127  |  | // inputs or outputs.  | 
128  |  | #define assert_fe(f)                                                     \  | 
129  |  |   do {                                                                   \ | 
130  |  |     for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ | 
131  |  |       declassify_assert(f[_assert_fe_i] <=                               \  | 
132  |  |                         ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \  | 
133  |  |     }                                                                    \  | 
134  |  |   } while (0)  | 
135  |  |  | 
136  |  | // assert_fe_loose asserts that |f| satisfies bounds:  | 
137  |  | //  | 
138  |  | //  [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],  | 
139  |  | //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],  | 
140  |  | //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],  | 
141  |  | //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],  | 
142  |  | //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]  | 
143  |  | //  | 
144  |  | // See comments in curve25519_32.h for which functions use these bounds for  | 
145  |  | // inputs or outputs.  | 
146  |  | #define assert_fe_loose(f)                                               \  | 
147  |  |   do {                                                                   \ | 
148  |  |     for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \ | 
149  |  |       declassify_assert(f[_assert_fe_i] <=                               \  | 
150  |  |                         ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \  | 
151  |  |     }                                                                    \  | 
152  |  |   } while (0)  | 
153  |  |  | 
154  |  | #endif  // OPENSSL_64_BIT  | 
155  |  |  | 
156  |  | OPENSSL_STATIC_ASSERT(sizeof(fe) == sizeof(fe_limb_t) * FE_NUM_LIMBS,  | 
157  |  |                       "fe_limb_t[FE_NUM_LIMBS] is inconsistent with fe");  | 
158  |  |  | 
159  | 0  | static void fe_frombytes_strict(fe *h, const uint8_t s[32]) { | 
160  |  |   // |fiat_25519_from_bytes| requires the top-most bit be clear.  | 
161  | 0  |   declassify_assert((s[31] & 0x80) == 0);  | 
162  | 0  |   fiat_25519_from_bytes(h->v, s);  | 
163  | 0  |   assert_fe(h->v);  | 
164  | 0  | }  | 
165  |  |  | 
166  | 0  | static void fe_frombytes(fe *h, const uint8_t s[32]) { | 
167  | 0  |   uint8_t s_copy[32];  | 
168  | 0  |   OPENSSL_memcpy(s_copy, s, 32);  | 
169  | 0  |   s_copy[31] &= 0x7f;  | 
170  | 0  |   fe_frombytes_strict(h, s_copy);  | 
171  | 0  | }  | 
172  |  |  | 
173  | 0  | static void fe_tobytes(uint8_t s[32], const fe *f) { | 
174  | 0  |   assert_fe(f->v);  | 
175  | 0  |   fiat_25519_to_bytes(s, f->v);  | 
176  | 0  | }  | 
177  |  |  | 
178  |  | // h = 0  | 
179  | 0  | static void fe_0(fe *h) { | 
180  | 0  |   OPENSSL_memset(h, 0, sizeof(fe));  | 
181  | 0  | }  | 
182  |  |  | 
183  |  | #if defined(OPENSSL_SMALL)  | 
184  |  |  | 
185  |  | static void fe_loose_0(fe_loose *h) { | 
186  |  |   OPENSSL_memset(h, 0, sizeof(fe_loose));  | 
187  |  | }  | 
188  |  |  | 
189  |  | #endif  | 
190  |  |  | 
191  |  | // h = 1  | 
192  | 0  | static void fe_1(fe *h) { | 
193  | 0  |   OPENSSL_memset(h, 0, sizeof(fe));  | 
194  | 0  |   h->v[0] = 1;  | 
195  | 0  | }  | 
196  |  |  | 
197  |  | #if defined(OPENSSL_SMALL)  | 
198  |  |  | 
199  |  | static void fe_loose_1(fe_loose *h) { | 
200  |  |   OPENSSL_memset(h, 0, sizeof(fe_loose));  | 
201  |  |   h->v[0] = 1;  | 
202  |  | }  | 
203  |  |  | 
204  |  | #endif  | 
205  |  |  | 
206  |  | // h = f + g  | 
207  |  | // Can overlap h with f or g.  | 
208  | 0  | static void fe_add(fe_loose *h, const fe *f, const fe *g) { | 
209  | 0  |   assert_fe(f->v);  | 
210  | 0  |   assert_fe(g->v);  | 
211  | 0  |   fiat_25519_add(h->v, f->v, g->v);  | 
212  | 0  |   assert_fe_loose(h->v);  | 
213  | 0  | }  | 
214  |  |  | 
215  |  | // h = f - g  | 
216  |  | // Can overlap h with f or g.  | 
217  | 0  | static void fe_sub(fe_loose *h, const fe *f, const fe *g) { | 
218  | 0  |   assert_fe(f->v);  | 
219  | 0  |   assert_fe(g->v);  | 
220  | 0  |   fiat_25519_sub(h->v, f->v, g->v);  | 
221  | 0  |   assert_fe_loose(h->v);  | 
222  | 0  | }  | 
223  |  |  | 
224  | 0  | static void fe_carry(fe *h, const fe_loose* f) { | 
225  | 0  |   assert_fe_loose(f->v);  | 
226  | 0  |   fiat_25519_carry(h->v, f->v);  | 
227  | 0  |   assert_fe(h->v);  | 
228  | 0  | }  | 
229  |  |  | 
230  |  | static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],  | 
231  |  |                         const fe_limb_t in1[FE_NUM_LIMBS],  | 
232  | 0  |                         const fe_limb_t in2[FE_NUM_LIMBS]) { | 
233  | 0  |   assert_fe_loose(in1);  | 
234  | 0  |   assert_fe_loose(in2);  | 
235  | 0  |   fiat_25519_carry_mul(out, in1, in2);  | 
236  | 0  |   assert_fe(out);  | 
237  | 0  | }  | 
238  |  |  | 
239  | 0  | static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) { | 
240  | 0  |   fe_mul_impl(h->v, f->v, g->v);  | 
241  | 0  | }  | 
242  |  |  | 
243  |  | #if defined(OPENSSL_SMALL)  | 
244  |  | static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) { | 
245  |  |   fe_mul_impl(h->v, f->v, g->v);  | 
246  |  | }  | 
247  |  | #endif  | 
248  |  |  | 
249  | 0  | static void fe_mul_ttt(fe *h, const fe *f, const fe *g) { | 
250  | 0  |   fe_mul_impl(h->v, f->v, g->v);  | 
251  | 0  | }  | 
252  |  |  | 
253  | 0  | static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) { | 
254  | 0  |   fe_mul_impl(h->v, f->v, g->v);  | 
255  | 0  | }  | 
256  |  |  | 
257  | 0  | static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) { | 
258  | 0  |   fe_mul_impl(h->v, f->v, g->v);  | 
259  | 0  | }  | 
260  |  |  | 
261  | 0  | static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) { | 
262  | 0  |   fe_mul_impl(h->v, f->v, g->v);  | 
263  | 0  | }  | 
264  |  |  | 
265  | 0  | static void fe_sq_tl(fe *h, const fe_loose *f) { | 
266  | 0  |   assert_fe_loose(f->v);  | 
267  | 0  |   fiat_25519_carry_square(h->v, f->v);  | 
268  | 0  |   assert_fe(h->v);  | 
269  | 0  | }  | 
270  |  |  | 
271  | 0  | static void fe_sq_tt(fe *h, const fe *f) { | 
272  | 0  |   assert_fe_loose(f->v);  | 
273  | 0  |   fiat_25519_carry_square(h->v, f->v);  | 
274  | 0  |   assert_fe(h->v);  | 
275  | 0  | }  | 
276  |  |  | 
277  |  | // Replace (f,g) with (g,f) if b == 1;  | 
278  |  | // replace (f,g) with (f,g) if b == 0.  | 
279  |  | //  | 
280  |  | // Preconditions: b in {0,1}. | 
281  | 0  | static void fe_cswap(fe *f, fe *g, fe_limb_t b) { | 
282  | 0  |   b = 0-b;  | 
283  | 0  |   for (unsigned i = 0; i < FE_NUM_LIMBS; i++) { | 
284  | 0  |     fe_limb_t x = f->v[i] ^ g->v[i];  | 
285  | 0  |     x &= b;  | 
286  | 0  |     f->v[i] ^= x;  | 
287  | 0  |     g->v[i] ^= x;  | 
288  | 0  |   }  | 
289  | 0  | }  | 
290  |  |  | 
291  | 0  | static void fe_mul121666(fe *h, const fe_loose *f) { | 
292  | 0  |   assert_fe_loose(f->v);  | 
293  | 0  |   fiat_25519_carry_scmul_121666(h->v, f->v);  | 
294  | 0  |   assert_fe(h->v);  | 
295  | 0  | }  | 
296  |  |  | 
297  |  | // h = -f  | 
298  | 0  | static void fe_neg(fe_loose *h, const fe *f) { | 
299  | 0  |   assert_fe(f->v);  | 
300  | 0  |   fiat_25519_opp(h->v, f->v);  | 
301  | 0  |   assert_fe_loose(h->v);  | 
302  | 0  | }  | 
303  |  |  | 
304  |  | // Replace (f,g) with (g,g) if b == 1;  | 
305  |  | // replace (f,g) with (f,g) if b == 0.  | 
306  |  | //  | 
307  |  | // Preconditions: b in {0,1}. | 
308  | 0  | static void fe_cmov(fe_loose *f, const fe_loose *g, fe_limb_t b) { | 
309  |  |   // TODO(davidben): Switch to fiat's calling convention, or ask fiat to emit a  | 
310  |  |   // different one.  | 
311  |  | 
  | 
312  | 0  |   b = 0-b;  | 
313  | 0  |   for (unsigned i = 0; i < FE_NUM_LIMBS; i++) { | 
314  | 0  |     fe_limb_t x = f->v[i] ^ g->v[i];  | 
315  | 0  |     x &= b;  | 
316  | 0  |     f->v[i] ^= x;  | 
317  | 0  |   }  | 
318  | 0  | }  | 
319  |  |  | 
320  |  | // h = f  | 
321  | 0  | static void fe_copy(fe *h, const fe *f) { | 
322  | 0  |   fe_limbs_copy(h->v, f->v);  | 
323  | 0  | }  | 
324  |  |  | 
325  | 0  | static void fe_copy_lt(fe_loose *h, const fe *f) { | 
326  | 0  |   OPENSSL_STATIC_ASSERT(sizeof(fe_loose) == sizeof(fe), "fe and fe_loose mismatch");  | 
327  | 0  |   fe_limbs_copy(h->v, f->v);  | 
328  | 0  | }  | 
329  |  |  | 
330  | 0  | static void fe_loose_invert(fe *out, const fe_loose *z) { | 
331  | 0  |   fe t0;  | 
332  | 0  |   fe t1;  | 
333  | 0  |   fe t2;  | 
334  | 0  |   fe t3;  | 
335  | 0  |   int i;  | 
336  |  | 
  | 
337  | 0  |   fe_sq_tl(&t0, z);  | 
338  | 0  |   fe_sq_tt(&t1, &t0);  | 
339  | 0  |   for (i = 1; i < 2; ++i) { | 
340  | 0  |     fe_sq_tt(&t1, &t1);  | 
341  | 0  |   }  | 
342  | 0  |   fe_mul_tlt(&t1, z, &t1);  | 
343  | 0  |   fe_mul_ttt(&t0, &t0, &t1);  | 
344  | 0  |   fe_sq_tt(&t2, &t0);  | 
345  | 0  |   fe_mul_ttt(&t1, &t1, &t2);  | 
346  | 0  |   fe_sq_tt(&t2, &t1);  | 
347  | 0  |   for (i = 1; i < 5; ++i) { | 
348  | 0  |     fe_sq_tt(&t2, &t2);  | 
349  | 0  |   }  | 
350  | 0  |   fe_mul_ttt(&t1, &t2, &t1);  | 
351  | 0  |   fe_sq_tt(&t2, &t1);  | 
352  | 0  |   for (i = 1; i < 10; ++i) { | 
353  | 0  |     fe_sq_tt(&t2, &t2);  | 
354  | 0  |   }  | 
355  | 0  |   fe_mul_ttt(&t2, &t2, &t1);  | 
356  | 0  |   fe_sq_tt(&t3, &t2);  | 
357  | 0  |   for (i = 1; i < 20; ++i) { | 
358  | 0  |     fe_sq_tt(&t3, &t3);  | 
359  | 0  |   }  | 
360  | 0  |   fe_mul_ttt(&t2, &t3, &t2);  | 
361  | 0  |   fe_sq_tt(&t2, &t2);  | 
362  | 0  |   for (i = 1; i < 10; ++i) { | 
363  | 0  |     fe_sq_tt(&t2, &t2);  | 
364  | 0  |   }  | 
365  | 0  |   fe_mul_ttt(&t1, &t2, &t1);  | 
366  | 0  |   fe_sq_tt(&t2, &t1);  | 
367  | 0  |   for (i = 1; i < 50; ++i) { | 
368  | 0  |     fe_sq_tt(&t2, &t2);  | 
369  | 0  |   }  | 
370  | 0  |   fe_mul_ttt(&t2, &t2, &t1);  | 
371  | 0  |   fe_sq_tt(&t3, &t2);  | 
372  | 0  |   for (i = 1; i < 100; ++i) { | 
373  | 0  |     fe_sq_tt(&t3, &t3);  | 
374  | 0  |   }  | 
375  | 0  |   fe_mul_ttt(&t2, &t3, &t2);  | 
376  | 0  |   fe_sq_tt(&t2, &t2);  | 
377  | 0  |   for (i = 1; i < 50; ++i) { | 
378  | 0  |     fe_sq_tt(&t2, &t2);  | 
379  | 0  |   }  | 
380  | 0  |   fe_mul_ttt(&t1, &t2, &t1);  | 
381  | 0  |   fe_sq_tt(&t1, &t1);  | 
382  | 0  |   for (i = 1; i < 5; ++i) { | 
383  | 0  |     fe_sq_tt(&t1, &t1);  | 
384  | 0  |   }  | 
385  | 0  |   fe_mul_ttt(out, &t1, &t0);  | 
386  | 0  | }  | 
387  |  |  | 
388  | 0  | static void fe_invert(fe *out, const fe *z) { | 
389  | 0  |   fe_loose l;  | 
390  | 0  |   fe_copy_lt(&l, z);  | 
391  | 0  |   fe_loose_invert(out, &l);  | 
392  | 0  | }  | 
393  |  |  | 
394  |  | // return 0 if f == 0  | 
395  |  | // return 1 if f != 0  | 
396  | 0  | static int fe_isnonzero(const fe_loose *f) { | 
397  | 0  |   fe tight;  | 
398  | 0  |   fe_carry(&tight, f);  | 
399  | 0  |   uint8_t s[32];  | 
400  | 0  |   fe_tobytes(s, &tight);  | 
401  |  | 
  | 
402  | 0  |   static const uint8_t zero[32] = {0}; | 
403  | 0  |   return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;  | 
404  | 0  | }  | 
405  |  |  | 
406  |  | // return 1 if f is in {1,3,5,...,q-2} | 
407  |  | // return 0 if f is in {0,2,4,...,q-1} | 
408  | 0  | static int fe_isnegative(const fe *f) { | 
409  | 0  |   uint8_t s[32];  | 
410  | 0  |   fe_tobytes(s, f);  | 
411  | 0  |   return s[0] & 1;  | 
412  | 0  | }  | 
413  |  |  | 
414  | 0  | static void fe_sq2_tt(fe *h, const fe *f) { | 
415  |  |   // h = f^2  | 
416  | 0  |   fe_sq_tt(h, f);  | 
417  |  |  | 
418  |  |   // h = h + h  | 
419  | 0  |   fe_loose tmp;  | 
420  | 0  |   fe_add(&tmp, h, h);  | 
421  | 0  |   fe_carry(h, &tmp);  | 
422  | 0  | }  | 
423  |  |  | 
424  | 0  | static void fe_pow22523(fe *out, const fe *z) { | 
425  | 0  |   fe t0;  | 
426  | 0  |   fe t1;  | 
427  | 0  |   fe t2;  | 
428  | 0  |   int i;  | 
429  |  | 
  | 
430  | 0  |   fe_sq_tt(&t0, z);  | 
431  | 0  |   fe_sq_tt(&t1, &t0);  | 
432  | 0  |   for (i = 1; i < 2; ++i) { | 
433  | 0  |     fe_sq_tt(&t1, &t1);  | 
434  | 0  |   }  | 
435  | 0  |   fe_mul_ttt(&t1, z, &t1);  | 
436  | 0  |   fe_mul_ttt(&t0, &t0, &t1);  | 
437  | 0  |   fe_sq_tt(&t0, &t0);  | 
438  | 0  |   fe_mul_ttt(&t0, &t1, &t0);  | 
439  | 0  |   fe_sq_tt(&t1, &t0);  | 
440  | 0  |   for (i = 1; i < 5; ++i) { | 
441  | 0  |     fe_sq_tt(&t1, &t1);  | 
442  | 0  |   }  | 
443  | 0  |   fe_mul_ttt(&t0, &t1, &t0);  | 
444  | 0  |   fe_sq_tt(&t1, &t0);  | 
445  | 0  |   for (i = 1; i < 10; ++i) { | 
446  | 0  |     fe_sq_tt(&t1, &t1);  | 
447  | 0  |   }  | 
448  | 0  |   fe_mul_ttt(&t1, &t1, &t0);  | 
449  | 0  |   fe_sq_tt(&t2, &t1);  | 
450  | 0  |   for (i = 1; i < 20; ++i) { | 
451  | 0  |     fe_sq_tt(&t2, &t2);  | 
452  | 0  |   }  | 
453  | 0  |   fe_mul_ttt(&t1, &t2, &t1);  | 
454  | 0  |   fe_sq_tt(&t1, &t1);  | 
455  | 0  |   for (i = 1; i < 10; ++i) { | 
456  | 0  |     fe_sq_tt(&t1, &t1);  | 
457  | 0  |   }  | 
458  | 0  |   fe_mul_ttt(&t0, &t1, &t0);  | 
459  | 0  |   fe_sq_tt(&t1, &t0);  | 
460  | 0  |   for (i = 1; i < 50; ++i) { | 
461  | 0  |     fe_sq_tt(&t1, &t1);  | 
462  | 0  |   }  | 
463  | 0  |   fe_mul_ttt(&t1, &t1, &t0);  | 
464  | 0  |   fe_sq_tt(&t2, &t1);  | 
465  | 0  |   for (i = 1; i < 100; ++i) { | 
466  | 0  |     fe_sq_tt(&t2, &t2);  | 
467  | 0  |   }  | 
468  | 0  |   fe_mul_ttt(&t1, &t2, &t1);  | 
469  | 0  |   fe_sq_tt(&t1, &t1);  | 
470  | 0  |   for (i = 1; i < 50; ++i) { | 
471  | 0  |     fe_sq_tt(&t1, &t1);  | 
472  | 0  |   }  | 
473  | 0  |   fe_mul_ttt(&t0, &t1, &t0);  | 
474  | 0  |   fe_sq_tt(&t0, &t0);  | 
475  | 0  |   for (i = 1; i < 2; ++i) { | 
476  | 0  |     fe_sq_tt(&t0, &t0);  | 
477  | 0  |   }  | 
478  | 0  |   fe_mul_ttt(out, &t0, z);  | 
479  | 0  | }  | 
480  |  |  | 
481  |  |  | 
482  |  | // Group operations.  | 
483  |  |  | 
484  | 0  | int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) { | 
485  | 0  |   fe u;  | 
486  | 0  |   fe_loose v;  | 
487  | 0  |   fe w;  | 
488  | 0  |   fe vxx;  | 
489  | 0  |   fe_loose check;  | 
490  |  | 
  | 
491  | 0  |   fe_frombytes(&h->Y, s);  | 
492  | 0  |   fe_1(&h->Z);  | 
493  | 0  |   fe_sq_tt(&w, &h->Y);  | 
494  | 0  |   fe_mul_ttt(&vxx, &w, &d);  | 
495  | 0  |   fe_sub(&v, &w, &h->Z);  // u = y^2-1  | 
496  | 0  |   fe_carry(&u, &v);  | 
497  | 0  |   fe_add(&v, &vxx, &h->Z);  // v = dy^2+1  | 
498  |  | 
  | 
499  | 0  |   fe_mul_ttl(&w, &u, &v);  // w = u*v  | 
500  | 0  |   fe_pow22523(&h->X, &w);  // x = w^((q-5)/8)  | 
501  | 0  |   fe_mul_ttt(&h->X, &h->X, &u);  // x = u*w^((q-5)/8)  | 
502  |  | 
  | 
503  | 0  |   fe_sq_tt(&vxx, &h->X);  | 
504  | 0  |   fe_mul_ttl(&vxx, &vxx, &v);  | 
505  | 0  |   fe_sub(&check, &vxx, &u);  | 
506  | 0  |   if (fe_isnonzero(&check)) { | 
507  | 0  |     fe_add(&check, &vxx, &u);  | 
508  | 0  |     if (fe_isnonzero(&check)) { | 
509  | 0  |       return 0;  | 
510  | 0  |     }  | 
511  | 0  |     fe_mul_ttt(&h->X, &h->X, &sqrtm1);  | 
512  | 0  |   }  | 
513  |  |  | 
514  | 0  |   if (fe_isnegative(&h->X) != (s[31] >> 7)) { | 
515  | 0  |     fe_loose t;  | 
516  | 0  |     fe_neg(&t, &h->X);  | 
517  | 0  |     fe_carry(&h->X, &t);  | 
518  | 0  |   }  | 
519  |  | 
  | 
520  | 0  |   fe_mul_ttt(&h->T, &h->X, &h->Y);  | 
521  | 0  |   return 1;  | 
522  | 0  | }  | 
523  |  |  | 
524  | 0  | static void ge_p2_0(ge_p2 *h) { | 
525  | 0  |   fe_0(&h->X);  | 
526  | 0  |   fe_1(&h->Y);  | 
527  | 0  |   fe_1(&h->Z);  | 
528  | 0  | }  | 
529  |  |  | 
530  | 0  | static void ge_p3_0(ge_p3 *h) { | 
531  | 0  |   fe_0(&h->X);  | 
532  | 0  |   fe_1(&h->Y);  | 
533  | 0  |   fe_1(&h->Z);  | 
534  | 0  |   fe_0(&h->T);  | 
535  | 0  | }  | 
536  |  |  | 
537  |  | #if defined(OPENSSL_SMALL)  | 
538  |  |  | 
539  |  | static void ge_precomp_0(ge_precomp *h) { | 
540  |  |   fe_loose_1(&h->yplusx);  | 
541  |  |   fe_loose_1(&h->yminusx);  | 
542  |  |   fe_loose_0(&h->xy2d);  | 
543  |  | }  | 
544  |  |  | 
545  |  | #endif  | 
546  |  |  | 
547  |  | // r = p  | 
548  | 0  | static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { | 
549  | 0  |   fe_copy(&r->X, &p->X);  | 
550  | 0  |   fe_copy(&r->Y, &p->Y);  | 
551  | 0  |   fe_copy(&r->Z, &p->Z);  | 
552  | 0  | }  | 
553  |  |  | 
554  |  | // r = p  | 
555  | 0  | static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { | 
556  | 0  |   fe_add(&r->YplusX, &p->Y, &p->X);  | 
557  | 0  |   fe_sub(&r->YminusX, &p->Y, &p->X);  | 
558  | 0  |   fe_copy_lt(&r->Z, &p->Z);  | 
559  | 0  |   fe_mul_ltt(&r->T2d, &p->T, &d2);  | 
560  | 0  | }  | 
561  |  |  | 
562  |  | // r = p  | 
563  | 0  | static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { | 
564  | 0  |   fe_mul_tll(&r->X, &p->X, &p->T);  | 
565  | 0  |   fe_mul_tll(&r->Y, &p->Y, &p->Z);  | 
566  | 0  |   fe_mul_tll(&r->Z, &p->Z, &p->T);  | 
567  | 0  | }  | 
568  |  |  | 
569  |  | // r = p  | 
570  | 0  | static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { | 
571  | 0  |   fe_mul_tll(&r->X, &p->X, &p->T);  | 
572  | 0  |   fe_mul_tll(&r->Y, &p->Y, &p->Z);  | 
573  | 0  |   fe_mul_tll(&r->Z, &p->Z, &p->T);  | 
574  | 0  |   fe_mul_tll(&r->T, &p->X, &p->Y);  | 
575  | 0  | }  | 
576  |  |  | 
577  |  | // r = 2 * p  | 
578  | 0  | static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { | 
579  | 0  |   fe trX, trZ, trT;  | 
580  | 0  |   fe t0;  | 
581  |  | 
  | 
582  | 0  |   fe_sq_tt(&trX, &p->X);  | 
583  | 0  |   fe_sq_tt(&trZ, &p->Y);  | 
584  | 0  |   fe_sq2_tt(&trT, &p->Z);  | 
585  | 0  |   fe_add(&r->Y, &p->X, &p->Y);  | 
586  | 0  |   fe_sq_tl(&t0, &r->Y);  | 
587  |  | 
  | 
588  | 0  |   fe_add(&r->Y, &trZ, &trX);  | 
589  | 0  |   fe_sub(&r->Z, &trZ, &trX);  | 
590  | 0  |   fe_carry(&trZ, &r->Y);  | 
591  | 0  |   fe_sub(&r->X, &t0, &trZ);  | 
592  | 0  |   fe_carry(&trZ, &r->Z);  | 
593  | 0  |   fe_sub(&r->T, &trT, &trZ);  | 
594  | 0  | }  | 
595  |  |  | 
596  |  | // r = 2 * p  | 
597  | 0  | static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { | 
598  | 0  |   ge_p2 q;  | 
599  | 0  |   ge_p3_to_p2(&q, p);  | 
600  | 0  |   ge_p2_dbl(r, &q);  | 
601  | 0  | }  | 
602  |  |  | 
603  |  | // r = p + q  | 
604  | 0  | static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { | 
605  | 0  |   fe trY, trZ, trT;  | 
606  |  | 
  | 
607  | 0  |   fe_add(&r->X, &p->Y, &p->X);  | 
608  | 0  |   fe_sub(&r->Y, &p->Y, &p->X);  | 
609  | 0  |   fe_mul_tll(&trZ, &r->X, &q->yplusx);  | 
610  | 0  |   fe_mul_tll(&trY, &r->Y, &q->yminusx);  | 
611  | 0  |   fe_mul_tlt(&trT, &q->xy2d, &p->T);  | 
612  | 0  |   fe_add(&r->T, &p->Z, &p->Z);  | 
613  | 0  |   fe_sub(&r->X, &trZ, &trY);  | 
614  | 0  |   fe_add(&r->Y, &trZ, &trY);  | 
615  | 0  |   fe_carry(&trZ, &r->T);  | 
616  | 0  |   fe_add(&r->Z, &trZ, &trT);  | 
617  | 0  |   fe_sub(&r->T, &trZ, &trT);  | 
618  | 0  | }  | 
619  |  |  | 
620  |  | // r = p - q  | 
621  | 0  | static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { | 
622  | 0  |   fe trY, trZ, trT;  | 
623  |  | 
  | 
624  | 0  |   fe_add(&r->X, &p->Y, &p->X);  | 
625  | 0  |   fe_sub(&r->Y, &p->Y, &p->X);  | 
626  | 0  |   fe_mul_tll(&trZ, &r->X, &q->yminusx);  | 
627  | 0  |   fe_mul_tll(&trY, &r->Y, &q->yplusx);  | 
628  | 0  |   fe_mul_tlt(&trT, &q->xy2d, &p->T);  | 
629  | 0  |   fe_add(&r->T, &p->Z, &p->Z);  | 
630  | 0  |   fe_sub(&r->X, &trZ, &trY);  | 
631  | 0  |   fe_add(&r->Y, &trZ, &trY);  | 
632  | 0  |   fe_carry(&trZ, &r->T);  | 
633  | 0  |   fe_sub(&r->Z, &trZ, &trT);  | 
634  | 0  |   fe_add(&r->T, &trZ, &trT);  | 
635  | 0  | }  | 
636  |  |  | 
637  |  | // r = p + q  | 
638  | 0  | static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { | 
639  | 0  |   fe trX, trY, trZ, trT;  | 
640  |  | 
  | 
641  | 0  |   fe_add(&r->X, &p->Y, &p->X);  | 
642  | 0  |   fe_sub(&r->Y, &p->Y, &p->X);  | 
643  | 0  |   fe_mul_tll(&trZ, &r->X, &q->YplusX);  | 
644  | 0  |   fe_mul_tll(&trY, &r->Y, &q->YminusX);  | 
645  | 0  |   fe_mul_tlt(&trT, &q->T2d, &p->T);  | 
646  | 0  |   fe_mul_ttl(&trX, &p->Z, &q->Z);  | 
647  | 0  |   fe_add(&r->T, &trX, &trX);  | 
648  | 0  |   fe_sub(&r->X, &trZ, &trY);  | 
649  | 0  |   fe_add(&r->Y, &trZ, &trY);  | 
650  | 0  |   fe_carry(&trZ, &r->T);  | 
651  | 0  |   fe_add(&r->Z, &trZ, &trT);  | 
652  | 0  |   fe_sub(&r->T, &trZ, &trT);  | 
653  | 0  | }  | 
654  |  |  | 
655  |  | // r = p - q  | 
656  | 0  | static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { | 
657  | 0  |   fe trX, trY, trZ, trT;  | 
658  |  | 
  | 
659  | 0  |   fe_add(&r->X, &p->Y, &p->X);  | 
660  | 0  |   fe_sub(&r->Y, &p->Y, &p->X);  | 
661  | 0  |   fe_mul_tll(&trZ, &r->X, &q->YminusX);  | 
662  | 0  |   fe_mul_tll(&trY, &r->Y, &q->YplusX);  | 
663  | 0  |   fe_mul_tlt(&trT, &q->T2d, &p->T);  | 
664  | 0  |   fe_mul_ttl(&trX, &p->Z, &q->Z);  | 
665  | 0  |   fe_add(&r->T, &trX, &trX);  | 
666  | 0  |   fe_sub(&r->X, &trZ, &trY);  | 
667  | 0  |   fe_add(&r->Y, &trZ, &trY);  | 
668  | 0  |   fe_carry(&trZ, &r->T);  | 
669  | 0  |   fe_sub(&r->Z, &trZ, &trT);  | 
670  | 0  |   fe_add(&r->T, &trZ, &trT);  | 
671  | 0  | }  | 
672  |  |  | 
673  | 0  | static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) { | 
674  | 0  |   fe_cmov(&t->yplusx, &u->yplusx, b);  | 
675  | 0  |   fe_cmov(&t->yminusx, &u->yminusx, b);  | 
676  | 0  |   fe_cmov(&t->xy2d, &u->xy2d, b);  | 
677  | 0  | }  | 
678  |  |  | 
679  |  | #if defined(OPENSSL_SMALL)  | 
680  |  |  | 
681  |  | static void x25519_ge_scalarmult_small_precomp(  | 
682  |  |     ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) { | 
683  |  |   // precomp_table is first expanded into matching |ge_precomp|  | 
684  |  |   // elements.  | 
685  |  |   ge_precomp multiples[15];  | 
686  |  |  | 
687  |  |   unsigned i;  | 
688  |  |   for (i = 0; i < 15; i++) { | 
689  |  |     // The precomputed table is assumed to already clear the top bit, so  | 
690  |  |     // |fe_frombytes_strict| may be used directly.  | 
691  |  |     const uint8_t *bytes = &precomp_table[i*(2 * 32)];  | 
692  |  |     fe x, y;  | 
693  |  |     fe_frombytes_strict(&x, bytes);  | 
694  |  |     fe_frombytes_strict(&y, bytes + 32);  | 
695  |  |  | 
696  |  |     ge_precomp *out = &multiples[i];  | 
697  |  |     fe_add(&out->yplusx, &y, &x);  | 
698  |  |     fe_sub(&out->yminusx, &y, &x);  | 
699  |  |     fe_mul_ltt(&out->xy2d, &x, &y);  | 
700  |  |     fe_mul_llt(&out->xy2d, &out->xy2d, &d2);  | 
701  |  |   }  | 
702  |  |  | 
703  |  |   // See the comment above |k25519SmallPrecomp| about the structure of the  | 
704  |  |   // precomputed elements. This loop does 64 additions and 64 doublings to  | 
705  |  |   // calculate the result.  | 
706  |  |   ge_p3_0(h);  | 
707  |  |  | 
708  |  |   for (i = 63; i < 64; i--) { | 
709  |  |     unsigned j;  | 
710  |  |     signed char index = 0;  | 
711  |  |  | 
712  |  |     for (j = 0; j < 4; j++) { | 
713  |  |       const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));  | 
714  |  |       index |= (bit << j);  | 
715  |  |     }  | 
716  |  |  | 
717  |  |     ge_precomp e;  | 
718  |  |     ge_precomp_0(&e);  | 
719  |  |  | 
720  |  |     for (j = 1; j < 16; j++) { | 
721  |  |       cmov(&e, &multiples[j-1], 1&constant_time_eq_w(index, j));  | 
722  |  |     }  | 
723  |  |  | 
724  |  |     ge_cached cached;  | 
725  |  |     ge_p1p1 r;  | 
726  |  |     x25519_ge_p3_to_cached(&cached, h);  | 
727  |  |     x25519_ge_add(&r, h, &cached);  | 
728  |  |     x25519_ge_p1p1_to_p3(h, &r);  | 
729  |  |  | 
730  |  |     ge_madd(&r, h, &e);  | 
731  |  |     x25519_ge_p1p1_to_p3(h, &r);  | 
732  |  |   }  | 
733  |  | }  | 
734  |  |  | 
735  |  | void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32], int use_adx) { | 
736  |  |   (void)use_adx;  | 
737  |  |   x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);  | 
738  |  | }  | 
739  |  |  | 
740  |  | #else  | 
741  |  |  | 
742  | 0  | static void table_select(ge_precomp *t, const int pos, const signed char b) { | 
743  | 0  |   uint8_t bnegative = constant_time_msb_w(b);  | 
744  | 0  |   uint8_t babs = b - ((bnegative & b) << 1);  | 
745  |  | 
  | 
746  | 0  |   uint8_t t_bytes[3][32] = { | 
747  | 0  |       {constant_time_is_zero_w(b) & 1}, {constant_time_is_zero_w(b) & 1}, {0}}; | 
748  | 0  | #if defined(__clang__) // materialize for vectorization, 6% speedup  | 
749  | 0  |   __asm__("" : "+m" (t_bytes) : /*no inputs*/); | 
750  | 0  | #endif  | 
751  | 0  |   OPENSSL_STATIC_ASSERT(sizeof(t_bytes) == sizeof(k25519Precomp[pos][0]), "");  | 
752  | 0  |   for (int i = 0; i < 8; i++) { | 
753  | 0  |     constant_time_conditional_memxor(t_bytes, k25519Precomp[pos][i],  | 
754  | 0  |                                      sizeof(t_bytes),  | 
755  | 0  |                                      constant_time_eq_w(babs, 1 + i));  | 
756  | 0  |   }  | 
757  |  | 
  | 
758  | 0  |   fe yplusx, yminusx, xy2d;  | 
759  | 0  |   fe_frombytes_strict(&yplusx, t_bytes[0]);  | 
760  | 0  |   fe_frombytes_strict(&yminusx, t_bytes[1]);  | 
761  | 0  |   fe_frombytes_strict(&xy2d, t_bytes[2]);  | 
762  |  | 
  | 
763  | 0  |   fe_copy_lt(&t->yplusx, &yplusx);  | 
764  | 0  |   fe_copy_lt(&t->yminusx, &yminusx);  | 
765  | 0  |   fe_copy_lt(&t->xy2d, &xy2d);  | 
766  |  | 
  | 
767  | 0  |   ge_precomp minust;  | 
768  | 0  |   fe_copy_lt(&minust.yplusx, &yminusx);  | 
769  | 0  |   fe_copy_lt(&minust.yminusx, &yplusx);  | 
770  | 0  |   fe_neg(&minust.xy2d, &xy2d);  | 
771  | 0  |   cmov(t, &minust, bnegative>>7);  | 
772  | 0  | }  | 
773  |  |  | 
774  |  | // h = a * B  | 
775  |  | // where a = a[0]+256*a[1]+...+256^31 a[31]  | 
776  |  | // B is the Ed25519 base point (x,4/5) with x positive.  | 
777  |  | //  | 
778  |  | // Preconditions:  | 
779  |  | //   a[31] <= 127  | 
780  | 0  | void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32], int use_adx) { | 
781  | 0  | #if defined(BORINGSSL_FE25519_ADX)  | 
782  | 0  |   if (use_adx) { | 
783  | 0  |     uint8_t t[4][32];  | 
784  | 0  |     x25519_ge_scalarmult_base_adx(t, a);  | 
785  | 0  |     fiat_25519_from_bytes(h->X.v, t[0]);  | 
786  | 0  |     fiat_25519_from_bytes(h->Y.v, t[1]);  | 
787  | 0  |     fiat_25519_from_bytes(h->Z.v, t[2]);  | 
788  | 0  |     fiat_25519_from_bytes(h->T.v, t[3]);  | 
789  | 0  |     return;  | 
790  | 0  |   }  | 
791  |  | #else  | 
792  |  |   (void)use_adx;  | 
793  |  | #endif  | 
794  | 0  |   signed char e[64];  | 
795  | 0  |   signed char carry;  | 
796  | 0  |   ge_p1p1 r;  | 
797  | 0  |   ge_p2 s;  | 
798  | 0  |   ge_precomp t;  | 
799  | 0  |   int i;  | 
800  |  | 
  | 
801  | 0  |   for (i = 0; i < 32; ++i) { | 
802  | 0  |     e[2 * i + 0] = (a[i] >> 0) & 15;  | 
803  | 0  |     e[2 * i + 1] = (a[i] >> 4) & 15;  | 
804  | 0  |   }  | 
805  |  |   // each e[i] is between 0 and 15  | 
806  |  |   // e[63] is between 0 and 7  | 
807  |  | 
  | 
808  | 0  |   carry = 0;  | 
809  | 0  |   for (i = 0; i < 63; ++i) { | 
810  | 0  |     e[i] += carry;  | 
811  | 0  |     carry = e[i] + 8;  | 
812  | 0  |     carry >>= 4;  | 
813  | 0  |     e[i] -= carry << 4;  | 
814  | 0  |   }  | 
815  | 0  |   e[63] += carry;  | 
816  |  |   // each e[i] is between -8 and 8  | 
817  |  | 
  | 
818  | 0  |   ge_p3_0(h);  | 
819  | 0  |   for (i = 1; i < 64; i += 2) { | 
820  | 0  |     table_select(&t, i / 2, e[i]);  | 
821  | 0  |     ge_madd(&r, h, &t);  | 
822  | 0  |     x25519_ge_p1p1_to_p3(h, &r);  | 
823  | 0  |   }  | 
824  |  | 
  | 
825  | 0  |   ge_p3_dbl(&r, h);  | 
826  | 0  |   x25519_ge_p1p1_to_p2(&s, &r);  | 
827  | 0  |   ge_p2_dbl(&r, &s);  | 
828  | 0  |   x25519_ge_p1p1_to_p2(&s, &r);  | 
829  | 0  |   ge_p2_dbl(&r, &s);  | 
830  | 0  |   x25519_ge_p1p1_to_p2(&s, &r);  | 
831  | 0  |   ge_p2_dbl(&r, &s);  | 
832  | 0  |   x25519_ge_p1p1_to_p3(h, &r);  | 
833  |  | 
  | 
834  | 0  |   for (i = 0; i < 64; i += 2) { | 
835  | 0  |     table_select(&t, i / 2, e[i]);  | 
836  | 0  |     ge_madd(&r, h, &t);  | 
837  | 0  |     x25519_ge_p1p1_to_p3(h, &r);  | 
838  | 0  |   }  | 
839  | 0  | }  | 
840  |  |  | 
841  |  | #endif  | 
842  |  |  | 
843  | 0  | static void slide(signed char *r, const uint8_t *a) { | 
844  | 0  |   int i;  | 
845  | 0  |   int b;  | 
846  | 0  |   int k;  | 
847  |  | 
  | 
848  | 0  |   for (i = 0; i < 256; ++i) { | 
849  | 0  |     r[i] = 1 & (a[i >> 3] >> (i & 7));  | 
850  | 0  |   }  | 
851  |  | 
  | 
852  | 0  |   for (i = 0; i < 256; ++i) { | 
853  | 0  |     if (r[i]) { | 
854  | 0  |       for (b = 1; b <= 6 && i + b < 256; ++b) { | 
855  | 0  |         if (r[i + b]) { | 
856  | 0  |           if (r[i] + (r[i + b] << b) <= 15) { | 
857  | 0  |             r[i] += r[i + b] << b;  | 
858  | 0  |             r[i + b] = 0;  | 
859  | 0  |           } else if (r[i] - (r[i + b] << b) >= -15) { | 
860  | 0  |             r[i] -= r[i + b] << b;  | 
861  | 0  |             for (k = i + b; k < 256; ++k) { | 
862  | 0  |               if (!r[k]) { | 
863  | 0  |                 r[k] = 1;  | 
864  | 0  |                 break;  | 
865  | 0  |               }  | 
866  | 0  |               r[k] = 0;  | 
867  | 0  |             }  | 
868  | 0  |           } else { | 
869  | 0  |             break;  | 
870  | 0  |           }  | 
871  | 0  |         }  | 
872  | 0  |       }  | 
873  | 0  |     }  | 
874  | 0  |   }  | 
875  | 0  | }  | 
876  |  |  | 
877  |  | // r = a * A + b * B  | 
878  |  | // where a = a[0]+256*a[1]+...+256^31 a[31].  | 
879  |  | // and b = b[0]+256*b[1]+...+256^31 b[31].  | 
880  |  | // B is the Ed25519 base point (x,4/5) with x positive.  | 
881  |  | static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,  | 
882  | 0  |                                          const ge_p3 *A, const uint8_t *b) { | 
883  | 0  |   signed char aslide[256];  | 
884  | 0  |   signed char bslide[256];  | 
885  | 0  |   ge_cached Ai[8];  // A,3A,5A,7A,9A,11A,13A,15A  | 
886  | 0  |   ge_p1p1 t;  | 
887  | 0  |   ge_p3 u;  | 
888  | 0  |   ge_p3 A2;  | 
889  | 0  |   int i;  | 
890  |  | 
  | 
891  | 0  |   slide(aslide, a);  | 
892  | 0  |   slide(bslide, b);  | 
893  |  | 
  | 
894  | 0  |   x25519_ge_p3_to_cached(&Ai[0], A);  | 
895  | 0  |   ge_p3_dbl(&t, A);  | 
896  | 0  |   x25519_ge_p1p1_to_p3(&A2, &t);  | 
897  | 0  |   x25519_ge_add(&t, &A2, &Ai[0]);  | 
898  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
899  | 0  |   x25519_ge_p3_to_cached(&Ai[1], &u);  | 
900  | 0  |   x25519_ge_add(&t, &A2, &Ai[1]);  | 
901  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
902  | 0  |   x25519_ge_p3_to_cached(&Ai[2], &u);  | 
903  | 0  |   x25519_ge_add(&t, &A2, &Ai[2]);  | 
904  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
905  | 0  |   x25519_ge_p3_to_cached(&Ai[3], &u);  | 
906  | 0  |   x25519_ge_add(&t, &A2, &Ai[3]);  | 
907  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
908  | 0  |   x25519_ge_p3_to_cached(&Ai[4], &u);  | 
909  | 0  |   x25519_ge_add(&t, &A2, &Ai[4]);  | 
910  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
911  | 0  |   x25519_ge_p3_to_cached(&Ai[5], &u);  | 
912  | 0  |   x25519_ge_add(&t, &A2, &Ai[5]);  | 
913  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
914  | 0  |   x25519_ge_p3_to_cached(&Ai[6], &u);  | 
915  | 0  |   x25519_ge_add(&t, &A2, &Ai[6]);  | 
916  | 0  |   x25519_ge_p1p1_to_p3(&u, &t);  | 
917  | 0  |   x25519_ge_p3_to_cached(&Ai[7], &u);  | 
918  |  | 
  | 
919  | 0  |   ge_p2_0(r);  | 
920  |  | 
  | 
921  | 0  |   for (i = 255; i >= 0; --i) { | 
922  | 0  |     if (aslide[i] || bslide[i]) { | 
923  | 0  |       break;  | 
924  | 0  |     }  | 
925  | 0  |   }  | 
926  |  | 
  | 
927  | 0  |   for (; i >= 0; --i) { | 
928  | 0  |     ge_p2_dbl(&t, r);  | 
929  |  | 
  | 
930  | 0  |     if (aslide[i] > 0) { | 
931  | 0  |       x25519_ge_p1p1_to_p3(&u, &t);  | 
932  | 0  |       x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);  | 
933  | 0  |     } else if (aslide[i] < 0) { | 
934  | 0  |       x25519_ge_p1p1_to_p3(&u, &t);  | 
935  | 0  |       x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);  | 
936  | 0  |     }  | 
937  |  | 
  | 
938  | 0  |     if (bslide[i] > 0) { | 
939  | 0  |       x25519_ge_p1p1_to_p3(&u, &t);  | 
940  | 0  |       ge_madd(&t, &u, &Bi[bslide[i] / 2]);  | 
941  | 0  |     } else if (bslide[i] < 0) { | 
942  | 0  |       x25519_ge_p1p1_to_p3(&u, &t);  | 
943  | 0  |       ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);  | 
944  | 0  |     }  | 
945  |  | 
  | 
946  | 0  |     x25519_ge_p1p1_to_p2(r, &t);  | 
947  | 0  |   }  | 
948  | 0  | }  | 
949  |  |  | 
950  |  | // int64_lshift21 returns |a << 21| but is defined when shifting bits into the  | 
951  |  | // sign bit. This works around a language flaw in C.  | 
952  | 0  | static inline int64_t int64_lshift21(int64_t a) { | 
953  | 0  |   return (int64_t)((uint64_t)a << 21);  | 
954  | 0  | }  | 
955  |  |  | 
956  |  | // The set of scalars is \Z/l  | 
957  |  | // where l = 2^252 + 27742317777372353535851937790883648493.  | 
958  |  |  | 
959  |  | // Input:  | 
960  |  | //   s[0]+256*s[1]+...+256^63*s[63] = s  | 
961  |  | //  | 
962  |  | // Output:  | 
963  |  | //   s[0]+256*s[1]+...+256^31*s[31] = s mod l  | 
964  |  | //   where l = 2^252 + 27742317777372353535851937790883648493.  | 
965  |  | //   Overwrites s in place.  | 
966  | 0  | void x25519_sc_reduce(uint8_t s[64]) { | 
967  | 0  |   int64_t s0 = 2097151 & load_3(s);  | 
968  | 0  |   int64_t s1 = 2097151 & (load_4(s + 2) >> 5);  | 
969  | 0  |   int64_t s2 = 2097151 & (load_3(s + 5) >> 2);  | 
970  | 0  |   int64_t s3 = 2097151 & (load_4(s + 7) >> 7);  | 
971  | 0  |   int64_t s4 = 2097151 & (load_4(s + 10) >> 4);  | 
972  | 0  |   int64_t s5 = 2097151 & (load_3(s + 13) >> 1);  | 
973  | 0  |   int64_t s6 = 2097151 & (load_4(s + 15) >> 6);  | 
974  | 0  |   int64_t s7 = 2097151 & (load_3(s + 18) >> 3);  | 
975  | 0  |   int64_t s8 = 2097151 & load_3(s + 21);  | 
976  | 0  |   int64_t s9 = 2097151 & (load_4(s + 23) >> 5);  | 
977  | 0  |   int64_t s10 = 2097151 & (load_3(s + 26) >> 2);  | 
978  | 0  |   int64_t s11 = 2097151 & (load_4(s + 28) >> 7);  | 
979  | 0  |   int64_t s12 = 2097151 & (load_4(s + 31) >> 4);  | 
980  | 0  |   int64_t s13 = 2097151 & (load_3(s + 34) >> 1);  | 
981  | 0  |   int64_t s14 = 2097151 & (load_4(s + 36) >> 6);  | 
982  | 0  |   int64_t s15 = 2097151 & (load_3(s + 39) >> 3);  | 
983  | 0  |   int64_t s16 = 2097151 & load_3(s + 42);  | 
984  | 0  |   int64_t s17 = 2097151 & (load_4(s + 44) >> 5);  | 
985  | 0  |   int64_t s18 = 2097151 & (load_3(s + 47) >> 2);  | 
986  | 0  |   int64_t s19 = 2097151 & (load_4(s + 49) >> 7);  | 
987  | 0  |   int64_t s20 = 2097151 & (load_4(s + 52) >> 4);  | 
988  | 0  |   int64_t s21 = 2097151 & (load_3(s + 55) >> 1);  | 
989  | 0  |   int64_t s22 = 2097151 & (load_4(s + 57) >> 6);  | 
990  | 0  |   int64_t s23 = (load_4(s + 60) >> 3);  | 
991  | 0  |   int64_t carry0;  | 
992  | 0  |   int64_t carry1;  | 
993  | 0  |   int64_t carry2;  | 
994  | 0  |   int64_t carry3;  | 
995  | 0  |   int64_t carry4;  | 
996  | 0  |   int64_t carry5;  | 
997  | 0  |   int64_t carry6;  | 
998  | 0  |   int64_t carry7;  | 
999  | 0  |   int64_t carry8;  | 
1000  | 0  |   int64_t carry9;  | 
1001  | 0  |   int64_t carry10;  | 
1002  | 0  |   int64_t carry11;  | 
1003  | 0  |   int64_t carry12;  | 
1004  | 0  |   int64_t carry13;  | 
1005  | 0  |   int64_t carry14;  | 
1006  | 0  |   int64_t carry15;  | 
1007  | 0  |   int64_t carry16;  | 
1008  |  | 
  | 
1009  | 0  |   s11 += s23 * 666643;  | 
1010  | 0  |   s12 += s23 * 470296;  | 
1011  | 0  |   s13 += s23 * 654183;  | 
1012  | 0  |   s14 -= s23 * 997805;  | 
1013  | 0  |   s15 += s23 * 136657;  | 
1014  | 0  |   s16 -= s23 * 683901;  | 
1015  | 0  |   s23 = 0;  | 
1016  |  | 
  | 
1017  | 0  |   s10 += s22 * 666643;  | 
1018  | 0  |   s11 += s22 * 470296;  | 
1019  | 0  |   s12 += s22 * 654183;  | 
1020  | 0  |   s13 -= s22 * 997805;  | 
1021  | 0  |   s14 += s22 * 136657;  | 
1022  | 0  |   s15 -= s22 * 683901;  | 
1023  | 0  |   s22 = 0;  | 
1024  |  | 
  | 
1025  | 0  |   s9 += s21 * 666643;  | 
1026  | 0  |   s10 += s21 * 470296;  | 
1027  | 0  |   s11 += s21 * 654183;  | 
1028  | 0  |   s12 -= s21 * 997805;  | 
1029  | 0  |   s13 += s21 * 136657;  | 
1030  | 0  |   s14 -= s21 * 683901;  | 
1031  | 0  |   s21 = 0;  | 
1032  |  | 
  | 
1033  | 0  |   s8 += s20 * 666643;  | 
1034  | 0  |   s9 += s20 * 470296;  | 
1035  | 0  |   s10 += s20 * 654183;  | 
1036  | 0  |   s11 -= s20 * 997805;  | 
1037  | 0  |   s12 += s20 * 136657;  | 
1038  | 0  |   s13 -= s20 * 683901;  | 
1039  | 0  |   s20 = 0;  | 
1040  |  | 
  | 
1041  | 0  |   s7 += s19 * 666643;  | 
1042  | 0  |   s8 += s19 * 470296;  | 
1043  | 0  |   s9 += s19 * 654183;  | 
1044  | 0  |   s10 -= s19 * 997805;  | 
1045  | 0  |   s11 += s19 * 136657;  | 
1046  | 0  |   s12 -= s19 * 683901;  | 
1047  | 0  |   s19 = 0;  | 
1048  |  | 
  | 
1049  | 0  |   s6 += s18 * 666643;  | 
1050  | 0  |   s7 += s18 * 470296;  | 
1051  | 0  |   s8 += s18 * 654183;  | 
1052  | 0  |   s9 -= s18 * 997805;  | 
1053  | 0  |   s10 += s18 * 136657;  | 
1054  | 0  |   s11 -= s18 * 683901;  | 
1055  | 0  |   s18 = 0;  | 
1056  |  | 
  | 
1057  | 0  |   carry6 = (s6 + (1 << 20)) >> 21;  | 
1058  | 0  |   s7 += carry6;  | 
1059  | 0  |   s6 -= int64_lshift21(carry6);  | 
1060  | 0  |   carry8 = (s8 + (1 << 20)) >> 21;  | 
1061  | 0  |   s9 += carry8;  | 
1062  | 0  |   s8 -= int64_lshift21(carry8);  | 
1063  | 0  |   carry10 = (s10 + (1 << 20)) >> 21;  | 
1064  | 0  |   s11 += carry10;  | 
1065  | 0  |   s10 -= int64_lshift21(carry10);  | 
1066  | 0  |   carry12 = (s12 + (1 << 20)) >> 21;  | 
1067  | 0  |   s13 += carry12;  | 
1068  | 0  |   s12 -= int64_lshift21(carry12);  | 
1069  | 0  |   carry14 = (s14 + (1 << 20)) >> 21;  | 
1070  | 0  |   s15 += carry14;  | 
1071  | 0  |   s14 -= int64_lshift21(carry14);  | 
1072  | 0  |   carry16 = (s16 + (1 << 20)) >> 21;  | 
1073  | 0  |   s17 += carry16;  | 
1074  | 0  |   s16 -= int64_lshift21(carry16);  | 
1075  |  | 
  | 
1076  | 0  |   carry7 = (s7 + (1 << 20)) >> 21;  | 
1077  | 0  |   s8 += carry7;  | 
1078  | 0  |   s7 -= int64_lshift21(carry7);  | 
1079  | 0  |   carry9 = (s9 + (1 << 20)) >> 21;  | 
1080  | 0  |   s10 += carry9;  | 
1081  | 0  |   s9 -= int64_lshift21(carry9);  | 
1082  | 0  |   carry11 = (s11 + (1 << 20)) >> 21;  | 
1083  | 0  |   s12 += carry11;  | 
1084  | 0  |   s11 -= int64_lshift21(carry11);  | 
1085  | 0  |   carry13 = (s13 + (1 << 20)) >> 21;  | 
1086  | 0  |   s14 += carry13;  | 
1087  | 0  |   s13 -= int64_lshift21(carry13);  | 
1088  | 0  |   carry15 = (s15 + (1 << 20)) >> 21;  | 
1089  | 0  |   s16 += carry15;  | 
1090  | 0  |   s15 -= int64_lshift21(carry15);  | 
1091  |  | 
  | 
1092  | 0  |   s5 += s17 * 666643;  | 
1093  | 0  |   s6 += s17 * 470296;  | 
1094  | 0  |   s7 += s17 * 654183;  | 
1095  | 0  |   s8 -= s17 * 997805;  | 
1096  | 0  |   s9 += s17 * 136657;  | 
1097  | 0  |   s10 -= s17 * 683901;  | 
1098  | 0  |   s17 = 0;  | 
1099  |  | 
  | 
1100  | 0  |   s4 += s16 * 666643;  | 
1101  | 0  |   s5 += s16 * 470296;  | 
1102  | 0  |   s6 += s16 * 654183;  | 
1103  | 0  |   s7 -= s16 * 997805;  | 
1104  | 0  |   s8 += s16 * 136657;  | 
1105  | 0  |   s9 -= s16 * 683901;  | 
1106  | 0  |   s16 = 0;  | 
1107  |  | 
  | 
1108  | 0  |   s3 += s15 * 666643;  | 
1109  | 0  |   s4 += s15 * 470296;  | 
1110  | 0  |   s5 += s15 * 654183;  | 
1111  | 0  |   s6 -= s15 * 997805;  | 
1112  | 0  |   s7 += s15 * 136657;  | 
1113  | 0  |   s8 -= s15 * 683901;  | 
1114  | 0  |   s15 = 0;  | 
1115  |  | 
  | 
1116  | 0  |   s2 += s14 * 666643;  | 
1117  | 0  |   s3 += s14 * 470296;  | 
1118  | 0  |   s4 += s14 * 654183;  | 
1119  | 0  |   s5 -= s14 * 997805;  | 
1120  | 0  |   s6 += s14 * 136657;  | 
1121  | 0  |   s7 -= s14 * 683901;  | 
1122  | 0  |   s14 = 0;  | 
1123  |  | 
  | 
1124  | 0  |   s1 += s13 * 666643;  | 
1125  | 0  |   s2 += s13 * 470296;  | 
1126  | 0  |   s3 += s13 * 654183;  | 
1127  | 0  |   s4 -= s13 * 997805;  | 
1128  | 0  |   s5 += s13 * 136657;  | 
1129  | 0  |   s6 -= s13 * 683901;  | 
1130  | 0  |   s13 = 0;  | 
1131  |  | 
  | 
1132  | 0  |   s0 += s12 * 666643;  | 
1133  | 0  |   s1 += s12 * 470296;  | 
1134  | 0  |   s2 += s12 * 654183;  | 
1135  | 0  |   s3 -= s12 * 997805;  | 
1136  | 0  |   s4 += s12 * 136657;  | 
1137  | 0  |   s5 -= s12 * 683901;  | 
1138  | 0  |   s12 = 0;  | 
1139  |  | 
  | 
1140  | 0  |   carry0 = (s0 + (1 << 20)) >> 21;  | 
1141  | 0  |   s1 += carry0;  | 
1142  | 0  |   s0 -= int64_lshift21(carry0);  | 
1143  | 0  |   carry2 = (s2 + (1 << 20)) >> 21;  | 
1144  | 0  |   s3 += carry2;  | 
1145  | 0  |   s2 -= int64_lshift21(carry2);  | 
1146  | 0  |   carry4 = (s4 + (1 << 20)) >> 21;  | 
1147  | 0  |   s5 += carry4;  | 
1148  | 0  |   s4 -= int64_lshift21(carry4);  | 
1149  | 0  |   carry6 = (s6 + (1 << 20)) >> 21;  | 
1150  | 0  |   s7 += carry6;  | 
1151  | 0  |   s6 -= int64_lshift21(carry6);  | 
1152  | 0  |   carry8 = (s8 + (1 << 20)) >> 21;  | 
1153  | 0  |   s9 += carry8;  | 
1154  | 0  |   s8 -= int64_lshift21(carry8);  | 
1155  | 0  |   carry10 = (s10 + (1 << 20)) >> 21;  | 
1156  | 0  |   s11 += carry10;  | 
1157  | 0  |   s10 -= int64_lshift21(carry10);  | 
1158  |  | 
  | 
1159  | 0  |   carry1 = (s1 + (1 << 20)) >> 21;  | 
1160  | 0  |   s2 += carry1;  | 
1161  | 0  |   s1 -= int64_lshift21(carry1);  | 
1162  | 0  |   carry3 = (s3 + (1 << 20)) >> 21;  | 
1163  | 0  |   s4 += carry3;  | 
1164  | 0  |   s3 -= int64_lshift21(carry3);  | 
1165  | 0  |   carry5 = (s5 + (1 << 20)) >> 21;  | 
1166  | 0  |   s6 += carry5;  | 
1167  | 0  |   s5 -= int64_lshift21(carry5);  | 
1168  | 0  |   carry7 = (s7 + (1 << 20)) >> 21;  | 
1169  | 0  |   s8 += carry7;  | 
1170  | 0  |   s7 -= int64_lshift21(carry7);  | 
1171  | 0  |   carry9 = (s9 + (1 << 20)) >> 21;  | 
1172  | 0  |   s10 += carry9;  | 
1173  | 0  |   s9 -= int64_lshift21(carry9);  | 
1174  | 0  |   carry11 = (s11 + (1 << 20)) >> 21;  | 
1175  | 0  |   s12 += carry11;  | 
1176  | 0  |   s11 -= int64_lshift21(carry11);  | 
1177  |  | 
  | 
1178  | 0  |   s0 += s12 * 666643;  | 
1179  | 0  |   s1 += s12 * 470296;  | 
1180  | 0  |   s2 += s12 * 654183;  | 
1181  | 0  |   s3 -= s12 * 997805;  | 
1182  | 0  |   s4 += s12 * 136657;  | 
1183  | 0  |   s5 -= s12 * 683901;  | 
1184  | 0  |   s12 = 0;  | 
1185  |  | 
  | 
1186  | 0  |   carry0 = s0 >> 21;  | 
1187  | 0  |   s1 += carry0;  | 
1188  | 0  |   s0 -= int64_lshift21(carry0);  | 
1189  | 0  |   carry1 = s1 >> 21;  | 
1190  | 0  |   s2 += carry1;  | 
1191  | 0  |   s1 -= int64_lshift21(carry1);  | 
1192  | 0  |   carry2 = s2 >> 21;  | 
1193  | 0  |   s3 += carry2;  | 
1194  | 0  |   s2 -= int64_lshift21(carry2);  | 
1195  | 0  |   carry3 = s3 >> 21;  | 
1196  | 0  |   s4 += carry3;  | 
1197  | 0  |   s3 -= int64_lshift21(carry3);  | 
1198  | 0  |   carry4 = s4 >> 21;  | 
1199  | 0  |   s5 += carry4;  | 
1200  | 0  |   s4 -= int64_lshift21(carry4);  | 
1201  | 0  |   carry5 = s5 >> 21;  | 
1202  | 0  |   s6 += carry5;  | 
1203  | 0  |   s5 -= int64_lshift21(carry5);  | 
1204  | 0  |   carry6 = s6 >> 21;  | 
1205  | 0  |   s7 += carry6;  | 
1206  | 0  |   s6 -= int64_lshift21(carry6);  | 
1207  | 0  |   carry7 = s7 >> 21;  | 
1208  | 0  |   s8 += carry7;  | 
1209  | 0  |   s7 -= int64_lshift21(carry7);  | 
1210  | 0  |   carry8 = s8 >> 21;  | 
1211  | 0  |   s9 += carry8;  | 
1212  | 0  |   s8 -= int64_lshift21(carry8);  | 
1213  | 0  |   carry9 = s9 >> 21;  | 
1214  | 0  |   s10 += carry9;  | 
1215  | 0  |   s9 -= int64_lshift21(carry9);  | 
1216  | 0  |   carry10 = s10 >> 21;  | 
1217  | 0  |   s11 += carry10;  | 
1218  | 0  |   s10 -= int64_lshift21(carry10);  | 
1219  | 0  |   carry11 = s11 >> 21;  | 
1220  | 0  |   s12 += carry11;  | 
1221  | 0  |   s11 -= int64_lshift21(carry11);  | 
1222  |  | 
  | 
1223  | 0  |   s0 += s12 * 666643;  | 
1224  | 0  |   s1 += s12 * 470296;  | 
1225  | 0  |   s2 += s12 * 654183;  | 
1226  | 0  |   s3 -= s12 * 997805;  | 
1227  | 0  |   s4 += s12 * 136657;  | 
1228  | 0  |   s5 -= s12 * 683901;  | 
1229  | 0  |   s12 = 0;  | 
1230  |  | 
  | 
1231  | 0  |   carry0 = s0 >> 21;  | 
1232  | 0  |   s1 += carry0;  | 
1233  | 0  |   s0 -= int64_lshift21(carry0);  | 
1234  | 0  |   carry1 = s1 >> 21;  | 
1235  | 0  |   s2 += carry1;  | 
1236  | 0  |   s1 -= int64_lshift21(carry1);  | 
1237  | 0  |   carry2 = s2 >> 21;  | 
1238  | 0  |   s3 += carry2;  | 
1239  | 0  |   s2 -= int64_lshift21(carry2);  | 
1240  | 0  |   carry3 = s3 >> 21;  | 
1241  | 0  |   s4 += carry3;  | 
1242  | 0  |   s3 -= int64_lshift21(carry3);  | 
1243  | 0  |   carry4 = s4 >> 21;  | 
1244  | 0  |   s5 += carry4;  | 
1245  | 0  |   s4 -= int64_lshift21(carry4);  | 
1246  | 0  |   carry5 = s5 >> 21;  | 
1247  | 0  |   s6 += carry5;  | 
1248  | 0  |   s5 -= int64_lshift21(carry5);  | 
1249  | 0  |   carry6 = s6 >> 21;  | 
1250  | 0  |   s7 += carry6;  | 
1251  | 0  |   s6 -= int64_lshift21(carry6);  | 
1252  | 0  |   carry7 = s7 >> 21;  | 
1253  | 0  |   s8 += carry7;  | 
1254  | 0  |   s7 -= int64_lshift21(carry7);  | 
1255  | 0  |   carry8 = s8 >> 21;  | 
1256  | 0  |   s9 += carry8;  | 
1257  | 0  |   s8 -= int64_lshift21(carry8);  | 
1258  | 0  |   carry9 = s9 >> 21;  | 
1259  | 0  |   s10 += carry9;  | 
1260  | 0  |   s9 -= int64_lshift21(carry9);  | 
1261  | 0  |   carry10 = s10 >> 21;  | 
1262  | 0  |   s11 += carry10;  | 
1263  | 0  |   s10 -= int64_lshift21(carry10);  | 
1264  |  | 
  | 
1265  | 0  |   s[0] = s0 >> 0;  | 
1266  | 0  |   s[1] = s0 >> 8;  | 
1267  | 0  |   s[2] = (s0 >> 16) | (s1 << 5);  | 
1268  | 0  |   s[3] = s1 >> 3;  | 
1269  | 0  |   s[4] = s1 >> 11;  | 
1270  | 0  |   s[5] = (s1 >> 19) | (s2 << 2);  | 
1271  | 0  |   s[6] = s2 >> 6;  | 
1272  | 0  |   s[7] = (s2 >> 14) | (s3 << 7);  | 
1273  | 0  |   s[8] = s3 >> 1;  | 
1274  | 0  |   s[9] = s3 >> 9;  | 
1275  | 0  |   s[10] = (s3 >> 17) | (s4 << 4);  | 
1276  | 0  |   s[11] = s4 >> 4;  | 
1277  | 0  |   s[12] = s4 >> 12;  | 
1278  | 0  |   s[13] = (s4 >> 20) | (s5 << 1);  | 
1279  | 0  |   s[14] = s5 >> 7;  | 
1280  | 0  |   s[15] = (s5 >> 15) | (s6 << 6);  | 
1281  | 0  |   s[16] = s6 >> 2;  | 
1282  | 0  |   s[17] = s6 >> 10;  | 
1283  | 0  |   s[18] = (s6 >> 18) | (s7 << 3);  | 
1284  | 0  |   s[19] = s7 >> 5;  | 
1285  | 0  |   s[20] = s7 >> 13;  | 
1286  | 0  |   s[21] = s8 >> 0;  | 
1287  | 0  |   s[22] = s8 >> 8;  | 
1288  | 0  |   s[23] = (s8 >> 16) | (s9 << 5);  | 
1289  | 0  |   s[24] = s9 >> 3;  | 
1290  | 0  |   s[25] = s9 >> 11;  | 
1291  | 0  |   s[26] = (s9 >> 19) | (s10 << 2);  | 
1292  | 0  |   s[27] = s10 >> 6;  | 
1293  | 0  |   s[28] = (s10 >> 14) | (s11 << 7);  | 
1294  | 0  |   s[29] = s11 >> 1;  | 
1295  | 0  |   s[30] = s11 >> 9;  | 
1296  | 0  |   s[31] = s11 >> 17;  | 
1297  | 0  | }  | 
1298  |  |  | 
1299  |  | // Input:  | 
1300  |  | //   a[0]+256*a[1]+...+256^31*a[31] = a  | 
1301  |  | //   b[0]+256*b[1]+...+256^31*b[31] = b  | 
1302  |  | //   c[0]+256*c[1]+...+256^31*c[31] = c  | 
1303  |  | //  | 
1304  |  | // Output:  | 
1305  |  | //   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l  | 
1306  |  | //   where l = 2^252 + 27742317777372353535851937790883648493.  | 
1307  |  | static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,  | 
1308  | 0  |                       const uint8_t *c) { | 
1309  | 0  |   int64_t a0 = 2097151 & load_3(a);  | 
1310  | 0  |   int64_t a1 = 2097151 & (load_4(a + 2) >> 5);  | 
1311  | 0  |   int64_t a2 = 2097151 & (load_3(a + 5) >> 2);  | 
1312  | 0  |   int64_t a3 = 2097151 & (load_4(a + 7) >> 7);  | 
1313  | 0  |   int64_t a4 = 2097151 & (load_4(a + 10) >> 4);  | 
1314  | 0  |   int64_t a5 = 2097151 & (load_3(a + 13) >> 1);  | 
1315  | 0  |   int64_t a6 = 2097151 & (load_4(a + 15) >> 6);  | 
1316  | 0  |   int64_t a7 = 2097151 & (load_3(a + 18) >> 3);  | 
1317  | 0  |   int64_t a8 = 2097151 & load_3(a + 21);  | 
1318  | 0  |   int64_t a9 = 2097151 & (load_4(a + 23) >> 5);  | 
1319  | 0  |   int64_t a10 = 2097151 & (load_3(a + 26) >> 2);  | 
1320  | 0  |   int64_t a11 = (load_4(a + 28) >> 7);  | 
1321  | 0  |   int64_t b0 = 2097151 & load_3(b);  | 
1322  | 0  |   int64_t b1 = 2097151 & (load_4(b + 2) >> 5);  | 
1323  | 0  |   int64_t b2 = 2097151 & (load_3(b + 5) >> 2);  | 
1324  | 0  |   int64_t b3 = 2097151 & (load_4(b + 7) >> 7);  | 
1325  | 0  |   int64_t b4 = 2097151 & (load_4(b + 10) >> 4);  | 
1326  | 0  |   int64_t b5 = 2097151 & (load_3(b + 13) >> 1);  | 
1327  | 0  |   int64_t b6 = 2097151 & (load_4(b + 15) >> 6);  | 
1328  | 0  |   int64_t b7 = 2097151 & (load_3(b + 18) >> 3);  | 
1329  | 0  |   int64_t b8 = 2097151 & load_3(b + 21);  | 
1330  | 0  |   int64_t b9 = 2097151 & (load_4(b + 23) >> 5);  | 
1331  | 0  |   int64_t b10 = 2097151 & (load_3(b + 26) >> 2);  | 
1332  | 0  |   int64_t b11 = (load_4(b + 28) >> 7);  | 
1333  | 0  |   int64_t c0 = 2097151 & load_3(c);  | 
1334  | 0  |   int64_t c1 = 2097151 & (load_4(c + 2) >> 5);  | 
1335  | 0  |   int64_t c2 = 2097151 & (load_3(c + 5) >> 2);  | 
1336  | 0  |   int64_t c3 = 2097151 & (load_4(c + 7) >> 7);  | 
1337  | 0  |   int64_t c4 = 2097151 & (load_4(c + 10) >> 4);  | 
1338  | 0  |   int64_t c5 = 2097151 & (load_3(c + 13) >> 1);  | 
1339  | 0  |   int64_t c6 = 2097151 & (load_4(c + 15) >> 6);  | 
1340  | 0  |   int64_t c7 = 2097151 & (load_3(c + 18) >> 3);  | 
1341  | 0  |   int64_t c8 = 2097151 & load_3(c + 21);  | 
1342  | 0  |   int64_t c9 = 2097151 & (load_4(c + 23) >> 5);  | 
1343  | 0  |   int64_t c10 = 2097151 & (load_3(c + 26) >> 2);  | 
1344  | 0  |   int64_t c11 = (load_4(c + 28) >> 7);  | 
1345  | 0  |   int64_t s0;  | 
1346  | 0  |   int64_t s1;  | 
1347  | 0  |   int64_t s2;  | 
1348  | 0  |   int64_t s3;  | 
1349  | 0  |   int64_t s4;  | 
1350  | 0  |   int64_t s5;  | 
1351  | 0  |   int64_t s6;  | 
1352  | 0  |   int64_t s7;  | 
1353  | 0  |   int64_t s8;  | 
1354  | 0  |   int64_t s9;  | 
1355  | 0  |   int64_t s10;  | 
1356  | 0  |   int64_t s11;  | 
1357  | 0  |   int64_t s12;  | 
1358  | 0  |   int64_t s13;  | 
1359  | 0  |   int64_t s14;  | 
1360  | 0  |   int64_t s15;  | 
1361  | 0  |   int64_t s16;  | 
1362  | 0  |   int64_t s17;  | 
1363  | 0  |   int64_t s18;  | 
1364  | 0  |   int64_t s19;  | 
1365  | 0  |   int64_t s20;  | 
1366  | 0  |   int64_t s21;  | 
1367  | 0  |   int64_t s22;  | 
1368  | 0  |   int64_t s23;  | 
1369  | 0  |   int64_t carry0;  | 
1370  | 0  |   int64_t carry1;  | 
1371  | 0  |   int64_t carry2;  | 
1372  | 0  |   int64_t carry3;  | 
1373  | 0  |   int64_t carry4;  | 
1374  | 0  |   int64_t carry5;  | 
1375  | 0  |   int64_t carry6;  | 
1376  | 0  |   int64_t carry7;  | 
1377  | 0  |   int64_t carry8;  | 
1378  | 0  |   int64_t carry9;  | 
1379  | 0  |   int64_t carry10;  | 
1380  | 0  |   int64_t carry11;  | 
1381  | 0  |   int64_t carry12;  | 
1382  | 0  |   int64_t carry13;  | 
1383  | 0  |   int64_t carry14;  | 
1384  | 0  |   int64_t carry15;  | 
1385  | 0  |   int64_t carry16;  | 
1386  | 0  |   int64_t carry17;  | 
1387  | 0  |   int64_t carry18;  | 
1388  | 0  |   int64_t carry19;  | 
1389  | 0  |   int64_t carry20;  | 
1390  | 0  |   int64_t carry21;  | 
1391  | 0  |   int64_t carry22;  | 
1392  |  | 
  | 
1393  | 0  |   s0 = c0 + a0 * b0;  | 
1394  | 0  |   s1 = c1 + a0 * b1 + a1 * b0;  | 
1395  | 0  |   s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;  | 
1396  | 0  |   s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;  | 
1397  | 0  |   s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;  | 
1398  | 0  |   s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;  | 
1399  | 0  |   s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;  | 
1400  | 0  |   s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +  | 
1401  | 0  |        a6 * b1 + a7 * b0;  | 
1402  | 0  |   s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +  | 
1403  | 0  |        a6 * b2 + a7 * b1 + a8 * b0;  | 
1404  | 0  |   s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +  | 
1405  | 0  |        a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;  | 
1406  | 0  |   s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +  | 
1407  | 0  |         a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;  | 
1408  | 0  |   s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +  | 
1409  | 0  |         a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;  | 
1410  | 0  |   s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 +  | 
1411  | 0  |         a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;  | 
1412  | 0  |   s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 +  | 
1413  | 0  |         a9 * b4 + a10 * b3 + a11 * b2;  | 
1414  | 0  |   s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 +  | 
1415  | 0  |         a10 * b4 + a11 * b3;  | 
1416  | 0  |   s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 +  | 
1417  | 0  |         a11 * b4;  | 
1418  | 0  |   s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;  | 
1419  | 0  |   s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;  | 
1420  | 0  |   s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;  | 
1421  | 0  |   s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;  | 
1422  | 0  |   s20 = a9 * b11 + a10 * b10 + a11 * b9;  | 
1423  | 0  |   s21 = a10 * b11 + a11 * b10;  | 
1424  | 0  |   s22 = a11 * b11;  | 
1425  | 0  |   s23 = 0;  | 
1426  |  | 
  | 
1427  | 0  |   carry0 = (s0 + (1 << 20)) >> 21;  | 
1428  | 0  |   s1 += carry0;  | 
1429  | 0  |   s0 -= int64_lshift21(carry0);  | 
1430  | 0  |   carry2 = (s2 + (1 << 20)) >> 21;  | 
1431  | 0  |   s3 += carry2;  | 
1432  | 0  |   s2 -= int64_lshift21(carry2);  | 
1433  | 0  |   carry4 = (s4 + (1 << 20)) >> 21;  | 
1434  | 0  |   s5 += carry4;  | 
1435  | 0  |   s4 -= int64_lshift21(carry4);  | 
1436  | 0  |   carry6 = (s6 + (1 << 20)) >> 21;  | 
1437  | 0  |   s7 += carry6;  | 
1438  | 0  |   s6 -= int64_lshift21(carry6);  | 
1439  | 0  |   carry8 = (s8 + (1 << 20)) >> 21;  | 
1440  | 0  |   s9 += carry8;  | 
1441  | 0  |   s8 -= int64_lshift21(carry8);  | 
1442  | 0  |   carry10 = (s10 + (1 << 20)) >> 21;  | 
1443  | 0  |   s11 += carry10;  | 
1444  | 0  |   s10 -= int64_lshift21(carry10);  | 
1445  | 0  |   carry12 = (s12 + (1 << 20)) >> 21;  | 
1446  | 0  |   s13 += carry12;  | 
1447  | 0  |   s12 -= int64_lshift21(carry12);  | 
1448  | 0  |   carry14 = (s14 + (1 << 20)) >> 21;  | 
1449  | 0  |   s15 += carry14;  | 
1450  | 0  |   s14 -= int64_lshift21(carry14);  | 
1451  | 0  |   carry16 = (s16 + (1 << 20)) >> 21;  | 
1452  | 0  |   s17 += carry16;  | 
1453  | 0  |   s16 -= int64_lshift21(carry16);  | 
1454  | 0  |   carry18 = (s18 + (1 << 20)) >> 21;  | 
1455  | 0  |   s19 += carry18;  | 
1456  | 0  |   s18 -= int64_lshift21(carry18);  | 
1457  | 0  |   carry20 = (s20 + (1 << 20)) >> 21;  | 
1458  | 0  |   s21 += carry20;  | 
1459  | 0  |   s20 -= int64_lshift21(carry20);  | 
1460  | 0  |   carry22 = (s22 + (1 << 20)) >> 21;  | 
1461  | 0  |   s23 += carry22;  | 
1462  | 0  |   s22 -= int64_lshift21(carry22);  | 
1463  |  | 
  | 
1464  | 0  |   carry1 = (s1 + (1 << 20)) >> 21;  | 
1465  | 0  |   s2 += carry1;  | 
1466  | 0  |   s1 -= int64_lshift21(carry1);  | 
1467  | 0  |   carry3 = (s3 + (1 << 20)) >> 21;  | 
1468  | 0  |   s4 += carry3;  | 
1469  | 0  |   s3 -= int64_lshift21(carry3);  | 
1470  | 0  |   carry5 = (s5 + (1 << 20)) >> 21;  | 
1471  | 0  |   s6 += carry5;  | 
1472  | 0  |   s5 -= int64_lshift21(carry5);  | 
1473  | 0  |   carry7 = (s7 + (1 << 20)) >> 21;  | 
1474  | 0  |   s8 += carry7;  | 
1475  | 0  |   s7 -= int64_lshift21(carry7);  | 
1476  | 0  |   carry9 = (s9 + (1 << 20)) >> 21;  | 
1477  | 0  |   s10 += carry9;  | 
1478  | 0  |   s9 -= int64_lshift21(carry9);  | 
1479  | 0  |   carry11 = (s11 + (1 << 20)) >> 21;  | 
1480  | 0  |   s12 += carry11;  | 
1481  | 0  |   s11 -= int64_lshift21(carry11);  | 
1482  | 0  |   carry13 = (s13 + (1 << 20)) >> 21;  | 
1483  | 0  |   s14 += carry13;  | 
1484  | 0  |   s13 -= int64_lshift21(carry13);  | 
1485  | 0  |   carry15 = (s15 + (1 << 20)) >> 21;  | 
1486  | 0  |   s16 += carry15;  | 
1487  | 0  |   s15 -= int64_lshift21(carry15);  | 
1488  | 0  |   carry17 = (s17 + (1 << 20)) >> 21;  | 
1489  | 0  |   s18 += carry17;  | 
1490  | 0  |   s17 -= int64_lshift21(carry17);  | 
1491  | 0  |   carry19 = (s19 + (1 << 20)) >> 21;  | 
1492  | 0  |   s20 += carry19;  | 
1493  | 0  |   s19 -= int64_lshift21(carry19);  | 
1494  | 0  |   carry21 = (s21 + (1 << 20)) >> 21;  | 
1495  | 0  |   s22 += carry21;  | 
1496  | 0  |   s21 -= int64_lshift21(carry21);  | 
1497  |  | 
  | 
1498  | 0  |   s11 += s23 * 666643;  | 
1499  | 0  |   s12 += s23 * 470296;  | 
1500  | 0  |   s13 += s23 * 654183;  | 
1501  | 0  |   s14 -= s23 * 997805;  | 
1502  | 0  |   s15 += s23 * 136657;  | 
1503  | 0  |   s16 -= s23 * 683901;  | 
1504  | 0  |   s23 = 0;  | 
1505  |  | 
  | 
1506  | 0  |   s10 += s22 * 666643;  | 
1507  | 0  |   s11 += s22 * 470296;  | 
1508  | 0  |   s12 += s22 * 654183;  | 
1509  | 0  |   s13 -= s22 * 997805;  | 
1510  | 0  |   s14 += s22 * 136657;  | 
1511  | 0  |   s15 -= s22 * 683901;  | 
1512  | 0  |   s22 = 0;  | 
1513  |  | 
  | 
1514  | 0  |   s9 += s21 * 666643;  | 
1515  | 0  |   s10 += s21 * 470296;  | 
1516  | 0  |   s11 += s21 * 654183;  | 
1517  | 0  |   s12 -= s21 * 997805;  | 
1518  | 0  |   s13 += s21 * 136657;  | 
1519  | 0  |   s14 -= s21 * 683901;  | 
1520  | 0  |   s21 = 0;  | 
1521  |  | 
  | 
1522  | 0  |   s8 += s20 * 666643;  | 
1523  | 0  |   s9 += s20 * 470296;  | 
1524  | 0  |   s10 += s20 * 654183;  | 
1525  | 0  |   s11 -= s20 * 997805;  | 
1526  | 0  |   s12 += s20 * 136657;  | 
1527  | 0  |   s13 -= s20 * 683901;  | 
1528  | 0  |   s20 = 0;  | 
1529  |  | 
  | 
1530  | 0  |   s7 += s19 * 666643;  | 
1531  | 0  |   s8 += s19 * 470296;  | 
1532  | 0  |   s9 += s19 * 654183;  | 
1533  | 0  |   s10 -= s19 * 997805;  | 
1534  | 0  |   s11 += s19 * 136657;  | 
1535  | 0  |   s12 -= s19 * 683901;  | 
1536  | 0  |   s19 = 0;  | 
1537  |  | 
  | 
1538  | 0  |   s6 += s18 * 666643;  | 
1539  | 0  |   s7 += s18 * 470296;  | 
1540  | 0  |   s8 += s18 * 654183;  | 
1541  | 0  |   s9 -= s18 * 997805;  | 
1542  | 0  |   s10 += s18 * 136657;  | 
1543  | 0  |   s11 -= s18 * 683901;  | 
1544  | 0  |   s18 = 0;  | 
1545  |  | 
  | 
1546  | 0  |   carry6 = (s6 + (1 << 20)) >> 21;  | 
1547  | 0  |   s7 += carry6;  | 
1548  | 0  |   s6 -= int64_lshift21(carry6);  | 
1549  | 0  |   carry8 = (s8 + (1 << 20)) >> 21;  | 
1550  | 0  |   s9 += carry8;  | 
1551  | 0  |   s8 -= int64_lshift21(carry8);  | 
1552  | 0  |   carry10 = (s10 + (1 << 20)) >> 21;  | 
1553  | 0  |   s11 += carry10;  | 
1554  | 0  |   s10 -= int64_lshift21(carry10);  | 
1555  | 0  |   carry12 = (s12 + (1 << 20)) >> 21;  | 
1556  | 0  |   s13 += carry12;  | 
1557  | 0  |   s12 -= int64_lshift21(carry12);  | 
1558  | 0  |   carry14 = (s14 + (1 << 20)) >> 21;  | 
1559  | 0  |   s15 += carry14;  | 
1560  | 0  |   s14 -= int64_lshift21(carry14);  | 
1561  | 0  |   carry16 = (s16 + (1 << 20)) >> 21;  | 
1562  | 0  |   s17 += carry16;  | 
1563  | 0  |   s16 -= int64_lshift21(carry16);  | 
1564  |  | 
  | 
1565  | 0  |   carry7 = (s7 + (1 << 20)) >> 21;  | 
1566  | 0  |   s8 += carry7;  | 
1567  | 0  |   s7 -= int64_lshift21(carry7);  | 
1568  | 0  |   carry9 = (s9 + (1 << 20)) >> 21;  | 
1569  | 0  |   s10 += carry9;  | 
1570  | 0  |   s9 -= int64_lshift21(carry9);  | 
1571  | 0  |   carry11 = (s11 + (1 << 20)) >> 21;  | 
1572  | 0  |   s12 += carry11;  | 
1573  | 0  |   s11 -= int64_lshift21(carry11);  | 
1574  | 0  |   carry13 = (s13 + (1 << 20)) >> 21;  | 
1575  | 0  |   s14 += carry13;  | 
1576  | 0  |   s13 -= int64_lshift21(carry13);  | 
1577  | 0  |   carry15 = (s15 + (1 << 20)) >> 21;  | 
1578  | 0  |   s16 += carry15;  | 
1579  | 0  |   s15 -= int64_lshift21(carry15);  | 
1580  |  | 
  | 
1581  | 0  |   s5 += s17 * 666643;  | 
1582  | 0  |   s6 += s17 * 470296;  | 
1583  | 0  |   s7 += s17 * 654183;  | 
1584  | 0  |   s8 -= s17 * 997805;  | 
1585  | 0  |   s9 += s17 * 136657;  | 
1586  | 0  |   s10 -= s17 * 683901;  | 
1587  | 0  |   s17 = 0;  | 
1588  |  | 
  | 
1589  | 0  |   s4 += s16 * 666643;  | 
1590  | 0  |   s5 += s16 * 470296;  | 
1591  | 0  |   s6 += s16 * 654183;  | 
1592  | 0  |   s7 -= s16 * 997805;  | 
1593  | 0  |   s8 += s16 * 136657;  | 
1594  | 0  |   s9 -= s16 * 683901;  | 
1595  | 0  |   s16 = 0;  | 
1596  |  | 
  | 
1597  | 0  |   s3 += s15 * 666643;  | 
1598  | 0  |   s4 += s15 * 470296;  | 
1599  | 0  |   s5 += s15 * 654183;  | 
1600  | 0  |   s6 -= s15 * 997805;  | 
1601  | 0  |   s7 += s15 * 136657;  | 
1602  | 0  |   s8 -= s15 * 683901;  | 
1603  | 0  |   s15 = 0;  | 
1604  |  | 
  | 
1605  | 0  |   s2 += s14 * 666643;  | 
1606  | 0  |   s3 += s14 * 470296;  | 
1607  | 0  |   s4 += s14 * 654183;  | 
1608  | 0  |   s5 -= s14 * 997805;  | 
1609  | 0  |   s6 += s14 * 136657;  | 
1610  | 0  |   s7 -= s14 * 683901;  | 
1611  | 0  |   s14 = 0;  | 
1612  |  | 
  | 
1613  | 0  |   s1 += s13 * 666643;  | 
1614  | 0  |   s2 += s13 * 470296;  | 
1615  | 0  |   s3 += s13 * 654183;  | 
1616  | 0  |   s4 -= s13 * 997805;  | 
1617  | 0  |   s5 += s13 * 136657;  | 
1618  | 0  |   s6 -= s13 * 683901;  | 
1619  | 0  |   s13 = 0;  | 
1620  |  | 
  | 
1621  | 0  |   s0 += s12 * 666643;  | 
1622  | 0  |   s1 += s12 * 470296;  | 
1623  | 0  |   s2 += s12 * 654183;  | 
1624  | 0  |   s3 -= s12 * 997805;  | 
1625  | 0  |   s4 += s12 * 136657;  | 
1626  | 0  |   s5 -= s12 * 683901;  | 
1627  | 0  |   s12 = 0;  | 
1628  |  | 
  | 
1629  | 0  |   carry0 = (s0 + (1 << 20)) >> 21;  | 
1630  | 0  |   s1 += carry0;  | 
1631  | 0  |   s0 -= int64_lshift21(carry0);  | 
1632  | 0  |   carry2 = (s2 + (1 << 20)) >> 21;  | 
1633  | 0  |   s3 += carry2;  | 
1634  | 0  |   s2 -= int64_lshift21(carry2);  | 
1635  | 0  |   carry4 = (s4 + (1 << 20)) >> 21;  | 
1636  | 0  |   s5 += carry4;  | 
1637  | 0  |   s4 -= int64_lshift21(carry4);  | 
1638  | 0  |   carry6 = (s6 + (1 << 20)) >> 21;  | 
1639  | 0  |   s7 += carry6;  | 
1640  | 0  |   s6 -= int64_lshift21(carry6);  | 
1641  | 0  |   carry8 = (s8 + (1 << 20)) >> 21;  | 
1642  | 0  |   s9 += carry8;  | 
1643  | 0  |   s8 -= int64_lshift21(carry8);  | 
1644  | 0  |   carry10 = (s10 + (1 << 20)) >> 21;  | 
1645  | 0  |   s11 += carry10;  | 
1646  | 0  |   s10 -= int64_lshift21(carry10);  | 
1647  |  | 
  | 
1648  | 0  |   carry1 = (s1 + (1 << 20)) >> 21;  | 
1649  | 0  |   s2 += carry1;  | 
1650  | 0  |   s1 -= int64_lshift21(carry1);  | 
1651  | 0  |   carry3 = (s3 + (1 << 20)) >> 21;  | 
1652  | 0  |   s4 += carry3;  | 
1653  | 0  |   s3 -= int64_lshift21(carry3);  | 
1654  | 0  |   carry5 = (s5 + (1 << 20)) >> 21;  | 
1655  | 0  |   s6 += carry5;  | 
1656  | 0  |   s5 -= int64_lshift21(carry5);  | 
1657  | 0  |   carry7 = (s7 + (1 << 20)) >> 21;  | 
1658  | 0  |   s8 += carry7;  | 
1659  | 0  |   s7 -= int64_lshift21(carry7);  | 
1660  | 0  |   carry9 = (s9 + (1 << 20)) >> 21;  | 
1661  | 0  |   s10 += carry9;  | 
1662  | 0  |   s9 -= int64_lshift21(carry9);  | 
1663  | 0  |   carry11 = (s11 + (1 << 20)) >> 21;  | 
1664  | 0  |   s12 += carry11;  | 
1665  | 0  |   s11 -= int64_lshift21(carry11);  | 
1666  |  | 
  | 
1667  | 0  |   s0 += s12 * 666643;  | 
1668  | 0  |   s1 += s12 * 470296;  | 
1669  | 0  |   s2 += s12 * 654183;  | 
1670  | 0  |   s3 -= s12 * 997805;  | 
1671  | 0  |   s4 += s12 * 136657;  | 
1672  | 0  |   s5 -= s12 * 683901;  | 
1673  | 0  |   s12 = 0;  | 
1674  |  | 
  | 
1675  | 0  |   carry0 = s0 >> 21;  | 
1676  | 0  |   s1 += carry0;  | 
1677  | 0  |   s0 -= int64_lshift21(carry0);  | 
1678  | 0  |   carry1 = s1 >> 21;  | 
1679  | 0  |   s2 += carry1;  | 
1680  | 0  |   s1 -= int64_lshift21(carry1);  | 
1681  | 0  |   carry2 = s2 >> 21;  | 
1682  | 0  |   s3 += carry2;  | 
1683  | 0  |   s2 -= int64_lshift21(carry2);  | 
1684  | 0  |   carry3 = s3 >> 21;  | 
1685  | 0  |   s4 += carry3;  | 
1686  | 0  |   s3 -= int64_lshift21(carry3);  | 
1687  | 0  |   carry4 = s4 >> 21;  | 
1688  | 0  |   s5 += carry4;  | 
1689  | 0  |   s4 -= int64_lshift21(carry4);  | 
1690  | 0  |   carry5 = s5 >> 21;  | 
1691  | 0  |   s6 += carry5;  | 
1692  | 0  |   s5 -= int64_lshift21(carry5);  | 
1693  | 0  |   carry6 = s6 >> 21;  | 
1694  | 0  |   s7 += carry6;  | 
1695  | 0  |   s6 -= int64_lshift21(carry6);  | 
1696  | 0  |   carry7 = s7 >> 21;  | 
1697  | 0  |   s8 += carry7;  | 
1698  | 0  |   s7 -= int64_lshift21(carry7);  | 
1699  | 0  |   carry8 = s8 >> 21;  | 
1700  | 0  |   s9 += carry8;  | 
1701  | 0  |   s8 -= int64_lshift21(carry8);  | 
1702  | 0  |   carry9 = s9 >> 21;  | 
1703  | 0  |   s10 += carry9;  | 
1704  | 0  |   s9 -= int64_lshift21(carry9);  | 
1705  | 0  |   carry10 = s10 >> 21;  | 
1706  | 0  |   s11 += carry10;  | 
1707  | 0  |   s10 -= int64_lshift21(carry10);  | 
1708  | 0  |   carry11 = s11 >> 21;  | 
1709  | 0  |   s12 += carry11;  | 
1710  | 0  |   s11 -= int64_lshift21(carry11);  | 
1711  |  | 
  | 
1712  | 0  |   s0 += s12 * 666643;  | 
1713  | 0  |   s1 += s12 * 470296;  | 
1714  | 0  |   s2 += s12 * 654183;  | 
1715  | 0  |   s3 -= s12 * 997805;  | 
1716  | 0  |   s4 += s12 * 136657;  | 
1717  | 0  |   s5 -= s12 * 683901;  | 
1718  | 0  |   s12 = 0;  | 
1719  |  | 
  | 
1720  | 0  |   carry0 = s0 >> 21;  | 
1721  | 0  |   s1 += carry0;  | 
1722  | 0  |   s0 -= int64_lshift21(carry0);  | 
1723  | 0  |   carry1 = s1 >> 21;  | 
1724  | 0  |   s2 += carry1;  | 
1725  | 0  |   s1 -= int64_lshift21(carry1);  | 
1726  | 0  |   carry2 = s2 >> 21;  | 
1727  | 0  |   s3 += carry2;  | 
1728  | 0  |   s2 -= int64_lshift21(carry2);  | 
1729  | 0  |   carry3 = s3 >> 21;  | 
1730  | 0  |   s4 += carry3;  | 
1731  | 0  |   s3 -= int64_lshift21(carry3);  | 
1732  | 0  |   carry4 = s4 >> 21;  | 
1733  | 0  |   s5 += carry4;  | 
1734  | 0  |   s4 -= int64_lshift21(carry4);  | 
1735  | 0  |   carry5 = s5 >> 21;  | 
1736  | 0  |   s6 += carry5;  | 
1737  | 0  |   s5 -= int64_lshift21(carry5);  | 
1738  | 0  |   carry6 = s6 >> 21;  | 
1739  | 0  |   s7 += carry6;  | 
1740  | 0  |   s6 -= int64_lshift21(carry6);  | 
1741  | 0  |   carry7 = s7 >> 21;  | 
1742  | 0  |   s8 += carry7;  | 
1743  | 0  |   s7 -= int64_lshift21(carry7);  | 
1744  | 0  |   carry8 = s8 >> 21;  | 
1745  | 0  |   s9 += carry8;  | 
1746  | 0  |   s8 -= int64_lshift21(carry8);  | 
1747  | 0  |   carry9 = s9 >> 21;  | 
1748  | 0  |   s10 += carry9;  | 
1749  | 0  |   s9 -= int64_lshift21(carry9);  | 
1750  | 0  |   carry10 = s10 >> 21;  | 
1751  | 0  |   s11 += carry10;  | 
1752  | 0  |   s10 -= int64_lshift21(carry10);  | 
1753  |  | 
  | 
1754  | 0  |   s[0] = s0 >> 0;  | 
1755  | 0  |   s[1] = s0 >> 8;  | 
1756  | 0  |   s[2] = (s0 >> 16) | (s1 << 5);  | 
1757  | 0  |   s[3] = s1 >> 3;  | 
1758  | 0  |   s[4] = s1 >> 11;  | 
1759  | 0  |   s[5] = (s1 >> 19) | (s2 << 2);  | 
1760  | 0  |   s[6] = s2 >> 6;  | 
1761  | 0  |   s[7] = (s2 >> 14) | (s3 << 7);  | 
1762  | 0  |   s[8] = s3 >> 1;  | 
1763  | 0  |   s[9] = s3 >> 9;  | 
1764  | 0  |   s[10] = (s3 >> 17) | (s4 << 4);  | 
1765  | 0  |   s[11] = s4 >> 4;  | 
1766  | 0  |   s[12] = s4 >> 12;  | 
1767  | 0  |   s[13] = (s4 >> 20) | (s5 << 1);  | 
1768  | 0  |   s[14] = s5 >> 7;  | 
1769  | 0  |   s[15] = (s5 >> 15) | (s6 << 6);  | 
1770  | 0  |   s[16] = s6 >> 2;  | 
1771  | 0  |   s[17] = s6 >> 10;  | 
1772  | 0  |   s[18] = (s6 >> 18) | (s7 << 3);  | 
1773  | 0  |   s[19] = s7 >> 5;  | 
1774  | 0  |   s[20] = s7 >> 13;  | 
1775  | 0  |   s[21] = s8 >> 0;  | 
1776  | 0  |   s[22] = s8 >> 8;  | 
1777  | 0  |   s[23] = (s8 >> 16) | (s9 << 5);  | 
1778  | 0  |   s[24] = s9 >> 3;  | 
1779  | 0  |   s[25] = s9 >> 11;  | 
1780  | 0  |   s[26] = (s9 >> 19) | (s10 << 2);  | 
1781  | 0  |   s[27] = s10 >> 6;  | 
1782  | 0  |   s[28] = (s10 >> 14) | (s11 << 7);  | 
1783  | 0  |   s[29] = s11 >> 1;  | 
1784  | 0  |   s[30] = s11 >> 9;  | 
1785  | 0  |   s[31] = s11 >> 17;  | 
1786  | 0  | }  | 
1787  |  |  | 
1788  |  |  | 
1789  |  | void x25519_scalar_mult_generic_masked(uint8_t out[32],  | 
1790  |  |                                            const uint8_t scalar_masked[32],  | 
1791  | 0  |                                            const uint8_t point[32]) { | 
1792  | 0  |   fe x1, x2, z2, x3, z3, tmp0, tmp1;  | 
1793  | 0  |   fe_loose x2l, z2l, x3l, tmp0l, tmp1l;  | 
1794  |  | 
  | 
1795  | 0  |   uint8_t e[32];  | 
1796  | 0  |   OPENSSL_memcpy(e, scalar_masked, 32);  | 
1797  |  |   // The following implementation was transcribed to Coq and proven to  | 
1798  |  |   // correspond to unary scalar multiplication in affine coordinates given that  | 
1799  |  |   // x1 != 0 is the x coordinate of some point on the curve. It was also checked  | 
1800  |  |   // in Coq that doing a ladderstep with x1 = x3 = 0 gives z2' = z3' = 0, and z2  | 
1801  |  |   // = z3 = 0 gives z2' = z3' = 0. The statement was quantified over the  | 
1802  |  |   // underlying field, so it applies to Curve25519 itself and the quadratic  | 
1803  |  |   // twist of Curve25519. It was not proven in Coq that prime-field arithmetic  | 
1804  |  |   // correctly simulates extension-field arithmetic on prime-field values.  | 
1805  |  |   // The decoding of the byte array representation of e was not considered.  | 
1806  |  |   // Specification of Montgomery curves in affine coordinates:  | 
1807  |  |   // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>  | 
1808  |  |   // Proof that these form a group that is isomorphic to a Weierstrass curve:  | 
1809  |  |   // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>  | 
1810  |  |   // Coq transcription and correctness proof of the loop (where scalarbits=255):  | 
1811  |  |   // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>  | 
1812  |  |   // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>  | 
1813  |  |   // preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0  | 
1814  | 0  |   fe_frombytes(&x1, point);  | 
1815  | 0  |   fe_1(&x2);  | 
1816  | 0  |   fe_0(&z2);  | 
1817  | 0  |   fe_copy(&x3, &x1);  | 
1818  | 0  |   fe_1(&z3);  | 
1819  |  | 
  | 
1820  | 0  |   unsigned swap = 0;  | 
1821  | 0  |   int pos;  | 
1822  | 0  |   for (pos = 254; pos >= 0; --pos) { | 
1823  |  |     // loop invariant as of right before the test, for the case where x1 != 0:  | 
1824  |  |     //   pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 is nonzero  | 
1825  |  |     //   let r := e >> (pos+1) in the following equalities of projective points:  | 
1826  |  |     //   to_xz (r*P)     === if swap then (x3, z3) else (x2, z2)  | 
1827  |  |     //   to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)  | 
1828  |  |     //   x1 is the nonzero x coordinate of the nonzero point (r*P-(r+1)*P)  | 
1829  | 0  |     unsigned b = 1 & (e[pos / 8] >> (pos & 7));  | 
1830  | 0  |     swap ^= b;  | 
1831  | 0  |     fe_cswap(&x2, &x3, swap);  | 
1832  | 0  |     fe_cswap(&z2, &z3, swap);  | 
1833  | 0  |     swap = b;  | 
1834  |  |     // Coq transcription of ladderstep formula (called from transcribed loop):  | 
1835  |  |     // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>  | 
1836  |  |     // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>  | 
1837  |  |     // x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>  | 
1838  |  |     // x1  = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>  | 
1839  | 0  |     fe_sub(&tmp0l, &x3, &z3);  | 
1840  | 0  |     fe_sub(&tmp1l, &x2, &z2);  | 
1841  | 0  |     fe_add(&x2l, &x2, &z2);  | 
1842  | 0  |     fe_add(&z2l, &x3, &z3);  | 
1843  | 0  |     fe_mul_tll(&z3, &tmp0l, &x2l);  | 
1844  | 0  |     fe_mul_tll(&z2, &z2l, &tmp1l);  | 
1845  | 0  |     fe_sq_tl(&tmp0, &tmp1l);  | 
1846  | 0  |     fe_sq_tl(&tmp1, &x2l);  | 
1847  | 0  |     fe_add(&x3l, &z3, &z2);  | 
1848  | 0  |     fe_sub(&z2l, &z3, &z2);  | 
1849  | 0  |     fe_mul_ttt(&x2, &tmp1, &tmp0);  | 
1850  | 0  |     fe_sub(&tmp1l, &tmp1, &tmp0);  | 
1851  | 0  |     fe_sq_tl(&z2, &z2l);  | 
1852  | 0  |     fe_mul121666(&z3, &tmp1l);  | 
1853  | 0  |     fe_sq_tl(&x3, &x3l);  | 
1854  | 0  |     fe_add(&tmp0l, &tmp0, &z3);  | 
1855  | 0  |     fe_mul_ttt(&z3, &x1, &z2);  | 
1856  | 0  |     fe_mul_tll(&z2, &tmp1l, &tmp0l);  | 
1857  | 0  |   }  | 
1858  |  |   // here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) else (x2, z2)  | 
1859  | 0  |   fe_cswap(&x2, &x3, swap);  | 
1860  | 0  |   fe_cswap(&z2, &z3, swap);  | 
1861  |  | 
  | 
1862  | 0  |   fe_invert(&z2, &z2);  | 
1863  | 0  |   fe_mul_ttt(&x2, &x2, &z2);  | 
1864  | 0  |   fe_tobytes(out, &x2);  | 
1865  | 0  | }  | 
1866  |  |  | 
1867  |  | void x25519_public_from_private_generic_masked(uint8_t out_public_value[32],  | 
1868  |  |                                                const uint8_t private_key_masked[32],  | 
1869  | 0  |                                                int use_adx) { | 
1870  | 0  |   uint8_t e[32];  | 
1871  | 0  |   OPENSSL_memcpy(e, private_key_masked, 32);  | 
1872  |  | 
  | 
1873  | 0  |   ge_p3 A;  | 
1874  | 0  |   x25519_ge_scalarmult_base(&A, e, use_adx);  | 
1875  |  |  | 
1876  |  |   // We only need the u-coordinate of the curve25519 point. The map is  | 
1877  |  |   // u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y).  | 
1878  | 0  |   fe_loose zplusy, zminusy;  | 
1879  | 0  |   fe zminusy_inv;  | 
1880  | 0  |   fe_add(&zplusy, &A.Z, &A.Y);  | 
1881  | 0  |   fe_sub(&zminusy, &A.Z, &A.Y);  | 
1882  | 0  |   fe_loose_invert(&zminusy_inv, &zminusy);  | 
1883  | 0  |   fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv);  | 
1884  | 0  |   fe_tobytes(out_public_value, &zminusy_inv);  | 
1885  | 0  |   CONSTTIME_DECLASSIFY(out_public_value, 32);  | 
1886  | 0  | }  | 
1887  |  |  | 
1888  | 0  | void x25519_fe_invert(fe *out, const fe *z) { | 
1889  | 0  |   fe_invert(out, z);  | 
1890  | 0  | }  | 
1891  |  |  | 
1892  | 0  | uint8_t x25519_fe_isnegative(const fe *f) { | 
1893  | 0  |   return (uint8_t)fe_isnegative(f);  | 
1894  | 0  | }  | 
1895  |  |  | 
1896  | 0  | void x25519_fe_mul_ttt(fe *h, const fe *f, const fe *g) { | 
1897  | 0  |   fe_mul_ttt(h, f, g);  | 
1898  | 0  | }  | 
1899  |  |  | 
1900  | 0  | void x25519_fe_neg(fe *f) { | 
1901  | 0  |   fe_loose t;  | 
1902  | 0  |   fe_neg(&t, f);  | 
1903  | 0  |   fe_carry(f, &t);  | 
1904  | 0  | }  | 
1905  |  |  | 
1906  | 0  | void x25519_fe_tobytes(uint8_t s[32], const fe *h) { | 
1907  | 0  |   fe_tobytes(s, h);  | 
1908  | 0  | }  | 
1909  |  |  | 
1910  |  | void x25519_ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,  | 
1911  | 0  |                                              const ge_p3 *A, const uint8_t *b) { | 
1912  | 0  |   ge_double_scalarmult_vartime(r, a, A, b);  | 
1913  | 0  | }  | 
1914  |  |  | 
1915  | 0  | void x25519_sc_mask(uint8_t a[32]) { | 
1916  | 0  |   a[0] &= 248;  | 
1917  | 0  |   a[31] &= 127;  | 
1918  | 0  |   a[31] |= 64;  | 
1919  | 0  | }  | 
1920  |  |  | 
1921  |  | void x25519_sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,  | 
1922  | 0  |                           const uint8_t *c) { | 
1923  | 0  |   sc_muladd(s, a, b, c);  | 
1924  | 0  | }  |