/src/nss/lib/freebl/verified/Hacl_P521.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* MIT License |
2 | | * |
3 | | * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation |
4 | | * Copyright (c) 2022-2023 HACL* Contributors |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in all |
14 | | * copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "Hacl_P521.h" |
26 | | |
27 | | #include "internal/Hacl_Krmllib.h" |
28 | | #include "internal/Hacl_Bignum_Base.h" |
29 | | |
30 | | static inline uint64_t |
31 | | bn_is_eq_mask(uint64_t *x, uint64_t *y) |
32 | 1.93k | { |
33 | 1.93k | uint64_t mask = 0xFFFFFFFFFFFFFFFFULL; |
34 | 1.93k | KRML_MAYBE_FOR9(i, |
35 | 1.93k | 0U, |
36 | 1.93k | 9U, |
37 | 1.93k | 1U, |
38 | 1.93k | uint64_t uu____0 = FStar_UInt64_eq_mask(x[i], y[i]); |
39 | 1.93k | mask = uu____0 & mask;); |
40 | 1.93k | uint64_t mask1 = mask; |
41 | 1.93k | return mask1; |
42 | 1.93k | } |
43 | | |
44 | | static inline void |
45 | | bn_cmovznz(uint64_t *a, uint64_t b, uint64_t *c, uint64_t *d) |
46 | 323 | { |
47 | 323 | uint64_t mask = ~FStar_UInt64_eq_mask(b, 0ULL); |
48 | 323 | KRML_MAYBE_FOR9(i, |
49 | 323 | 0U, |
50 | 323 | 9U, |
51 | 323 | 1U, |
52 | 323 | uint64_t *os = a; |
53 | 323 | uint64_t uu____0 = c[i]; |
54 | 323 | uint64_t x = uu____0 ^ (mask & (d[i] ^ uu____0)); |
55 | 323 | os[i] = x;); |
56 | 323 | } |
57 | | |
58 | | static inline void |
59 | | bn_add_mod(uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d) |
60 | 5.76M | { |
61 | 5.76M | uint64_t c10 = 0ULL; |
62 | 5.76M | KRML_MAYBE_FOR2(i, |
63 | 5.76M | 0U, |
64 | 5.76M | 2U, |
65 | 5.76M | 1U, |
66 | 5.76M | uint64_t t1 = c[4U * i]; |
67 | 5.76M | uint64_t t20 = d[4U * i]; |
68 | 5.76M | uint64_t *res_i0 = a + 4U * i; |
69 | 5.76M | c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t1, t20, res_i0); |
70 | 5.76M | uint64_t t10 = c[4U * i + 1U]; |
71 | 5.76M | uint64_t t21 = d[4U * i + 1U]; |
72 | 5.76M | uint64_t *res_i1 = a + 4U * i + 1U; |
73 | 5.76M | c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t10, t21, res_i1); |
74 | 5.76M | uint64_t t11 = c[4U * i + 2U]; |
75 | 5.76M | uint64_t t22 = d[4U * i + 2U]; |
76 | 5.76M | uint64_t *res_i2 = a + 4U * i + 2U; |
77 | 5.76M | c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t11, t22, res_i2); |
78 | 5.76M | uint64_t t12 = c[4U * i + 3U]; |
79 | 5.76M | uint64_t t2 = d[4U * i + 3U]; |
80 | 5.76M | uint64_t *res_i = a + 4U * i + 3U; |
81 | 5.76M | c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t12, t2, res_i);); |
82 | 5.76M | { |
83 | 5.76M | uint64_t t1 = c[8U]; |
84 | 5.76M | uint64_t t2 = d[8U]; |
85 | 5.76M | uint64_t *res_i = a + 8U; |
86 | 5.76M | c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t1, t2, res_i); |
87 | 5.76M | } |
88 | 5.76M | uint64_t c0 = c10; |
89 | 5.76M | uint64_t tmp[9U] = { 0U }; |
90 | 5.76M | uint64_t c1 = 0ULL; |
91 | 5.76M | KRML_MAYBE_FOR2(i, |
92 | 5.76M | 0U, |
93 | 5.76M | 2U, |
94 | 5.76M | 1U, |
95 | 5.76M | uint64_t t1 = a[4U * i]; |
96 | 5.76M | uint64_t t20 = b[4U * i]; |
97 | 5.76M | uint64_t *res_i0 = tmp + 4U * i; |
98 | 5.76M | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0); |
99 | 5.76M | uint64_t t10 = a[4U * i + 1U]; |
100 | 5.76M | uint64_t t21 = b[4U * i + 1U]; |
101 | 5.76M | uint64_t *res_i1 = tmp + 4U * i + 1U; |
102 | 5.76M | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1); |
103 | 5.76M | uint64_t t11 = a[4U * i + 2U]; |
104 | 5.76M | uint64_t t22 = b[4U * i + 2U]; |
105 | 5.76M | uint64_t *res_i2 = tmp + 4U * i + 2U; |
106 | 5.76M | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2); |
107 | 5.76M | uint64_t t12 = a[4U * i + 3U]; |
108 | 5.76M | uint64_t t2 = b[4U * i + 3U]; |
109 | 5.76M | uint64_t *res_i = tmp + 4U * i + 3U; |
110 | 5.76M | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i);); |
111 | 5.76M | { |
112 | 5.76M | uint64_t t1 = a[8U]; |
113 | 5.76M | uint64_t t2 = b[8U]; |
114 | 5.76M | uint64_t *res_i = tmp + 8U; |
115 | 5.76M | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i); |
116 | 5.76M | } |
117 | 5.76M | uint64_t c11 = c1; |
118 | 5.76M | uint64_t c2 = c0 - c11; |
119 | 5.76M | KRML_MAYBE_FOR9(i, |
120 | 5.76M | 0U, |
121 | 5.76M | 9U, |
122 | 5.76M | 1U, |
123 | 5.76M | uint64_t *os = a; |
124 | 5.76M | uint64_t x = (c2 & a[i]) | (~c2 & tmp[i]); |
125 | 5.76M | os[i] = x;); |
126 | 5.76M | } |
127 | | |
128 | | static inline uint64_t |
129 | | bn_sub(uint64_t *a, uint64_t *b, uint64_t *c) |
130 | 2.81k | { |
131 | 2.81k | uint64_t c1 = 0ULL; |
132 | 2.81k | KRML_MAYBE_FOR2(i, |
133 | 2.81k | 0U, |
134 | 2.81k | 2U, |
135 | 2.81k | 1U, |
136 | 2.81k | uint64_t t1 = b[4U * i]; |
137 | 2.81k | uint64_t t20 = c[4U * i]; |
138 | 2.81k | uint64_t *res_i0 = a + 4U * i; |
139 | 2.81k | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0); |
140 | 2.81k | uint64_t t10 = b[4U * i + 1U]; |
141 | 2.81k | uint64_t t21 = c[4U * i + 1U]; |
142 | 2.81k | uint64_t *res_i1 = a + 4U * i + 1U; |
143 | 2.81k | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1); |
144 | 2.81k | uint64_t t11 = b[4U * i + 2U]; |
145 | 2.81k | uint64_t t22 = c[4U * i + 2U]; |
146 | 2.81k | uint64_t *res_i2 = a + 4U * i + 2U; |
147 | 2.81k | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2); |
148 | 2.81k | uint64_t t12 = b[4U * i + 3U]; |
149 | 2.81k | uint64_t t2 = c[4U * i + 3U]; |
150 | 2.81k | uint64_t *res_i = a + 4U * i + 3U; |
151 | 2.81k | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i);); |
152 | 2.81k | { |
153 | 2.81k | uint64_t t1 = b[8U]; |
154 | 2.81k | uint64_t t2 = c[8U]; |
155 | 2.81k | uint64_t *res_i = a + 8U; |
156 | 2.81k | c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i); |
157 | 2.81k | } |
158 | 2.81k | uint64_t c10 = c1; |
159 | 2.81k | return c10; |
160 | 2.81k | } |
161 | | |
162 | | static inline void |
163 | | bn_sub_mod(uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d) |
164 | 2.37M | { |
165 | 2.37M | uint64_t c10 = 0ULL; |
166 | 2.37M | KRML_MAYBE_FOR2(i, |
167 | 2.37M | 0U, |
168 | 2.37M | 2U, |
169 | 2.37M | 1U, |
170 | 2.37M | uint64_t t1 = c[4U * i]; |
171 | 2.37M | uint64_t t20 = d[4U * i]; |
172 | 2.37M | uint64_t *res_i0 = a + 4U * i; |
173 | 2.37M | c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t1, t20, res_i0); |
174 | 2.37M | uint64_t t10 = c[4U * i + 1U]; |
175 | 2.37M | uint64_t t21 = d[4U * i + 1U]; |
176 | 2.37M | uint64_t *res_i1 = a + 4U * i + 1U; |
177 | 2.37M | c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t10, t21, res_i1); |
178 | 2.37M | uint64_t t11 = c[4U * i + 2U]; |
179 | 2.37M | uint64_t t22 = d[4U * i + 2U]; |
180 | 2.37M | uint64_t *res_i2 = a + 4U * i + 2U; |
181 | 2.37M | c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t11, t22, res_i2); |
182 | 2.37M | uint64_t t12 = c[4U * i + 3U]; |
183 | 2.37M | uint64_t t2 = d[4U * i + 3U]; |
184 | 2.37M | uint64_t *res_i = a + 4U * i + 3U; |
185 | 2.37M | c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t12, t2, res_i);); |
186 | 2.37M | { |
187 | 2.37M | uint64_t t1 = c[8U]; |
188 | 2.37M | uint64_t t2 = d[8U]; |
189 | 2.37M | uint64_t *res_i = a + 8U; |
190 | 2.37M | c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t1, t2, res_i); |
191 | 2.37M | } |
192 | 2.37M | uint64_t c0 = c10; |
193 | 2.37M | uint64_t tmp[9U] = { 0U }; |
194 | 2.37M | uint64_t c1 = 0ULL; |
195 | 2.37M | KRML_MAYBE_FOR2(i, |
196 | 2.37M | 0U, |
197 | 2.37M | 2U, |
198 | 2.37M | 1U, |
199 | 2.37M | uint64_t t1 = a[4U * i]; |
200 | 2.37M | uint64_t t20 = b[4U * i]; |
201 | 2.37M | uint64_t *res_i0 = tmp + 4U * i; |
202 | 2.37M | c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t1, t20, res_i0); |
203 | 2.37M | uint64_t t10 = a[4U * i + 1U]; |
204 | 2.37M | uint64_t t21 = b[4U * i + 1U]; |
205 | 2.37M | uint64_t *res_i1 = tmp + 4U * i + 1U; |
206 | 2.37M | c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t10, t21, res_i1); |
207 | 2.37M | uint64_t t11 = a[4U * i + 2U]; |
208 | 2.37M | uint64_t t22 = b[4U * i + 2U]; |
209 | 2.37M | uint64_t *res_i2 = tmp + 4U * i + 2U; |
210 | 2.37M | c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t11, t22, res_i2); |
211 | 2.37M | uint64_t t12 = a[4U * i + 3U]; |
212 | 2.37M | uint64_t t2 = b[4U * i + 3U]; |
213 | 2.37M | uint64_t *res_i = tmp + 4U * i + 3U; |
214 | 2.37M | c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t12, t2, res_i);); |
215 | 2.37M | { |
216 | 2.37M | uint64_t t1 = a[8U]; |
217 | 2.37M | uint64_t t2 = b[8U]; |
218 | 2.37M | uint64_t *res_i = tmp + 8U; |
219 | 2.37M | c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t1, t2, res_i); |
220 | 2.37M | } |
221 | 2.37M | uint64_t c11 = c1; |
222 | 2.37M | KRML_MAYBE_UNUSED_VAR(c11); |
223 | 2.37M | uint64_t c2 = 0ULL - c0; |
224 | 2.37M | KRML_MAYBE_FOR9(i, |
225 | 2.37M | 0U, |
226 | 2.37M | 9U, |
227 | 2.37M | 1U, |
228 | 2.37M | uint64_t *os = a; |
229 | 2.37M | uint64_t x = (c2 & tmp[i]) | (~c2 & a[i]); |
230 | 2.37M | os[i] = x;); |
231 | 2.37M | } |
232 | | |
233 | | static inline void |
234 | | bn_mul(uint64_t *a, uint64_t *b, uint64_t *c) |
235 | 3.96M | { |
236 | 3.96M | memset(a, 0U, 18U * sizeof(uint64_t)); |
237 | 3.96M | KRML_MAYBE_FOR9( |
238 | 3.96M | i0, |
239 | 3.96M | 0U, |
240 | 3.96M | 9U, |
241 | 3.96M | 1U, |
242 | 3.96M | uint64_t bj = c[i0]; |
243 | 3.96M | uint64_t *res_j = a + i0; |
244 | 3.96M | uint64_t c1 = 0ULL; |
245 | 3.96M | KRML_MAYBE_FOR2(i, |
246 | 3.96M | 0U, |
247 | 3.96M | 2U, |
248 | 3.96M | 1U, |
249 | 3.96M | uint64_t a_i = b[4U * i]; |
250 | 3.96M | uint64_t *res_i0 = res_j + 4U * i; |
251 | 3.96M | c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i0); |
252 | 3.96M | uint64_t a_i0 = b[4U * i + 1U]; |
253 | 3.96M | uint64_t *res_i1 = res_j + 4U * i + 1U; |
254 | 3.96M | c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c1, res_i1); |
255 | 3.96M | uint64_t a_i1 = b[4U * i + 2U]; |
256 | 3.96M | uint64_t *res_i2 = res_j + 4U * i + 2U; |
257 | 3.96M | c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c1, res_i2); |
258 | 3.96M | uint64_t a_i2 = b[4U * i + 3U]; |
259 | 3.96M | uint64_t *res_i = res_j + 4U * i + 3U; |
260 | 3.96M | c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c1, res_i);); |
261 | 3.96M | { |
262 | 3.96M | uint64_t a_i = b[8U]; |
263 | 3.96M | uint64_t *res_i = res_j + 8U; |
264 | 3.96M | c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i); |
265 | 3.96M | } uint64_t r = c1; |
266 | 3.96M | a[9U + i0] = r;); |
267 | 3.96M | } |
268 | | |
269 | | static inline void |
270 | | bn_sqr(uint64_t *a, uint64_t *b) |
271 | 1.14M | { |
272 | 1.14M | memset(a, 0U, 18U * sizeof(uint64_t)); |
273 | 1.14M | KRML_MAYBE_FOR9( |
274 | 1.14M | i0, |
275 | 1.14M | 0U, |
276 | 1.14M | 9U, |
277 | 1.14M | 1U, |
278 | 1.14M | uint64_t *ab = b; |
279 | 1.14M | uint64_t a_j = b[i0]; |
280 | 1.14M | uint64_t *res_j = a + i0; |
281 | 1.14M | uint64_t c = 0ULL; |
282 | 1.14M | for (uint32_t i = 0U; i < i0 / 4U; i++) { |
283 | 1.14M | uint64_t a_i = ab[4U * i]; |
284 | 1.14M | uint64_t *res_i0 = res_j + 4U * i; |
285 | 1.14M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0); |
286 | 1.14M | uint64_t a_i0 = ab[4U * i + 1U]; |
287 | 1.14M | uint64_t *res_i1 = res_j + 4U * i + 1U; |
288 | 1.14M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1); |
289 | 1.14M | uint64_t a_i1 = ab[4U * i + 2U]; |
290 | 1.14M | uint64_t *res_i2 = res_j + 4U * i + 2U; |
291 | 1.14M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2); |
292 | 1.14M | uint64_t a_i2 = ab[4U * i + 3U]; |
293 | 1.14M | uint64_t *res_i = res_j + 4U * i + 3U; |
294 | 1.14M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i); |
295 | 1.14M | } for (uint32_t i = i0 / 4U * 4U; i < i0; i++) { |
296 | 1.14M | uint64_t a_i = ab[i]; |
297 | 1.14M | uint64_t *res_i = res_j + i; |
298 | 1.14M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i); |
299 | 1.14M | } uint64_t r = c; |
300 | 1.14M | a[i0 + i0] = r;); |
301 | 1.14M | uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(18U, a, a, a); |
302 | 1.14M | KRML_MAYBE_UNUSED_VAR(c0); |
303 | 1.14M | uint64_t tmp[18U] = { 0U }; |
304 | 1.14M | KRML_MAYBE_FOR9(i, |
305 | 1.14M | 0U, |
306 | 1.14M | 9U, |
307 | 1.14M | 1U, |
308 | 1.14M | FStar_UInt128_uint128 res = FStar_UInt128_mul_wide(b[i], b[i]); |
309 | 1.14M | uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, 64U)); |
310 | 1.14M | uint64_t lo = FStar_UInt128_uint128_to_uint64(res); |
311 | 1.14M | tmp[2U * i] = lo; |
312 | 1.14M | tmp[2U * i + 1U] = hi;); |
313 | 1.14M | uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(18U, a, tmp, a); |
314 | 1.14M | KRML_MAYBE_UNUSED_VAR(c1); |
315 | 1.14M | } |
316 | | |
317 | | static inline void |
318 | | bn_to_bytes_be(uint8_t *a, uint64_t *b) |
319 | 474 | { |
320 | 474 | uint8_t tmp[72U] = { 0U }; |
321 | 474 | KRML_MAYBE_FOR9(i, 0U, 9U, 1U, store64_be(tmp + i * 8U, b[9U - i - 1U]);); |
322 | 474 | memcpy(a, tmp + 6U, 66U * sizeof(uint8_t)); |
323 | 474 | } |
324 | | |
325 | | static inline void |
326 | | bn_from_bytes_be(uint64_t *a, uint8_t *b) |
327 | 2.66k | { |
328 | 2.66k | uint8_t tmp[72U] = { 0U }; |
329 | 2.66k | memcpy(tmp + 6U, b, 66U * sizeof(uint8_t)); |
330 | 2.66k | KRML_MAYBE_FOR9(i, |
331 | 2.66k | 0U, |
332 | 2.66k | 9U, |
333 | 2.66k | 1U, |
334 | 2.66k | uint64_t *os = a; |
335 | 2.66k | uint64_t u = load64_be(tmp + (9U - i - 1U) * 8U); |
336 | 2.66k | uint64_t x = u; |
337 | 2.66k | os[i] = x;); |
338 | 2.66k | } |
339 | | |
340 | | static inline void |
341 | | p521_make_prime(uint64_t *n) |
342 | 13.1M | { |
343 | 13.1M | n[0U] = 0xffffffffffffffffULL; |
344 | 13.1M | n[1U] = 0xffffffffffffffffULL; |
345 | 13.1M | n[2U] = 0xffffffffffffffffULL; |
346 | 13.1M | n[3U] = 0xffffffffffffffffULL; |
347 | 13.1M | n[4U] = 0xffffffffffffffffULL; |
348 | 13.1M | n[5U] = 0xffffffffffffffffULL; |
349 | 13.1M | n[6U] = 0xffffffffffffffffULL; |
350 | 13.1M | n[7U] = 0xffffffffffffffffULL; |
351 | 13.1M | n[8U] = 0x1ffULL; |
352 | 13.1M | } |
353 | | |
354 | | static inline void |
355 | | p521_make_order(uint64_t *n) |
356 | 101k | { |
357 | 101k | n[0U] = 0xbb6fb71e91386409ULL; |
358 | 101k | n[1U] = 0x3bb5c9b8899c47aeULL; |
359 | 101k | n[2U] = 0x7fcc0148f709a5d0ULL; |
360 | 101k | n[3U] = 0x51868783bf2f966bULL; |
361 | 101k | n[4U] = 0xfffffffffffffffaULL; |
362 | 101k | n[5U] = 0xffffffffffffffffULL; |
363 | 101k | n[6U] = 0xffffffffffffffffULL; |
364 | 101k | n[7U] = 0xffffffffffffffffULL; |
365 | 101k | n[8U] = 0x1ffULL; |
366 | 101k | } |
367 | | |
368 | | static inline void |
369 | | p521_make_a_coeff(uint64_t *a) |
370 | 824 | { |
371 | 824 | a[0U] = 0xfe7fffffffffffffULL; |
372 | 824 | a[1U] = 0xffffffffffffffffULL; |
373 | 824 | a[2U] = 0xffffffffffffffffULL; |
374 | 824 | a[3U] = 0xffffffffffffffffULL; |
375 | 824 | a[4U] = 0xffffffffffffffffULL; |
376 | 824 | a[5U] = 0xffffffffffffffffULL; |
377 | 824 | a[6U] = 0xffffffffffffffffULL; |
378 | 824 | a[7U] = 0xffffffffffffffffULL; |
379 | 824 | a[8U] = 0x01ffULL; |
380 | 824 | } |
381 | | |
382 | | static inline void |
383 | | p521_make_b_coeff(uint64_t *b) |
384 | 719k | { |
385 | 719k | b[0U] = 0x8014654fae586387ULL; |
386 | 719k | b[1U] = 0x78f7a28fea35a81fULL; |
387 | 719k | b[2U] = 0x839ab9efc41e961aULL; |
388 | 719k | b[3U] = 0xbd8b29605e9dd8dfULL; |
389 | 719k | b[4U] = 0xf0ab0c9ca8f63f49ULL; |
390 | 719k | b[5U] = 0xf9dc5a44c8c77884ULL; |
391 | 719k | b[6U] = 0x77516d392dccd98aULL; |
392 | 719k | b[7U] = 0x0fc94d10d05b42a0ULL; |
393 | 719k | b[8U] = 0x4dULL; |
394 | 719k | } |
395 | | |
396 | | static inline void |
397 | | p521_make_g_x(uint64_t *n) |
398 | 378 | { |
399 | 378 | n[0U] = 0xb331a16381adc101ULL; |
400 | 378 | n[1U] = 0x4dfcbf3f18e172deULL; |
401 | 378 | n[2U] = 0x6f19a459e0c2b521ULL; |
402 | 378 | n[3U] = 0x947f0ee093d17fd4ULL; |
403 | 378 | n[4U] = 0xdd50a5af3bf7f3acULL; |
404 | 378 | n[5U] = 0x90fc1457b035a69eULL; |
405 | 378 | n[6U] = 0x214e32409c829fdaULL; |
406 | 378 | n[7U] = 0xe6cf1f65b311cadaULL; |
407 | 378 | n[8U] = 0x74ULL; |
408 | 378 | } |
409 | | |
410 | | static inline void |
411 | | p521_make_g_y(uint64_t *n) |
412 | 378 | { |
413 | 378 | n[0U] = 0x28460e4a5a9e268eULL; |
414 | 378 | n[1U] = 0x20445f4a3b4fe8b3ULL; |
415 | 378 | n[2U] = 0xb09a9e3843513961ULL; |
416 | 378 | n[3U] = 0x2062a85c809fd683ULL; |
417 | 378 | n[4U] = 0x164bf7394caf7a13ULL; |
418 | 378 | n[5U] = 0x340bd7de8b939f33ULL; |
419 | 378 | n[6U] = 0xeccc7aa224abcda2ULL; |
420 | 378 | n[7U] = 0x022e452fda163e8dULL; |
421 | 378 | n[8U] = 0x1e0ULL; |
422 | 378 | } |
423 | | |
424 | | static inline void |
425 | | p521_make_fmont_R2(uint64_t *n) |
426 | 2.83k | { |
427 | 2.83k | n[0U] = 0x0ULL; |
428 | 2.83k | n[1U] = 0x400000000000ULL; |
429 | 2.83k | n[2U] = 0x0ULL; |
430 | 2.83k | n[3U] = 0x0ULL; |
431 | 2.83k | n[4U] = 0x0ULL; |
432 | 2.83k | n[5U] = 0x0ULL; |
433 | 2.83k | n[6U] = 0x0ULL; |
434 | 2.83k | n[7U] = 0x0ULL; |
435 | 2.83k | n[8U] = 0x0ULL; |
436 | 2.83k | } |
437 | | |
438 | | static inline void |
439 | | p521_make_fzero(uint64_t *n) |
440 | 1.08k | { |
441 | 1.08k | memset(n, 0U, 9U * sizeof(uint64_t)); |
442 | 1.08k | n[0U] = 0ULL; |
443 | 1.08k | } |
444 | | |
445 | | static inline void |
446 | | p521_make_fone(uint64_t *n) |
447 | 1.90k | { |
448 | 1.90k | n[0U] = 0x80000000000000ULL; |
449 | 1.90k | n[1U] = 0x0ULL; |
450 | 1.90k | n[2U] = 0x0ULL; |
451 | 1.90k | n[3U] = 0x0ULL; |
452 | 1.90k | n[4U] = 0x0ULL; |
453 | 1.90k | n[5U] = 0x0ULL; |
454 | 1.90k | n[6U] = 0x0ULL; |
455 | 1.90k | n[7U] = 0x0ULL; |
456 | 1.90k | n[8U] = 0x0ULL; |
457 | 1.90k | } |
458 | | |
459 | | static inline void |
460 | | p521_make_qone(uint64_t *f) |
461 | 152 | { |
462 | 152 | f[0U] = 0xfb80000000000000ULL; |
463 | 152 | f[1U] = 0x28a2482470b763cdULL; |
464 | 152 | f[2U] = 0x17e2251b23bb31dcULL; |
465 | 152 | f[3U] = 0xca4019ff5b847b2dULL; |
466 | 152 | f[4U] = 0x2d73cbc3e206834ULL; |
467 | 152 | f[5U] = 0x0ULL; |
468 | 152 | f[6U] = 0x0ULL; |
469 | 152 | f[7U] = 0x0ULL; |
470 | 152 | f[8U] = 0x0ULL; |
471 | 152 | } |
472 | | |
473 | | static inline void |
474 | | fmont_reduction(uint64_t *res, uint64_t *x) |
475 | 5.00M | { |
476 | 5.00M | uint64_t n[9U] = { 0U }; |
477 | 5.00M | p521_make_prime(n); |
478 | 5.00M | uint64_t c0 = 0ULL; |
479 | 5.00M | KRML_MAYBE_FOR9( |
480 | 5.00M | i0, |
481 | 5.00M | 0U, |
482 | 5.00M | 9U, |
483 | 5.00M | 1U, |
484 | 5.00M | uint64_t qj = 1ULL * x[i0]; |
485 | 5.00M | uint64_t *res_j0 = x + i0; |
486 | 5.00M | uint64_t c = 0ULL; |
487 | 5.00M | KRML_MAYBE_FOR2(i, |
488 | 5.00M | 0U, |
489 | 5.00M | 2U, |
490 | 5.00M | 1U, |
491 | 5.00M | uint64_t a_i = n[4U * i]; |
492 | 5.00M | uint64_t *res_i0 = res_j0 + 4U * i; |
493 | 5.00M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0); |
494 | 5.00M | uint64_t a_i0 = n[4U * i + 1U]; |
495 | 5.00M | uint64_t *res_i1 = res_j0 + 4U * i + 1U; |
496 | 5.00M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1); |
497 | 5.00M | uint64_t a_i1 = n[4U * i + 2U]; |
498 | 5.00M | uint64_t *res_i2 = res_j0 + 4U * i + 2U; |
499 | 5.00M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2); |
500 | 5.00M | uint64_t a_i2 = n[4U * i + 3U]; |
501 | 5.00M | uint64_t *res_i = res_j0 + 4U * i + 3U; |
502 | 5.00M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i);); |
503 | 5.00M | { |
504 | 5.00M | uint64_t a_i = n[8U]; |
505 | 5.00M | uint64_t *res_i = res_j0 + 8U; |
506 | 5.00M | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i); |
507 | 5.00M | } uint64_t r = c; |
508 | 5.00M | uint64_t c1 = r; |
509 | 5.00M | uint64_t *resb = x + 9U + i0; |
510 | 5.00M | uint64_t res_j = x[9U + i0]; |
511 | 5.00M | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb);); |
512 | 5.00M | memcpy(res, x + 9U, 9U * sizeof(uint64_t)); |
513 | 5.00M | uint64_t c00 = c0; |
514 | 5.00M | uint64_t tmp[9U] = { 0U }; |
515 | 5.00M | uint64_t c = 0ULL; |
516 | 5.00M | KRML_MAYBE_FOR2(i, |
517 | 5.00M | 0U, |
518 | 5.00M | 2U, |
519 | 5.00M | 1U, |
520 | 5.00M | uint64_t t1 = res[4U * i]; |
521 | 5.00M | uint64_t t20 = n[4U * i]; |
522 | 5.00M | uint64_t *res_i0 = tmp + 4U * i; |
523 | 5.00M | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
524 | 5.00M | uint64_t t10 = res[4U * i + 1U]; |
525 | 5.00M | uint64_t t21 = n[4U * i + 1U]; |
526 | 5.00M | uint64_t *res_i1 = tmp + 4U * i + 1U; |
527 | 5.00M | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
528 | 5.00M | uint64_t t11 = res[4U * i + 2U]; |
529 | 5.00M | uint64_t t22 = n[4U * i + 2U]; |
530 | 5.00M | uint64_t *res_i2 = tmp + 4U * i + 2U; |
531 | 5.00M | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
532 | 5.00M | uint64_t t12 = res[4U * i + 3U]; |
533 | 5.00M | uint64_t t2 = n[4U * i + 3U]; |
534 | 5.00M | uint64_t *res_i = tmp + 4U * i + 3U; |
535 | 5.00M | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);); |
536 | 5.00M | { |
537 | 5.00M | uint64_t t1 = res[8U]; |
538 | 5.00M | uint64_t t2 = n[8U]; |
539 | 5.00M | uint64_t *res_i = tmp + 8U; |
540 | 5.00M | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); |
541 | 5.00M | } |
542 | 5.00M | uint64_t c1 = c; |
543 | 5.00M | uint64_t c2 = c00 - c1; |
544 | 5.00M | KRML_MAYBE_FOR9(i, |
545 | 5.00M | 0U, |
546 | 5.00M | 9U, |
547 | 5.00M | 1U, |
548 | 5.00M | uint64_t *os = res; |
549 | 5.00M | uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]); |
550 | 5.00M | os[i] = x1;); |
551 | 5.00M | } |
552 | | |
553 | | static inline void |
554 | | qmont_reduction(uint64_t *res, uint64_t *x) |
555 | 100k | { |
556 | 100k | uint64_t n[9U] = { 0U }; |
557 | 100k | p521_make_order(n); |
558 | 100k | uint64_t c0 = 0ULL; |
559 | 100k | KRML_MAYBE_FOR9( |
560 | 100k | i0, |
561 | 100k | 0U, |
562 | 100k | 9U, |
563 | 100k | 1U, |
564 | 100k | uint64_t qj = 2103001588584519111ULL * x[i0]; |
565 | 100k | uint64_t *res_j0 = x + i0; |
566 | 100k | uint64_t c = 0ULL; |
567 | 100k | KRML_MAYBE_FOR2(i, |
568 | 100k | 0U, |
569 | 100k | 2U, |
570 | 100k | 1U, |
571 | 100k | uint64_t a_i = n[4U * i]; |
572 | 100k | uint64_t *res_i0 = res_j0 + 4U * i; |
573 | 100k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0); |
574 | 100k | uint64_t a_i0 = n[4U * i + 1U]; |
575 | 100k | uint64_t *res_i1 = res_j0 + 4U * i + 1U; |
576 | 100k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1); |
577 | 100k | uint64_t a_i1 = n[4U * i + 2U]; |
578 | 100k | uint64_t *res_i2 = res_j0 + 4U * i + 2U; |
579 | 100k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2); |
580 | 100k | uint64_t a_i2 = n[4U * i + 3U]; |
581 | 100k | uint64_t *res_i = res_j0 + 4U * i + 3U; |
582 | 100k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i);); |
583 | 100k | { |
584 | 100k | uint64_t a_i = n[8U]; |
585 | 100k | uint64_t *res_i = res_j0 + 8U; |
586 | 100k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i); |
587 | 100k | } uint64_t r = c; |
588 | 100k | uint64_t c1 = r; |
589 | 100k | uint64_t *resb = x + 9U + i0; |
590 | 100k | uint64_t res_j = x[9U + i0]; |
591 | 100k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb);); |
592 | 100k | memcpy(res, x + 9U, 9U * sizeof(uint64_t)); |
593 | 100k | uint64_t c00 = c0; |
594 | 100k | uint64_t tmp[9U] = { 0U }; |
595 | 100k | uint64_t c = 0ULL; |
596 | 100k | KRML_MAYBE_FOR2(i, |
597 | 100k | 0U, |
598 | 100k | 2U, |
599 | 100k | 1U, |
600 | 100k | uint64_t t1 = res[4U * i]; |
601 | 100k | uint64_t t20 = n[4U * i]; |
602 | 100k | uint64_t *res_i0 = tmp + 4U * i; |
603 | 100k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
604 | 100k | uint64_t t10 = res[4U * i + 1U]; |
605 | 100k | uint64_t t21 = n[4U * i + 1U]; |
606 | 100k | uint64_t *res_i1 = tmp + 4U * i + 1U; |
607 | 100k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
608 | 100k | uint64_t t11 = res[4U * i + 2U]; |
609 | 100k | uint64_t t22 = n[4U * i + 2U]; |
610 | 100k | uint64_t *res_i2 = tmp + 4U * i + 2U; |
611 | 100k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
612 | 100k | uint64_t t12 = res[4U * i + 3U]; |
613 | 100k | uint64_t t2 = n[4U * i + 3U]; |
614 | 100k | uint64_t *res_i = tmp + 4U * i + 3U; |
615 | 100k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);); |
616 | 100k | { |
617 | 100k | uint64_t t1 = res[8U]; |
618 | 100k | uint64_t t2 = n[8U]; |
619 | 100k | uint64_t *res_i = tmp + 8U; |
620 | 100k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i); |
621 | 100k | } |
622 | 100k | uint64_t c1 = c; |
623 | 100k | uint64_t c2 = c00 - c1; |
624 | 100k | KRML_MAYBE_FOR9(i, |
625 | 100k | 0U, |
626 | 100k | 9U, |
627 | 100k | 1U, |
628 | 100k | uint64_t *os = res; |
629 | 100k | uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]); |
630 | 100k | os[i] = x1;); |
631 | 100k | } |
632 | | |
633 | | static inline uint64_t |
634 | | bn_is_lt_prime_mask(uint64_t *f) |
635 | 1.69k | { |
636 | 1.69k | uint64_t tmp[9U] = { 0U }; |
637 | 1.69k | p521_make_prime(tmp); |
638 | 1.69k | uint64_t c = bn_sub(tmp, f, tmp); |
639 | 1.69k | uint64_t m = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL); |
640 | 1.69k | return m; |
641 | 1.69k | } |
642 | | |
643 | | static inline void |
644 | | fadd0(uint64_t *a, uint64_t *b, uint64_t *c) |
645 | 5.76M | { |
646 | 5.76M | uint64_t n[9U] = { 0U }; |
647 | 5.76M | p521_make_prime(n); |
648 | 5.76M | bn_add_mod(a, n, b, c); |
649 | 5.76M | } |
650 | | |
651 | | static inline void |
652 | | fsub0(uint64_t *a, uint64_t *b, uint64_t *c) |
653 | 2.37M | { |
654 | 2.37M | uint64_t n[9U] = { 0U }; |
655 | 2.37M | p521_make_prime(n); |
656 | 2.37M | bn_sub_mod(a, n, b, c); |
657 | 2.37M | } |
658 | | |
659 | | static inline void |
660 | | fmul0(uint64_t *a, uint64_t *b, uint64_t *c) |
661 | 3.93M | { |
662 | 3.93M | uint64_t tmp[18U] = { 0U }; |
663 | 3.93M | bn_mul(tmp, b, c); |
664 | 3.93M | fmont_reduction(a, tmp); |
665 | 3.93M | } |
666 | | |
667 | | static inline void |
668 | | fsqr0(uint64_t *a, uint64_t *b) |
669 | 1.06M | { |
670 | 1.06M | uint64_t tmp[18U] = { 0U }; |
671 | 1.06M | bn_sqr(tmp, b); |
672 | 1.06M | fmont_reduction(a, tmp); |
673 | 1.06M | } |
674 | | |
675 | | static inline void |
676 | | from_mont(uint64_t *a, uint64_t *b) |
677 | 626 | { |
678 | 626 | uint64_t tmp[18U] = { 0U }; |
679 | 626 | memcpy(tmp, b, 9U * sizeof(uint64_t)); |
680 | 626 | fmont_reduction(a, tmp); |
681 | 626 | } |
682 | | |
683 | | static inline void |
684 | | to_mont(uint64_t *a, uint64_t *b) |
685 | 2.83k | { |
686 | 2.83k | uint64_t r2modn[9U] = { 0U }; |
687 | 2.83k | p521_make_fmont_R2(r2modn); |
688 | 2.83k | uint64_t tmp[18U] = { 0U }; |
689 | 2.83k | bn_mul(tmp, b, r2modn); |
690 | 2.83k | fmont_reduction(a, tmp); |
691 | 2.83k | } |
692 | | |
693 | | static inline void |
694 | | p521_finv(uint64_t *res, uint64_t *a) |
695 | 389 | { |
696 | 389 | uint64_t b[9U] = { 0U }; |
697 | 389 | b[0U] = 0xfffffffffffffffdULL; |
698 | 389 | b[1U] = 0xffffffffffffffffULL; |
699 | 389 | b[2U] = 0xffffffffffffffffULL; |
700 | 389 | b[3U] = 0xffffffffffffffffULL; |
701 | 389 | b[4U] = 0xffffffffffffffffULL; |
702 | 389 | b[5U] = 0xffffffffffffffffULL; |
703 | 389 | b[6U] = 0xffffffffffffffffULL; |
704 | 389 | b[7U] = 0xffffffffffffffffULL; |
705 | 389 | b[8U] = 0x1ffULL; |
706 | 389 | uint64_t tmp[9U] = { 0U }; |
707 | 389 | memcpy(tmp, a, 9U * sizeof(uint64_t)); |
708 | 389 | uint64_t table[288U] = { 0U }; |
709 | 389 | uint64_t tmp1[9U] = { 0U }; |
710 | 389 | uint64_t *t0 = table; |
711 | 389 | uint64_t *t1 = table + 9U; |
712 | 389 | p521_make_fone(t0); |
713 | 389 | memcpy(t1, tmp, 9U * sizeof(uint64_t)); |
714 | 389 | KRML_MAYBE_FOR15(i, |
715 | 389 | 0U, |
716 | 389 | 15U, |
717 | 389 | 1U, |
718 | 389 | uint64_t *t11 = table + (i + 1U) * 9U; |
719 | 389 | fsqr0(tmp1, t11); |
720 | 389 | memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t)); |
721 | 389 | uint64_t *t2 = table + (2U * i + 2U) * 9U; |
722 | 389 | fmul0(tmp1, tmp, t2); |
723 | 389 | memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t));); |
724 | 389 | uint32_t i0 = 520U; |
725 | 389 | uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U); |
726 | 389 | memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t)); |
727 | 12.4k | for (uint32_t i1 = 0U; i1 < 31U; i1++) { |
728 | 12.0k | uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U)); |
729 | 12.0k | const uint64_t *res_j = table + (i1 + 1U) * 9U; |
730 | 12.0k | KRML_MAYBE_FOR9(i, |
731 | 12.0k | 0U, |
732 | 12.0k | 9U, |
733 | 12.0k | 1U, |
734 | 12.0k | uint64_t *os = res; |
735 | 12.0k | uint64_t x = (c & res_j[i]) | (~c & res[i]); |
736 | 12.0k | os[i] = x;); |
737 | 12.0k | } |
738 | 389 | uint64_t tmp10[9U] = { 0U }; |
739 | 40.8k | for (uint32_t i1 = 0U; i1 < 104U; i1++) { |
740 | 40.4k | KRML_MAYBE_FOR5(i, 0U, 5U, 1U, fsqr0(res, res);); |
741 | 40.4k | uint32_t k = 520U - 5U * i1 - 5U; |
742 | 40.4k | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U); |
743 | 40.4k | memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t)); |
744 | 1.29M | for (uint32_t i2 = 0U; i2 < 31U; i2++) { |
745 | 1.25M | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U)); |
746 | 1.25M | const uint64_t *res_j = table + (i2 + 1U) * 9U; |
747 | 1.25M | KRML_MAYBE_FOR9(i, |
748 | 1.25M | 0U, |
749 | 1.25M | 9U, |
750 | 1.25M | 1U, |
751 | 1.25M | uint64_t *os = tmp10; |
752 | 1.25M | uint64_t x = (c & res_j[i]) | (~c & tmp10[i]); |
753 | 1.25M | os[i] = x;); |
754 | 1.25M | } |
755 | 40.4k | fmul0(res, res, tmp10); |
756 | 40.4k | } |
757 | 389 | } |
758 | | |
759 | | static inline void |
760 | | p521_fsqrt(uint64_t *res, uint64_t *a) |
761 | 0 | { |
762 | 0 | uint64_t b[9U] = { 0U }; |
763 | 0 | b[0U] = 0x0ULL; |
764 | 0 | b[1U] = 0x0ULL; |
765 | 0 | b[2U] = 0x0ULL; |
766 | 0 | b[3U] = 0x0ULL; |
767 | 0 | b[4U] = 0x0ULL; |
768 | 0 | b[5U] = 0x0ULL; |
769 | 0 | b[6U] = 0x0ULL; |
770 | 0 | b[7U] = 0x0ULL; |
771 | 0 | b[8U] = 0x80ULL; |
772 | 0 | uint64_t tmp[9U] = { 0U }; |
773 | 0 | memcpy(tmp, a, 9U * sizeof(uint64_t)); |
774 | 0 | uint64_t table[288U] = { 0U }; |
775 | 0 | uint64_t tmp1[9U] = { 0U }; |
776 | 0 | uint64_t *t0 = table; |
777 | 0 | uint64_t *t1 = table + 9U; |
778 | 0 | p521_make_fone(t0); |
779 | 0 | memcpy(t1, tmp, 9U * sizeof(uint64_t)); |
780 | 0 | KRML_MAYBE_FOR15(i, |
781 | 0 | 0U, |
782 | 0 | 15U, |
783 | 0 | 1U, |
784 | 0 | uint64_t *t11 = table + (i + 1U) * 9U; |
785 | 0 | fsqr0(tmp1, t11); |
786 | 0 | memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t)); |
787 | 0 | uint64_t *t2 = table + (2U * i + 2U) * 9U; |
788 | 0 | fmul0(tmp1, tmp, t2); |
789 | 0 | memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t));); |
790 | 0 | uint32_t i0 = 520U; |
791 | 0 | uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U); |
792 | 0 | memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t)); |
793 | 0 | for (uint32_t i1 = 0U; i1 < 31U; i1++) { |
794 | 0 | uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U)); |
795 | 0 | const uint64_t *res_j = table + (i1 + 1U) * 9U; |
796 | 0 | KRML_MAYBE_FOR9(i, |
797 | 0 | 0U, |
798 | 0 | 9U, |
799 | 0 | 1U, |
800 | 0 | uint64_t *os = res; |
801 | 0 | uint64_t x = (c & res_j[i]) | (~c & res[i]); |
802 | 0 | os[i] = x;); |
803 | 0 | } |
804 | 0 | uint64_t tmp10[9U] = { 0U }; |
805 | 0 | for (uint32_t i1 = 0U; i1 < 104U; i1++) { |
806 | 0 | KRML_MAYBE_FOR5(i, 0U, 5U, 1U, fsqr0(res, res);); |
807 | 0 | uint32_t k = 520U - 5U * i1 - 5U; |
808 | 0 | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U); |
809 | 0 | memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t)); |
810 | 0 | for (uint32_t i2 = 0U; i2 < 31U; i2++) { |
811 | 0 | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U)); |
812 | 0 | const uint64_t *res_j = table + (i2 + 1U) * 9U; |
813 | 0 | KRML_MAYBE_FOR9(i, |
814 | 0 | 0U, |
815 | 0 | 9U, |
816 | 0 | 1U, |
817 | 0 | uint64_t *os = tmp10; |
818 | 0 | uint64_t x = (c & res_j[i]) | (~c & tmp10[i]); |
819 | 0 | os[i] = x;); |
820 | 0 | } |
821 | 0 | fmul0(res, res, tmp10); |
822 | 0 | } |
823 | 0 | } |
824 | | |
825 | | static inline uint64_t |
826 | | load_qelem_conditional(uint64_t *a, uint8_t *b) |
827 | 237 | { |
828 | 237 | bn_from_bytes_be(a, b); |
829 | 237 | uint64_t tmp[9U] = { 0U }; |
830 | 237 | p521_make_order(tmp); |
831 | 237 | uint64_t c = bn_sub(tmp, a, tmp); |
832 | 237 | uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL); |
833 | 237 | uint64_t bn_zero[9U] = { 0U }; |
834 | 237 | uint64_t res = bn_is_eq_mask(a, bn_zero); |
835 | 237 | uint64_t is_eq_zero = res; |
836 | 237 | uint64_t is_b_valid = is_lt_order & ~is_eq_zero; |
837 | 237 | uint64_t oneq[9U] = { 0U }; |
838 | 237 | memset(oneq, 0U, 9U * sizeof(uint64_t)); |
839 | 237 | oneq[0U] = 1ULL; |
840 | 237 | KRML_MAYBE_FOR9(i, |
841 | 237 | 0U, |
842 | 237 | 9U, |
843 | 237 | 1U, |
844 | 237 | uint64_t *os = a; |
845 | 237 | uint64_t uu____0 = oneq[i]; |
846 | 237 | uint64_t x = uu____0 ^ (is_b_valid & (a[i] ^ uu____0)); |
847 | 237 | os[i] = x;); |
848 | 237 | return is_b_valid; |
849 | 237 | } |
850 | | |
851 | | static inline void |
852 | | qmod_short(uint64_t *a, uint64_t *b) |
853 | 323 | { |
854 | 323 | uint64_t tmp[9U] = { 0U }; |
855 | 323 | p521_make_order(tmp); |
856 | 323 | uint64_t c = bn_sub(tmp, b, tmp); |
857 | 323 | bn_cmovznz(a, c, tmp, b); |
858 | 323 | } |
859 | | |
860 | | static inline void |
861 | | qadd(uint64_t *a, uint64_t *b, uint64_t *c) |
862 | 0 | { |
863 | 0 | uint64_t n[9U] = { 0U }; |
864 | 0 | p521_make_order(n); |
865 | 0 | bn_add_mod(a, n, b, c); |
866 | 0 | } |
867 | | |
868 | | static inline void |
869 | | qmul(uint64_t *a, uint64_t *b, uint64_t *c) |
870 | 18.3k | { |
871 | 18.3k | uint64_t tmp[18U] = { 0U }; |
872 | 18.3k | bn_mul(tmp, b, c); |
873 | 18.3k | qmont_reduction(a, tmp); |
874 | 18.3k | } |
875 | | |
876 | | static inline void |
877 | | qsqr(uint64_t *a, uint64_t *b) |
878 | 81.3k | { |
879 | 81.3k | uint64_t tmp[18U] = { 0U }; |
880 | 81.3k | bn_sqr(tmp, b); |
881 | 81.3k | qmont_reduction(a, tmp); |
882 | 81.3k | } |
883 | | |
884 | | static inline void |
885 | | from_qmont(uint64_t *a, uint64_t *b) |
886 | 304 | { |
887 | 304 | uint64_t tmp[18U] = { 0U }; |
888 | 304 | memcpy(tmp, b, 9U * sizeof(uint64_t)); |
889 | 304 | qmont_reduction(a, tmp); |
890 | 304 | } |
891 | | |
892 | | static inline void |
893 | | p521_qinv(uint64_t *res, uint64_t *a) |
894 | 152 | { |
895 | 152 | uint64_t b[9U] = { 0U }; |
896 | 152 | b[0U] = 0xbb6fb71e91386407ULL; |
897 | 152 | b[1U] = 0x3bb5c9b8899c47aeULL; |
898 | 152 | b[2U] = 0x7fcc0148f709a5d0ULL; |
899 | 152 | b[3U] = 0x51868783bf2f966bULL; |
900 | 152 | b[4U] = 0xfffffffffffffffaULL; |
901 | 152 | b[5U] = 0xffffffffffffffffULL; |
902 | 152 | b[6U] = 0xffffffffffffffffULL; |
903 | 152 | b[7U] = 0xffffffffffffffffULL; |
904 | 152 | b[8U] = 0x1ffULL; |
905 | 152 | uint64_t tmp[9U] = { 0U }; |
906 | 152 | memcpy(tmp, a, 9U * sizeof(uint64_t)); |
907 | 152 | uint64_t table[288U] = { 0U }; |
908 | 152 | uint64_t tmp1[9U] = { 0U }; |
909 | 152 | uint64_t *t0 = table; |
910 | 152 | uint64_t *t1 = table + 9U; |
911 | 152 | p521_make_qone(t0); |
912 | 152 | memcpy(t1, tmp, 9U * sizeof(uint64_t)); |
913 | 152 | KRML_MAYBE_FOR15(i, |
914 | 152 | 0U, |
915 | 152 | 15U, |
916 | 152 | 1U, |
917 | 152 | uint64_t *t11 = table + (i + 1U) * 9U; |
918 | 152 | qsqr(tmp1, t11); |
919 | 152 | memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t)); |
920 | 152 | uint64_t *t2 = table + (2U * i + 2U) * 9U; |
921 | 152 | qmul(tmp1, tmp, t2); |
922 | 152 | memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t));); |
923 | 152 | uint32_t i0 = 520U; |
924 | 152 | uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U); |
925 | 152 | memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t)); |
926 | 4.86k | for (uint32_t i1 = 0U; i1 < 31U; i1++) { |
927 | 4.71k | uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U)); |
928 | 4.71k | const uint64_t *res_j = table + (i1 + 1U) * 9U; |
929 | 4.71k | KRML_MAYBE_FOR9(i, |
930 | 4.71k | 0U, |
931 | 4.71k | 9U, |
932 | 4.71k | 1U, |
933 | 4.71k | uint64_t *os = res; |
934 | 4.71k | uint64_t x = (c & res_j[i]) | (~c & res[i]); |
935 | 4.71k | os[i] = x;); |
936 | 4.71k | } |
937 | 152 | uint64_t tmp10[9U] = { 0U }; |
938 | 15.9k | for (uint32_t i1 = 0U; i1 < 104U; i1++) { |
939 | 15.8k | KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(res, res);); |
940 | 15.8k | uint32_t k = 520U - 5U * i1 - 5U; |
941 | 15.8k | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U); |
942 | 15.8k | memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t)); |
943 | 505k | for (uint32_t i2 = 0U; i2 < 31U; i2++) { |
944 | 490k | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U)); |
945 | 490k | const uint64_t *res_j = table + (i2 + 1U) * 9U; |
946 | 490k | KRML_MAYBE_FOR9(i, |
947 | 490k | 0U, |
948 | 490k | 9U, |
949 | 490k | 1U, |
950 | 490k | uint64_t *os = tmp10; |
951 | 490k | uint64_t x = (c & res_j[i]) | (~c & tmp10[i]); |
952 | 490k | os[i] = x;); |
953 | 490k | } |
954 | 15.8k | qmul(res, res, tmp10); |
955 | 15.8k | } |
956 | 152 | } |
957 | | |
958 | | static inline void |
959 | | point_add(uint64_t *x, uint64_t *y, uint64_t *xy) |
960 | 74.2k | { |
961 | 74.2k | uint64_t tmp[81U] = { 0U }; |
962 | 74.2k | uint64_t *t0 = tmp; |
963 | 74.2k | uint64_t *t1 = tmp + 54U; |
964 | 74.2k | uint64_t *x3 = t1; |
965 | 74.2k | uint64_t *y3 = t1 + 9U; |
966 | 74.2k | uint64_t *z3 = t1 + 18U; |
967 | 74.2k | uint64_t *t01 = t0; |
968 | 74.2k | uint64_t *t11 = t0 + 9U; |
969 | 74.2k | uint64_t *t2 = t0 + 18U; |
970 | 74.2k | uint64_t *t3 = t0 + 27U; |
971 | 74.2k | uint64_t *t4 = t0 + 36U; |
972 | 74.2k | uint64_t *t5 = t0 + 45U; |
973 | 74.2k | uint64_t *x1 = x; |
974 | 74.2k | uint64_t *y1 = x + 9U; |
975 | 74.2k | uint64_t *z10 = x + 18U; |
976 | 74.2k | uint64_t *x20 = y; |
977 | 74.2k | uint64_t *y20 = y + 9U; |
978 | 74.2k | uint64_t *z20 = y + 18U; |
979 | 74.2k | fmul0(t01, x1, x20); |
980 | 74.2k | fmul0(t11, y1, y20); |
981 | 74.2k | fmul0(t2, z10, z20); |
982 | 74.2k | fadd0(t3, x1, y1); |
983 | 74.2k | fadd0(t4, x20, y20); |
984 | 74.2k | fmul0(t3, t3, t4); |
985 | 74.2k | fadd0(t4, t01, t11); |
986 | 74.2k | uint64_t *y10 = x + 9U; |
987 | 74.2k | uint64_t *z11 = x + 18U; |
988 | 74.2k | uint64_t *y2 = y + 9U; |
989 | 74.2k | uint64_t *z21 = y + 18U; |
990 | 74.2k | fsub0(t3, t3, t4); |
991 | 74.2k | fadd0(t4, y10, z11); |
992 | 74.2k | fadd0(t5, y2, z21); |
993 | 74.2k | fmul0(t4, t4, t5); |
994 | 74.2k | fadd0(t5, t11, t2); |
995 | 74.2k | fsub0(t4, t4, t5); |
996 | 74.2k | uint64_t *x10 = x; |
997 | 74.2k | uint64_t *z1 = x + 18U; |
998 | 74.2k | uint64_t *x2 = y; |
999 | 74.2k | uint64_t *z2 = y + 18U; |
1000 | 74.2k | fadd0(x3, x10, z1); |
1001 | 74.2k | fadd0(y3, x2, z2); |
1002 | 74.2k | fmul0(x3, x3, y3); |
1003 | 74.2k | fadd0(y3, t01, t2); |
1004 | 74.2k | fsub0(y3, x3, y3); |
1005 | 74.2k | uint64_t b_coeff[9U] = { 0U }; |
1006 | 74.2k | p521_make_b_coeff(b_coeff); |
1007 | 74.2k | fmul0(z3, b_coeff, t2); |
1008 | 74.2k | fsub0(x3, y3, z3); |
1009 | 74.2k | fadd0(z3, x3, x3); |
1010 | 74.2k | fadd0(x3, x3, z3); |
1011 | 74.2k | fsub0(z3, t11, x3); |
1012 | 74.2k | fadd0(x3, t11, x3); |
1013 | 74.2k | uint64_t b_coeff0[9U] = { 0U }; |
1014 | 74.2k | p521_make_b_coeff(b_coeff0); |
1015 | 74.2k | fmul0(y3, b_coeff0, y3); |
1016 | 74.2k | fadd0(t11, t2, t2); |
1017 | 74.2k | fadd0(t2, t11, t2); |
1018 | 74.2k | fsub0(y3, y3, t2); |
1019 | 74.2k | fsub0(y3, y3, t01); |
1020 | 74.2k | fadd0(t11, y3, y3); |
1021 | 74.2k | fadd0(y3, t11, y3); |
1022 | 74.2k | fadd0(t11, t01, t01); |
1023 | 74.2k | fadd0(t01, t11, t01); |
1024 | 74.2k | fsub0(t01, t01, t2); |
1025 | 74.2k | fmul0(t11, t4, y3); |
1026 | 74.2k | fmul0(t2, t01, y3); |
1027 | 74.2k | fmul0(y3, x3, z3); |
1028 | 74.2k | fadd0(y3, y3, t2); |
1029 | 74.2k | fmul0(x3, t3, x3); |
1030 | 74.2k | fsub0(x3, x3, t11); |
1031 | 74.2k | fmul0(z3, t4, z3); |
1032 | 74.2k | fmul0(t11, t3, t01); |
1033 | 74.2k | fadd0(z3, z3, t11); |
1034 | 74.2k | memcpy(xy, t1, 27U * sizeof(uint64_t)); |
1035 | 74.2k | } |
1036 | | |
1037 | | static inline void |
1038 | | point_double(uint64_t *x, uint64_t *xx) |
1039 | 285k | { |
1040 | 285k | uint64_t tmp[45U] = { 0U }; |
1041 | 285k | uint64_t *x1 = x; |
1042 | 285k | uint64_t *z = x + 18U; |
1043 | 285k | uint64_t *x3 = xx; |
1044 | 285k | uint64_t *y3 = xx + 9U; |
1045 | 285k | uint64_t *z3 = xx + 18U; |
1046 | 285k | uint64_t *t0 = tmp; |
1047 | 285k | uint64_t *t1 = tmp + 9U; |
1048 | 285k | uint64_t *t2 = tmp + 18U; |
1049 | 285k | uint64_t *t3 = tmp + 27U; |
1050 | 285k | uint64_t *t4 = tmp + 36U; |
1051 | 285k | uint64_t *x2 = x; |
1052 | 285k | uint64_t *y = x + 9U; |
1053 | 285k | uint64_t *z1 = x + 18U; |
1054 | 285k | fsqr0(t0, x2); |
1055 | 285k | fsqr0(t1, y); |
1056 | 285k | fsqr0(t2, z1); |
1057 | 285k | fmul0(t3, x2, y); |
1058 | 285k | fadd0(t3, t3, t3); |
1059 | 285k | fmul0(t4, y, z1); |
1060 | 285k | fmul0(z3, x1, z); |
1061 | 285k | fadd0(z3, z3, z3); |
1062 | 285k | uint64_t b_coeff[9U] = { 0U }; |
1063 | 285k | p521_make_b_coeff(b_coeff); |
1064 | 285k | fmul0(y3, b_coeff, t2); |
1065 | 285k | fsub0(y3, y3, z3); |
1066 | 285k | fadd0(x3, y3, y3); |
1067 | 285k | fadd0(y3, x3, y3); |
1068 | 285k | fsub0(x3, t1, y3); |
1069 | 285k | fadd0(y3, t1, y3); |
1070 | 285k | fmul0(y3, x3, y3); |
1071 | 285k | fmul0(x3, x3, t3); |
1072 | 285k | fadd0(t3, t2, t2); |
1073 | 285k | fadd0(t2, t2, t3); |
1074 | 285k | uint64_t b_coeff0[9U] = { 0U }; |
1075 | 285k | p521_make_b_coeff(b_coeff0); |
1076 | 285k | fmul0(z3, b_coeff0, z3); |
1077 | 285k | fsub0(z3, z3, t2); |
1078 | 285k | fsub0(z3, z3, t0); |
1079 | 285k | fadd0(t3, z3, z3); |
1080 | 285k | fadd0(z3, z3, t3); |
1081 | 285k | fadd0(t3, t0, t0); |
1082 | 285k | fadd0(t0, t3, t0); |
1083 | 285k | fsub0(t0, t0, t2); |
1084 | 285k | fmul0(t0, t0, z3); |
1085 | 285k | fadd0(y3, y3, t0); |
1086 | 285k | fadd0(t0, t4, t4); |
1087 | 285k | fmul0(z3, t0, z3); |
1088 | 285k | fsub0(x3, x3, z3); |
1089 | 285k | fmul0(z3, t0, t1); |
1090 | 285k | fadd0(z3, z3, z3); |
1091 | 285k | fadd0(z3, z3, z3); |
1092 | 285k | } |
1093 | | |
1094 | | static inline void |
1095 | | point_zero(uint64_t *one) |
1096 | 541 | { |
1097 | 541 | uint64_t *x = one; |
1098 | 541 | uint64_t *y = one + 9U; |
1099 | 541 | uint64_t *z = one + 18U; |
1100 | 541 | p521_make_fzero(x); |
1101 | 541 | p521_make_fone(y); |
1102 | 541 | p521_make_fzero(z); |
1103 | 541 | } |
1104 | | |
1105 | | static inline void |
1106 | | point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p) |
1107 | 541 | { |
1108 | 541 | uint64_t table[432U] = { 0U }; |
1109 | 541 | uint64_t tmp[27U] = { 0U }; |
1110 | 541 | uint64_t *t0 = table; |
1111 | 541 | uint64_t *t1 = table + 27U; |
1112 | 541 | point_zero(t0); |
1113 | 541 | memcpy(t1, p, 27U * sizeof(uint64_t)); |
1114 | 541 | KRML_MAYBE_FOR7(i, |
1115 | 541 | 0U, |
1116 | 541 | 7U, |
1117 | 541 | 1U, |
1118 | 541 | uint64_t *t11 = table + (i + 1U) * 27U; |
1119 | 541 | point_double(t11, tmp); |
1120 | 541 | memcpy(table + (2U * i + 2U) * 27U, tmp, 27U * sizeof(uint64_t)); |
1121 | 541 | uint64_t *t2 = table + (2U * i + 2U) * 27U; |
1122 | 541 | point_add(p, t2, tmp); |
1123 | 541 | memcpy(table + (2U * i + 3U) * 27U, tmp, 27U * sizeof(uint64_t));); |
1124 | 541 | uint32_t i0 = 520U; |
1125 | 541 | uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, scalar, i0, 4U); |
1126 | 541 | memcpy(res, (uint64_t *)table, 27U * sizeof(uint64_t)); |
1127 | 541 | KRML_MAYBE_FOR15( |
1128 | 541 | i1, |
1129 | 541 | 0U, |
1130 | 541 | 15U, |
1131 | 541 | 1U, |
1132 | 541 | uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U)); |
1133 | 541 | const uint64_t *res_j = table + (i1 + 1U) * 27U; |
1134 | 541 | for (uint32_t i = 0U; i < 27U; i++) { |
1135 | 541 | uint64_t *os = res; |
1136 | 541 | uint64_t x = (c & res_j[i]) | (~c & res[i]); |
1137 | 541 | os[i] = x; |
1138 | 541 | }); |
1139 | 541 | uint64_t tmp0[27U] = { 0U }; |
1140 | 70.8k | for (uint32_t i1 = 0U; i1 < 130U; i1++) { |
1141 | 70.3k | KRML_MAYBE_FOR4(i, 0U, 4U, 1U, point_double(res, res);); |
1142 | 70.3k | uint32_t k = 520U - 4U * i1 - 4U; |
1143 | 70.3k | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, scalar, k, 4U); |
1144 | 70.3k | memcpy(tmp0, (uint64_t *)table, 27U * sizeof(uint64_t)); |
1145 | 70.3k | KRML_MAYBE_FOR15( |
1146 | 70.3k | i2, |
1147 | 70.3k | 0U, |
1148 | 70.3k | 15U, |
1149 | 70.3k | 1U, |
1150 | 70.3k | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U)); |
1151 | 70.3k | const uint64_t *res_j = table + (i2 + 1U) * 27U; |
1152 | 70.3k | for (uint32_t i = 0U; i < 27U; i++) { |
1153 | 70.3k | uint64_t *os = tmp0; |
1154 | 70.3k | uint64_t x = (c & res_j[i]) | (~c & tmp0[i]); |
1155 | 70.3k | os[i] = x; |
1156 | 70.3k | }); |
1157 | 70.3k | point_add(res, tmp0, res); |
1158 | 70.3k | } |
1159 | 541 | } |
1160 | | |
1161 | | static inline void |
1162 | | point_mul_g(uint64_t *res, uint64_t *scalar) |
1163 | 378 | { |
1164 | 378 | uint64_t g[27U] = { 0U }; |
1165 | 378 | uint64_t *x = g; |
1166 | 378 | uint64_t *y = g + 9U; |
1167 | 378 | uint64_t *z = g + 18U; |
1168 | 378 | p521_make_g_x(x); |
1169 | 378 | p521_make_g_y(y); |
1170 | 378 | p521_make_fone(z); |
1171 | 378 | point_mul(res, scalar, g); |
1172 | 378 | } |
1173 | | |
1174 | | static inline void |
1175 | | point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t *p) |
1176 | 152 | { |
1177 | 152 | uint64_t tmp[27U] = { 0U }; |
1178 | 152 | point_mul_g(tmp, scalar1); |
1179 | 152 | point_mul(res, scalar2, p); |
1180 | 152 | point_add(res, tmp, res); |
1181 | 152 | } |
1182 | | |
1183 | | static inline bool |
1184 | | ecdsa_sign_msg_as_qelem( |
1185 | | uint8_t *signature, |
1186 | | uint64_t *m_q, |
1187 | | uint8_t *private_key, |
1188 | | uint8_t *nonce) |
1189 | 0 | { |
1190 | 0 | uint64_t rsdk_q[36U] = { 0U }; |
1191 | 0 | uint64_t *r_q = rsdk_q; |
1192 | 0 | uint64_t *s_q = rsdk_q + 9U; |
1193 | 0 | uint64_t *d_a = rsdk_q + 18U; |
1194 | 0 | uint64_t *k_q = rsdk_q + 27U; |
1195 | 0 | uint64_t is_sk_valid = load_qelem_conditional(d_a, private_key); |
1196 | 0 | uint64_t is_nonce_valid = load_qelem_conditional(k_q, nonce); |
1197 | 0 | uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid; |
1198 | 0 | uint64_t p[27U] = { 0U }; |
1199 | 0 | point_mul_g(p, k_q); |
1200 | 0 | uint64_t zinv[9U] = { 0U }; |
1201 | 0 | uint64_t *px = p; |
1202 | 0 | uint64_t *pz = p + 18U; |
1203 | 0 | p521_finv(zinv, pz); |
1204 | 0 | fmul0(r_q, px, zinv); |
1205 | 0 | from_mont(r_q, r_q); |
1206 | 0 | qmod_short(r_q, r_q); |
1207 | 0 | uint64_t kinv[9U] = { 0U }; |
1208 | 0 | p521_qinv(kinv, k_q); |
1209 | 0 | qmul(s_q, r_q, d_a); |
1210 | 0 | from_qmont(m_q, m_q); |
1211 | 0 | qadd(s_q, m_q, s_q); |
1212 | 0 | qmul(s_q, kinv, s_q); |
1213 | 0 | bn_to_bytes_be(signature, r_q); |
1214 | 0 | bn_to_bytes_be(signature + 66U, s_q); |
1215 | 0 | uint64_t bn_zero0[9U] = { 0U }; |
1216 | 0 | uint64_t res = bn_is_eq_mask(r_q, bn_zero0); |
1217 | 0 | uint64_t is_r_zero = res; |
1218 | 0 | uint64_t bn_zero[9U] = { 0U }; |
1219 | 0 | uint64_t res0 = bn_is_eq_mask(s_q, bn_zero); |
1220 | 0 | uint64_t is_s_zero = res0; |
1221 | 0 | uint64_t m = are_sk_nonce_valid & (~is_r_zero & ~is_s_zero); |
1222 | 0 | bool res1 = m == 0xFFFFFFFFFFFFFFFFULL; |
1223 | 0 | return res1; |
1224 | 0 | } |
1225 | | |
1226 | | static inline bool |
1227 | | ecdsa_verify_msg_as_qelem( |
1228 | | uint64_t *m_q, |
1229 | | uint8_t *public_key, |
1230 | | uint8_t *signature_r, |
1231 | | uint8_t *signature_s) |
1232 | 171 | { |
1233 | 171 | uint64_t tmp[63U] = { 0U }; |
1234 | 171 | uint64_t *pk = tmp; |
1235 | 171 | uint64_t *r_q = tmp + 27U; |
1236 | 171 | uint64_t *s_q = tmp + 36U; |
1237 | 171 | uint64_t *u1 = tmp + 45U; |
1238 | 171 | uint64_t *u2 = tmp + 54U; |
1239 | 171 | uint64_t p_aff[18U] = { 0U }; |
1240 | 171 | uint8_t *p_x = public_key; |
1241 | 171 | uint8_t *p_y = public_key + 66U; |
1242 | 171 | uint64_t *bn_p_x = p_aff; |
1243 | 171 | uint64_t *bn_p_y = p_aff + 9U; |
1244 | 171 | bn_from_bytes_be(bn_p_x, p_x); |
1245 | 171 | bn_from_bytes_be(bn_p_y, p_y); |
1246 | 171 | uint64_t *px0 = p_aff; |
1247 | 171 | uint64_t *py0 = p_aff + 9U; |
1248 | 171 | uint64_t lessX = bn_is_lt_prime_mask(px0); |
1249 | 171 | uint64_t lessY = bn_is_lt_prime_mask(py0); |
1250 | 171 | uint64_t res0 = lessX & lessY; |
1251 | 171 | bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL; |
1252 | 171 | bool res; |
1253 | 171 | if (!is_xy_valid) { |
1254 | 0 | res = false; |
1255 | 171 | } else { |
1256 | 171 | uint64_t rp[9U] = { 0U }; |
1257 | 171 | uint64_t tx[9U] = { 0U }; |
1258 | 171 | uint64_t ty[9U] = { 0U }; |
1259 | 171 | uint64_t *px = p_aff; |
1260 | 171 | uint64_t *py = p_aff + 9U; |
1261 | 171 | to_mont(tx, px); |
1262 | 171 | to_mont(ty, py); |
1263 | 171 | uint64_t tmp1[9U] = { 0U }; |
1264 | 171 | fsqr0(rp, tx); |
1265 | 171 | fmul0(rp, rp, tx); |
1266 | 171 | p521_make_a_coeff(tmp1); |
1267 | 171 | fmul0(tmp1, tmp1, tx); |
1268 | 171 | fadd0(rp, tmp1, rp); |
1269 | 171 | p521_make_b_coeff(tmp1); |
1270 | 171 | fadd0(rp, tmp1, rp); |
1271 | 171 | fsqr0(ty, ty); |
1272 | 171 | uint64_t r = bn_is_eq_mask(ty, rp); |
1273 | 171 | uint64_t r0 = r; |
1274 | 171 | bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL; |
1275 | 171 | res = r1; |
1276 | 171 | } |
1277 | 171 | if (res) { |
1278 | 171 | uint64_t *px = p_aff; |
1279 | 171 | uint64_t *py = p_aff + 9U; |
1280 | 171 | uint64_t *rx = pk; |
1281 | 171 | uint64_t *ry = pk + 9U; |
1282 | 171 | uint64_t *rz = pk + 18U; |
1283 | 171 | to_mont(rx, px); |
1284 | 171 | to_mont(ry, py); |
1285 | 171 | p521_make_fone(rz); |
1286 | 171 | } |
1287 | 171 | bool is_pk_valid = res; |
1288 | 171 | bn_from_bytes_be(r_q, signature_r); |
1289 | 171 | bn_from_bytes_be(s_q, signature_s); |
1290 | 171 | uint64_t tmp10[9U] = { 0U }; |
1291 | 171 | p521_make_order(tmp10); |
1292 | 171 | uint64_t c = bn_sub(tmp10, r_q, tmp10); |
1293 | 171 | uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL); |
1294 | 171 | uint64_t bn_zero0[9U] = { 0U }; |
1295 | 171 | uint64_t res1 = bn_is_eq_mask(r_q, bn_zero0); |
1296 | 171 | uint64_t is_eq_zero = res1; |
1297 | 171 | uint64_t is_r_valid = is_lt_order & ~is_eq_zero; |
1298 | 171 | uint64_t tmp11[9U] = { 0U }; |
1299 | 171 | p521_make_order(tmp11); |
1300 | 171 | uint64_t c0 = bn_sub(tmp11, s_q, tmp11); |
1301 | 171 | uint64_t is_lt_order0 = FStar_UInt64_gte_mask(c0, 0ULL) & ~FStar_UInt64_eq_mask(c0, 0ULL); |
1302 | 171 | uint64_t bn_zero1[9U] = { 0U }; |
1303 | 171 | uint64_t res2 = bn_is_eq_mask(s_q, bn_zero1); |
1304 | 171 | uint64_t is_eq_zero0 = res2; |
1305 | 171 | uint64_t is_s_valid = is_lt_order0 & ~is_eq_zero0; |
1306 | 171 | bool is_rs_valid = is_r_valid == 0xFFFFFFFFFFFFFFFFULL && is_s_valid == 0xFFFFFFFFFFFFFFFFULL; |
1307 | 171 | if (!(is_pk_valid && is_rs_valid)) { |
1308 | 19 | return false; |
1309 | 19 | } |
1310 | 152 | uint64_t sinv[9U] = { 0U }; |
1311 | 152 | p521_qinv(sinv, s_q); |
1312 | 152 | uint64_t tmp1[9U] = { 0U }; |
1313 | 152 | from_qmont(tmp1, m_q); |
1314 | 152 | qmul(u1, sinv, tmp1); |
1315 | 152 | uint64_t tmp12[9U] = { 0U }; |
1316 | 152 | from_qmont(tmp12, r_q); |
1317 | 152 | qmul(u2, sinv, tmp12); |
1318 | 152 | uint64_t res3[27U] = { 0U }; |
1319 | 152 | point_mul_double_g(res3, u1, u2, pk); |
1320 | 152 | uint64_t *pz0 = res3 + 18U; |
1321 | 152 | uint64_t bn_zero[9U] = { 0U }; |
1322 | 152 | uint64_t res10 = bn_is_eq_mask(pz0, bn_zero); |
1323 | 152 | uint64_t m = res10; |
1324 | 152 | if (m == 0xFFFFFFFFFFFFFFFFULL) { |
1325 | 0 | return false; |
1326 | 0 | } |
1327 | 152 | uint64_t x[9U] = { 0U }; |
1328 | 152 | uint64_t zinv[9U] = { 0U }; |
1329 | 152 | uint64_t *px = res3; |
1330 | 152 | uint64_t *pz = res3 + 18U; |
1331 | 152 | p521_finv(zinv, pz); |
1332 | 152 | fmul0(x, px, zinv); |
1333 | 152 | from_mont(x, x); |
1334 | 152 | qmod_short(x, x); |
1335 | 152 | uint64_t m0 = bn_is_eq_mask(x, r_q); |
1336 | 152 | bool res11 = m0 == 0xFFFFFFFFFFFFFFFFULL; |
1337 | 152 | return res11; |
1338 | 152 | } |
1339 | | |
1340 | | /******************************************************************************* |
1341 | | |
1342 | | Verified C library for ECDSA and ECDH functions over the P-521 NIST curve. |
1343 | | |
1344 | | This module implements signing and verification, key validation, conversions |
1345 | | between various point representations, and ECDH key agreement. |
1346 | | |
1347 | | *******************************************************************************/ |
1348 | | |
1349 | | /*****************/ |
1350 | | /* ECDSA signing */ |
1351 | | /*****************/ |
1352 | | |
1353 | | /** |
1354 | | Create an ECDSA signature WITHOUT hashing first. |
1355 | | |
1356 | | This function is intended to receive a hash of the input. |
1357 | | For convenience, we recommend using one of the hash-and-sign combined functions above. |
1358 | | |
1359 | | The argument `msg` MUST be at least 66 bytes (i.e. `msg_len >= 66`). |
1360 | | |
1361 | | NOTE: The equivalent functions in OpenSSL and Fiat-Crypto both accept inputs |
1362 | | smaller than 66 bytes. These libraries left-pad the input with enough zeroes to |
1363 | | reach the minimum 66 byte size. Clients who need behavior identical to OpenSSL |
1364 | | need to perform the left-padding themselves. |
1365 | | |
1366 | | The function returns `true` for successful creation of an ECDSA signature and `false` otherwise. |
1367 | | |
1368 | | The outparam `signature` (R || S) points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1369 | | The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len]. |
1370 | | The arguments `private_key` and `nonce` point to 66 bytes of valid memory, i.e., uint8_t[66]. |
1371 | | |
1372 | | The function also checks whether `private_key` and `nonce` are valid values: |
1373 | | • 0 < `private_key` < the order of the curve |
1374 | | • 0 < `nonce` < the order of the curve |
1375 | | */ |
1376 | | bool |
1377 | | Hacl_P521_ecdsa_sign_p521_without_hash( |
1378 | | uint8_t *signature, |
1379 | | uint32_t msg_len, |
1380 | | uint8_t *msg, |
1381 | | uint8_t *private_key, |
1382 | | uint8_t *nonce) |
1383 | 0 | { |
1384 | 0 | uint64_t m_q[9U] = { 0U }; |
1385 | 0 | uint8_t mHash[66U] = { 0U }; |
1386 | 0 | memcpy(mHash, msg, 66U * sizeof(uint8_t)); |
1387 | 0 | KRML_MAYBE_UNUSED_VAR(msg_len); |
1388 | 0 | bn_from_bytes_be(m_q, mHash); |
1389 | 0 | qmod_short(m_q, m_q); |
1390 | 0 | bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce); |
1391 | 0 | return res; |
1392 | 0 | } |
1393 | | |
1394 | | /**********************/ |
1395 | | /* ECDSA verification */ |
1396 | | /**********************/ |
1397 | | |
1398 | | /** |
1399 | | Verify an ECDSA signature WITHOUT hashing first. |
1400 | | |
1401 | | This function is intended to receive a hash of the input. |
1402 | | For convenience, we recommend using one of the hash-and-verify combined functions above. |
1403 | | |
1404 | | The argument `msg` MUST be at least 66 bytes (i.e. `msg_len >= 66`). |
1405 | | |
1406 | | The function returns `true` if the signature is valid and `false` otherwise. |
1407 | | |
1408 | | The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len]. |
1409 | | The argument `public_key` (x || y) points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1410 | | The arguments `signature_r` and `signature_s` point to 66 bytes of valid memory, i.e., uint8_t[66]. |
1411 | | |
1412 | | The function also checks whether `public_key` is valid |
1413 | | */ |
1414 | | bool |
1415 | | Hacl_P521_ecdsa_verif_without_hash( |
1416 | | uint32_t msg_len, |
1417 | | uint8_t *msg, |
1418 | | uint8_t *public_key, |
1419 | | uint8_t *signature_r, |
1420 | | uint8_t *signature_s) |
1421 | 171 | { |
1422 | 171 | uint64_t m_q[9U] = { 0U }; |
1423 | 171 | uint8_t mHash[66U] = { 0U }; |
1424 | 171 | memcpy(mHash, msg, 66U * sizeof(uint8_t)); |
1425 | 171 | KRML_MAYBE_UNUSED_VAR(msg_len); |
1426 | 171 | bn_from_bytes_be(m_q, mHash); |
1427 | 171 | qmod_short(m_q, m_q); |
1428 | 171 | bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s); |
1429 | 171 | return res; |
1430 | 171 | } |
1431 | | |
1432 | | /******************/ |
1433 | | /* Key validation */ |
1434 | | /******************/ |
1435 | | |
1436 | | /** |
1437 | | Public key validation. |
1438 | | |
1439 | | The function returns `true` if a public key is valid and `false` otherwise. |
1440 | | |
1441 | | The argument `public_key` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1442 | | |
1443 | | The public key (x || y) is valid (with respect to SP 800-56A): |
1444 | | • the public key is not the “point at infinity”, represented as O. |
1445 | | • the affine x and y coordinates of the point represented by the public key are |
1446 | | in the range [0, p – 1] where p is the prime defining the finite field. |
1447 | | • y^2 = x^3 + ax + b where a and b are the coefficients of the curve equation. |
1448 | | The last extract is taken from: https://neilmadden.blog/2017/05/17/so-how-do-you-validate-nist-ecdh-public-keys/ |
1449 | | */ |
1450 | | bool |
1451 | | Hacl_P521_validate_public_key(uint8_t *public_key) |
1452 | 663 | { |
1453 | 663 | uint64_t point_jac[27U] = { 0U }; |
1454 | 663 | uint64_t p_aff[18U] = { 0U }; |
1455 | 663 | uint8_t *p_x = public_key; |
1456 | 663 | uint8_t *p_y = public_key + 66U; |
1457 | 663 | uint64_t *bn_p_x = p_aff; |
1458 | 663 | uint64_t *bn_p_y = p_aff + 9U; |
1459 | 663 | bn_from_bytes_be(bn_p_x, p_x); |
1460 | 663 | bn_from_bytes_be(bn_p_y, p_y); |
1461 | 663 | uint64_t *px0 = p_aff; |
1462 | 663 | uint64_t *py0 = p_aff + 9U; |
1463 | 663 | uint64_t lessX = bn_is_lt_prime_mask(px0); |
1464 | 663 | uint64_t lessY = bn_is_lt_prime_mask(py0); |
1465 | 663 | uint64_t res0 = lessX & lessY; |
1466 | 663 | bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL; |
1467 | 663 | bool res; |
1468 | 663 | if (!is_xy_valid) { |
1469 | 21 | res = false; |
1470 | 642 | } else { |
1471 | 642 | uint64_t rp[9U] = { 0U }; |
1472 | 642 | uint64_t tx[9U] = { 0U }; |
1473 | 642 | uint64_t ty[9U] = { 0U }; |
1474 | 642 | uint64_t *px = p_aff; |
1475 | 642 | uint64_t *py = p_aff + 9U; |
1476 | 642 | to_mont(tx, px); |
1477 | 642 | to_mont(ty, py); |
1478 | 642 | uint64_t tmp[9U] = { 0U }; |
1479 | 642 | fsqr0(rp, tx); |
1480 | 642 | fmul0(rp, rp, tx); |
1481 | 642 | p521_make_a_coeff(tmp); |
1482 | 642 | fmul0(tmp, tmp, tx); |
1483 | 642 | fadd0(rp, tmp, rp); |
1484 | 642 | p521_make_b_coeff(tmp); |
1485 | 642 | fadd0(rp, tmp, rp); |
1486 | 642 | fsqr0(ty, ty); |
1487 | 642 | uint64_t r = bn_is_eq_mask(ty, rp); |
1488 | 642 | uint64_t r0 = r; |
1489 | 642 | bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL; |
1490 | 642 | res = r1; |
1491 | 642 | } |
1492 | 663 | if (res) { |
1493 | 410 | uint64_t *px = p_aff; |
1494 | 410 | uint64_t *py = p_aff + 9U; |
1495 | 410 | uint64_t *rx = point_jac; |
1496 | 410 | uint64_t *ry = point_jac + 9U; |
1497 | 410 | uint64_t *rz = point_jac + 18U; |
1498 | 410 | to_mont(rx, px); |
1499 | 410 | to_mont(ry, py); |
1500 | 410 | p521_make_fone(rz); |
1501 | 410 | } |
1502 | 663 | bool res1 = res; |
1503 | 663 | return res1; |
1504 | 663 | } |
1505 | | |
1506 | | /** |
1507 | | Private key validation. |
1508 | | |
1509 | | The function returns `true` if a private key is valid and `false` otherwise. |
1510 | | |
1511 | | The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66]. |
1512 | | |
1513 | | The private key is valid: |
1514 | | • 0 < `private_key` < the order of the curve |
1515 | | */ |
1516 | | bool |
1517 | | Hacl_P521_validate_private_key(uint8_t *private_key) |
1518 | 226 | { |
1519 | 226 | uint64_t bn_sk[9U] = { 0U }; |
1520 | 226 | bn_from_bytes_be(bn_sk, private_key); |
1521 | 226 | uint64_t tmp[9U] = { 0U }; |
1522 | 226 | p521_make_order(tmp); |
1523 | 226 | uint64_t c = bn_sub(tmp, bn_sk, tmp); |
1524 | 226 | uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL); |
1525 | 226 | uint64_t bn_zero[9U] = { 0U }; |
1526 | 226 | uint64_t res = bn_is_eq_mask(bn_sk, bn_zero); |
1527 | 226 | uint64_t is_eq_zero = res; |
1528 | 226 | uint64_t res0 = is_lt_order & ~is_eq_zero; |
1529 | 226 | return res0 == 0xFFFFFFFFFFFFFFFFULL; |
1530 | 226 | } |
1531 | | |
1532 | | /******************************************************************************* |
1533 | | Parsing and Serializing public keys. |
1534 | | |
1535 | | A public key is a point (x, y) on the P-521 NIST curve. |
1536 | | |
1537 | | The point can be represented in the following three ways. |
1538 | | • raw = [ x || y ], 132 bytes |
1539 | | • uncompressed = [ 0x04 || x || y ], 133 bytes |
1540 | | • compressed = [ (0x02 for even `y` and 0x03 for odd `y`) || x ], 33 bytes |
1541 | | |
1542 | | *******************************************************************************/ |
1543 | | |
1544 | | /** |
1545 | | Convert a public key from uncompressed to its raw form. |
1546 | | |
1547 | | The function returns `true` for successful conversion of a public key and `false` otherwise. |
1548 | | |
1549 | | The outparam `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1550 | | The argument `pk` points to 133 bytes of valid memory, i.e., uint8_t[133]. |
1551 | | |
1552 | | The function DOESN'T check whether (x, y) is a valid point. |
1553 | | */ |
1554 | | bool |
1555 | | Hacl_P521_uncompressed_to_raw(uint8_t *pk, uint8_t *pk_raw) |
1556 | 0 | { |
1557 | 0 | uint8_t pk0 = pk[0U]; |
1558 | 0 | if (pk0 != 0x04U) { |
1559 | 0 | return false; |
1560 | 0 | } |
1561 | 0 | memcpy(pk_raw, pk + 1U, 132U * sizeof(uint8_t)); |
1562 | 0 | return true; |
1563 | 0 | } |
1564 | | |
1565 | | /** |
1566 | | Convert a public key from compressed to its raw form. |
1567 | | |
1568 | | The function returns `true` for successful conversion of a public key and `false` otherwise. |
1569 | | |
1570 | | The outparam `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1571 | | The argument `pk` points to 33 bytes of valid memory, i.e., uint8_t[33]. |
1572 | | |
1573 | | The function also checks whether (x, y) is a valid point. |
1574 | | */ |
1575 | | bool |
1576 | | Hacl_P521_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw) |
1577 | 0 | { |
1578 | 0 | uint64_t xa[9U] = { 0U }; |
1579 | 0 | uint64_t ya[9U] = { 0U }; |
1580 | 0 | uint8_t *pk_xb = pk + 1U; |
1581 | 0 | uint8_t s0 = pk[0U]; |
1582 | 0 | uint8_t s01 = s0; |
1583 | 0 | bool b; |
1584 | 0 | if (!(s01 == 0x02U || s01 == 0x03U)) { |
1585 | 0 | b = false; |
1586 | 0 | } else { |
1587 | 0 | uint8_t *xb = pk + 1U; |
1588 | 0 | bn_from_bytes_be(xa, xb); |
1589 | 0 | uint64_t is_x_valid = bn_is_lt_prime_mask(xa); |
1590 | 0 | bool is_x_valid1 = is_x_valid == 0xFFFFFFFFFFFFFFFFULL; |
1591 | 0 | bool is_y_odd = s01 == 0x03U; |
1592 | 0 | if (!is_x_valid1) { |
1593 | 0 | b = false; |
1594 | 0 | } else { |
1595 | 0 | uint64_t y2M[9U] = { 0U }; |
1596 | 0 | uint64_t xM[9U] = { 0U }; |
1597 | 0 | uint64_t yM[9U] = { 0U }; |
1598 | 0 | to_mont(xM, xa); |
1599 | 0 | uint64_t tmp[9U] = { 0U }; |
1600 | 0 | fsqr0(y2M, xM); |
1601 | 0 | fmul0(y2M, y2M, xM); |
1602 | 0 | p521_make_a_coeff(tmp); |
1603 | 0 | fmul0(tmp, tmp, xM); |
1604 | 0 | fadd0(y2M, tmp, y2M); |
1605 | 0 | p521_make_b_coeff(tmp); |
1606 | 0 | fadd0(y2M, tmp, y2M); |
1607 | 0 | p521_fsqrt(yM, y2M); |
1608 | 0 | from_mont(ya, yM); |
1609 | 0 | fsqr0(yM, yM); |
1610 | 0 | uint64_t r = bn_is_eq_mask(yM, y2M); |
1611 | 0 | uint64_t r0 = r; |
1612 | 0 | bool is_y_valid = r0 == 0xFFFFFFFFFFFFFFFFULL; |
1613 | 0 | bool is_y_valid0 = is_y_valid; |
1614 | 0 | if (!is_y_valid0) { |
1615 | 0 | b = false; |
1616 | 0 | } else { |
1617 | 0 | uint64_t is_y_odd1 = ya[0U] & 1ULL; |
1618 | 0 | bool is_y_odd2 = is_y_odd1 == 1ULL; |
1619 | 0 | uint64_t zero[9U] = { 0U }; |
1620 | 0 | if (is_y_odd2 != is_y_odd) { |
1621 | 0 | fsub0(ya, zero, ya); |
1622 | 0 | } |
1623 | 0 | b = true; |
1624 | 0 | } |
1625 | 0 | } |
1626 | 0 | } |
1627 | 0 | if (b) { |
1628 | 0 | memcpy(pk_raw, pk_xb, 66U * sizeof(uint8_t)); |
1629 | 0 | bn_to_bytes_be(pk_raw + 66U, ya); |
1630 | 0 | } |
1631 | 0 | return b; |
1632 | 0 | } |
1633 | | |
1634 | | /** |
1635 | | Convert a public key from raw to its uncompressed form. |
1636 | | |
1637 | | The outparam `pk` points to 133 bytes of valid memory, i.e., uint8_t[133]. |
1638 | | The argument `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1639 | | |
1640 | | The function DOESN'T check whether (x, y) is a valid point. |
1641 | | */ |
1642 | | void |
1643 | | Hacl_P521_raw_to_uncompressed(uint8_t *pk_raw, uint8_t *pk) |
1644 | 0 | { |
1645 | 0 | pk[0U] = 0x04U; |
1646 | 0 | memcpy(pk + 1U, pk_raw, 132U * sizeof(uint8_t)); |
1647 | 0 | } |
1648 | | |
1649 | | /** |
1650 | | Convert a public key from raw to its compressed form. |
1651 | | |
1652 | | The outparam `pk` points to 33 bytes of valid memory, i.e., uint8_t[33]. |
1653 | | The argument `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1654 | | |
1655 | | The function DOESN'T check whether (x, y) is a valid point. |
1656 | | */ |
1657 | | void |
1658 | | Hacl_P521_raw_to_compressed(uint8_t *pk_raw, uint8_t *pk) |
1659 | 0 | { |
1660 | 0 | uint8_t *pk_x = pk_raw; |
1661 | 0 | uint8_t *pk_y = pk_raw + 66U; |
1662 | 0 | uint64_t bn_f[9U] = { 0U }; |
1663 | 0 | bn_from_bytes_be(bn_f, pk_y); |
1664 | 0 | uint64_t is_odd_f = bn_f[0U] & 1ULL; |
1665 | 0 | pk[0U] = (uint32_t)(uint8_t)is_odd_f + 0x02U; |
1666 | 0 | memcpy(pk + 1U, pk_x, 66U * sizeof(uint8_t)); |
1667 | 0 | } |
1668 | | |
1669 | | /******************/ |
1670 | | /* ECDH agreement */ |
1671 | | /******************/ |
1672 | | |
1673 | | /** |
1674 | | Compute the public key from the private key. |
1675 | | |
1676 | | The function returns `true` if a private key is valid and `false` otherwise. |
1677 | | |
1678 | | The outparam `public_key` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1679 | | The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66]. |
1680 | | |
1681 | | The private key is valid: |
1682 | | • 0 < `private_key` < the order of the curve. |
1683 | | */ |
1684 | | bool |
1685 | | Hacl_P521_dh_initiator(uint8_t *public_key, uint8_t *private_key) |
1686 | 226 | { |
1687 | 226 | uint64_t tmp[36U] = { 0U }; |
1688 | 226 | uint64_t *sk = tmp; |
1689 | 226 | uint64_t *pk = tmp + 9U; |
1690 | 226 | uint64_t is_sk_valid = load_qelem_conditional(sk, private_key); |
1691 | 226 | point_mul_g(pk, sk); |
1692 | 226 | uint64_t aff_p[18U] = { 0U }; |
1693 | 226 | uint64_t zinv[9U] = { 0U }; |
1694 | 226 | uint64_t *px = pk; |
1695 | 226 | uint64_t *py0 = pk + 9U; |
1696 | 226 | uint64_t *pz = pk + 18U; |
1697 | 226 | uint64_t *x = aff_p; |
1698 | 226 | uint64_t *y = aff_p + 9U; |
1699 | 226 | p521_finv(zinv, pz); |
1700 | 226 | fmul0(x, px, zinv); |
1701 | 226 | fmul0(y, py0, zinv); |
1702 | 226 | from_mont(x, x); |
1703 | 226 | from_mont(y, y); |
1704 | 226 | uint64_t *px0 = aff_p; |
1705 | 226 | uint64_t *py = aff_p + 9U; |
1706 | 226 | bn_to_bytes_be(public_key, px0); |
1707 | 226 | bn_to_bytes_be(public_key + 66U, py); |
1708 | 226 | return is_sk_valid == 0xFFFFFFFFFFFFFFFFULL; |
1709 | 226 | } |
1710 | | |
1711 | | /** |
1712 | | Execute the diffie-hellmann key exchange. |
1713 | | |
1714 | | The function returns `true` for successful creation of an ECDH shared secret and |
1715 | | `false` otherwise. |
1716 | | |
1717 | | The outparam `shared_secret` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1718 | | The argument `their_pubkey` points to 132 bytes of valid memory, i.e., uint8_t[132]. |
1719 | | The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66]. |
1720 | | |
1721 | | The function also checks whether `private_key` and `their_pubkey` are valid. |
1722 | | */ |
1723 | | bool |
1724 | | Hacl_P521_dh_responder(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t *private_key) |
1725 | 11 | { |
1726 | 11 | uint64_t tmp[264U] = { 0U }; |
1727 | 11 | uint64_t *sk = tmp; |
1728 | 11 | uint64_t *pk = tmp + 9U; |
1729 | 11 | uint64_t p_aff[18U] = { 0U }; |
1730 | 11 | uint8_t *p_x = their_pubkey; |
1731 | 11 | uint8_t *p_y = their_pubkey + 66U; |
1732 | 11 | uint64_t *bn_p_x = p_aff; |
1733 | 11 | uint64_t *bn_p_y = p_aff + 9U; |
1734 | 11 | bn_from_bytes_be(bn_p_x, p_x); |
1735 | 11 | bn_from_bytes_be(bn_p_y, p_y); |
1736 | 11 | uint64_t *px0 = p_aff; |
1737 | 11 | uint64_t *py0 = p_aff + 9U; |
1738 | 11 | uint64_t lessX = bn_is_lt_prime_mask(px0); |
1739 | 11 | uint64_t lessY = bn_is_lt_prime_mask(py0); |
1740 | 11 | uint64_t res0 = lessX & lessY; |
1741 | 11 | bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL; |
1742 | 11 | bool res; |
1743 | 11 | if (!is_xy_valid) { |
1744 | 0 | res = false; |
1745 | 11 | } else { |
1746 | 11 | uint64_t rp[9U] = { 0U }; |
1747 | 11 | uint64_t tx[9U] = { 0U }; |
1748 | 11 | uint64_t ty[9U] = { 0U }; |
1749 | 11 | uint64_t *px = p_aff; |
1750 | 11 | uint64_t *py = p_aff + 9U; |
1751 | 11 | to_mont(tx, px); |
1752 | 11 | to_mont(ty, py); |
1753 | 11 | uint64_t tmp1[9U] = { 0U }; |
1754 | 11 | fsqr0(rp, tx); |
1755 | 11 | fmul0(rp, rp, tx); |
1756 | 11 | p521_make_a_coeff(tmp1); |
1757 | 11 | fmul0(tmp1, tmp1, tx); |
1758 | 11 | fadd0(rp, tmp1, rp); |
1759 | 11 | p521_make_b_coeff(tmp1); |
1760 | 11 | fadd0(rp, tmp1, rp); |
1761 | 11 | fsqr0(ty, ty); |
1762 | 11 | uint64_t r = bn_is_eq_mask(ty, rp); |
1763 | 11 | uint64_t r0 = r; |
1764 | 11 | bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL; |
1765 | 11 | res = r1; |
1766 | 11 | } |
1767 | 11 | if (res) { |
1768 | 11 | uint64_t *px = p_aff; |
1769 | 11 | uint64_t *py = p_aff + 9U; |
1770 | 11 | uint64_t *rx = pk; |
1771 | 11 | uint64_t *ry = pk + 9U; |
1772 | 11 | uint64_t *rz = pk + 18U; |
1773 | 11 | to_mont(rx, px); |
1774 | 11 | to_mont(ry, py); |
1775 | 11 | p521_make_fone(rz); |
1776 | 11 | } |
1777 | 11 | bool is_pk_valid = res; |
1778 | 11 | uint64_t is_sk_valid = load_qelem_conditional(sk, private_key); |
1779 | 11 | uint64_t ss_proj[27U] = { 0U }; |
1780 | 11 | if (is_pk_valid) { |
1781 | 11 | point_mul(ss_proj, sk, pk); |
1782 | 11 | uint64_t aff_p[18U] = { 0U }; |
1783 | 11 | uint64_t zinv[9U] = { 0U }; |
1784 | 11 | uint64_t *px = ss_proj; |
1785 | 11 | uint64_t *py1 = ss_proj + 9U; |
1786 | 11 | uint64_t *pz = ss_proj + 18U; |
1787 | 11 | uint64_t *x = aff_p; |
1788 | 11 | uint64_t *y = aff_p + 9U; |
1789 | 11 | p521_finv(zinv, pz); |
1790 | 11 | fmul0(x, px, zinv); |
1791 | 11 | fmul0(y, py1, zinv); |
1792 | 11 | from_mont(x, x); |
1793 | 11 | from_mont(y, y); |
1794 | 11 | uint64_t *px1 = aff_p; |
1795 | 11 | uint64_t *py = aff_p + 9U; |
1796 | 11 | bn_to_bytes_be(shared_secret, px1); |
1797 | 11 | bn_to_bytes_be(shared_secret + 66U, py); |
1798 | 11 | } |
1799 | 11 | return is_sk_valid == 0xFFFFFFFFFFFFFFFFULL && is_pk_valid; |
1800 | 11 | } |