/src/nss-nspr/nss/lib/freebl/verified/Hacl_P256.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* MIT License |
2 | | * |
3 | | * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation |
4 | | * Copyright (c) 2022-2023 HACL* Contributors |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in all |
14 | | * copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "internal/Hacl_P256.h" |
26 | | |
27 | | #include "internal/Hacl_P256_PrecompTable.h" |
28 | | #include "internal/Hacl_Krmllib.h" |
29 | | #include "internal/Hacl_Bignum_Base.h" |
30 | | #include "lib_intrinsics.h" |
31 | | |
32 | | static inline uint64_t |
33 | | bn_is_zero_mask4(uint64_t *f) |
34 | 244 | { |
35 | 244 | uint64_t bn_zero[4U] = { 0U }; |
36 | 244 | uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU; |
37 | 244 | KRML_MAYBE_FOR4(i, |
38 | 244 | (uint32_t)0U, |
39 | 244 | (uint32_t)4U, |
40 | 244 | (uint32_t)1U, |
41 | 244 | uint64_t uu____0 = FStar_UInt64_eq_mask(f[i], bn_zero[i]); |
42 | 244 | mask = uu____0 & mask;); |
43 | 244 | uint64_t mask1 = mask; |
44 | 244 | uint64_t res = mask1; |
45 | 244 | return res; |
46 | 244 | } |
47 | | |
48 | | static inline bool |
49 | | bn_is_zero_vartime4(uint64_t *f) |
50 | 17 | { |
51 | 17 | uint64_t m = bn_is_zero_mask4(f); |
52 | 17 | return m == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
53 | 17 | } |
54 | | |
55 | | static inline uint64_t |
56 | | bn_is_eq_mask4(uint64_t *a, uint64_t *b) |
57 | 62 | { |
58 | 62 | uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU; |
59 | 62 | KRML_MAYBE_FOR4(i, |
60 | 62 | (uint32_t)0U, |
61 | 62 | (uint32_t)4U, |
62 | 62 | (uint32_t)1U, |
63 | 62 | uint64_t uu____0 = FStar_UInt64_eq_mask(a[i], b[i]); |
64 | 62 | mask = uu____0 & mask;); |
65 | 62 | uint64_t mask1 = mask; |
66 | 62 | return mask1; |
67 | 62 | } |
68 | | |
69 | | static inline bool |
70 | | bn_is_eq_vartime4(uint64_t *a, uint64_t *b) |
71 | 17 | { |
72 | 17 | uint64_t m = bn_is_eq_mask4(a, b); |
73 | 17 | return m == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
74 | 17 | } |
75 | | |
76 | | static inline void |
77 | | bn_cmovznz4(uint64_t *res, uint64_t cin, uint64_t *x, uint64_t *y) |
78 | 66 | { |
79 | 66 | uint64_t mask = ~FStar_UInt64_eq_mask(cin, (uint64_t)0U); |
80 | 66 | KRML_MAYBE_FOR4(i, |
81 | 66 | (uint32_t)0U, |
82 | 66 | (uint32_t)4U, |
83 | 66 | (uint32_t)1U, |
84 | 66 | uint64_t *os = res; |
85 | 66 | uint64_t uu____0 = x[i]; |
86 | 66 | uint64_t x1 = uu____0 ^ (mask & (y[i] ^ uu____0)); |
87 | 66 | os[i] = x1;); |
88 | 66 | } |
89 | | |
90 | | static inline void |
91 | | bn_add_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t *y) |
92 | 344k | { |
93 | 344k | uint64_t c0 = (uint64_t)0U; |
94 | 344k | { |
95 | 344k | uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U]; |
96 | 344k | uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U]; |
97 | 344k | uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U; |
98 | 344k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0); |
99 | 344k | uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
100 | 344k | uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
101 | 344k | uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
102 | 344k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1); |
103 | 344k | uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
104 | 344k | uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
105 | 344k | uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
106 | 344k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2); |
107 | 344k | uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
108 | 344k | uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
109 | 344k | uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
110 | 344k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i); |
111 | 344k | } |
112 | 344k | uint64_t c00 = c0; |
113 | 344k | uint64_t tmp[4U] = { 0U }; |
114 | 344k | uint64_t c = (uint64_t)0U; |
115 | 344k | { |
116 | 344k | uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U]; |
117 | 344k | uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U]; |
118 | 344k | uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U; |
119 | 344k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
120 | 344k | uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
121 | 344k | uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
122 | 344k | uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
123 | 344k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
124 | 344k | uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
125 | 344k | uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
126 | 344k | uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
127 | 344k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
128 | 344k | uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
129 | 344k | uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
130 | 344k | uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
131 | 344k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); |
132 | 344k | } |
133 | 344k | uint64_t c1 = c; |
134 | 344k | uint64_t c2 = c00 - c1; |
135 | 344k | KRML_MAYBE_FOR4(i, |
136 | 344k | (uint32_t)0U, |
137 | 344k | (uint32_t)4U, |
138 | 344k | (uint32_t)1U, |
139 | 344k | uint64_t *os = res; |
140 | 344k | uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]); |
141 | 344k | os[i] = x1;); |
142 | 344k | } |
143 | | |
144 | | static inline uint64_t |
145 | | bn_sub4(uint64_t *res, uint64_t *x, uint64_t *y) |
146 | 351 | { |
147 | 351 | uint64_t c = (uint64_t)0U; |
148 | 351 | { |
149 | 351 | uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U]; |
150 | 351 | uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U]; |
151 | 351 | uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U; |
152 | 351 | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
153 | 351 | uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
154 | 351 | uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
155 | 351 | uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
156 | 351 | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
157 | 351 | uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
158 | 351 | uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
159 | 351 | uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
160 | 351 | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
161 | 351 | uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
162 | 351 | uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
163 | 351 | uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
164 | 351 | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); |
165 | 351 | } |
166 | 351 | uint64_t c0 = c; |
167 | 351 | return c0; |
168 | 351 | } |
169 | | |
170 | | static inline void |
171 | | bn_sub_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t *y) |
172 | 146k | { |
173 | 146k | uint64_t c0 = (uint64_t)0U; |
174 | 146k | { |
175 | 146k | uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U]; |
176 | 146k | uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U]; |
177 | 146k | uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U; |
178 | 146k | c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t1, t20, res_i0); |
179 | 146k | uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
180 | 146k | uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
181 | 146k | uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
182 | 146k | c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t10, t21, res_i1); |
183 | 146k | uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
184 | 146k | uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
185 | 146k | uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
186 | 146k | c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t11, t22, res_i2); |
187 | 146k | uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
188 | 146k | uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
189 | 146k | uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
190 | 146k | c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t12, t2, res_i); |
191 | 146k | } |
192 | 146k | uint64_t c00 = c0; |
193 | 146k | uint64_t tmp[4U] = { 0U }; |
194 | 146k | uint64_t c = (uint64_t)0U; |
195 | 146k | { |
196 | 146k | uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U]; |
197 | 146k | uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U]; |
198 | 146k | uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U; |
199 | 146k | c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0); |
200 | 146k | uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
201 | 146k | uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
202 | 146k | uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
203 | 146k | c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1); |
204 | 146k | uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
205 | 146k | uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
206 | 146k | uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
207 | 146k | c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2); |
208 | 146k | uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
209 | 146k | uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
210 | 146k | uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
211 | 146k | c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i); |
212 | 146k | } |
213 | 146k | uint64_t c1 = c; |
214 | 146k | KRML_HOST_IGNORE(c1); |
215 | 146k | uint64_t c2 = (uint64_t)0U - c00; |
216 | 146k | KRML_MAYBE_FOR4(i, |
217 | 146k | (uint32_t)0U, |
218 | 146k | (uint32_t)4U, |
219 | 146k | (uint32_t)1U, |
220 | 146k | uint64_t *os = res; |
221 | 146k | uint64_t x1 = (c2 & tmp[i]) | (~c2 & res[i]); |
222 | 146k | os[i] = x1;); |
223 | 146k | } |
224 | | |
225 | | static inline void |
226 | | bn_mul4(uint64_t *res, uint64_t *x, uint64_t *y) |
227 | 238k | { |
228 | 238k | memset(res, 0U, (uint32_t)8U * sizeof(uint64_t)); |
229 | 238k | KRML_MAYBE_FOR4( |
230 | 238k | i0, |
231 | 238k | (uint32_t)0U, |
232 | 238k | (uint32_t)4U, |
233 | 238k | (uint32_t)1U, |
234 | 238k | uint64_t bj = y[i0]; |
235 | 238k | uint64_t *res_j = res + i0; |
236 | 238k | uint64_t c = (uint64_t)0U; |
237 | 238k | { |
238 | 238k | uint64_t a_i = x[(uint32_t)4U * (uint32_t)0U]; |
239 | 238k | uint64_t *res_i0 = res_j + (uint32_t)4U * (uint32_t)0U; |
240 | 238k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0); |
241 | 238k | uint64_t a_i0 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
242 | 238k | uint64_t *res_i1 = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
243 | 238k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1); |
244 | 238k | uint64_t a_i1 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
245 | 238k | uint64_t *res_i2 = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
246 | 238k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2); |
247 | 238k | uint64_t a_i2 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
248 | 238k | uint64_t *res_i = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
249 | 238k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i); |
250 | 238k | } uint64_t r = c; |
251 | 238k | res[(uint32_t)4U + i0] = r;); |
252 | 238k | } |
253 | | |
254 | | static inline void |
255 | | bn_sqr4(uint64_t *res, uint64_t *x) |
256 | 73.5k | { |
257 | 73.5k | memset(res, 0U, (uint32_t)8U * sizeof(uint64_t)); |
258 | 73.5k | KRML_MAYBE_FOR4( |
259 | 73.5k | i0, |
260 | 73.5k | (uint32_t)0U, |
261 | 73.5k | (uint32_t)4U, |
262 | 73.5k | (uint32_t)1U, |
263 | 73.5k | uint64_t *ab = x; |
264 | 73.5k | uint64_t a_j = x[i0]; |
265 | 73.5k | uint64_t *res_j = res + i0; |
266 | 73.5k | uint64_t c = (uint64_t)0U; |
267 | 73.5k | for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U; i++) { |
268 | 73.5k | uint64_t a_i = ab[(uint32_t)4U * i]; |
269 | 73.5k | uint64_t *res_i0 = res_j + (uint32_t)4U * i; |
270 | 73.5k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0); |
271 | 73.5k | uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U]; |
272 | 73.5k | uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U; |
273 | 73.5k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1); |
274 | 73.5k | uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U]; |
275 | 73.5k | uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U; |
276 | 73.5k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2); |
277 | 73.5k | uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U]; |
278 | 73.5k | uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U; |
279 | 73.5k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i); |
280 | 73.5k | } for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) { |
281 | 73.5k | uint64_t a_i = ab[i]; |
282 | 73.5k | uint64_t *res_i = res_j + i; |
283 | 73.5k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i); |
284 | 73.5k | } uint64_t r = c; |
285 | 73.5k | res[i0 + i0] = r;); |
286 | 73.5k | uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64((uint32_t)8U, res, res, res); |
287 | 73.5k | KRML_HOST_IGNORE(c0); |
288 | 73.5k | uint64_t tmp[8U] = { 0U }; |
289 | 73.5k | KRML_MAYBE_FOR4(i, |
290 | 73.5k | (uint32_t)0U, |
291 | 73.5k | (uint32_t)4U, |
292 | 73.5k | (uint32_t)1U, |
293 | 73.5k | FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(x[i], x[i]); |
294 | 73.5k | uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U)); |
295 | 73.5k | uint64_t lo = FStar_UInt128_uint128_to_uint64(res1); |
296 | 73.5k | tmp[(uint32_t)2U * i] = lo; |
297 | 73.5k | tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;); |
298 | 73.5k | uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64((uint32_t)8U, res, tmp, res); |
299 | 73.5k | KRML_HOST_IGNORE(c1); |
300 | 73.5k | } |
301 | | |
302 | | static inline void |
303 | | bn_to_bytes_be4(uint8_t *res, uint64_t *f) |
304 | 210 | { |
305 | 210 | uint8_t tmp[32U] = { 0U }; |
306 | 210 | KRML_HOST_IGNORE(tmp); |
307 | 210 | KRML_MAYBE_FOR4(i, |
308 | 210 | (uint32_t)0U, |
309 | 210 | (uint32_t)4U, |
310 | 210 | (uint32_t)1U, |
311 | 210 | store64_be(res + i * (uint32_t)8U, f[(uint32_t)4U - i - (uint32_t)1U]);); |
312 | 210 | } |
313 | | |
314 | | static inline void |
315 | | bn_from_bytes_be4(uint64_t *res, uint8_t *b) |
316 | 318 | { |
317 | 318 | KRML_MAYBE_FOR4(i, |
318 | 318 | (uint32_t)0U, |
319 | 318 | (uint32_t)4U, |
320 | 318 | (uint32_t)1U, |
321 | 318 | uint64_t *os = res; |
322 | 318 | uint64_t u = load64_be(b + ((uint32_t)4U - i - (uint32_t)1U) * (uint32_t)8U); |
323 | 318 | uint64_t x = u; |
324 | 318 | os[i] = x;); |
325 | 318 | } |
326 | | |
327 | | static inline void |
328 | | bn2_to_bytes_be4(uint8_t *res, uint64_t *x, uint64_t *y) |
329 | 105 | { |
330 | 105 | bn_to_bytes_be4(res, x); |
331 | 105 | bn_to_bytes_be4(res + (uint32_t)32U, y); |
332 | 105 | } |
333 | | |
334 | | static inline void |
335 | | make_prime(uint64_t *n) |
336 | 793k | { |
337 | 793k | n[0U] = (uint64_t)0xffffffffffffffffU; |
338 | 793k | n[1U] = (uint64_t)0xffffffffU; |
339 | 793k | n[2U] = (uint64_t)0x0U; |
340 | 793k | n[3U] = (uint64_t)0xffffffff00000001U; |
341 | 793k | } |
342 | | |
343 | | static inline void |
344 | | make_order(uint64_t *n) |
345 | 10.0k | { |
346 | 10.0k | n[0U] = (uint64_t)0xf3b9cac2fc632551U; |
347 | 10.0k | n[1U] = (uint64_t)0xbce6faada7179e84U; |
348 | 10.0k | n[2U] = (uint64_t)0xffffffffffffffffU; |
349 | 10.0k | n[3U] = (uint64_t)0xffffffff00000000U; |
350 | 10.0k | } |
351 | | |
352 | | static inline void |
353 | | make_a_coeff(uint64_t *a) |
354 | 45 | { |
355 | 45 | a[0U] = (uint64_t)0xfffffffffffffffcU; |
356 | 45 | a[1U] = (uint64_t)0x3ffffffffU; |
357 | 45 | a[2U] = (uint64_t)0x0U; |
358 | 45 | a[3U] = (uint64_t)0xfffffffc00000004U; |
359 | 45 | } |
360 | | |
361 | | static inline void |
362 | | make_b_coeff(uint64_t *b) |
363 | 40.1k | { |
364 | 40.1k | b[0U] = (uint64_t)0xd89cdf6229c4bddfU; |
365 | 40.1k | b[1U] = (uint64_t)0xacf005cd78843090U; |
366 | 40.1k | b[2U] = (uint64_t)0xe5a220abf7212ed6U; |
367 | 40.1k | b[3U] = (uint64_t)0xdc30061d04874834U; |
368 | 40.1k | } |
369 | | |
370 | | static inline void |
371 | | make_g_x(uint64_t *n) |
372 | 122 | { |
373 | 122 | n[0U] = (uint64_t)0x79e730d418a9143cU; |
374 | 122 | n[1U] = (uint64_t)0x75ba95fc5fedb601U; |
375 | 122 | n[2U] = (uint64_t)0x79fb732b77622510U; |
376 | 122 | n[3U] = (uint64_t)0x18905f76a53755c6U; |
377 | 122 | } |
378 | | |
379 | | static inline void |
380 | | make_g_y(uint64_t *n) |
381 | 122 | { |
382 | 122 | n[0U] = (uint64_t)0xddf25357ce95560aU; |
383 | 122 | n[1U] = (uint64_t)0x8b4ab8e4ba19e45cU; |
384 | 122 | n[2U] = (uint64_t)0xd2e88688dd21f325U; |
385 | 122 | n[3U] = (uint64_t)0x8571ff1825885d85U; |
386 | 122 | } |
387 | | |
388 | | static inline void |
389 | | make_fmont_R2(uint64_t *n) |
390 | 160 | { |
391 | 160 | n[0U] = (uint64_t)0x3U; |
392 | 160 | n[1U] = (uint64_t)0xfffffffbffffffffU; |
393 | 160 | n[2U] = (uint64_t)0xfffffffffffffffeU; |
394 | 160 | n[3U] = (uint64_t)0x4fffffffdU; |
395 | 160 | } |
396 | | |
397 | | static inline void |
398 | | make_fzero(uint64_t *n) |
399 | 244 | { |
400 | 244 | n[0U] = (uint64_t)0U; |
401 | 244 | n[1U] = (uint64_t)0U; |
402 | 244 | n[2U] = (uint64_t)0U; |
403 | 244 | n[3U] = (uint64_t)0U; |
404 | 244 | } |
405 | | |
406 | | static inline void |
407 | | make_fone(uint64_t *n) |
408 | 279 | { |
409 | 279 | n[0U] = (uint64_t)0x1U; |
410 | 279 | n[1U] = (uint64_t)0xffffffff00000000U; |
411 | 279 | n[2U] = (uint64_t)0xffffffffffffffffU; |
412 | 279 | n[3U] = (uint64_t)0xfffffffeU; |
413 | 279 | } |
414 | | |
415 | | static inline uint64_t |
416 | | bn_is_lt_prime_mask4(uint64_t *f) |
417 | 90 | { |
418 | 90 | uint64_t tmp[4U] = { 0U }; |
419 | 90 | make_prime(tmp); |
420 | 90 | uint64_t c = bn_sub4(tmp, f, tmp); |
421 | 90 | return (uint64_t)0U - c; |
422 | 90 | } |
423 | | |
424 | | static inline uint64_t |
425 | | feq_mask(uint64_t *a, uint64_t *b) |
426 | 45 | { |
427 | 45 | uint64_t r = bn_is_eq_mask4(a, b); |
428 | 45 | return r; |
429 | 45 | } |
430 | | |
431 | | static inline void |
432 | | fadd0(uint64_t *res, uint64_t *x, uint64_t *y) |
433 | 344k | { |
434 | 344k | uint64_t n[4U] = { 0U }; |
435 | 344k | make_prime(n); |
436 | 344k | bn_add_mod4(res, n, x, y); |
437 | 344k | } |
438 | | |
439 | | static inline void |
440 | | fsub0(uint64_t *res, uint64_t *x, uint64_t *y) |
441 | 146k | { |
442 | 146k | uint64_t n[4U] = { 0U }; |
443 | 146k | make_prime(n); |
444 | 146k | bn_sub_mod4(res, n, x, y); |
445 | 146k | } |
446 | | |
447 | | static inline void |
448 | | fnegate_conditional_vartime(uint64_t *f, bool is_negate) |
449 | 0 | { |
450 | 0 | uint64_t zero[4U] = { 0U }; |
451 | 0 | if (is_negate) { |
452 | 0 | fsub0(f, zero, f); |
453 | 0 | } |
454 | 0 | } |
455 | | |
456 | | static inline void |
457 | | mont_reduction(uint64_t *res, uint64_t *x) |
458 | 302k | { |
459 | 302k | uint64_t n[4U] = { 0U }; |
460 | 302k | make_prime(n); |
461 | 302k | uint64_t c0 = (uint64_t)0U; |
462 | 302k | KRML_MAYBE_FOR4( |
463 | 302k | i0, |
464 | 302k | (uint32_t)0U, |
465 | 302k | (uint32_t)4U, |
466 | 302k | (uint32_t)1U, |
467 | 302k | uint64_t qj = (uint64_t)1U * x[i0]; |
468 | 302k | uint64_t *res_j0 = x + i0; |
469 | 302k | uint64_t c = (uint64_t)0U; |
470 | 302k | { |
471 | 302k | uint64_t a_i = n[(uint32_t)4U * (uint32_t)0U]; |
472 | 302k | uint64_t *res_i0 = res_j0 + (uint32_t)4U * (uint32_t)0U; |
473 | 302k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0); |
474 | 302k | uint64_t a_i0 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
475 | 302k | uint64_t *res_i1 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
476 | 302k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1); |
477 | 302k | uint64_t a_i1 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
478 | 302k | uint64_t *res_i2 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
479 | 302k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2); |
480 | 302k | uint64_t a_i2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
481 | 302k | uint64_t *res_i = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
482 | 302k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i); |
483 | 302k | } uint64_t r = c; |
484 | 302k | uint64_t c1 = r; |
485 | 302k | uint64_t *resb = x + (uint32_t)4U + i0; |
486 | 302k | uint64_t res_j = x[(uint32_t)4U + i0]; |
487 | 302k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb);); |
488 | 302k | memcpy(res, x + (uint32_t)4U, (uint32_t)4U * sizeof(uint64_t)); |
489 | 302k | uint64_t c00 = c0; |
490 | 302k | uint64_t tmp[4U] = { 0U }; |
491 | 302k | uint64_t c = (uint64_t)0U; |
492 | 302k | { |
493 | 302k | uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U]; |
494 | 302k | uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U]; |
495 | 302k | uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U; |
496 | 302k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
497 | 302k | uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
498 | 302k | uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
499 | 302k | uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
500 | 302k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
501 | 302k | uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
502 | 302k | uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
503 | 302k | uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
504 | 302k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
505 | 302k | uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
506 | 302k | uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
507 | 302k | uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
508 | 302k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); |
509 | 302k | } |
510 | 302k | uint64_t c1 = c; |
511 | 302k | uint64_t c2 = c00 - c1; |
512 | 302k | KRML_MAYBE_FOR4(i, |
513 | 302k | (uint32_t)0U, |
514 | 302k | (uint32_t)4U, |
515 | 302k | (uint32_t)1U, |
516 | 302k | uint64_t *os = res; |
517 | 302k | uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]); |
518 | 302k | os[i] = x1;); |
519 | 302k | } |
520 | | |
521 | | static inline void |
522 | | fmul0(uint64_t *res, uint64_t *x, uint64_t *y) |
523 | 237k | { |
524 | 237k | uint64_t tmp[8U] = { 0U }; |
525 | 237k | bn_mul4(tmp, x, y); |
526 | 237k | mont_reduction(res, tmp); |
527 | 237k | } |
528 | | |
529 | | static inline void |
530 | | fsqr0(uint64_t *res, uint64_t *x) |
531 | 65.1k | { |
532 | 65.1k | uint64_t tmp[8U] = { 0U }; |
533 | 65.1k | bn_sqr4(tmp, x); |
534 | 65.1k | mont_reduction(res, tmp); |
535 | 65.1k | } |
536 | | |
537 | | static inline void |
538 | | from_mont(uint64_t *res, uint64_t *a) |
539 | 211 | { |
540 | 211 | uint64_t tmp[8U] = { 0U }; |
541 | 211 | memcpy(tmp, a, (uint32_t)4U * sizeof(uint64_t)); |
542 | 211 | mont_reduction(res, tmp); |
543 | 211 | } |
544 | | |
545 | | static inline void |
546 | | to_mont(uint64_t *res, uint64_t *a) |
547 | 160 | { |
548 | 160 | uint64_t r2modn[4U] = { 0U }; |
549 | 160 | make_fmont_R2(r2modn); |
550 | 160 | fmul0(res, a, r2modn); |
551 | 160 | } |
552 | | |
553 | | static inline void |
554 | | fmul_by_b_coeff(uint64_t *res, uint64_t *x) |
555 | 40.0k | { |
556 | 40.0k | uint64_t b_coeff[4U] = { 0U }; |
557 | 40.0k | make_b_coeff(b_coeff); |
558 | 40.0k | fmul0(res, b_coeff, x); |
559 | 40.0k | } |
560 | | |
561 | | static inline void |
562 | | fcube(uint64_t *res, uint64_t *x) |
563 | 45 | { |
564 | 45 | fsqr0(res, x); |
565 | 45 | fmul0(res, res, x); |
566 | 45 | } |
567 | | |
568 | | static inline void |
569 | | finv(uint64_t *res, uint64_t *a) |
570 | 122 | { |
571 | 122 | uint64_t tmp[16U] = { 0U }; |
572 | 122 | uint64_t *x30 = tmp; |
573 | 122 | uint64_t *x2 = tmp + (uint32_t)4U; |
574 | 122 | uint64_t *tmp1 = tmp + (uint32_t)8U; |
575 | 122 | uint64_t *tmp2 = tmp + (uint32_t)12U; |
576 | 122 | memcpy(x2, a, (uint32_t)4U * sizeof(uint64_t)); |
577 | 122 | { |
578 | 122 | fsqr0(x2, x2); |
579 | 122 | } |
580 | 122 | fmul0(x2, x2, a); |
581 | 122 | memcpy(x30, x2, (uint32_t)4U * sizeof(uint64_t)); |
582 | 122 | { |
583 | 122 | fsqr0(x30, x30); |
584 | 122 | } |
585 | 122 | fmul0(x30, x30, a); |
586 | 122 | memcpy(tmp1, x30, (uint32_t)4U * sizeof(uint64_t)); |
587 | 122 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, fsqr0(tmp1, tmp1);); |
588 | 122 | fmul0(tmp1, tmp1, x30); |
589 | 122 | memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
590 | 122 | KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, fsqr0(tmp2, tmp2);); |
591 | 122 | fmul0(tmp2, tmp2, tmp1); |
592 | 122 | memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t)); |
593 | 122 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, fsqr0(tmp1, tmp1);); |
594 | 122 | fmul0(tmp1, tmp1, x30); |
595 | 122 | memcpy(x30, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
596 | 122 | KRML_MAYBE_FOR15(i, (uint32_t)0U, (uint32_t)15U, (uint32_t)1U, fsqr0(x30, x30);); |
597 | 122 | fmul0(x30, x30, tmp1); |
598 | 122 | memcpy(tmp1, x30, (uint32_t)4U * sizeof(uint64_t)); |
599 | 122 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(tmp1, tmp1);); |
600 | 122 | fmul0(tmp1, tmp1, x2); |
601 | 122 | memcpy(x2, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
602 | 4.02k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { |
603 | 3.90k | fsqr0(x2, x2); |
604 | 3.90k | } |
605 | 122 | fmul0(x2, x2, a); |
606 | 15.7k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)128U; i++) { |
607 | 15.6k | fsqr0(x2, x2); |
608 | 15.6k | } |
609 | 122 | fmul0(x2, x2, tmp1); |
610 | 4.02k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { |
611 | 3.90k | fsqr0(x2, x2); |
612 | 3.90k | } |
613 | 122 | fmul0(x2, x2, tmp1); |
614 | 3.78k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)30U; i++) { |
615 | 3.66k | fsqr0(x2, x2); |
616 | 3.66k | } |
617 | 122 | fmul0(x2, x2, x30); |
618 | 122 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(x2, x2);); |
619 | 122 | fmul0(tmp1, x2, a); |
620 | 122 | memcpy(res, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
621 | 122 | } |
622 | | |
623 | | static inline void |
624 | | fsqrt(uint64_t *res, uint64_t *a) |
625 | 0 | { |
626 | 0 | uint64_t tmp[8U] = { 0U }; |
627 | 0 | uint64_t *tmp1 = tmp; |
628 | 0 | uint64_t *tmp2 = tmp + (uint32_t)4U; |
629 | 0 | memcpy(tmp1, a, (uint32_t)4U * sizeof(uint64_t)); |
630 | 0 | { |
631 | 0 | fsqr0(tmp1, tmp1); |
632 | 0 | } |
633 | 0 | fmul0(tmp1, tmp1, a); |
634 | 0 | memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
635 | 0 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(tmp2, tmp2);); |
636 | 0 | fmul0(tmp2, tmp2, tmp1); |
637 | 0 | memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t)); |
638 | 0 | KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, fsqr0(tmp1, tmp1);); |
639 | 0 | fmul0(tmp1, tmp1, tmp2); |
640 | 0 | memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
641 | 0 | KRML_MAYBE_FOR8(i, (uint32_t)0U, (uint32_t)8U, (uint32_t)1U, fsqr0(tmp2, tmp2);); |
642 | 0 | fmul0(tmp2, tmp2, tmp1); |
643 | 0 | memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t)); |
644 | 0 | KRML_MAYBE_FOR16(i, (uint32_t)0U, (uint32_t)16U, (uint32_t)1U, fsqr0(tmp1, tmp1);); |
645 | 0 | fmul0(tmp1, tmp1, tmp2); |
646 | 0 | memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
647 | 0 | for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { |
648 | 0 | fsqr0(tmp2, tmp2); |
649 | 0 | } |
650 | 0 | fmul0(tmp2, tmp2, a); |
651 | 0 | for (uint32_t i = (uint32_t)0U; i < (uint32_t)96U; i++) { |
652 | 0 | fsqr0(tmp2, tmp2); |
653 | 0 | } |
654 | 0 | fmul0(tmp2, tmp2, a); |
655 | 0 | for (uint32_t i = (uint32_t)0U; i < (uint32_t)94U; i++) { |
656 | 0 | fsqr0(tmp2, tmp2); |
657 | 0 | } |
658 | 0 | memcpy(res, tmp2, (uint32_t)4U * sizeof(uint64_t)); |
659 | 0 | } |
660 | | |
661 | | static inline void |
662 | | make_base_point(uint64_t *p) |
663 | 122 | { |
664 | 122 | uint64_t *x = p; |
665 | 122 | uint64_t *y = p + (uint32_t)4U; |
666 | 122 | uint64_t *z = p + (uint32_t)8U; |
667 | 122 | make_g_x(x); |
668 | 122 | make_g_y(y); |
669 | 122 | make_fone(z); |
670 | 122 | } |
671 | | |
672 | | static inline void |
673 | | make_point_at_inf(uint64_t *p) |
674 | 122 | { |
675 | 122 | uint64_t *x = p; |
676 | 122 | uint64_t *y = p + (uint32_t)4U; |
677 | 122 | uint64_t *z = p + (uint32_t)8U; |
678 | 122 | make_fzero(x); |
679 | 122 | make_fone(y); |
680 | 122 | make_fzero(z); |
681 | 122 | } |
682 | | |
683 | | static inline bool |
684 | | is_point_at_inf_vartime(uint64_t *p) |
685 | 17 | { |
686 | 17 | uint64_t *pz = p + (uint32_t)8U; |
687 | 17 | return bn_is_zero_vartime4(pz); |
688 | 17 | } |
689 | | |
690 | | static inline void |
691 | | to_aff_point(uint64_t *res, uint64_t *p) |
692 | 89 | { |
693 | 89 | uint64_t zinv[4U] = { 0U }; |
694 | 89 | uint64_t *px = p; |
695 | 89 | uint64_t *py = p + (uint32_t)4U; |
696 | 89 | uint64_t *pz = p + (uint32_t)8U; |
697 | 89 | uint64_t *x = res; |
698 | 89 | uint64_t *y = res + (uint32_t)4U; |
699 | 89 | finv(zinv, pz); |
700 | 89 | fmul0(x, px, zinv); |
701 | 89 | fmul0(y, py, zinv); |
702 | 89 | from_mont(x, x); |
703 | 89 | from_mont(y, y); |
704 | 89 | } |
705 | | |
706 | | static inline void |
707 | | to_aff_point_x(uint64_t *res, uint64_t *p) |
708 | 33 | { |
709 | 33 | uint64_t zinv[4U] = { 0U }; |
710 | 33 | uint64_t *px = p; |
711 | 33 | uint64_t *pz = p + (uint32_t)8U; |
712 | 33 | finv(zinv, pz); |
713 | 33 | fmul0(res, px, zinv); |
714 | 33 | from_mont(res, res); |
715 | 33 | } |
716 | | |
717 | | static inline void |
718 | | to_proj_point(uint64_t *res, uint64_t *p) |
719 | 35 | { |
720 | 35 | uint64_t *px = p; |
721 | 35 | uint64_t *py = p + (uint32_t)4U; |
722 | 35 | uint64_t *rx = res; |
723 | 35 | uint64_t *ry = res + (uint32_t)4U; |
724 | 35 | uint64_t *rz = res + (uint32_t)8U; |
725 | 35 | to_mont(rx, px); |
726 | 35 | to_mont(ry, py); |
727 | 35 | make_fone(rz); |
728 | 35 | } |
729 | | |
730 | | static inline bool |
731 | | is_on_curve_vartime(uint64_t *p) |
732 | 45 | { |
733 | 45 | uint64_t rp[4U] = { 0U }; |
734 | 45 | uint64_t tx[4U] = { 0U }; |
735 | 45 | uint64_t ty[4U] = { 0U }; |
736 | 45 | uint64_t *px = p; |
737 | 45 | uint64_t *py = p + (uint32_t)4U; |
738 | 45 | to_mont(tx, px); |
739 | 45 | to_mont(ty, py); |
740 | 45 | uint64_t tmp[4U] = { 0U }; |
741 | 45 | fcube(rp, tx); |
742 | 45 | make_a_coeff(tmp); |
743 | 45 | fmul0(tmp, tmp, tx); |
744 | 45 | fadd0(rp, tmp, rp); |
745 | 45 | make_b_coeff(tmp); |
746 | 45 | fadd0(rp, tmp, rp); |
747 | 45 | fsqr0(ty, ty); |
748 | 45 | uint64_t r = feq_mask(ty, rp); |
749 | 45 | bool r0 = r == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
750 | 45 | return r0; |
751 | 45 | } |
752 | | |
753 | | static inline void |
754 | | aff_point_store(uint8_t *res, uint64_t *p) |
755 | 89 | { |
756 | 89 | uint64_t *px = p; |
757 | 89 | uint64_t *py = p + (uint32_t)4U; |
758 | 89 | bn2_to_bytes_be4(res, px, py); |
759 | 89 | } |
760 | | |
761 | | static inline void |
762 | | point_store(uint8_t *res, uint64_t *p) |
763 | 89 | { |
764 | 89 | uint64_t aff_p[8U] = { 0U }; |
765 | 89 | to_aff_point(aff_p, p); |
766 | 89 | aff_point_store(res, aff_p); |
767 | 89 | } |
768 | | |
769 | | static inline bool |
770 | | aff_point_load_vartime(uint64_t *p, uint8_t *b) |
771 | 45 | { |
772 | 45 | uint8_t *p_x = b; |
773 | 45 | uint8_t *p_y = b + (uint32_t)32U; |
774 | 45 | uint64_t *bn_p_x = p; |
775 | 45 | uint64_t *bn_p_y = p + (uint32_t)4U; |
776 | 45 | bn_from_bytes_be4(bn_p_x, p_x); |
777 | 45 | bn_from_bytes_be4(bn_p_y, p_y); |
778 | 45 | uint64_t *px = p; |
779 | 45 | uint64_t *py = p + (uint32_t)4U; |
780 | 45 | uint64_t lessX = bn_is_lt_prime_mask4(px); |
781 | 45 | uint64_t lessY = bn_is_lt_prime_mask4(py); |
782 | 45 | uint64_t res = lessX & lessY; |
783 | 45 | bool is_xy_valid = res == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
784 | 45 | if (!is_xy_valid) { |
785 | 0 | return false; |
786 | 0 | } |
787 | 45 | return is_on_curve_vartime(p); |
788 | 45 | } |
789 | | |
790 | | static inline bool |
791 | | load_point_vartime(uint64_t *p, uint8_t *b) |
792 | 45 | { |
793 | 45 | uint64_t p_aff[8U] = { 0U }; |
794 | 45 | bool res = aff_point_load_vartime(p_aff, b); |
795 | 45 | if (res) { |
796 | 35 | to_proj_point(p, p_aff); |
797 | 35 | } |
798 | 45 | return res; |
799 | 45 | } |
800 | | |
801 | | static inline bool |
802 | | aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_t *s) |
803 | 0 | { |
804 | 0 | uint8_t s0 = s[0U]; |
805 | 0 | uint8_t s01 = s0; |
806 | 0 | if (!(s01 == (uint8_t)0x02U || s01 == (uint8_t)0x03U)) { |
807 | 0 | return false; |
808 | 0 | } |
809 | 0 | uint8_t *xb = s + (uint32_t)1U; |
810 | 0 | bn_from_bytes_be4(x, xb); |
811 | 0 | uint64_t is_x_valid = bn_is_lt_prime_mask4(x); |
812 | 0 | bool is_x_valid1 = is_x_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
813 | 0 | bool is_y_odd = s01 == (uint8_t)0x03U; |
814 | 0 | if (!is_x_valid1) { |
815 | 0 | return false; |
816 | 0 | } |
817 | 0 | uint64_t y2M[4U] = { 0U }; |
818 | 0 | uint64_t xM[4U] = { 0U }; |
819 | 0 | uint64_t yM[4U] = { 0U }; |
820 | 0 | to_mont(xM, x); |
821 | 0 | uint64_t tmp[4U] = { 0U }; |
822 | 0 | fcube(y2M, xM); |
823 | 0 | make_a_coeff(tmp); |
824 | 0 | fmul0(tmp, tmp, xM); |
825 | 0 | fadd0(y2M, tmp, y2M); |
826 | 0 | make_b_coeff(tmp); |
827 | 0 | fadd0(y2M, tmp, y2M); |
828 | 0 | fsqrt(yM, y2M); |
829 | 0 | from_mont(y, yM); |
830 | 0 | fsqr0(yM, yM); |
831 | 0 | uint64_t r = feq_mask(yM, y2M); |
832 | 0 | bool is_y_valid = r == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
833 | 0 | bool is_y_valid0 = is_y_valid; |
834 | 0 | if (!is_y_valid0) { |
835 | 0 | return false; |
836 | 0 | } |
837 | 0 | uint64_t is_y_odd1 = y[0U] & (uint64_t)1U; |
838 | 0 | bool is_y_odd2 = is_y_odd1 == (uint64_t)1U; |
839 | 0 | fnegate_conditional_vartime(y, is_y_odd2 != is_y_odd); |
840 | 0 | return true; |
841 | 0 | } |
842 | | |
843 | | static inline void |
844 | | point_double(uint64_t *res, uint64_t *p) |
845 | 11.3k | { |
846 | 11.3k | uint64_t tmp[20U] = { 0U }; |
847 | 11.3k | uint64_t *x = p; |
848 | 11.3k | uint64_t *z = p + (uint32_t)8U; |
849 | 11.3k | uint64_t *x3 = res; |
850 | 11.3k | uint64_t *y3 = res + (uint32_t)4U; |
851 | 11.3k | uint64_t *z3 = res + (uint32_t)8U; |
852 | 11.3k | uint64_t *t0 = tmp; |
853 | 11.3k | uint64_t *t1 = tmp + (uint32_t)4U; |
854 | 11.3k | uint64_t *t2 = tmp + (uint32_t)8U; |
855 | 11.3k | uint64_t *t3 = tmp + (uint32_t)12U; |
856 | 11.3k | uint64_t *t4 = tmp + (uint32_t)16U; |
857 | 11.3k | uint64_t *x1 = p; |
858 | 11.3k | uint64_t *y = p + (uint32_t)4U; |
859 | 11.3k | uint64_t *z1 = p + (uint32_t)8U; |
860 | 11.3k | fsqr0(t0, x1); |
861 | 11.3k | fsqr0(t1, y); |
862 | 11.3k | fsqr0(t2, z1); |
863 | 11.3k | fmul0(t3, x1, y); |
864 | 11.3k | fadd0(t3, t3, t3); |
865 | 11.3k | fmul0(t4, y, z1); |
866 | 11.3k | fmul0(z3, x, z); |
867 | 11.3k | fadd0(z3, z3, z3); |
868 | 11.3k | fmul_by_b_coeff(y3, t2); |
869 | 11.3k | fsub0(y3, y3, z3); |
870 | 11.3k | fadd0(x3, y3, y3); |
871 | 11.3k | fadd0(y3, x3, y3); |
872 | 11.3k | fsub0(x3, t1, y3); |
873 | 11.3k | fadd0(y3, t1, y3); |
874 | 11.3k | fmul0(y3, x3, y3); |
875 | 11.3k | fmul0(x3, x3, t3); |
876 | 11.3k | fadd0(t3, t2, t2); |
877 | 11.3k | fadd0(t2, t2, t3); |
878 | 11.3k | fmul_by_b_coeff(z3, z3); |
879 | 11.3k | fsub0(z3, z3, t2); |
880 | 11.3k | fsub0(z3, z3, t0); |
881 | 11.3k | fadd0(t3, z3, z3); |
882 | 11.3k | fadd0(z3, z3, t3); |
883 | 11.3k | fadd0(t3, t0, t0); |
884 | 11.3k | fadd0(t0, t3, t0); |
885 | 11.3k | fsub0(t0, t0, t2); |
886 | 11.3k | fmul0(t0, t0, z3); |
887 | 11.3k | fadd0(y3, y3, t0); |
888 | 11.3k | fadd0(t0, t4, t4); |
889 | 11.3k | fmul0(z3, t0, z3); |
890 | 11.3k | fsub0(x3, x3, z3); |
891 | 11.3k | fmul0(z3, t0, t1); |
892 | 11.3k | fadd0(z3, z3, z3); |
893 | 11.3k | fadd0(z3, z3, z3); |
894 | 11.3k | } |
895 | | |
896 | | static inline void |
897 | | point_add(uint64_t *res, uint64_t *p, uint64_t *q) |
898 | 8.72k | { |
899 | 8.72k | uint64_t tmp[36U] = { 0U }; |
900 | 8.72k | uint64_t *t0 = tmp; |
901 | 8.72k | uint64_t *t1 = tmp + (uint32_t)24U; |
902 | 8.72k | uint64_t *x3 = t1; |
903 | 8.72k | uint64_t *y3 = t1 + (uint32_t)4U; |
904 | 8.72k | uint64_t *z3 = t1 + (uint32_t)8U; |
905 | 8.72k | uint64_t *t01 = t0; |
906 | 8.72k | uint64_t *t11 = t0 + (uint32_t)4U; |
907 | 8.72k | uint64_t *t2 = t0 + (uint32_t)8U; |
908 | 8.72k | uint64_t *t3 = t0 + (uint32_t)12U; |
909 | 8.72k | uint64_t *t4 = t0 + (uint32_t)16U; |
910 | 8.72k | uint64_t *t5 = t0 + (uint32_t)20U; |
911 | 8.72k | uint64_t *x1 = p; |
912 | 8.72k | uint64_t *y1 = p + (uint32_t)4U; |
913 | 8.72k | uint64_t *z10 = p + (uint32_t)8U; |
914 | 8.72k | uint64_t *x20 = q; |
915 | 8.72k | uint64_t *y20 = q + (uint32_t)4U; |
916 | 8.72k | uint64_t *z20 = q + (uint32_t)8U; |
917 | 8.72k | fmul0(t01, x1, x20); |
918 | 8.72k | fmul0(t11, y1, y20); |
919 | 8.72k | fmul0(t2, z10, z20); |
920 | 8.72k | fadd0(t3, x1, y1); |
921 | 8.72k | fadd0(t4, x20, y20); |
922 | 8.72k | fmul0(t3, t3, t4); |
923 | 8.72k | fadd0(t4, t01, t11); |
924 | 8.72k | uint64_t *y10 = p + (uint32_t)4U; |
925 | 8.72k | uint64_t *z11 = p + (uint32_t)8U; |
926 | 8.72k | uint64_t *y2 = q + (uint32_t)4U; |
927 | 8.72k | uint64_t *z21 = q + (uint32_t)8U; |
928 | 8.72k | fsub0(t3, t3, t4); |
929 | 8.72k | fadd0(t4, y10, z11); |
930 | 8.72k | fadd0(t5, y2, z21); |
931 | 8.72k | fmul0(t4, t4, t5); |
932 | 8.72k | fadd0(t5, t11, t2); |
933 | 8.72k | fsub0(t4, t4, t5); |
934 | 8.72k | uint64_t *x10 = p; |
935 | 8.72k | uint64_t *z1 = p + (uint32_t)8U; |
936 | 8.72k | uint64_t *x2 = q; |
937 | 8.72k | uint64_t *z2 = q + (uint32_t)8U; |
938 | 8.72k | fadd0(x3, x10, z1); |
939 | 8.72k | fadd0(y3, x2, z2); |
940 | 8.72k | fmul0(x3, x3, y3); |
941 | 8.72k | fadd0(y3, t01, t2); |
942 | 8.72k | fsub0(y3, x3, y3); |
943 | 8.72k | fmul_by_b_coeff(z3, t2); |
944 | 8.72k | fsub0(x3, y3, z3); |
945 | 8.72k | fadd0(z3, x3, x3); |
946 | 8.72k | fadd0(x3, x3, z3); |
947 | 8.72k | fsub0(z3, t11, x3); |
948 | 8.72k | fadd0(x3, t11, x3); |
949 | 8.72k | fmul_by_b_coeff(y3, y3); |
950 | 8.72k | fadd0(t11, t2, t2); |
951 | 8.72k | fadd0(t2, t11, t2); |
952 | 8.72k | fsub0(y3, y3, t2); |
953 | 8.72k | fsub0(y3, y3, t01); |
954 | 8.72k | fadd0(t11, y3, y3); |
955 | 8.72k | fadd0(y3, t11, y3); |
956 | 8.72k | fadd0(t11, t01, t01); |
957 | 8.72k | fadd0(t01, t11, t01); |
958 | 8.72k | fsub0(t01, t01, t2); |
959 | 8.72k | fmul0(t11, t4, y3); |
960 | 8.72k | fmul0(t2, t01, y3); |
961 | 8.72k | fmul0(y3, x3, z3); |
962 | 8.72k | fadd0(y3, y3, t2); |
963 | 8.72k | fmul0(x3, t3, x3); |
964 | 8.72k | fsub0(x3, x3, t11); |
965 | 8.72k | fmul0(z3, t4, z3); |
966 | 8.72k | fmul0(t11, t3, t01); |
967 | 8.72k | fadd0(z3, z3, t11); |
968 | 8.72k | memcpy(res, t1, (uint32_t)12U * sizeof(uint64_t)); |
969 | 8.72k | } |
970 | | |
971 | | static inline void |
972 | | point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p) |
973 | 0 | { |
974 | 0 | uint64_t table[192U] = { 0U }; |
975 | 0 | uint64_t tmp[12U] = { 0U }; |
976 | 0 | uint64_t *t0 = table; |
977 | 0 | uint64_t *t1 = table + (uint32_t)12U; |
978 | 0 | make_point_at_inf(t0); |
979 | 0 | memcpy(t1, p, (uint32_t)12U * sizeof(uint64_t)); |
980 | 0 | KRML_MAYBE_FOR7(i, |
981 | 0 | (uint32_t)0U, |
982 | 0 | (uint32_t)7U, |
983 | 0 | (uint32_t)1U, |
984 | 0 | uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)12U; |
985 | 0 | point_double(tmp, t11); |
986 | 0 | memcpy(table + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U, |
987 | 0 | tmp, |
988 | 0 | (uint32_t)12U * sizeof(uint64_t)); |
989 | 0 | uint64_t *t2 = table + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U; |
990 | 0 | point_add(tmp, p, t2); |
991 | 0 | memcpy(table + ((uint32_t)2U * i + (uint32_t)3U) * (uint32_t)12U, |
992 | 0 | tmp, |
993 | 0 | (uint32_t)12U * sizeof(uint64_t));); |
994 | 0 | make_point_at_inf(res); |
995 | 0 | uint64_t tmp0[12U] = { 0U }; |
996 | 0 | for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)64U; i0++) { |
997 | 0 | KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, point_double(res, res);); |
998 | 0 | uint32_t k = (uint32_t)256U - (uint32_t)4U * i0 - (uint32_t)4U; |
999 | 0 | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar, k, (uint32_t)4U); |
1000 | 0 | memcpy(tmp0, (uint64_t *)table, (uint32_t)12U * sizeof(uint64_t)); |
1001 | 0 | KRML_MAYBE_FOR15(i1, |
1002 | 0 | (uint32_t)0U, |
1003 | 0 | (uint32_t)15U, |
1004 | 0 | (uint32_t)1U, |
1005 | 0 | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i1 + (uint32_t)1U)); |
1006 | 0 | const uint64_t *res_j = table + (i1 + (uint32_t)1U) * (uint32_t)12U; |
1007 | 0 | KRML_MAYBE_FOR12(i, |
1008 | 0 | (uint32_t)0U, |
1009 | 0 | (uint32_t)12U, |
1010 | 0 | (uint32_t)1U, |
1011 | 0 | uint64_t *os = tmp0; |
1012 | 0 | uint64_t x = (c & res_j[i]) | (~c & tmp0[i]); |
1013 | 0 | os[i] = x;);); |
1014 | 0 | point_add(res, res, tmp0); |
1015 | 0 | } |
1016 | 0 | } |
1017 | | |
1018 | | static inline void |
1019 | | precomp_get_consttime(const uint64_t *table, uint64_t bits_l, uint64_t *tmp) |
1020 | 6.72k | { |
1021 | 6.72k | memcpy(tmp, (uint64_t *)table, (uint32_t)12U * sizeof(uint64_t)); |
1022 | 6.72k | KRML_MAYBE_FOR15(i0, |
1023 | 6.72k | (uint32_t)0U, |
1024 | 6.72k | (uint32_t)15U, |
1025 | 6.72k | (uint32_t)1U, |
1026 | 6.72k | uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i0 + (uint32_t)1U)); |
1027 | 6.72k | const uint64_t *res_j = table + (i0 + (uint32_t)1U) * (uint32_t)12U; |
1028 | 6.72k | KRML_MAYBE_FOR12(i, |
1029 | 6.72k | (uint32_t)0U, |
1030 | 6.72k | (uint32_t)12U, |
1031 | 6.72k | (uint32_t)1U, |
1032 | 6.72k | uint64_t *os = tmp; |
1033 | 6.72k | uint64_t x = (c & res_j[i]) | (~c & tmp[i]); |
1034 | 6.72k | os[i] = x;);); |
1035 | 6.72k | } |
1036 | | |
1037 | | static inline void |
1038 | | point_mul_g(uint64_t *res, uint64_t *scalar) |
1039 | 105 | { |
1040 | 105 | uint64_t q1[12U] = { 0U }; |
1041 | 105 | make_base_point(q1); |
1042 | 105 | uint64_t |
1043 | 105 | q2[12U] = { |
1044 | 105 | (uint64_t)1499621593102562565U, (uint64_t)16692369783039433128U, |
1045 | 105 | (uint64_t)15337520135922861848U, (uint64_t)5455737214495366228U, |
1046 | 105 | (uint64_t)17827017231032529600U, (uint64_t)12413621606240782649U, |
1047 | 105 | (uint64_t)2290483008028286132U, (uint64_t)15752017553340844820U, |
1048 | 105 | (uint64_t)4846430910634234874U, (uint64_t)10861682798464583253U, |
1049 | 105 | (uint64_t)15404737222404363049U, (uint64_t)363586619281562022U |
1050 | 105 | }; |
1051 | 105 | uint64_t |
1052 | 105 | q3[12U] = { |
1053 | 105 | (uint64_t)14619254753077084366U, (uint64_t)13913835116514008593U, |
1054 | 105 | (uint64_t)15060744674088488145U, (uint64_t)17668414598203068685U, |
1055 | 105 | (uint64_t)10761169236902342334U, (uint64_t)15467027479157446221U, |
1056 | 105 | (uint64_t)14989185522423469618U, (uint64_t)14354539272510107003U, |
1057 | 105 | (uint64_t)14298211796392133693U, (uint64_t)13270323784253711450U, |
1058 | 105 | (uint64_t)13380964971965046957U, (uint64_t)8686204248456909699U |
1059 | 105 | }; |
1060 | 105 | uint64_t |
1061 | 105 | q4[12U] = { |
1062 | 105 | (uint64_t)7870395003430845958U, (uint64_t)18001862936410067720U, |
1063 | 105 | (uint64_t)8006461232116967215U, (uint64_t)5921313779532424762U, |
1064 | 105 | (uint64_t)10702113371959864307U, (uint64_t)8070517410642379879U, |
1065 | 105 | (uint64_t)7139806720777708306U, (uint64_t)8253938546650739833U, |
1066 | 105 | (uint64_t)17490482834545705718U, (uint64_t)1065249776797037500U, |
1067 | 105 | (uint64_t)5018258455937968775U, (uint64_t)14100621120178668337U |
1068 | 105 | }; |
1069 | 105 | uint64_t *r1 = scalar; |
1070 | 105 | uint64_t *r2 = scalar + (uint32_t)1U; |
1071 | 105 | uint64_t *r3 = scalar + (uint32_t)2U; |
1072 | 105 | uint64_t *r4 = scalar + (uint32_t)3U; |
1073 | 105 | make_point_at_inf(res); |
1074 | 105 | uint64_t tmp[12U] = { 0U }; |
1075 | 105 | KRML_MAYBE_FOR16(i, |
1076 | 105 | (uint32_t)0U, |
1077 | 105 | (uint32_t)16U, |
1078 | 105 | (uint32_t)1U, |
1079 | 105 | KRML_MAYBE_FOR4(i0, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, point_double(res, res);); |
1080 | 105 | uint32_t k = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U; |
1081 | 105 | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r4, k, (uint32_t)4U); |
1082 | 105 | precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp); |
1083 | 105 | point_add(res, res, tmp); |
1084 | 105 | uint32_t k0 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U; |
1085 | 105 | uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r3, k0, (uint32_t)4U); |
1086 | 105 | precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp); |
1087 | 105 | point_add(res, res, tmp); |
1088 | 105 | uint32_t k1 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U; |
1089 | 105 | uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r2, k1, (uint32_t)4U); |
1090 | 105 | precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp); |
1091 | 105 | point_add(res, res, tmp); |
1092 | 105 | uint32_t k2 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U; |
1093 | 105 | uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r1, k2, (uint32_t)4U); |
1094 | 105 | precomp_get_consttime(Hacl_P256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp); |
1095 | 105 | point_add(res, res, tmp);); |
1096 | 105 | KRML_HOST_IGNORE(q1); |
1097 | 105 | KRML_HOST_IGNORE(q2); |
1098 | 105 | KRML_HOST_IGNORE(q3); |
1099 | 105 | KRML_HOST_IGNORE(q4); |
1100 | 105 | } |
1101 | | |
1102 | | static inline void |
1103 | | point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t *q2) |
1104 | 17 | { |
1105 | 17 | uint64_t q1[12U] = { 0U }; |
1106 | 17 | make_base_point(q1); |
1107 | 17 | uint64_t table2[384U] = { 0U }; |
1108 | 17 | uint64_t tmp[12U] = { 0U }; |
1109 | 17 | uint64_t *t0 = table2; |
1110 | 17 | uint64_t *t1 = table2 + (uint32_t)12U; |
1111 | 17 | make_point_at_inf(t0); |
1112 | 17 | memcpy(t1, q2, (uint32_t)12U * sizeof(uint64_t)); |
1113 | 17 | KRML_MAYBE_FOR15(i, |
1114 | 17 | (uint32_t)0U, |
1115 | 17 | (uint32_t)15U, |
1116 | 17 | (uint32_t)1U, |
1117 | 17 | uint64_t *t11 = table2 + (i + (uint32_t)1U) * (uint32_t)12U; |
1118 | 17 | point_double(tmp, t11); |
1119 | 17 | memcpy(table2 + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U, |
1120 | 17 | tmp, |
1121 | 17 | (uint32_t)12U * sizeof(uint64_t)); |
1122 | 17 | uint64_t *t2 = table2 + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U; |
1123 | 17 | point_add(tmp, q2, t2); |
1124 | 17 | memcpy(table2 + ((uint32_t)2U * i + (uint32_t)3U) * (uint32_t)12U, |
1125 | 17 | tmp, |
1126 | 17 | (uint32_t)12U * sizeof(uint64_t));); |
1127 | 17 | uint64_t tmp0[12U] = { 0U }; |
1128 | 17 | uint32_t i0 = (uint32_t)255U; |
1129 | 17 | uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar1, i0, (uint32_t)5U); |
1130 | 17 | uint32_t bits_l32 = (uint32_t)bits_c; |
1131 | 17 | const uint64_t |
1132 | 17 | *a_bits_l = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l32 * (uint32_t)12U; |
1133 | 17 | memcpy(res, (uint64_t *)a_bits_l, (uint32_t)12U * sizeof(uint64_t)); |
1134 | 17 | uint32_t i1 = (uint32_t)255U; |
1135 | 17 | uint64_t bits_c0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar2, i1, (uint32_t)5U); |
1136 | 17 | uint32_t bits_l320 = (uint32_t)bits_c0; |
1137 | 17 | const uint64_t *a_bits_l0 = table2 + bits_l320 * (uint32_t)12U; |
1138 | 17 | memcpy(tmp0, (uint64_t *)a_bits_l0, (uint32_t)12U * sizeof(uint64_t)); |
1139 | 17 | point_add(res, res, tmp0); |
1140 | 17 | uint64_t tmp1[12U] = { 0U }; |
1141 | 884 | for (uint32_t i = (uint32_t)0U; i < (uint32_t)51U; i++) { |
1142 | 867 | KRML_MAYBE_FOR5(i2, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, point_double(res, res);); |
1143 | 867 | uint32_t k = (uint32_t)255U - (uint32_t)5U * i - (uint32_t)5U; |
1144 | 867 | uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar2, k, (uint32_t)5U); |
1145 | 867 | uint32_t bits_l321 = (uint32_t)bits_l; |
1146 | 867 | const uint64_t *a_bits_l1 = table2 + bits_l321 * (uint32_t)12U; |
1147 | 867 | memcpy(tmp1, (uint64_t *)a_bits_l1, (uint32_t)12U * sizeof(uint64_t)); |
1148 | 867 | point_add(res, res, tmp1); |
1149 | 867 | uint32_t k0 = (uint32_t)255U - (uint32_t)5U * i - (uint32_t)5U; |
1150 | 867 | uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar1, k0, (uint32_t)5U); |
1151 | 867 | uint32_t bits_l322 = (uint32_t)bits_l0; |
1152 | 867 | const uint64_t |
1153 | 867 | *a_bits_l2 = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * (uint32_t)12U; |
1154 | 867 | memcpy(tmp1, (uint64_t *)a_bits_l2, (uint32_t)12U * sizeof(uint64_t)); |
1155 | 867 | point_add(res, res, tmp1); |
1156 | 867 | } |
1157 | 17 | } |
1158 | | |
1159 | | static inline uint64_t |
1160 | | bn_is_lt_order_mask4(uint64_t *f) |
1161 | 195 | { |
1162 | 195 | uint64_t tmp[4U] = { 0U }; |
1163 | 195 | make_order(tmp); |
1164 | 195 | uint64_t c = bn_sub4(tmp, f, tmp); |
1165 | 195 | return (uint64_t)0U - c; |
1166 | 195 | } |
1167 | | |
1168 | | static inline uint64_t |
1169 | | bn_is_lt_order_and_gt_zero_mask4(uint64_t *f) |
1170 | 195 | { |
1171 | 195 | uint64_t is_lt_order = bn_is_lt_order_mask4(f); |
1172 | 195 | uint64_t is_eq_zero = bn_is_zero_mask4(f); |
1173 | 195 | return is_lt_order & ~is_eq_zero; |
1174 | 195 | } |
1175 | | |
1176 | | static inline void |
1177 | | qmod_short(uint64_t *res, uint64_t *x) |
1178 | 66 | { |
1179 | 66 | uint64_t tmp[4U] = { 0U }; |
1180 | 66 | make_order(tmp); |
1181 | 66 | uint64_t c = bn_sub4(tmp, x, tmp); |
1182 | 66 | bn_cmovznz4(res, c, tmp, x); |
1183 | 66 | } |
1184 | | |
1185 | | static inline void |
1186 | | qadd(uint64_t *res, uint64_t *x, uint64_t *y) |
1187 | 16 | { |
1188 | 16 | uint64_t n[4U] = { 0U }; |
1189 | 16 | make_order(n); |
1190 | 16 | bn_add_mod4(res, n, x, y); |
1191 | 16 | } |
1192 | | |
1193 | | static inline void |
1194 | | qmont_reduction(uint64_t *res, uint64_t *x) |
1195 | 9.75k | { |
1196 | 9.75k | uint64_t n[4U] = { 0U }; |
1197 | 9.75k | make_order(n); |
1198 | 9.75k | uint64_t c0 = (uint64_t)0U; |
1199 | 9.75k | KRML_MAYBE_FOR4( |
1200 | 9.75k | i0, |
1201 | 9.75k | (uint32_t)0U, |
1202 | 9.75k | (uint32_t)4U, |
1203 | 9.75k | (uint32_t)1U, |
1204 | 9.75k | uint64_t qj = (uint64_t)0xccd1c8aaee00bc4fU * x[i0]; |
1205 | 9.75k | uint64_t *res_j0 = x + i0; |
1206 | 9.75k | uint64_t c = (uint64_t)0U; |
1207 | 9.75k | { |
1208 | 9.75k | uint64_t a_i = n[(uint32_t)4U * (uint32_t)0U]; |
1209 | 9.75k | uint64_t *res_i0 = res_j0 + (uint32_t)4U * (uint32_t)0U; |
1210 | 9.75k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0); |
1211 | 9.75k | uint64_t a_i0 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
1212 | 9.75k | uint64_t *res_i1 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
1213 | 9.75k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1); |
1214 | 9.75k | uint64_t a_i1 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
1215 | 9.75k | uint64_t *res_i2 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
1216 | 9.75k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2); |
1217 | 9.75k | uint64_t a_i2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
1218 | 9.75k | uint64_t *res_i = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
1219 | 9.75k | c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i); |
1220 | 9.75k | } uint64_t r = c; |
1221 | 9.75k | uint64_t c1 = r; |
1222 | 9.75k | uint64_t *resb = x + (uint32_t)4U + i0; |
1223 | 9.75k | uint64_t res_j = x[(uint32_t)4U + i0]; |
1224 | 9.75k | c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb);); |
1225 | 9.75k | memcpy(res, x + (uint32_t)4U, (uint32_t)4U * sizeof(uint64_t)); |
1226 | 9.75k | uint64_t c00 = c0; |
1227 | 9.75k | uint64_t tmp[4U] = { 0U }; |
1228 | 9.75k | uint64_t c = (uint64_t)0U; |
1229 | 9.75k | { |
1230 | 9.75k | uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U]; |
1231 | 9.75k | uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U]; |
1232 | 9.75k | uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U; |
1233 | 9.75k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0); |
1234 | 9.75k | uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
1235 | 9.75k | uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U]; |
1236 | 9.75k | uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U; |
1237 | 9.75k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1); |
1238 | 9.75k | uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
1239 | 9.75k | uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U]; |
1240 | 9.75k | uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U; |
1241 | 9.75k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2); |
1242 | 9.75k | uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
1243 | 9.75k | uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U]; |
1244 | 9.75k | uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U; |
1245 | 9.75k | c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i); |
1246 | 9.75k | } |
1247 | 9.75k | uint64_t c1 = c; |
1248 | 9.75k | uint64_t c2 = c00 - c1; |
1249 | 9.75k | KRML_MAYBE_FOR4(i, |
1250 | 9.75k | (uint32_t)0U, |
1251 | 9.75k | (uint32_t)4U, |
1252 | 9.75k | (uint32_t)1U, |
1253 | 9.75k | uint64_t *os = res; |
1254 | 9.75k | uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]); |
1255 | 9.75k | os[i] = x1;); |
1256 | 9.75k | } |
1257 | | |
1258 | | static inline void |
1259 | | from_qmont(uint64_t *res, uint64_t *x) |
1260 | 50 | { |
1261 | 50 | uint64_t tmp[8U] = { 0U }; |
1262 | 50 | memcpy(tmp, x, (uint32_t)4U * sizeof(uint64_t)); |
1263 | 50 | qmont_reduction(res, tmp); |
1264 | 50 | } |
1265 | | |
1266 | | static inline void |
1267 | | qmul(uint64_t *res, uint64_t *x, uint64_t *y) |
1268 | 1.32k | { |
1269 | 1.32k | uint64_t tmp[8U] = { 0U }; |
1270 | 1.32k | bn_mul4(tmp, x, y); |
1271 | 1.32k | qmont_reduction(res, tmp); |
1272 | 1.32k | } |
1273 | | |
1274 | | static inline void |
1275 | | qsqr(uint64_t *res, uint64_t *x) |
1276 | 8.38k | { |
1277 | 8.38k | uint64_t tmp[8U] = { 0U }; |
1278 | 8.38k | bn_sqr4(tmp, x); |
1279 | 8.38k | qmont_reduction(res, tmp); |
1280 | 8.38k | } |
1281 | | |
1282 | | bool |
1283 | | Hacl_Impl_P256_DH_ecp256dh_i(uint8_t *public_key, uint8_t *private_key) |
1284 | 89 | { |
1285 | 89 | uint64_t tmp[16U] = { 0U }; |
1286 | 89 | uint64_t *sk = tmp; |
1287 | 89 | uint64_t *pk = tmp + (uint32_t)4U; |
1288 | 89 | bn_from_bytes_be4(sk, private_key); |
1289 | 89 | uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(sk); |
1290 | 89 | uint64_t oneq[4U] = { 0U }; |
1291 | 89 | oneq[0U] = (uint64_t)1U; |
1292 | 89 | oneq[1U] = (uint64_t)0U; |
1293 | 89 | oneq[2U] = (uint64_t)0U; |
1294 | 89 | oneq[3U] = (uint64_t)0U; |
1295 | 89 | KRML_MAYBE_FOR4(i, |
1296 | 89 | (uint32_t)0U, |
1297 | 89 | (uint32_t)4U, |
1298 | 89 | (uint32_t)1U, |
1299 | 89 | uint64_t *os = sk; |
1300 | 89 | uint64_t uu____0 = oneq[i]; |
1301 | 89 | uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0)); |
1302 | 89 | os[i] = x;); |
1303 | 89 | uint64_t is_sk_valid = is_b_valid; |
1304 | 89 | point_mul_g(pk, sk); |
1305 | 89 | point_store(public_key, pk); |
1306 | 89 | return is_sk_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
1307 | 89 | } |
1308 | | |
1309 | | bool |
1310 | | Hacl_Impl_P256_DH_ecp256dh_r( |
1311 | | uint8_t *shared_secret, |
1312 | | uint8_t *their_pubkey, |
1313 | | uint8_t *private_key) |
1314 | 0 | { |
1315 | 0 | uint64_t tmp[16U] = { 0U }; |
1316 | 0 | uint64_t *sk = tmp; |
1317 | 0 | uint64_t *pk = tmp + (uint32_t)4U; |
1318 | 0 | bool is_pk_valid = load_point_vartime(pk, their_pubkey); |
1319 | 0 | bn_from_bytes_be4(sk, private_key); |
1320 | 0 | uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(sk); |
1321 | 0 | uint64_t oneq[4U] = { 0U }; |
1322 | 0 | oneq[0U] = (uint64_t)1U; |
1323 | 0 | oneq[1U] = (uint64_t)0U; |
1324 | 0 | oneq[2U] = (uint64_t)0U; |
1325 | 0 | oneq[3U] = (uint64_t)0U; |
1326 | 0 | KRML_MAYBE_FOR4(i, |
1327 | 0 | (uint32_t)0U, |
1328 | 0 | (uint32_t)4U, |
1329 | 0 | (uint32_t)1U, |
1330 | 0 | uint64_t *os = sk; |
1331 | 0 | uint64_t uu____0 = oneq[i]; |
1332 | 0 | uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0)); |
1333 | 0 | os[i] = x;); |
1334 | 0 | uint64_t is_sk_valid = is_b_valid; |
1335 | 0 | uint64_t ss_proj[12U] = { 0U }; |
1336 | 0 | if (is_pk_valid) { |
1337 | 0 | point_mul(ss_proj, sk, pk); |
1338 | 0 | point_store(shared_secret, ss_proj); |
1339 | 0 | } |
1340 | 0 | return is_sk_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU && is_pk_valid; |
1341 | 0 | } |
1342 | | |
1343 | | static inline void |
1344 | | qinv(uint64_t *res, uint64_t *r) |
1345 | 33 | { |
1346 | 33 | uint64_t tmp[28U] = { 0U }; |
1347 | 33 | uint64_t *x6 = tmp; |
1348 | 33 | uint64_t *x_11 = tmp + (uint32_t)4U; |
1349 | 33 | uint64_t *x_101 = tmp + (uint32_t)8U; |
1350 | 33 | uint64_t *x_111 = tmp + (uint32_t)12U; |
1351 | 33 | uint64_t *x_1111 = tmp + (uint32_t)16U; |
1352 | 33 | uint64_t *x_10101 = tmp + (uint32_t)20U; |
1353 | 33 | uint64_t *x_101111 = tmp + (uint32_t)24U; |
1354 | 33 | memcpy(x6, r, (uint32_t)4U * sizeof(uint64_t)); |
1355 | 33 | { |
1356 | 33 | qsqr(x6, x6); |
1357 | 33 | } |
1358 | 33 | qmul(x_11, x6, r); |
1359 | 33 | qmul(x_101, x6, x_11); |
1360 | 33 | qmul(x_111, x6, x_101); |
1361 | 33 | memcpy(x6, x_101, (uint32_t)4U * sizeof(uint64_t)); |
1362 | 33 | { |
1363 | 33 | qsqr(x6, x6); |
1364 | 33 | } |
1365 | 33 | qmul(x_1111, x_101, x6); |
1366 | 33 | { |
1367 | 33 | qsqr(x6, x6); |
1368 | 33 | } |
1369 | 33 | qmul(x_10101, x6, r); |
1370 | 33 | memcpy(x6, x_10101, (uint32_t)4U * sizeof(uint64_t)); |
1371 | 33 | { |
1372 | 33 | qsqr(x6, x6); |
1373 | 33 | } |
1374 | 33 | qmul(x_101111, x_101, x6); |
1375 | 33 | qmul(x6, x_10101, x6); |
1376 | 33 | uint64_t tmp1[4U] = { 0U }; |
1377 | 33 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(x6, x6);); |
1378 | 33 | qmul(x6, x6, x_11); |
1379 | 33 | memcpy(tmp1, x6, (uint32_t)4U * sizeof(uint64_t)); |
1380 | 33 | KRML_MAYBE_FOR8(i, (uint32_t)0U, (uint32_t)8U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1381 | 33 | qmul(tmp1, tmp1, x6); |
1382 | 33 | memcpy(x6, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
1383 | 33 | KRML_MAYBE_FOR16(i, (uint32_t)0U, (uint32_t)16U, (uint32_t)1U, qsqr(x6, x6);); |
1384 | 33 | qmul(x6, x6, tmp1); |
1385 | 33 | memcpy(tmp1, x6, (uint32_t)4U * sizeof(uint64_t)); |
1386 | 2.14k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)64U; i++) { |
1387 | 2.11k | qsqr(tmp1, tmp1); |
1388 | 2.11k | } |
1389 | 33 | qmul(tmp1, tmp1, x6); |
1390 | 1.08k | for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { |
1391 | 1.05k | qsqr(tmp1, tmp1); |
1392 | 1.05k | } |
1393 | 33 | qmul(tmp1, tmp1, x6); |
1394 | 33 | KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1395 | 33 | qmul(tmp1, tmp1, x_101111); |
1396 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1397 | 33 | qmul(tmp1, tmp1, x_111); |
1398 | 33 | KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1399 | 33 | qmul(tmp1, tmp1, x_11); |
1400 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1401 | 33 | qmul(tmp1, tmp1, x_1111); |
1402 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1403 | 33 | qmul(tmp1, tmp1, x_10101); |
1404 | 33 | KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1405 | 33 | qmul(tmp1, tmp1, x_101); |
1406 | 33 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1407 | 33 | qmul(tmp1, tmp1, x_101); |
1408 | 33 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1409 | 33 | qmul(tmp1, tmp1, x_101); |
1410 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1411 | 33 | qmul(tmp1, tmp1, x_111); |
1412 | 33 | KRML_MAYBE_FOR9(i, (uint32_t)0U, (uint32_t)9U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1413 | 33 | qmul(tmp1, tmp1, x_101111); |
1414 | 33 | KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1415 | 33 | qmul(tmp1, tmp1, x_1111); |
1416 | 33 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1417 | 33 | qmul(tmp1, tmp1, r); |
1418 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1419 | 33 | qmul(tmp1, tmp1, r); |
1420 | 33 | KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1421 | 33 | qmul(tmp1, tmp1, x_1111); |
1422 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1423 | 33 | qmul(tmp1, tmp1, x_111); |
1424 | 33 | KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1425 | 33 | qmul(tmp1, tmp1, x_111); |
1426 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1427 | 33 | qmul(tmp1, tmp1, x_111); |
1428 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1429 | 33 | qmul(tmp1, tmp1, x_101); |
1430 | 33 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1431 | 33 | qmul(tmp1, tmp1, x_11); |
1432 | 33 | KRML_MAYBE_FOR10(i, (uint32_t)0U, (uint32_t)10U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1433 | 33 | qmul(tmp1, tmp1, x_101111); |
1434 | 33 | KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1435 | 33 | qmul(tmp1, tmp1, x_11); |
1436 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1437 | 33 | qmul(tmp1, tmp1, x_11); |
1438 | 33 | KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1439 | 33 | qmul(tmp1, tmp1, x_11); |
1440 | 33 | KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1441 | 33 | qmul(tmp1, tmp1, r); |
1442 | 33 | KRML_MAYBE_FOR7(i, (uint32_t)0U, (uint32_t)7U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1443 | 33 | qmul(tmp1, tmp1, x_10101); |
1444 | 33 | KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1);); |
1445 | 33 | qmul(tmp1, tmp1, x_1111); |
1446 | 33 | memcpy(x6, tmp1, (uint32_t)4U * sizeof(uint64_t)); |
1447 | 33 | memcpy(res, x6, (uint32_t)4U * sizeof(uint64_t)); |
1448 | 33 | } |
1449 | | |
1450 | | static inline void |
1451 | | qmul_mont(uint64_t *sinv, uint64_t *b, uint64_t *res) |
1452 | 34 | { |
1453 | 34 | uint64_t tmp[4U] = { 0U }; |
1454 | 34 | from_qmont(tmp, b); |
1455 | 34 | qmul(res, sinv, tmp); |
1456 | 34 | } |
1457 | | |
1458 | | static inline bool |
1459 | | ecdsa_verify_msg_as_qelem( |
1460 | | uint64_t *m_q, |
1461 | | uint8_t *public_key, |
1462 | | uint8_t *signature_r, |
1463 | | uint8_t *signature_s) |
1464 | 17 | { |
1465 | 17 | uint64_t tmp[28U] = { 0U }; |
1466 | 17 | uint64_t *pk = tmp; |
1467 | 17 | uint64_t *r_q = tmp + (uint32_t)12U; |
1468 | 17 | uint64_t *s_q = tmp + (uint32_t)16U; |
1469 | 17 | uint64_t *u1 = tmp + (uint32_t)20U; |
1470 | 17 | uint64_t *u2 = tmp + (uint32_t)24U; |
1471 | 17 | bool is_pk_valid = load_point_vartime(pk, public_key); |
1472 | 17 | bn_from_bytes_be4(r_q, signature_r); |
1473 | 17 | bn_from_bytes_be4(s_q, signature_s); |
1474 | 17 | uint64_t is_r_valid = bn_is_lt_order_and_gt_zero_mask4(r_q); |
1475 | 17 | uint64_t is_s_valid = bn_is_lt_order_and_gt_zero_mask4(s_q); |
1476 | 17 | bool |
1477 | 17 | is_rs_valid = |
1478 | 17 | is_r_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU && is_s_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
1479 | 17 | if (!(is_pk_valid && is_rs_valid)) { |
1480 | 0 | return false; |
1481 | 0 | } |
1482 | 17 | uint64_t sinv[4U] = { 0U }; |
1483 | 17 | qinv(sinv, s_q); |
1484 | 17 | qmul_mont(sinv, m_q, u1); |
1485 | 17 | qmul_mont(sinv, r_q, u2); |
1486 | 17 | uint64_t res[12U] = { 0U }; |
1487 | 17 | point_mul_double_g(res, u1, u2, pk); |
1488 | 17 | if (is_point_at_inf_vartime(res)) { |
1489 | 0 | return false; |
1490 | 0 | } |
1491 | 17 | uint64_t x[4U] = { 0U }; |
1492 | 17 | to_aff_point_x(x, res); |
1493 | 17 | qmod_short(x, x); |
1494 | 17 | bool res1 = bn_is_eq_vartime4(x, r_q); |
1495 | 17 | return res1; |
1496 | 17 | } |
1497 | | |
1498 | | static inline bool |
1499 | | ecdsa_sign_msg_as_qelem( |
1500 | | uint8_t *signature, |
1501 | | uint64_t *m_q, |
1502 | | uint8_t *private_key, |
1503 | | uint8_t *nonce) |
1504 | 16 | { |
1505 | 16 | uint64_t rsdk_q[16U] = { 0U }; |
1506 | 16 | uint64_t *r_q = rsdk_q; |
1507 | 16 | uint64_t *s_q = rsdk_q + (uint32_t)4U; |
1508 | 16 | uint64_t *d_a = rsdk_q + (uint32_t)8U; |
1509 | 16 | uint64_t *k_q = rsdk_q + (uint32_t)12U; |
1510 | 16 | bn_from_bytes_be4(d_a, private_key); |
1511 | 16 | uint64_t is_b_valid0 = bn_is_lt_order_and_gt_zero_mask4(d_a); |
1512 | 16 | uint64_t oneq0[4U] = { 0U }; |
1513 | 16 | oneq0[0U] = (uint64_t)1U; |
1514 | 16 | oneq0[1U] = (uint64_t)0U; |
1515 | 16 | oneq0[2U] = (uint64_t)0U; |
1516 | 16 | oneq0[3U] = (uint64_t)0U; |
1517 | 16 | KRML_MAYBE_FOR4(i, |
1518 | 16 | (uint32_t)0U, |
1519 | 16 | (uint32_t)4U, |
1520 | 16 | (uint32_t)1U, |
1521 | 16 | uint64_t *os = d_a; |
1522 | 16 | uint64_t uu____0 = oneq0[i]; |
1523 | 16 | uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0)); |
1524 | 16 | os[i] = x;); |
1525 | 16 | uint64_t is_sk_valid = is_b_valid0; |
1526 | 16 | bn_from_bytes_be4(k_q, nonce); |
1527 | 16 | uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(k_q); |
1528 | 16 | uint64_t oneq[4U] = { 0U }; |
1529 | 16 | oneq[0U] = (uint64_t)1U; |
1530 | 16 | oneq[1U] = (uint64_t)0U; |
1531 | 16 | oneq[2U] = (uint64_t)0U; |
1532 | 16 | oneq[3U] = (uint64_t)0U; |
1533 | 16 | KRML_MAYBE_FOR4(i, |
1534 | 16 | (uint32_t)0U, |
1535 | 16 | (uint32_t)4U, |
1536 | 16 | (uint32_t)1U, |
1537 | 16 | uint64_t *os = k_q; |
1538 | 16 | uint64_t uu____1 = oneq[i]; |
1539 | 16 | uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1)); |
1540 | 16 | os[i] = x;); |
1541 | 16 | uint64_t is_nonce_valid = is_b_valid; |
1542 | 16 | uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid; |
1543 | 16 | uint64_t p[12U] = { 0U }; |
1544 | 16 | point_mul_g(p, k_q); |
1545 | 16 | to_aff_point_x(r_q, p); |
1546 | 16 | qmod_short(r_q, r_q); |
1547 | 16 | uint64_t kinv[4U] = { 0U }; |
1548 | 16 | qinv(kinv, k_q); |
1549 | 16 | qmul(s_q, r_q, d_a); |
1550 | 16 | from_qmont(m_q, m_q); |
1551 | 16 | qadd(s_q, m_q, s_q); |
1552 | 16 | qmul(s_q, kinv, s_q); |
1553 | 16 | bn2_to_bytes_be4(signature, r_q, s_q); |
1554 | 16 | uint64_t is_r_zero = bn_is_zero_mask4(r_q); |
1555 | 16 | uint64_t is_s_zero = bn_is_zero_mask4(s_q); |
1556 | 16 | uint64_t m = are_sk_nonce_valid & (~is_r_zero & ~is_s_zero); |
1557 | 16 | bool res = m == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
1558 | 16 | return res; |
1559 | 16 | } |
1560 | | |
1561 | | /******************************************************************************* |
1562 | | |
1563 | | Verified C library for ECDSA and ECDH functions over the P-256 NIST curve. |
1564 | | |
1565 | | This module implements signing and verification, key validation, conversions |
1566 | | between various point representations, and ECDH key agreement. |
1567 | | |
1568 | | *******************************************************************************/ |
1569 | | |
1570 | | /*****************/ |
1571 | | /* ECDSA signing */ |
1572 | | /*****************/ |
1573 | | |
1574 | | /** |
1575 | | Create an ECDSA signature WITHOUT hashing first. |
1576 | | |
1577 | | This function is intended to receive a hash of the input. |
1578 | | For convenience, we recommend using one of the hash-and-sign combined functions above. |
1579 | | |
1580 | | The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`). |
1581 | | |
1582 | | NOTE: The equivalent functions in OpenSSL and Fiat-Crypto both accept inputs |
1583 | | smaller than 32 bytes. These libraries left-pad the input with enough zeroes to |
1584 | | reach the minimum 32 byte size. Clients who need behavior identical to OpenSSL |
1585 | | need to perform the left-padding themselves. |
1586 | | |
1587 | | The function returns `true` for successful creation of an ECDSA signature and `false` otherwise. |
1588 | | |
1589 | | The outparam `signature` (R || S) points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1590 | | The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len]. |
1591 | | The arguments `private_key` and `nonce` point to 32 bytes of valid memory, i.e., uint8_t[32]. |
1592 | | |
1593 | | The function also checks whether `private_key` and `nonce` are valid values: |
1594 | | • 0 < `private_key` < the order of the curve |
1595 | | • 0 < `nonce` < the order of the curve |
1596 | | */ |
1597 | | bool |
1598 | | Hacl_P256_ecdsa_sign_p256_without_hash( |
1599 | | uint8_t *signature, |
1600 | | uint32_t msg_len, |
1601 | | uint8_t *msg, |
1602 | | uint8_t *private_key, |
1603 | | uint8_t *nonce) |
1604 | 16 | { |
1605 | 16 | uint64_t m_q[4U] = { 0U }; |
1606 | 16 | uint8_t mHash[32U] = { 0U }; |
1607 | 16 | memcpy(mHash, msg, (uint32_t)32U * sizeof(uint8_t)); |
1608 | 16 | KRML_HOST_IGNORE(msg_len); |
1609 | 16 | uint8_t *mHash32 = mHash; |
1610 | 16 | bn_from_bytes_be4(m_q, mHash32); |
1611 | 16 | qmod_short(m_q, m_q); |
1612 | 16 | bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce); |
1613 | 16 | return res; |
1614 | 16 | } |
1615 | | |
1616 | | /**********************/ |
1617 | | /* ECDSA verification */ |
1618 | | /**********************/ |
1619 | | |
1620 | | /** |
1621 | | Verify an ECDSA signature WITHOUT hashing first. |
1622 | | |
1623 | | This function is intended to receive a hash of the input. |
1624 | | For convenience, we recommend using one of the hash-and-verify combined functions above. |
1625 | | |
1626 | | The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`). |
1627 | | |
1628 | | The function returns `true` if the signature is valid and `false` otherwise. |
1629 | | |
1630 | | The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len]. |
1631 | | The argument `public_key` (x || y) points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1632 | | The arguments `signature_r` and `signature_s` point to 32 bytes of valid memory, i.e., uint8_t[32]. |
1633 | | |
1634 | | The function also checks whether `public_key` is valid |
1635 | | */ |
1636 | | bool |
1637 | | Hacl_P256_ecdsa_verif_without_hash( |
1638 | | uint32_t msg_len, |
1639 | | uint8_t *msg, |
1640 | | uint8_t *public_key, |
1641 | | uint8_t *signature_r, |
1642 | | uint8_t *signature_s) |
1643 | 17 | { |
1644 | 17 | uint64_t m_q[4U] = { 0U }; |
1645 | 17 | uint8_t mHash[32U] = { 0U }; |
1646 | 17 | memcpy(mHash, msg, (uint32_t)32U * sizeof(uint8_t)); |
1647 | 17 | KRML_HOST_IGNORE(msg_len); |
1648 | 17 | uint8_t *mHash32 = mHash; |
1649 | 17 | bn_from_bytes_be4(m_q, mHash32); |
1650 | 17 | qmod_short(m_q, m_q); |
1651 | 17 | bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s); |
1652 | 17 | return res; |
1653 | 17 | } |
1654 | | |
1655 | | /******************/ |
1656 | | /* Key validation */ |
1657 | | /******************/ |
1658 | | |
1659 | | /** |
1660 | | Public key validation. |
1661 | | |
1662 | | The function returns `true` if a public key is valid and `false` otherwise. |
1663 | | |
1664 | | The argument `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1665 | | |
1666 | | The public key (x || y) is valid (with respect to SP 800-56A): |
1667 | | • the public key is not the “point at infinity”, represented as O. |
1668 | | • the affine x and y coordinates of the point represented by the public key are |
1669 | | in the range [0, p – 1] where p is the prime defining the finite field. |
1670 | | • y^2 = x^3 + ax + b where a and b are the coefficients of the curve equation. |
1671 | | The last extract is taken from: https://neilmadden.blog/2017/05/17/so-how-do-you-validate-nist-ecdh-public-keys/ |
1672 | | */ |
1673 | | bool |
1674 | | Hacl_P256_validate_public_key(uint8_t *public_key) |
1675 | 28 | { |
1676 | 28 | uint64_t point_jac[12U] = { 0U }; |
1677 | 28 | bool res = load_point_vartime(point_jac, public_key); |
1678 | 28 | return res; |
1679 | 28 | } |
1680 | | |
1681 | | /** |
1682 | | Private key validation. |
1683 | | |
1684 | | The function returns `true` if a private key is valid and `false` otherwise. |
1685 | | |
1686 | | The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32]. |
1687 | | |
1688 | | The private key is valid: |
1689 | | • 0 < `private_key` < the order of the curve |
1690 | | */ |
1691 | | bool |
1692 | | Hacl_P256_validate_private_key(uint8_t *private_key) |
1693 | 40 | { |
1694 | 40 | uint64_t bn_sk[4U] = { 0U }; |
1695 | 40 | bn_from_bytes_be4(bn_sk, private_key); |
1696 | 40 | uint64_t res = bn_is_lt_order_and_gt_zero_mask4(bn_sk); |
1697 | 40 | return res == (uint64_t)0xFFFFFFFFFFFFFFFFU; |
1698 | 40 | } |
1699 | | |
1700 | | /******************************************************************************* |
1701 | | Parsing and Serializing public keys. |
1702 | | |
1703 | | A public key is a point (x, y) on the P-256 NIST curve. |
1704 | | |
1705 | | The point can be represented in the following three ways. |
1706 | | • raw = [ x || y ], 64 bytes |
1707 | | • uncompressed = [ 0x04 || x || y ], 65 bytes |
1708 | | • compressed = [ (0x02 for even `y` and 0x03 for odd `y`) || x ], 33 bytes |
1709 | | |
1710 | | *******************************************************************************/ |
1711 | | |
1712 | | /** |
1713 | | Convert a public key from uncompressed to its raw form. |
1714 | | |
1715 | | The function returns `true` for successful conversion of a public key and `false` otherwise. |
1716 | | |
1717 | | The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1718 | | The argument `pk` points to 65 bytes of valid memory, i.e., uint8_t[65]. |
1719 | | |
1720 | | The function DOESN'T check whether (x, y) is a valid point. |
1721 | | */ |
1722 | | bool |
1723 | | Hacl_P256_uncompressed_to_raw(uint8_t *pk, uint8_t *pk_raw) |
1724 | 0 | { |
1725 | 0 | uint8_t pk0 = pk[0U]; |
1726 | 0 | if (pk0 != (uint8_t)0x04U) { |
1727 | 0 | return false; |
1728 | 0 | } |
1729 | 0 | memcpy(pk_raw, pk + (uint32_t)1U, (uint32_t)64U * sizeof(uint8_t)); |
1730 | 0 | return true; |
1731 | 0 | } |
1732 | | |
1733 | | /** |
1734 | | Convert a public key from compressed to its raw form. |
1735 | | |
1736 | | The function returns `true` for successful conversion of a public key and `false` otherwise. |
1737 | | |
1738 | | The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1739 | | The argument `pk` points to 33 bytes of valid memory, i.e., uint8_t[33]. |
1740 | | |
1741 | | The function also checks whether (x, y) is a valid point. |
1742 | | */ |
1743 | | bool |
1744 | | Hacl_P256_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw) |
1745 | 0 | { |
1746 | 0 | uint64_t xa[4U] = { 0U }; |
1747 | 0 | uint64_t ya[4U] = { 0U }; |
1748 | 0 | uint8_t *pk_xb = pk + (uint32_t)1U; |
1749 | 0 | bool b = aff_point_decompress_vartime(xa, ya, pk); |
1750 | 0 | if (b) { |
1751 | 0 | memcpy(pk_raw, pk_xb, (uint32_t)32U * sizeof(uint8_t)); |
1752 | 0 | bn_to_bytes_be4(pk_raw + (uint32_t)32U, ya); |
1753 | 0 | } |
1754 | 0 | return b; |
1755 | 0 | } |
1756 | | |
1757 | | /** |
1758 | | Convert a public key from raw to its uncompressed form. |
1759 | | |
1760 | | The outparam `pk` points to 65 bytes of valid memory, i.e., uint8_t[65]. |
1761 | | The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1762 | | |
1763 | | The function DOESN'T check whether (x, y) is a valid point. |
1764 | | */ |
1765 | | void |
1766 | | Hacl_P256_raw_to_uncompressed(uint8_t *pk_raw, uint8_t *pk) |
1767 | 0 | { |
1768 | 0 | pk[0U] = (uint8_t)0x04U; |
1769 | 0 | memcpy(pk + (uint32_t)1U, pk_raw, (uint32_t)64U * sizeof(uint8_t)); |
1770 | 0 | } |
1771 | | |
1772 | | /** |
1773 | | Convert a public key from raw to its compressed form. |
1774 | | |
1775 | | The outparam `pk` points to 33 bytes of valid memory, i.e., uint8_t[33]. |
1776 | | The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1777 | | |
1778 | | The function DOESN'T check whether (x, y) is a valid point. |
1779 | | */ |
1780 | | void |
1781 | | Hacl_P256_raw_to_compressed(uint8_t *pk_raw, uint8_t *pk) |
1782 | 0 | { |
1783 | 0 | uint8_t *pk_x = pk_raw; |
1784 | 0 | uint8_t *pk_y = pk_raw + (uint32_t)32U; |
1785 | 0 | uint64_t bn_f[4U] = { 0U }; |
1786 | 0 | bn_from_bytes_be4(bn_f, pk_y); |
1787 | 0 | uint64_t is_odd_f = bn_f[0U] & (uint64_t)1U; |
1788 | 0 | pk[0U] = (uint8_t)is_odd_f + (uint8_t)0x02U; |
1789 | 0 | memcpy(pk + (uint32_t)1U, pk_x, (uint32_t)32U * sizeof(uint8_t)); |
1790 | 0 | } |
1791 | | |
1792 | | /******************/ |
1793 | | /* ECDH agreement */ |
1794 | | /******************/ |
1795 | | |
1796 | | /** |
1797 | | Compute the public key from the private key. |
1798 | | |
1799 | | The function returns `true` if a private key is valid and `false` otherwise. |
1800 | | |
1801 | | The outparam `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1802 | | The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32]. |
1803 | | |
1804 | | The private key is valid: |
1805 | | • 0 < `private_key` < the order of the curve. |
1806 | | */ |
1807 | | bool |
1808 | | Hacl_P256_dh_initiator(uint8_t *public_key, uint8_t *private_key) |
1809 | 89 | { |
1810 | 89 | return Hacl_Impl_P256_DH_ecp256dh_i(public_key, private_key); |
1811 | 89 | } |
1812 | | |
1813 | | /** |
1814 | | Execute the diffie-hellmann key exchange. |
1815 | | |
1816 | | The function returns `true` for successful creation of an ECDH shared secret and |
1817 | | `false` otherwise. |
1818 | | |
1819 | | The outparam `shared_secret` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1820 | | The argument `their_pubkey` points to 64 bytes of valid memory, i.e., uint8_t[64]. |
1821 | | The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32]. |
1822 | | |
1823 | | The function also checks whether `private_key` and `their_pubkey` are valid. |
1824 | | */ |
1825 | | bool |
1826 | | Hacl_P256_dh_responder(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t *private_key) |
1827 | 0 | { |
1828 | 0 | return Hacl_Impl_P256_DH_ecp256dh_r(shared_secret, their_pubkey, private_key); |
1829 | 0 | } |