/src/BearSSL/src/ec/ec_p256_m31.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining |
5 | | * a copy of this software and associated documentation files (the |
6 | | * "Software"), to deal in the Software without restriction, including |
7 | | * without limitation the rights to use, copy, modify, merge, publish, |
8 | | * distribute, sublicense, and/or sell copies of the Software, and to |
9 | | * permit persons to whom the Software is furnished to do so, subject to |
10 | | * the following conditions: |
11 | | * |
12 | | * The above copyright notice and this permission notice shall be |
13 | | * included in all copies or substantial portions of the Software. |
14 | | * |
15 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
16 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
17 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
18 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
19 | | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
20 | | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
21 | | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "inner.h" |
26 | | |
27 | | /* |
28 | | * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_ |
29 | | * that right-shifting a signed negative integer copies the sign bit |
30 | | * (arithmetic right-shift). This is "implementation-defined behaviour", |
31 | | * i.e. it is not undefined, but it may differ between compilers. Each |
32 | | * compiler is supposed to document its behaviour in that respect. GCC |
33 | | * explicitly defines that an arithmetic right shift is used. We expect |
34 | | * all other compilers to do the same, because underlying CPU offer an |
35 | | * arithmetic right shift opcode that could not be used otherwise. |
36 | | */ |
37 | | #if BR_NO_ARITH_SHIFT |
38 | | #define ARSH(x, n) (((uint32_t)(x) >> (n)) \ |
39 | | | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) |
40 | | #define ARSHW(x, n) (((uint64_t)(x) >> (n)) \ |
41 | | | ((-((uint64_t)(x) >> 63)) << (64 - (n)))) |
42 | | #else |
43 | 16.4M | #define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) |
44 | 29.5M | #define ARSHW(x, n) ((*(int64_t *)&(x)) >> (n)) |
45 | | #endif |
46 | | |
47 | | /* |
48 | | * Convert an integer from unsigned big-endian encoding to a sequence of |
49 | | * 30-bit words in little-endian order. The final "partial" word is |
50 | | * returned. |
51 | | */ |
52 | | static uint32_t |
53 | | be8_to_le30(uint32_t *dst, const unsigned char *src, size_t len) |
54 | 84 | { |
55 | 84 | uint32_t acc; |
56 | 84 | int acc_len; |
57 | | |
58 | 84 | acc = 0; |
59 | 84 | acc_len = 0; |
60 | 2.77k | while (len -- > 0) { |
61 | 2.68k | uint32_t b; |
62 | | |
63 | 2.68k | b = src[len]; |
64 | 2.68k | if (acc_len < 22) { |
65 | 2.01k | acc |= b << acc_len; |
66 | 2.01k | acc_len += 8; |
67 | 2.01k | } else { |
68 | 672 | *dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF; |
69 | 672 | acc = b >> (30 - acc_len); |
70 | 672 | acc_len -= 22; |
71 | 672 | } |
72 | 2.68k | } |
73 | 84 | return acc; |
74 | 84 | } |
75 | | |
76 | | /* |
77 | | * Convert an integer (30-bit words, little-endian) to unsigned |
78 | | * big-endian encoding. The total encoding length is provided; all |
79 | | * the destination bytes will be filled. |
80 | | */ |
81 | | static void |
82 | | le30_to_be8(unsigned char *dst, size_t len, const uint32_t *src) |
83 | 228 | { |
84 | 228 | uint32_t acc; |
85 | 228 | int acc_len; |
86 | | |
87 | 228 | acc = 0; |
88 | 228 | acc_len = 0; |
89 | 7.52k | while (len -- > 0) { |
90 | 7.29k | if (acc_len < 8) { |
91 | 2.05k | uint32_t w; |
92 | | |
93 | 2.05k | w = *src ++; |
94 | 2.05k | dst[len] = (unsigned char)(acc | (w << acc_len)); |
95 | 2.05k | acc = w >> (8 - acc_len); |
96 | 2.05k | acc_len += 22; |
97 | 5.24k | } else { |
98 | 5.24k | dst[len] = (unsigned char)acc; |
99 | 5.24k | acc >>= 8; |
100 | 5.24k | acc_len -= 8; |
101 | 5.24k | } |
102 | 7.29k | } |
103 | 228 | } |
104 | | |
105 | | /* |
106 | | * Multiply two integers. Source integers are represented as arrays of |
107 | | * nine 30-bit words, for values up to 2^270-1. Result is encoded over |
108 | | * 18 words of 30 bits each. |
109 | | */ |
110 | | static void |
111 | | mul9(uint32_t *d, const uint32_t *a, const uint32_t *b) |
112 | 354k | { |
113 | | /* |
114 | | * Maximum intermediate result is no more than |
115 | | * 10376293531797946367, which fits in 64 bits. Reason: |
116 | | * |
117 | | * 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 |
118 | | * 10376293531797946367 < 9663676407 * 2^30 |
119 | | * |
120 | | * Thus, adding together 9 products of 30-bit integers, with |
121 | | * a carry of at most 9663676406, yields an integer that fits |
122 | | * on 64 bits and generates a carry of at most 9663676406. |
123 | | */ |
124 | 354k | uint64_t t[17]; |
125 | 354k | uint64_t cc; |
126 | 354k | int i; |
127 | | |
128 | 354k | t[ 0] = MUL31(a[0], b[0]); |
129 | 354k | t[ 1] = MUL31(a[0], b[1]) |
130 | 354k | + MUL31(a[1], b[0]); |
131 | 354k | t[ 2] = MUL31(a[0], b[2]) |
132 | 354k | + MUL31(a[1], b[1]) |
133 | 354k | + MUL31(a[2], b[0]); |
134 | 354k | t[ 3] = MUL31(a[0], b[3]) |
135 | 354k | + MUL31(a[1], b[2]) |
136 | 354k | + MUL31(a[2], b[1]) |
137 | 354k | + MUL31(a[3], b[0]); |
138 | 354k | t[ 4] = MUL31(a[0], b[4]) |
139 | 354k | + MUL31(a[1], b[3]) |
140 | 354k | + MUL31(a[2], b[2]) |
141 | 354k | + MUL31(a[3], b[1]) |
142 | 354k | + MUL31(a[4], b[0]); |
143 | 354k | t[ 5] = MUL31(a[0], b[5]) |
144 | 354k | + MUL31(a[1], b[4]) |
145 | 354k | + MUL31(a[2], b[3]) |
146 | 354k | + MUL31(a[3], b[2]) |
147 | 354k | + MUL31(a[4], b[1]) |
148 | 354k | + MUL31(a[5], b[0]); |
149 | 354k | t[ 6] = MUL31(a[0], b[6]) |
150 | 354k | + MUL31(a[1], b[5]) |
151 | 354k | + MUL31(a[2], b[4]) |
152 | 354k | + MUL31(a[3], b[3]) |
153 | 354k | + MUL31(a[4], b[2]) |
154 | 354k | + MUL31(a[5], b[1]) |
155 | 354k | + MUL31(a[6], b[0]); |
156 | 354k | t[ 7] = MUL31(a[0], b[7]) |
157 | 354k | + MUL31(a[1], b[6]) |
158 | 354k | + MUL31(a[2], b[5]) |
159 | 354k | + MUL31(a[3], b[4]) |
160 | 354k | + MUL31(a[4], b[3]) |
161 | 354k | + MUL31(a[5], b[2]) |
162 | 354k | + MUL31(a[6], b[1]) |
163 | 354k | + MUL31(a[7], b[0]); |
164 | 354k | t[ 8] = MUL31(a[0], b[8]) |
165 | 354k | + MUL31(a[1], b[7]) |
166 | 354k | + MUL31(a[2], b[6]) |
167 | 354k | + MUL31(a[3], b[5]) |
168 | 354k | + MUL31(a[4], b[4]) |
169 | 354k | + MUL31(a[5], b[3]) |
170 | 354k | + MUL31(a[6], b[2]) |
171 | 354k | + MUL31(a[7], b[1]) |
172 | 354k | + MUL31(a[8], b[0]); |
173 | 354k | t[ 9] = MUL31(a[1], b[8]) |
174 | 354k | + MUL31(a[2], b[7]) |
175 | 354k | + MUL31(a[3], b[6]) |
176 | 354k | + MUL31(a[4], b[5]) |
177 | 354k | + MUL31(a[5], b[4]) |
178 | 354k | + MUL31(a[6], b[3]) |
179 | 354k | + MUL31(a[7], b[2]) |
180 | 354k | + MUL31(a[8], b[1]); |
181 | 354k | t[10] = MUL31(a[2], b[8]) |
182 | 354k | + MUL31(a[3], b[7]) |
183 | 354k | + MUL31(a[4], b[6]) |
184 | 354k | + MUL31(a[5], b[5]) |
185 | 354k | + MUL31(a[6], b[4]) |
186 | 354k | + MUL31(a[7], b[3]) |
187 | 354k | + MUL31(a[8], b[2]); |
188 | 354k | t[11] = MUL31(a[3], b[8]) |
189 | 354k | + MUL31(a[4], b[7]) |
190 | 354k | + MUL31(a[5], b[6]) |
191 | 354k | + MUL31(a[6], b[5]) |
192 | 354k | + MUL31(a[7], b[4]) |
193 | 354k | + MUL31(a[8], b[3]); |
194 | 354k | t[12] = MUL31(a[4], b[8]) |
195 | 354k | + MUL31(a[5], b[7]) |
196 | 354k | + MUL31(a[6], b[6]) |
197 | 354k | + MUL31(a[7], b[5]) |
198 | 354k | + MUL31(a[8], b[4]); |
199 | 354k | t[13] = MUL31(a[5], b[8]) |
200 | 354k | + MUL31(a[6], b[7]) |
201 | 354k | + MUL31(a[7], b[6]) |
202 | 354k | + MUL31(a[8], b[5]); |
203 | 354k | t[14] = MUL31(a[6], b[8]) |
204 | 354k | + MUL31(a[7], b[7]) |
205 | 354k | + MUL31(a[8], b[6]); |
206 | 354k | t[15] = MUL31(a[7], b[8]) |
207 | 354k | + MUL31(a[8], b[7]); |
208 | 354k | t[16] = MUL31(a[8], b[8]); |
209 | | |
210 | | /* |
211 | | * Propagate carries. |
212 | | */ |
213 | 354k | cc = 0; |
214 | 6.37M | for (i = 0; i < 17; i ++) { |
215 | 6.01M | uint64_t w; |
216 | | |
217 | 6.01M | w = t[i] + cc; |
218 | 6.01M | d[i] = (uint32_t)w & 0x3FFFFFFF; |
219 | 6.01M | cc = w >> 30; |
220 | 6.01M | } |
221 | 354k | d[17] = (uint32_t)cc; |
222 | 354k | } |
223 | | |
224 | | /* |
225 | | * Square a 270-bit integer, represented as an array of nine 30-bit words. |
226 | | * Result uses 18 words of 30 bits each. |
227 | | */ |
228 | | static void |
229 | | square9(uint32_t *d, const uint32_t *a) |
230 | 288k | { |
231 | 288k | uint64_t t[17]; |
232 | 288k | uint64_t cc; |
233 | 288k | int i; |
234 | | |
235 | 288k | t[ 0] = MUL31(a[0], a[0]); |
236 | 288k | t[ 1] = ((MUL31(a[0], a[1])) << 1); |
237 | 288k | t[ 2] = MUL31(a[1], a[1]) |
238 | 288k | + ((MUL31(a[0], a[2])) << 1); |
239 | 288k | t[ 3] = ((MUL31(a[0], a[3]) |
240 | 288k | + MUL31(a[1], a[2])) << 1); |
241 | 288k | t[ 4] = MUL31(a[2], a[2]) |
242 | 288k | + ((MUL31(a[0], a[4]) |
243 | 288k | + MUL31(a[1], a[3])) << 1); |
244 | 288k | t[ 5] = ((MUL31(a[0], a[5]) |
245 | 288k | + MUL31(a[1], a[4]) |
246 | 288k | + MUL31(a[2], a[3])) << 1); |
247 | 288k | t[ 6] = MUL31(a[3], a[3]) |
248 | 288k | + ((MUL31(a[0], a[6]) |
249 | 288k | + MUL31(a[1], a[5]) |
250 | 288k | + MUL31(a[2], a[4])) << 1); |
251 | 288k | t[ 7] = ((MUL31(a[0], a[7]) |
252 | 288k | + MUL31(a[1], a[6]) |
253 | 288k | + MUL31(a[2], a[5]) |
254 | 288k | + MUL31(a[3], a[4])) << 1); |
255 | 288k | t[ 8] = MUL31(a[4], a[4]) |
256 | 288k | + ((MUL31(a[0], a[8]) |
257 | 288k | + MUL31(a[1], a[7]) |
258 | 288k | + MUL31(a[2], a[6]) |
259 | 288k | + MUL31(a[3], a[5])) << 1); |
260 | 288k | t[ 9] = ((MUL31(a[1], a[8]) |
261 | 288k | + MUL31(a[2], a[7]) |
262 | 288k | + MUL31(a[3], a[6]) |
263 | 288k | + MUL31(a[4], a[5])) << 1); |
264 | 288k | t[10] = MUL31(a[5], a[5]) |
265 | 288k | + ((MUL31(a[2], a[8]) |
266 | 288k | + MUL31(a[3], a[7]) |
267 | 288k | + MUL31(a[4], a[6])) << 1); |
268 | 288k | t[11] = ((MUL31(a[3], a[8]) |
269 | 288k | + MUL31(a[4], a[7]) |
270 | 288k | + MUL31(a[5], a[6])) << 1); |
271 | 288k | t[12] = MUL31(a[6], a[6]) |
272 | 288k | + ((MUL31(a[4], a[8]) |
273 | 288k | + MUL31(a[5], a[7])) << 1); |
274 | 288k | t[13] = ((MUL31(a[5], a[8]) |
275 | 288k | + MUL31(a[6], a[7])) << 1); |
276 | 288k | t[14] = MUL31(a[7], a[7]) |
277 | 288k | + ((MUL31(a[6], a[8])) << 1); |
278 | 288k | t[15] = ((MUL31(a[7], a[8])) << 1); |
279 | 288k | t[16] = MUL31(a[8], a[8]); |
280 | | |
281 | | /* |
282 | | * Propagate carries. |
283 | | */ |
284 | 288k | cc = 0; |
285 | 5.18M | for (i = 0; i < 17; i ++) { |
286 | 4.89M | uint64_t w; |
287 | | |
288 | 4.89M | w = t[i] + cc; |
289 | 4.89M | d[i] = (uint32_t)w & 0x3FFFFFFF; |
290 | 4.89M | cc = w >> 30; |
291 | 4.89M | } |
292 | 288k | d[17] = (uint32_t)cc; |
293 | 288k | } |
294 | | |
295 | | /* |
296 | | * Base field modulus for P-256. |
297 | | */ |
298 | | static const uint32_t F256[] = { |
299 | | |
300 | | 0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000, |
301 | | 0x00000000, 0x00001000, 0x3FFFC000, 0x0000FFFF |
302 | | }; |
303 | | |
304 | | /* |
305 | | * The 'b' curve equation coefficient for P-256. |
306 | | */ |
307 | | static const uint32_t P256_B[] = { |
308 | | |
309 | | 0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65, |
310 | | 0x2EF555DA, 0x293E7B3E, 0x0D762A8E, 0x00005AC6 |
311 | | }; |
312 | | |
313 | | /* |
314 | | * Addition in the field. Source operands shall fit on 257 bits; output |
315 | | * will be lower than twice the modulus. |
316 | | */ |
317 | | static void |
318 | | add_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) |
319 | 356k | { |
320 | 356k | uint32_t w, cc; |
321 | 356k | int i; |
322 | | |
323 | 356k | cc = 0; |
324 | 3.56M | for (i = 0; i < 9; i ++) { |
325 | 3.20M | w = a[i] + b[i] + cc; |
326 | 3.20M | d[i] = w & 0x3FFFFFFF; |
327 | 3.20M | cc = w >> 30; |
328 | 3.20M | } |
329 | 356k | w >>= 16; |
330 | 356k | d[8] &= 0xFFFF; |
331 | 356k | d[3] -= w << 6; |
332 | 356k | d[6] -= w << 12; |
333 | 356k | d[7] += w << 14; |
334 | 356k | cc = w; |
335 | 3.56M | for (i = 0; i < 9; i ++) { |
336 | 3.20M | w = d[i] + cc; |
337 | 3.20M | d[i] = w & 0x3FFFFFFF; |
338 | 3.20M | cc = ARSH(w, 30); |
339 | 3.20M | } |
340 | 356k | } |
341 | | |
342 | | /* |
343 | | * Subtraction in the field. Source operands shall be smaller than twice |
344 | | * the modulus; the result will fulfil the same property. |
345 | | */ |
346 | | static void |
347 | | sub_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) |
348 | 363k | { |
349 | 363k | uint32_t w, cc; |
350 | 363k | int i; |
351 | | |
352 | | /* |
353 | | * We really compute a - b + 2*p to make sure that the result is |
354 | | * positive. |
355 | | */ |
356 | 363k | w = a[0] - b[0] - 0x00002; |
357 | 363k | d[0] = w & 0x3FFFFFFF; |
358 | 363k | w = a[1] - b[1] + ARSH(w, 30); |
359 | 363k | d[1] = w & 0x3FFFFFFF; |
360 | 363k | w = a[2] - b[2] + ARSH(w, 30); |
361 | 363k | d[2] = w & 0x3FFFFFFF; |
362 | 363k | w = a[3] - b[3] + ARSH(w, 30) + 0x00080; |
363 | 363k | d[3] = w & 0x3FFFFFFF; |
364 | 363k | w = a[4] - b[4] + ARSH(w, 30); |
365 | 363k | d[4] = w & 0x3FFFFFFF; |
366 | 363k | w = a[5] - b[5] + ARSH(w, 30); |
367 | 363k | d[5] = w & 0x3FFFFFFF; |
368 | 363k | w = a[6] - b[6] + ARSH(w, 30) + 0x02000; |
369 | 363k | d[6] = w & 0x3FFFFFFF; |
370 | 363k | w = a[7] - b[7] + ARSH(w, 30) - 0x08000; |
371 | 363k | d[7] = w & 0x3FFFFFFF; |
372 | 363k | w = a[8] - b[8] + ARSH(w, 30) + 0x20000; |
373 | 363k | d[8] = w & 0xFFFF; |
374 | 363k | w >>= 16; |
375 | 363k | d[8] &= 0xFFFF; |
376 | 363k | d[3] -= w << 6; |
377 | 363k | d[6] -= w << 12; |
378 | 363k | d[7] += w << 14; |
379 | 363k | cc = w; |
380 | 3.63M | for (i = 0; i < 9; i ++) { |
381 | 3.26M | w = d[i] + cc; |
382 | 3.26M | d[i] = w & 0x3FFFFFFF; |
383 | 3.26M | cc = ARSH(w, 30); |
384 | 3.26M | } |
385 | 363k | } |
386 | | |
387 | | /* |
388 | | * Compute a multiplication in F256. Source operands shall be less than |
389 | | * twice the modulus. |
390 | | */ |
391 | | static void |
392 | | mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) |
393 | 354k | { |
394 | 354k | uint32_t t[18]; |
395 | 354k | uint64_t s[18]; |
396 | 354k | uint64_t cc, x; |
397 | 354k | uint32_t z, c; |
398 | 354k | int i; |
399 | | |
400 | 354k | mul9(t, a, b); |
401 | | |
402 | | /* |
403 | | * Modular reduction: each high word in added/subtracted where |
404 | | * necessary. |
405 | | * |
406 | | * The modulus is: |
407 | | * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 |
408 | | * Therefore: |
409 | | * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p |
410 | | * |
411 | | * For a word x at bit offset n (n >= 256), we have: |
412 | | * x*2^n = x*2^(n-32) - x*2^(n-64) |
413 | | * - x*2^(n - 160) + x*2^(n-256) mod p |
414 | | * |
415 | | * Thus, we can nullify the high word if we reinject it at some |
416 | | * proper emplacements. |
417 | | * |
418 | | * We use 64-bit intermediate words to allow for carries to |
419 | | * accumulate easily, before performing the final propagation. |
420 | | */ |
421 | 6.72M | for (i = 0; i < 18; i ++) { |
422 | 6.37M | s[i] = t[i]; |
423 | 6.37M | } |
424 | | |
425 | 3.54M | for (i = 17; i >= 9; i --) { |
426 | 3.18M | uint64_t y; |
427 | | |
428 | 3.18M | y = s[i]; |
429 | 3.18M | s[i - 1] += ARSHW(y, 2); |
430 | 3.18M | s[i - 2] += (y << 28) & 0x3FFFFFFF; |
431 | 3.18M | s[i - 2] -= ARSHW(y, 4); |
432 | 3.18M | s[i - 3] -= (y << 26) & 0x3FFFFFFF; |
433 | 3.18M | s[i - 5] -= ARSHW(y, 10); |
434 | 3.18M | s[i - 6] -= (y << 20) & 0x3FFFFFFF; |
435 | 3.18M | s[i - 8] += ARSHW(y, 16); |
436 | 3.18M | s[i - 9] += (y << 14) & 0x3FFFFFFF; |
437 | 3.18M | } |
438 | | |
439 | | /* |
440 | | * Carry propagation must be signed. Moreover, we may have overdone |
441 | | * it a bit, and obtain a negative result. |
442 | | * |
443 | | * The loop above ran 9 times; each time, each word was augmented |
444 | | * by at most one extra word (in absolute value). Thus, the top |
445 | | * word must in fine fit in 39 bits, so the carry below will fit |
446 | | * on 9 bits. |
447 | | */ |
448 | 354k | cc = 0; |
449 | 3.54M | for (i = 0; i < 9; i ++) { |
450 | 3.18M | x = s[i] + cc; |
451 | 3.18M | d[i] = (uint32_t)x & 0x3FFFFFFF; |
452 | 3.18M | cc = ARSHW(x, 30); |
453 | 3.18M | } |
454 | | |
455 | | /* |
456 | | * All nine words fit on 30 bits, but there may be an extra |
457 | | * carry for a few bits (at most 9), and that carry may be |
458 | | * negative. Moreover, we want the result to fit on 257 bits. |
459 | | * The two lines below ensure that the word in d[] has length |
460 | | * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The |
461 | | * significant length of cc is less than 24 bits, so we will be |
462 | | * able to switch to 32-bit operations. |
463 | | */ |
464 | 354k | cc = ARSHW(x, 16); |
465 | 354k | d[8] &= 0xFFFF; |
466 | | |
467 | | /* |
468 | | * One extra round of reduction, for cc*2^256, which means |
469 | | * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) |
470 | | * value. If cc is negative, then it may happen (rarely, but |
471 | | * not neglectibly so) that the result would be negative. In |
472 | | * order to avoid that, if cc is negative, then we add the |
473 | | * modulus once. Note that if cc is negative, then propagating |
474 | | * that carry must yield a value lower than the modulus, so |
475 | | * adding the modulus once will keep the final result under |
476 | | * twice the modulus. |
477 | | */ |
478 | 354k | z = (uint32_t)cc; |
479 | 354k | d[3] -= z << 6; |
480 | 354k | d[6] -= (z << 12) & 0x3FFFFFFF; |
481 | 354k | d[7] -= ARSH(z, 18); |
482 | 354k | d[7] += (z << 14) & 0x3FFFFFFF; |
483 | 354k | d[8] += ARSH(z, 16); |
484 | 354k | c = z >> 31; |
485 | 354k | d[0] -= c; |
486 | 354k | d[3] += c << 6; |
487 | 354k | d[6] += c << 12; |
488 | 354k | d[7] -= c << 14; |
489 | 354k | d[8] += c << 16; |
490 | 3.54M | for (i = 0; i < 9; i ++) { |
491 | 3.18M | uint32_t w; |
492 | | |
493 | 3.18M | w = d[i] + z; |
494 | 3.18M | d[i] = w & 0x3FFFFFFF; |
495 | 3.18M | z = ARSH(w, 30); |
496 | 3.18M | } |
497 | 354k | } |
498 | | |
499 | | /* |
500 | | * Compute a square in F256. Source operand shall be less than |
501 | | * twice the modulus. |
502 | | */ |
503 | | static void |
504 | | square_f256(uint32_t *d, const uint32_t *a) |
505 | 288k | { |
506 | 288k | uint32_t t[18]; |
507 | 288k | uint64_t s[18]; |
508 | 288k | uint64_t cc, x; |
509 | 288k | uint32_t z, c; |
510 | 288k | int i; |
511 | | |
512 | 288k | square9(t, a); |
513 | | |
514 | | /* |
515 | | * Modular reduction: each high word in added/subtracted where |
516 | | * necessary. |
517 | | * |
518 | | * The modulus is: |
519 | | * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 |
520 | | * Therefore: |
521 | | * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p |
522 | | * |
523 | | * For a word x at bit offset n (n >= 256), we have: |
524 | | * x*2^n = x*2^(n-32) - x*2^(n-64) |
525 | | * - x*2^(n - 160) + x*2^(n-256) mod p |
526 | | * |
527 | | * Thus, we can nullify the high word if we reinject it at some |
528 | | * proper emplacements. |
529 | | * |
530 | | * We use 64-bit intermediate words to allow for carries to |
531 | | * accumulate easily, before performing the final propagation. |
532 | | */ |
533 | 5.47M | for (i = 0; i < 18; i ++) { |
534 | 5.18M | s[i] = t[i]; |
535 | 5.18M | } |
536 | | |
537 | 2.88M | for (i = 17; i >= 9; i --) { |
538 | 2.59M | uint64_t y; |
539 | | |
540 | 2.59M | y = s[i]; |
541 | 2.59M | s[i - 1] += ARSHW(y, 2); |
542 | 2.59M | s[i - 2] += (y << 28) & 0x3FFFFFFF; |
543 | 2.59M | s[i - 2] -= ARSHW(y, 4); |
544 | 2.59M | s[i - 3] -= (y << 26) & 0x3FFFFFFF; |
545 | 2.59M | s[i - 5] -= ARSHW(y, 10); |
546 | 2.59M | s[i - 6] -= (y << 20) & 0x3FFFFFFF; |
547 | 2.59M | s[i - 8] += ARSHW(y, 16); |
548 | 2.59M | s[i - 9] += (y << 14) & 0x3FFFFFFF; |
549 | 2.59M | } |
550 | | |
551 | | /* |
552 | | * Carry propagation must be signed. Moreover, we may have overdone |
553 | | * it a bit, and obtain a negative result. |
554 | | * |
555 | | * The loop above ran 9 times; each time, each word was augmented |
556 | | * by at most one extra word (in absolute value). Thus, the top |
557 | | * word must in fine fit in 39 bits, so the carry below will fit |
558 | | * on 9 bits. |
559 | | */ |
560 | 288k | cc = 0; |
561 | 2.88M | for (i = 0; i < 9; i ++) { |
562 | 2.59M | x = s[i] + cc; |
563 | 2.59M | d[i] = (uint32_t)x & 0x3FFFFFFF; |
564 | 2.59M | cc = ARSHW(x, 30); |
565 | 2.59M | } |
566 | | |
567 | | /* |
568 | | * All nine words fit on 30 bits, but there may be an extra |
569 | | * carry for a few bits (at most 9), and that carry may be |
570 | | * negative. Moreover, we want the result to fit on 257 bits. |
571 | | * The two lines below ensure that the word in d[] has length |
572 | | * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The |
573 | | * significant length of cc is less than 24 bits, so we will be |
574 | | * able to switch to 32-bit operations. |
575 | | */ |
576 | 288k | cc = ARSHW(x, 16); |
577 | 288k | d[8] &= 0xFFFF; |
578 | | |
579 | | /* |
580 | | * One extra round of reduction, for cc*2^256, which means |
581 | | * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) |
582 | | * value. If cc is negative, then it may happen (rarely, but |
583 | | * not neglectibly so) that the result would be negative. In |
584 | | * order to avoid that, if cc is negative, then we add the |
585 | | * modulus once. Note that if cc is negative, then propagating |
586 | | * that carry must yield a value lower than the modulus, so |
587 | | * adding the modulus once will keep the final result under |
588 | | * twice the modulus. |
589 | | */ |
590 | 288k | z = (uint32_t)cc; |
591 | 288k | d[3] -= z << 6; |
592 | 288k | d[6] -= (z << 12) & 0x3FFFFFFF; |
593 | 288k | d[7] -= ARSH(z, 18); |
594 | 288k | d[7] += (z << 14) & 0x3FFFFFFF; |
595 | 288k | d[8] += ARSH(z, 16); |
596 | 288k | c = z >> 31; |
597 | 288k | d[0] -= c; |
598 | 288k | d[3] += c << 6; |
599 | 288k | d[6] += c << 12; |
600 | 288k | d[7] -= c << 14; |
601 | 288k | d[8] += c << 16; |
602 | 2.88M | for (i = 0; i < 9; i ++) { |
603 | 2.59M | uint32_t w; |
604 | | |
605 | 2.59M | w = d[i] + z; |
606 | 2.59M | d[i] = w & 0x3FFFFFFF; |
607 | 2.59M | z = ARSH(w, 30); |
608 | 2.59M | } |
609 | 288k | } |
610 | | |
611 | | /* |
612 | | * Perform a "final reduction" in field F256 (field for curve P-256). |
613 | | * The source value must be less than twice the modulus. If the value |
614 | | * is not lower than the modulus, then the modulus is subtracted and |
615 | | * this function returns 1; otherwise, it leaves it untouched and it |
616 | | * returns 0. |
617 | | */ |
618 | | static uint32_t |
619 | | reduce_final_f256(uint32_t *d) |
620 | 15.9k | { |
621 | 15.9k | uint32_t t[9]; |
622 | 15.9k | uint32_t cc; |
623 | 15.9k | int i; |
624 | | |
625 | 15.9k | cc = 0; |
626 | 159k | for (i = 0; i < 9; i ++) { |
627 | 143k | uint32_t w; |
628 | | |
629 | 143k | w = d[i] - F256[i] - cc; |
630 | 143k | cc = w >> 31; |
631 | 143k | t[i] = w & 0x3FFFFFFF; |
632 | 143k | } |
633 | 15.9k | cc ^= 1; |
634 | 15.9k | CCOPY(cc, d, t, sizeof t); |
635 | 15.9k | return cc; |
636 | 15.9k | } |
637 | | |
638 | | /* |
639 | | * Jacobian coordinates for a point in P-256: affine coordinates (X,Y) |
640 | | * are such that: |
641 | | * X = x / z^2 |
642 | | * Y = y / z^3 |
643 | | * For the point at infinity, z = 0. |
644 | | * Each point thus admits many possible representations. |
645 | | * |
646 | | * Coordinates are represented in arrays of 32-bit integers, each holding |
647 | | * 30 bits of data. Values may also be slightly greater than the modulus, |
648 | | * but they will always be lower than twice the modulus. |
649 | | */ |
650 | | typedef struct { |
651 | | uint32_t x[9]; |
652 | | uint32_t y[9]; |
653 | | uint32_t z[9]; |
654 | | } p256_jacobian; |
655 | | |
656 | | /* |
657 | | * Convert a point to affine coordinates: |
658 | | * - If the point is the point at infinity, then all three coordinates |
659 | | * are set to 0. |
660 | | * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y' |
661 | | * coordinates are the 'X' and 'Y' affine coordinates. |
662 | | * The coordinates are guaranteed to be lower than the modulus. |
663 | | */ |
664 | | static void |
665 | | p256_to_affine(p256_jacobian *P) |
666 | 114 | { |
667 | 114 | uint32_t t1[9], t2[9]; |
668 | 114 | int i; |
669 | | |
670 | | /* |
671 | | * Invert z with a modular exponentiation: the modulus is |
672 | | * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is |
673 | | * p-2. Exponent bit pattern (from high to low) is: |
674 | | * - 32 bits of value 1 |
675 | | * - 31 bits of value 0 |
676 | | * - 1 bit of value 1 |
677 | | * - 96 bits of value 0 |
678 | | * - 94 bits of value 1 |
679 | | * - 1 bit of value 0 |
680 | | * - 1 bit of value 1 |
681 | | * Thus, we precompute z^(2^31-1) to speed things up. |
682 | | * |
683 | | * If z = 0 (point at infinity) then the modular exponentiation |
684 | | * will yield 0, which leads to the expected result (all three |
685 | | * coordinates set to 0). |
686 | | */ |
687 | | |
688 | | /* |
689 | | * A simple square-and-multiply for z^(2^31-1). We could save about |
690 | | * two dozen multiplications here with an addition chain, but |
691 | | * this would require a bit more code, and extra stack buffers. |
692 | | */ |
693 | 114 | memcpy(t1, P->z, sizeof P->z); |
694 | 3.53k | for (i = 0; i < 30; i ++) { |
695 | 3.42k | square_f256(t1, t1); |
696 | 3.42k | mul_f256(t1, t1, P->z); |
697 | 3.42k | } |
698 | | |
699 | | /* |
700 | | * Square-and-multiply. Apart from the squarings, we have a few |
701 | | * multiplications to set bits to 1; we multiply by the original z |
702 | | * for setting 1 bit, and by t1 for setting 31 bits. |
703 | | */ |
704 | 114 | memcpy(t2, P->z, sizeof P->z); |
705 | 29.1k | for (i = 1; i < 256; i ++) { |
706 | 29.0k | square_f256(t2, t2); |
707 | 29.0k | switch (i) { |
708 | 114 | case 31: |
709 | 228 | case 190: |
710 | 342 | case 221: |
711 | 456 | case 252: |
712 | 456 | mul_f256(t2, t2, t1); |
713 | 456 | break; |
714 | 114 | case 63: |
715 | 228 | case 253: |
716 | 342 | case 255: |
717 | 342 | mul_f256(t2, t2, P->z); |
718 | 342 | break; |
719 | 29.0k | } |
720 | 29.0k | } |
721 | | |
722 | | /* |
723 | | * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. |
724 | | */ |
725 | 114 | mul_f256(t1, t2, t2); |
726 | 114 | mul_f256(P->x, t1, P->x); |
727 | 114 | mul_f256(t1, t1, t2); |
728 | 114 | mul_f256(P->y, t1, P->y); |
729 | 114 | reduce_final_f256(P->x); |
730 | 114 | reduce_final_f256(P->y); |
731 | | |
732 | | /* |
733 | | * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise |
734 | | * this will set z to 1. |
735 | | */ |
736 | 114 | mul_f256(P->z, P->z, t2); |
737 | 114 | reduce_final_f256(P->z); |
738 | 114 | } |
739 | | |
740 | | /* |
741 | | * Double a point in P-256. This function works for all valid points, |
742 | | * including the point at infinity. |
743 | | */ |
744 | | static void |
745 | | p256_double(p256_jacobian *Q) |
746 | 50.9k | { |
747 | | /* |
748 | | * Doubling formulas are: |
749 | | * |
750 | | * s = 4*x*y^2 |
751 | | * m = 3*(x + z^2)*(x - z^2) |
752 | | * x' = m^2 - 2*s |
753 | | * y' = m*(s - x') - 8*y^4 |
754 | | * z' = 2*y*z |
755 | | * |
756 | | * These formulas work for all points, including points of order 2 |
757 | | * and points at infinity: |
758 | | * - If y = 0 then z' = 0. But there is no such point in P-256 |
759 | | * anyway. |
760 | | * - If z = 0 then z' = 0. |
761 | | */ |
762 | 50.9k | uint32_t t1[9], t2[9], t3[9], t4[9]; |
763 | | |
764 | | /* |
765 | | * Compute z^2 in t1. |
766 | | */ |
767 | 50.9k | square_f256(t1, Q->z); |
768 | | |
769 | | /* |
770 | | * Compute x-z^2 in t2 and x+z^2 in t1. |
771 | | */ |
772 | 50.9k | add_f256(t2, Q->x, t1); |
773 | 50.9k | sub_f256(t1, Q->x, t1); |
774 | | |
775 | | /* |
776 | | * Compute 3*(x+z^2)*(x-z^2) in t1. |
777 | | */ |
778 | 50.9k | mul_f256(t3, t1, t2); |
779 | 50.9k | add_f256(t1, t3, t3); |
780 | 50.9k | add_f256(t1, t3, t1); |
781 | | |
782 | | /* |
783 | | * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). |
784 | | */ |
785 | 50.9k | square_f256(t3, Q->y); |
786 | 50.9k | add_f256(t3, t3, t3); |
787 | 50.9k | mul_f256(t2, Q->x, t3); |
788 | 50.9k | add_f256(t2, t2, t2); |
789 | | |
790 | | /* |
791 | | * Compute x' = m^2 - 2*s. |
792 | | */ |
793 | 50.9k | square_f256(Q->x, t1); |
794 | 50.9k | sub_f256(Q->x, Q->x, t2); |
795 | 50.9k | sub_f256(Q->x, Q->x, t2); |
796 | | |
797 | | /* |
798 | | * Compute z' = 2*y*z. |
799 | | */ |
800 | 50.9k | mul_f256(t4, Q->y, Q->z); |
801 | 50.9k | add_f256(Q->z, t4, t4); |
802 | | |
803 | | /* |
804 | | * Compute y' = m*(s - x') - 8*y^4. Note that we already have |
805 | | * 2*y^2 in t3. |
806 | | */ |
807 | 50.9k | sub_f256(t2, t2, Q->x); |
808 | 50.9k | mul_f256(Q->y, t1, t2); |
809 | 50.9k | square_f256(t4, t3); |
810 | 50.9k | add_f256(t4, t4, t4); |
811 | 50.9k | sub_f256(Q->y, Q->y, t4); |
812 | 50.9k | } |
813 | | |
814 | | /* |
815 | | * Add point P2 to point P1. |
816 | | * |
817 | | * This function computes the wrong result in the following cases: |
818 | | * |
819 | | * - If P1 == 0 but P2 != 0 |
820 | | * - If P1 != 0 but P2 == 0 |
821 | | * - If P1 == P2 |
822 | | * |
823 | | * In all three cases, P1 is set to the point at infinity. |
824 | | * |
825 | | * Returned value is 0 if one of the following occurs: |
826 | | * |
827 | | * - P1 and P2 have the same Y coordinate |
828 | | * - P1 == 0 and P2 == 0 |
829 | | * - The Y coordinate of one of the points is 0 and the other point is |
830 | | * the point at infinity. |
831 | | * |
832 | | * The third case cannot actually happen with valid points, since a point |
833 | | * with Y == 0 is a point of order 2, and there is no point of order 2 on |
834 | | * curve P-256. |
835 | | * |
836 | | * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller |
837 | | * can apply the following: |
838 | | * |
839 | | * - If the result is not the point at infinity, then it is correct. |
840 | | * - Otherwise, if the returned value is 1, then this is a case of |
841 | | * P1+P2 == 0, so the result is indeed the point at infinity. |
842 | | * - Otherwise, P1 == P2, so a "double" operation should have been |
843 | | * performed. |
844 | | */ |
845 | | static uint32_t |
846 | | p256_add(p256_jacobian *P1, const p256_jacobian *P2) |
847 | 5.46k | { |
848 | | /* |
849 | | * Addtions formulas are: |
850 | | * |
851 | | * u1 = x1 * z2^2 |
852 | | * u2 = x2 * z1^2 |
853 | | * s1 = y1 * z2^3 |
854 | | * s2 = y2 * z1^3 |
855 | | * h = u2 - u1 |
856 | | * r = s2 - s1 |
857 | | * x3 = r^2 - h^3 - 2 * u1 * h^2 |
858 | | * y3 = r * (u1 * h^2 - x3) - s1 * h^3 |
859 | | * z3 = h * z1 * z2 |
860 | | */ |
861 | 5.46k | uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9]; |
862 | 5.46k | uint32_t ret; |
863 | 5.46k | int i; |
864 | | |
865 | | /* |
866 | | * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). |
867 | | */ |
868 | 5.46k | square_f256(t3, P2->z); |
869 | 5.46k | mul_f256(t1, P1->x, t3); |
870 | 5.46k | mul_f256(t4, P2->z, t3); |
871 | 5.46k | mul_f256(t3, P1->y, t4); |
872 | | |
873 | | /* |
874 | | * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). |
875 | | */ |
876 | 5.46k | square_f256(t4, P1->z); |
877 | 5.46k | mul_f256(t2, P2->x, t4); |
878 | 5.46k | mul_f256(t5, P1->z, t4); |
879 | 5.46k | mul_f256(t4, P2->y, t5); |
880 | | |
881 | | /* |
882 | | * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). |
883 | | * We need to test whether r is zero, so we will do some extra |
884 | | * reduce. |
885 | | */ |
886 | 5.46k | sub_f256(t2, t2, t1); |
887 | 5.46k | sub_f256(t4, t4, t3); |
888 | 5.46k | reduce_final_f256(t4); |
889 | 5.46k | ret = 0; |
890 | 54.6k | for (i = 0; i < 9; i ++) { |
891 | 49.1k | ret |= t4[i]; |
892 | 49.1k | } |
893 | 5.46k | ret = (ret | -ret) >> 31; |
894 | | |
895 | | /* |
896 | | * Compute u1*h^2 (in t6) and h^3 (in t5); |
897 | | */ |
898 | 5.46k | square_f256(t7, t2); |
899 | 5.46k | mul_f256(t6, t1, t7); |
900 | 5.46k | mul_f256(t5, t7, t2); |
901 | | |
902 | | /* |
903 | | * Compute x3 = r^2 - h^3 - 2*u1*h^2. |
904 | | */ |
905 | 5.46k | square_f256(P1->x, t4); |
906 | 5.46k | sub_f256(P1->x, P1->x, t5); |
907 | 5.46k | sub_f256(P1->x, P1->x, t6); |
908 | 5.46k | sub_f256(P1->x, P1->x, t6); |
909 | | |
910 | | /* |
911 | | * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. |
912 | | */ |
913 | 5.46k | sub_f256(t6, t6, P1->x); |
914 | 5.46k | mul_f256(P1->y, t4, t6); |
915 | 5.46k | mul_f256(t1, t5, t3); |
916 | 5.46k | sub_f256(P1->y, P1->y, t1); |
917 | | |
918 | | /* |
919 | | * Compute z3 = h*z1*z2. |
920 | | */ |
921 | 5.46k | mul_f256(t1, P1->z, P2->z); |
922 | 5.46k | mul_f256(P1->z, t1, t2); |
923 | | |
924 | 5.46k | return ret; |
925 | 5.46k | } |
926 | | |
927 | | /* |
928 | | * Add point P2 to point P1. This is a specialised function for the |
929 | | * case when P2 is a non-zero point in affine coordinate. |
930 | | * |
931 | | * This function computes the wrong result in the following cases: |
932 | | * |
933 | | * - If P1 == 0 |
934 | | * - If P1 == P2 |
935 | | * |
936 | | * In both cases, P1 is set to the point at infinity. |
937 | | * |
938 | | * Returned value is 0 if one of the following occurs: |
939 | | * |
940 | | * - P1 and P2 have the same Y coordinate |
941 | | * - The Y coordinate of P2 is 0 and P1 is the point at infinity. |
942 | | * |
943 | | * The second case cannot actually happen with valid points, since a point |
944 | | * with Y == 0 is a point of order 2, and there is no point of order 2 on |
945 | | * curve P-256. |
946 | | * |
947 | | * Therefore, assuming that P1 != 0 on input, then the caller |
948 | | * can apply the following: |
949 | | * |
950 | | * - If the result is not the point at infinity, then it is correct. |
951 | | * - Otherwise, if the returned value is 1, then this is a case of |
952 | | * P1+P2 == 0, so the result is indeed the point at infinity. |
953 | | * - Otherwise, P1 == P2, so a "double" operation should have been |
954 | | * performed. |
955 | | */ |
956 | | static uint32_t |
957 | | p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2) |
958 | 10.0k | { |
959 | | /* |
960 | | * Addtions formulas are: |
961 | | * |
962 | | * u1 = x1 |
963 | | * u2 = x2 * z1^2 |
964 | | * s1 = y1 |
965 | | * s2 = y2 * z1^3 |
966 | | * h = u2 - u1 |
967 | | * r = s2 - s1 |
968 | | * x3 = r^2 - h^3 - 2 * u1 * h^2 |
969 | | * y3 = r * (u1 * h^2 - x3) - s1 * h^3 |
970 | | * z3 = h * z1 |
971 | | */ |
972 | 10.0k | uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9]; |
973 | 10.0k | uint32_t ret; |
974 | 10.0k | int i; |
975 | | |
976 | | /* |
977 | | * Compute u1 = x1 (in t1) and s1 = y1 (in t3). |
978 | | */ |
979 | 10.0k | memcpy(t1, P1->x, sizeof t1); |
980 | 10.0k | memcpy(t3, P1->y, sizeof t3); |
981 | | |
982 | | /* |
983 | | * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). |
984 | | */ |
985 | 10.0k | square_f256(t4, P1->z); |
986 | 10.0k | mul_f256(t2, P2->x, t4); |
987 | 10.0k | mul_f256(t5, P1->z, t4); |
988 | 10.0k | mul_f256(t4, P2->y, t5); |
989 | | |
990 | | /* |
991 | | * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). |
992 | | * We need to test whether r is zero, so we will do some extra |
993 | | * reduce. |
994 | | */ |
995 | 10.0k | sub_f256(t2, t2, t1); |
996 | 10.0k | sub_f256(t4, t4, t3); |
997 | 10.0k | reduce_final_f256(t4); |
998 | 10.0k | ret = 0; |
999 | 100k | for (i = 0; i < 9; i ++) { |
1000 | 90.1k | ret |= t4[i]; |
1001 | 90.1k | } |
1002 | 10.0k | ret = (ret | -ret) >> 31; |
1003 | | |
1004 | | /* |
1005 | | * Compute u1*h^2 (in t6) and h^3 (in t5); |
1006 | | */ |
1007 | 10.0k | square_f256(t7, t2); |
1008 | 10.0k | mul_f256(t6, t1, t7); |
1009 | 10.0k | mul_f256(t5, t7, t2); |
1010 | | |
1011 | | /* |
1012 | | * Compute x3 = r^2 - h^3 - 2*u1*h^2. |
1013 | | */ |
1014 | 10.0k | square_f256(P1->x, t4); |
1015 | 10.0k | sub_f256(P1->x, P1->x, t5); |
1016 | 10.0k | sub_f256(P1->x, P1->x, t6); |
1017 | 10.0k | sub_f256(P1->x, P1->x, t6); |
1018 | | |
1019 | | /* |
1020 | | * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. |
1021 | | */ |
1022 | 10.0k | sub_f256(t6, t6, P1->x); |
1023 | 10.0k | mul_f256(P1->y, t4, t6); |
1024 | 10.0k | mul_f256(t1, t5, t3); |
1025 | 10.0k | sub_f256(P1->y, P1->y, t1); |
1026 | | |
1027 | | /* |
1028 | | * Compute z3 = h*z1*z2. |
1029 | | */ |
1030 | 10.0k | mul_f256(P1->z, P1->z, t2); |
1031 | | |
1032 | 10.0k | return ret; |
1033 | 10.0k | } |
1034 | | |
1035 | | /* |
1036 | | * Decode a P-256 point. This function does not support the point at |
1037 | | * infinity. Returned value is 0 if the point is invalid, 1 otherwise. |
1038 | | */ |
1039 | | static uint32_t |
1040 | | p256_decode(p256_jacobian *P, const void *src, size_t len) |
1041 | 42 | { |
1042 | 42 | const unsigned char *buf; |
1043 | 42 | uint32_t tx[9], ty[9], t1[9], t2[9]; |
1044 | 42 | uint32_t bad; |
1045 | 42 | int i; |
1046 | | |
1047 | 42 | if (len != 65) { |
1048 | 0 | return 0; |
1049 | 0 | } |
1050 | 42 | buf = src; |
1051 | | |
1052 | | /* |
1053 | | * First byte must be 0x04 (uncompressed format). We could support |
1054 | | * "hybrid format" (first byte is 0x06 or 0x07, and encodes the |
1055 | | * least significant bit of the Y coordinate), but it is explicitly |
1056 | | * forbidden by RFC 5480 (section 2.2). |
1057 | | */ |
1058 | 42 | bad = NEQ(buf[0], 0x04); |
1059 | | |
1060 | | /* |
1061 | | * Decode the coordinates, and check that they are both lower |
1062 | | * than the modulus. |
1063 | | */ |
1064 | 42 | tx[8] = be8_to_le30(tx, buf + 1, 32); |
1065 | 42 | ty[8] = be8_to_le30(ty, buf + 33, 32); |
1066 | 42 | bad |= reduce_final_f256(tx); |
1067 | 42 | bad |= reduce_final_f256(ty); |
1068 | | |
1069 | | /* |
1070 | | * Check curve equation. |
1071 | | */ |
1072 | 42 | square_f256(t1, tx); |
1073 | 42 | mul_f256(t1, tx, t1); |
1074 | 42 | square_f256(t2, ty); |
1075 | 42 | sub_f256(t1, t1, tx); |
1076 | 42 | sub_f256(t1, t1, tx); |
1077 | 42 | sub_f256(t1, t1, tx); |
1078 | 42 | add_f256(t1, t1, P256_B); |
1079 | 42 | sub_f256(t1, t1, t2); |
1080 | 42 | reduce_final_f256(t1); |
1081 | 420 | for (i = 0; i < 9; i ++) { |
1082 | 378 | bad |= t1[i]; |
1083 | 378 | } |
1084 | | |
1085 | | /* |
1086 | | * Copy coordinates to the point structure. |
1087 | | */ |
1088 | 42 | memcpy(P->x, tx, sizeof tx); |
1089 | 42 | memcpy(P->y, ty, sizeof ty); |
1090 | 42 | memset(P->z, 0, sizeof P->z); |
1091 | 42 | P->z[0] = 1; |
1092 | 42 | return EQ(bad, 0); |
1093 | 42 | } |
1094 | | |
1095 | | /* |
1096 | | * Encode a point into a buffer. This function assumes that the point is |
1097 | | * valid, in affine coordinates, and not the point at infinity. |
1098 | | */ |
1099 | | static void |
1100 | | p256_encode(void *dst, const p256_jacobian *P) |
1101 | 114 | { |
1102 | 114 | unsigned char *buf; |
1103 | | |
1104 | 114 | buf = dst; |
1105 | 114 | buf[0] = 0x04; |
1106 | 114 | le30_to_be8(buf + 1, 32, P->x); |
1107 | 114 | le30_to_be8(buf + 33, 32, P->y); |
1108 | 114 | } |
1109 | | |
1110 | | /* |
1111 | | * Multiply a curve point by an integer. The integer is assumed to be |
1112 | | * lower than the curve order, and the base point must not be the point |
1113 | | * at infinity. |
1114 | | */ |
1115 | | static void |
1116 | | p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen) |
1117 | 42 | { |
1118 | | /* |
1119 | | * qz is a flag that is initially 1, and remains equal to 1 |
1120 | | * as long as the point is the point at infinity. |
1121 | | * |
1122 | | * We use a 2-bit window to handle multiplier bits by pairs. |
1123 | | * The precomputed window really is the points P2 and P3. |
1124 | | */ |
1125 | 42 | uint32_t qz; |
1126 | 42 | p256_jacobian P2, P3, Q, T, U; |
1127 | | |
1128 | | /* |
1129 | | * Compute window values. |
1130 | | */ |
1131 | 42 | P2 = *P; |
1132 | 42 | p256_double(&P2); |
1133 | 42 | P3 = *P; |
1134 | 42 | p256_add(&P3, &P2); |
1135 | | |
1136 | | /* |
1137 | | * We start with Q = 0. We process multiplier bits 2 by 2. |
1138 | | */ |
1139 | 42 | memset(&Q, 0, sizeof Q); |
1140 | 42 | qz = 1; |
1141 | 1.38k | while (xlen -- > 0) { |
1142 | 1.34k | int k; |
1143 | | |
1144 | 6.72k | for (k = 6; k >= 0; k -= 2) { |
1145 | 5.37k | uint32_t bits; |
1146 | 5.37k | uint32_t bnz; |
1147 | | |
1148 | 5.37k | p256_double(&Q); |
1149 | 5.37k | p256_double(&Q); |
1150 | 5.37k | T = *P; |
1151 | 5.37k | U = Q; |
1152 | 5.37k | bits = (*x >> k) & (uint32_t)3; |
1153 | 5.37k | bnz = NEQ(bits, 0); |
1154 | 5.37k | CCOPY(EQ(bits, 2), &T, &P2, sizeof T); |
1155 | 5.37k | CCOPY(EQ(bits, 3), &T, &P3, sizeof T); |
1156 | 5.37k | p256_add(&U, &T); |
1157 | 5.37k | CCOPY(bnz & qz, &Q, &T, sizeof Q); |
1158 | 5.37k | CCOPY(bnz & ~qz, &Q, &U, sizeof Q); |
1159 | 5.37k | qz &= ~bnz; |
1160 | 5.37k | } |
1161 | 1.34k | x ++; |
1162 | 1.34k | } |
1163 | 42 | *P = Q; |
1164 | 42 | } |
1165 | | |
1166 | | /* |
1167 | | * Precomputed window: k*G points, where G is the curve generator, and k |
1168 | | * is an integer from 1 to 15 (inclusive). The X and Y coordinates of |
1169 | | * the point are encoded as 9 words of 30 bits each (little-endian |
1170 | | * order). |
1171 | | */ |
1172 | | static const uint32_t Gwin[15][18] = { |
1173 | | |
1174 | | { 0x1898C296, 0x1284E517, 0x1EB33A0F, 0x00DF604B, |
1175 | | 0x2440F277, 0x339B958E, 0x04247F8B, 0x347CB84B, |
1176 | | 0x00006B17, 0x37BF51F5, 0x2ED901A0, 0x3315ECEC, |
1177 | | 0x338CD5DA, 0x0F9E162B, 0x1FAD29F0, 0x27F9B8EE, |
1178 | | 0x10B8BF86, 0x00004FE3 }, |
1179 | | |
1180 | | { 0x07669978, 0x182D23F1, 0x3F21B35A, 0x225A789D, |
1181 | | 0x351AC3C0, 0x08E00C12, 0x34F7E8A5, 0x1EC62340, |
1182 | | 0x00007CF2, 0x227873D1, 0x3812DE74, 0x0E982299, |
1183 | | 0x1F6B798F, 0x3430DBBA, 0x366B1A7D, 0x2D040293, |
1184 | | 0x154436E3, 0x00000777 }, |
1185 | | |
1186 | | { 0x06E7FD6C, 0x2D05986F, 0x3ADA985F, 0x31ADC87B, |
1187 | | 0x0BF165E6, 0x1FBE5475, 0x30A44C8F, 0x3934698C, |
1188 | | 0x00005ECB, 0x227D5032, 0x29E6C49E, 0x04FB83D9, |
1189 | | 0x0AAC0D8E, 0x24A2ECD8, 0x2C1B3869, 0x0FF7E374, |
1190 | | 0x19031266, 0x00008734 }, |
1191 | | |
1192 | | { 0x2B030852, 0x024C0911, 0x05596EF5, 0x07F8B6DE, |
1193 | | 0x262BD003, 0x3779967B, 0x08FBBA02, 0x128D4CB4, |
1194 | | 0x0000E253, 0x184ED8C6, 0x310B08FC, 0x30EE0055, |
1195 | | 0x3F25B0FC, 0x062D764E, 0x3FB97F6A, 0x33CC719D, |
1196 | | 0x15D69318, 0x0000E0F1 }, |
1197 | | |
1198 | | { 0x03D033ED, 0x05552837, 0x35BE5242, 0x2320BF47, |
1199 | | 0x268FDFEF, 0x13215821, 0x140D2D78, 0x02DE9454, |
1200 | | 0x00005159, 0x3DA16DA4, 0x0742ED13, 0x0D80888D, |
1201 | | 0x004BC035, 0x0A79260D, 0x06FCDAFE, 0x2727D8AE, |
1202 | | 0x1F6A2412, 0x0000E0C1 }, |
1203 | | |
1204 | | { 0x3C2291A9, 0x1AC2ABA4, 0x3B215B4C, 0x131D037A, |
1205 | | 0x17DDE302, 0x0C90B2E2, 0x0602C92D, 0x05CA9DA9, |
1206 | | 0x0000B01A, 0x0FC77FE2, 0x35F1214E, 0x07E16BDF, |
1207 | | 0x003DDC07, 0x2703791C, 0x3038B7EE, 0x3DAD56FE, |
1208 | | 0x041D0C8D, 0x0000E85C }, |
1209 | | |
1210 | | { 0x3187B2A3, 0x0018A1C0, 0x00FEF5B3, 0x3E7E2E2A, |
1211 | | 0x01FB607E, 0x2CC199F0, 0x37B4625B, 0x0EDBE82F, |
1212 | | 0x00008E53, 0x01F400B4, 0x15786A1B, 0x3041B21C, |
1213 | | 0x31CD8CF2, 0x35900053, 0x1A7E0E9B, 0x318366D0, |
1214 | | 0x076F780C, 0x000073EB }, |
1215 | | |
1216 | | { 0x1B6FB393, 0x13767707, 0x3CE97DBB, 0x348E2603, |
1217 | | 0x354CADC1, 0x09D0B4EA, 0x1B053404, 0x1DE76FBA, |
1218 | | 0x000062D9, 0x0F09957E, 0x295029A8, 0x3E76A78D, |
1219 | | 0x3B547DAE, 0x27CEE0A2, 0x0575DC45, 0x1D8244FF, |
1220 | | 0x332F647A, 0x0000AD5A }, |
1221 | | |
1222 | | { 0x10949EE0, 0x1E7A292E, 0x06DF8B3D, 0x02B2E30B, |
1223 | | 0x31F8729E, 0x24E35475, 0x30B71878, 0x35EDBFB7, |
1224 | | 0x0000EA68, 0x0DD048FA, 0x21688929, 0x0DE823FE, |
1225 | | 0x1C53FAA9, 0x0EA0C84D, 0x052A592A, 0x1FCE7870, |
1226 | | 0x11325CB2, 0x00002A27 }, |
1227 | | |
1228 | | { 0x04C5723F, 0x30D81A50, 0x048306E4, 0x329B11C7, |
1229 | | 0x223FB545, 0x085347A8, 0x2993E591, 0x1B5ACA8E, |
1230 | | 0x0000CEF6, 0x04AF0773, 0x28D2EEA9, 0x2751EEEC, |
1231 | | 0x037B4A7F, 0x3B4C1059, 0x08F37674, 0x2AE906E1, |
1232 | | 0x18A88A6A, 0x00008786 }, |
1233 | | |
1234 | | { 0x34BC21D1, 0x0CCE474D, 0x15048BF4, 0x1D0BB409, |
1235 | | 0x021CDA16, 0x20DE76C3, 0x34C59063, 0x04EDE20E, |
1236 | | 0x00003ED1, 0x282A3740, 0x0BE3BBF3, 0x29889DAE, |
1237 | | 0x03413697, 0x34C68A09, 0x210EBE93, 0x0C8A224C, |
1238 | | 0x0826B331, 0x00009099 }, |
1239 | | |
1240 | | { 0x0624E3C4, 0x140317BA, 0x2F82C99D, 0x260C0A2C, |
1241 | | 0x25D55179, 0x194DCC83, 0x3D95E462, 0x356F6A05, |
1242 | | 0x0000741D, 0x0D4481D3, 0x2657FC8B, 0x1BA5CA71, |
1243 | | 0x3AE44B0D, 0x07B1548E, 0x0E0D5522, 0x05FDC567, |
1244 | | 0x2D1AA70E, 0x00000770 }, |
1245 | | |
1246 | | { 0x06072C01, 0x23857675, 0x1EAD58A9, 0x0B8A12D9, |
1247 | | 0x1EE2FC79, 0x0177CB61, 0x0495A618, 0x20DEB82B, |
1248 | | 0x0000177C, 0x2FC7BFD8, 0x310EEF8B, 0x1FB4DF39, |
1249 | | 0x3B8530E8, 0x0F4E7226, 0x0246B6D0, 0x2A558A24, |
1250 | | 0x163353AF, 0x000063BB }, |
1251 | | |
1252 | | { 0x24D2920B, 0x1C249DCC, 0x2069C5E5, 0x09AB2F9E, |
1253 | | 0x36DF3CF1, 0x1991FD0C, 0x062B97A7, 0x1E80070E, |
1254 | | 0x000054E7, 0x20D0B375, 0x2E9F20BD, 0x35090081, |
1255 | | 0x1C7A9DDC, 0x22E7C371, 0x087E3016, 0x03175421, |
1256 | | 0x3C6ECA7D, 0x0000F599 }, |
1257 | | |
1258 | | { 0x259B9D5F, 0x0D9A318F, 0x23A0EF16, 0x00EBE4B7, |
1259 | | 0x088265AE, 0x2CDE2666, 0x2BAE7ADF, 0x1371A5C6, |
1260 | | 0x0000F045, 0x0D034F36, 0x1F967378, 0x1B5FA3F4, |
1261 | | 0x0EC8739D, 0x1643E62A, 0x1653947E, 0x22D1F4E6, |
1262 | | 0x0FB8D64B, 0x0000B5B9 } |
1263 | | }; |
1264 | | |
1265 | | /* |
1266 | | * Lookup one of the Gwin[] values, by index. This is constant-time. |
1267 | | */ |
1268 | | static void |
1269 | | lookup_Gwin(p256_jacobian *T, uint32_t idx) |
1270 | 10.0k | { |
1271 | 10.0k | uint32_t xy[18]; |
1272 | 10.0k | uint32_t k; |
1273 | 10.0k | size_t u; |
1274 | | |
1275 | 10.0k | memset(xy, 0, sizeof xy); |
1276 | 160k | for (k = 0; k < 15; k ++) { |
1277 | 150k | uint32_t m; |
1278 | | |
1279 | 150k | m = -EQ(idx, k + 1); |
1280 | 2.85M | for (u = 0; u < 18; u ++) { |
1281 | 2.70M | xy[u] |= m & Gwin[k][u]; |
1282 | 2.70M | } |
1283 | 150k | } |
1284 | 10.0k | memcpy(T->x, &xy[0], sizeof T->x); |
1285 | 10.0k | memcpy(T->y, &xy[9], sizeof T->y); |
1286 | 10.0k | memset(T->z, 0, sizeof T->z); |
1287 | 10.0k | T->z[0] = 1; |
1288 | 10.0k | } |
1289 | | |
1290 | | /* |
1291 | | * Multiply the generator by an integer. The integer is assumed non-zero |
1292 | | * and lower than the curve order. |
1293 | | */ |
1294 | | static void |
1295 | | p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen) |
1296 | 114 | { |
1297 | | /* |
1298 | | * qz is a flag that is initially 1, and remains equal to 1 |
1299 | | * as long as the point is the point at infinity. |
1300 | | * |
1301 | | * We use a 4-bit window to handle multiplier bits by groups |
1302 | | * of 4. The precomputed window is constant static data, with |
1303 | | * points in affine coordinates; we use a constant-time lookup. |
1304 | | */ |
1305 | 114 | p256_jacobian Q; |
1306 | 114 | uint32_t qz; |
1307 | | |
1308 | 114 | memset(&Q, 0, sizeof Q); |
1309 | 114 | qz = 1; |
1310 | 5.12k | while (xlen -- > 0) { |
1311 | 5.00k | int k; |
1312 | 5.00k | unsigned bx; |
1313 | | |
1314 | 5.00k | bx = *x ++; |
1315 | 15.0k | for (k = 0; k < 2; k ++) { |
1316 | 10.0k | uint32_t bits; |
1317 | 10.0k | uint32_t bnz; |
1318 | 10.0k | p256_jacobian T, U; |
1319 | | |
1320 | 10.0k | p256_double(&Q); |
1321 | 10.0k | p256_double(&Q); |
1322 | 10.0k | p256_double(&Q); |
1323 | 10.0k | p256_double(&Q); |
1324 | 10.0k | bits = (bx >> 4) & 0x0F; |
1325 | 10.0k | bnz = NEQ(bits, 0); |
1326 | 10.0k | lookup_Gwin(&T, bits); |
1327 | 10.0k | U = Q; |
1328 | 10.0k | p256_add_mixed(&U, &T); |
1329 | 10.0k | CCOPY(bnz & qz, &Q, &T, sizeof Q); |
1330 | 10.0k | CCOPY(bnz & ~qz, &Q, &U, sizeof Q); |
1331 | 10.0k | qz &= ~bnz; |
1332 | 10.0k | bx <<= 4; |
1333 | 10.0k | } |
1334 | 5.00k | } |
1335 | 114 | *P = Q; |
1336 | 114 | } |
1337 | | |
1338 | | static const unsigned char P256_G[] = { |
1339 | | 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, |
1340 | | 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, |
1341 | | 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, |
1342 | | 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, |
1343 | | 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B, |
1344 | | 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40, |
1345 | | 0x68, 0x37, 0xBF, 0x51, 0xF5 |
1346 | | }; |
1347 | | |
1348 | | static const unsigned char P256_N[] = { |
1349 | | 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, |
1350 | | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, |
1351 | | 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, |
1352 | | 0x25, 0x51 |
1353 | | }; |
1354 | | |
1355 | | static const unsigned char * |
1356 | | api_generator(int curve, size_t *len) |
1357 | 0 | { |
1358 | 0 | (void)curve; |
1359 | 0 | *len = sizeof P256_G; |
1360 | 0 | return P256_G; |
1361 | 0 | } |
1362 | | |
1363 | | static const unsigned char * |
1364 | | api_order(int curve, size_t *len) |
1365 | 24 | { |
1366 | 24 | (void)curve; |
1367 | 24 | *len = sizeof P256_N; |
1368 | 24 | return P256_N; |
1369 | 24 | } |
1370 | | |
1371 | | static size_t |
1372 | | api_xoff(int curve, size_t *len) |
1373 | 0 | { |
1374 | 0 | (void)curve; |
1375 | 0 | *len = 32; |
1376 | 0 | return 1; |
1377 | 0 | } |
1378 | | |
1379 | | static uint32_t |
1380 | | api_mul(unsigned char *G, size_t Glen, |
1381 | | const unsigned char *x, size_t xlen, int curve) |
1382 | 0 | { |
1383 | 0 | uint32_t r; |
1384 | 0 | p256_jacobian P; |
1385 | |
|
1386 | 0 | (void)curve; |
1387 | 0 | if (Glen != 65) { |
1388 | 0 | return 0; |
1389 | 0 | } |
1390 | 0 | r = p256_decode(&P, G, Glen); |
1391 | 0 | p256_mul(&P, x, xlen); |
1392 | 0 | p256_to_affine(&P); |
1393 | 0 | p256_encode(G, &P); |
1394 | 0 | return r; |
1395 | 0 | } |
1396 | | |
1397 | | static size_t |
1398 | | api_mulgen(unsigned char *R, |
1399 | | const unsigned char *x, size_t xlen, int curve) |
1400 | 72 | { |
1401 | 72 | p256_jacobian P; |
1402 | | |
1403 | 72 | (void)curve; |
1404 | 72 | p256_mulgen(&P, x, xlen); |
1405 | 72 | p256_to_affine(&P); |
1406 | 72 | p256_encode(R, &P); |
1407 | 72 | return 65; |
1408 | 72 | } |
1409 | | |
1410 | | static uint32_t |
1411 | | api_muladd(unsigned char *A, const unsigned char *B, size_t len, |
1412 | | const unsigned char *x, size_t xlen, |
1413 | | const unsigned char *y, size_t ylen, int curve) |
1414 | 42 | { |
1415 | 42 | p256_jacobian P, Q; |
1416 | 42 | uint32_t r, t, z; |
1417 | 42 | int i; |
1418 | | |
1419 | 42 | (void)curve; |
1420 | 42 | if (len != 65) { |
1421 | 0 | return 0; |
1422 | 0 | } |
1423 | 42 | r = p256_decode(&P, A, len); |
1424 | 42 | p256_mul(&P, x, xlen); |
1425 | 42 | if (B == NULL) { |
1426 | 42 | p256_mulgen(&Q, y, ylen); |
1427 | 42 | } else { |
1428 | 0 | r &= p256_decode(&Q, B, len); |
1429 | 0 | p256_mul(&Q, y, ylen); |
1430 | 0 | } |
1431 | | |
1432 | | /* |
1433 | | * The final addition may fail in case both points are equal. |
1434 | | */ |
1435 | 42 | t = p256_add(&P, &Q); |
1436 | 42 | reduce_final_f256(P.z); |
1437 | 42 | z = 0; |
1438 | 420 | for (i = 0; i < 9; i ++) { |
1439 | 378 | z |= P.z[i]; |
1440 | 378 | } |
1441 | 42 | z = EQ(z, 0); |
1442 | 42 | p256_double(&Q); |
1443 | | |
1444 | | /* |
1445 | | * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we |
1446 | | * have the following: |
1447 | | * |
1448 | | * z = 0, t = 0 return P (normal addition) |
1449 | | * z = 0, t = 1 return P (normal addition) |
1450 | | * z = 1, t = 0 return Q (a 'double' case) |
1451 | | * z = 1, t = 1 report an error (P+Q = 0) |
1452 | | */ |
1453 | 42 | CCOPY(z & ~t, &P, &Q, sizeof Q); |
1454 | 42 | p256_to_affine(&P); |
1455 | 42 | p256_encode(A, &P); |
1456 | 42 | r &= ~(z & t); |
1457 | 42 | return r; |
1458 | 42 | } |
1459 | | |
1460 | | /* see bearssl_ec.h */ |
1461 | | const br_ec_impl br_ec_p256_m31 = { |
1462 | | (uint32_t)0x00800000, |
1463 | | &api_generator, |
1464 | | &api_order, |
1465 | | &api_xoff, |
1466 | | &api_mul, |
1467 | | &api_mulgen, |
1468 | | &api_muladd |
1469 | | }; |