/src/BearSSL/src/ec/ec_prime_i31.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> |
3 | | * |
4 | | * Permission is hereby granted, free of charge, to any person obtaining |
5 | | * a copy of this software and associated documentation files (the |
6 | | * "Software"), to deal in the Software without restriction, including |
7 | | * without limitation the rights to use, copy, modify, merge, publish, |
8 | | * distribute, sublicense, and/or sell copies of the Software, and to |
9 | | * permit persons to whom the Software is furnished to do so, subject to |
10 | | * the following conditions: |
11 | | * |
12 | | * The above copyright notice and this permission notice shall be |
13 | | * included in all copies or substantial portions of the Software. |
14 | | * |
15 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
16 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
17 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
18 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
19 | | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
20 | | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
21 | | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "inner.h" |
26 | | |
27 | | /* |
28 | | * Parameters for supported curves (field modulus, and 'b' equation |
29 | | * parameter; both values use the 'i31' format, and 'b' is in Montgomery |
30 | | * representation). |
31 | | */ |
32 | | |
33 | | static const uint32_t P256_P[] = { |
34 | | 0x00000108, |
35 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007, |
36 | | 0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80, |
37 | | 0x000000FF |
38 | | }; |
39 | | |
40 | | static const uint32_t P256_R2[] = { |
41 | | 0x00000108, |
42 | | 0x00014000, 0x00018000, 0x00000000, 0x7FF40000, |
43 | | 0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF, |
44 | | 0x00000000 |
45 | | }; |
46 | | |
47 | | static const uint32_t P256_B[] = { |
48 | | 0x00000108, |
49 | | 0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA, |
50 | | 0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D, |
51 | | 0x0000000E |
52 | | }; |
53 | | |
54 | | static const uint32_t P384_P[] = { |
55 | | 0x0000018C, |
56 | | 0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8, |
57 | | 0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
58 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
59 | | 0x00000FFF |
60 | | }; |
61 | | |
62 | | static const uint32_t P384_R2[] = { |
63 | | 0x0000018C, |
64 | | 0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF, |
65 | | 0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF, |
66 | | 0x00008000, 0x00008000, 0x00000000, 0x00000000, |
67 | | 0x00000000 |
68 | | }; |
69 | | |
70 | | static const uint32_t P384_B[] = { |
71 | | 0x0000018C, |
72 | | 0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C, |
73 | | 0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B, |
74 | | 0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE, |
75 | | 0x000008A5 |
76 | | }; |
77 | | |
78 | | static const uint32_t P521_P[] = { |
79 | | 0x00000219, |
80 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
81 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
82 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
83 | | 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, |
84 | | 0x01FFFFFF |
85 | | }; |
86 | | |
87 | | static const uint32_t P521_R2[] = { |
88 | | 0x00000219, |
89 | | 0x00001000, 0x00000000, 0x00000000, 0x00000000, |
90 | | 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
91 | | 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
92 | | 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
93 | | 0x00000000 |
94 | | }; |
95 | | |
96 | | static const uint32_t P521_B[] = { |
97 | | 0x00000219, |
98 | | 0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A, |
99 | | 0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F, |
100 | | 0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366, |
101 | | 0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393, |
102 | | 0x00654FAE |
103 | | }; |
104 | | |
105 | | typedef struct { |
106 | | const uint32_t *p; |
107 | | const uint32_t *b; |
108 | | const uint32_t *R2; |
109 | | uint32_t p0i; |
110 | | size_t point_len; |
111 | | } curve_params; |
112 | | |
113 | | static inline const curve_params * |
114 | | id_to_curve(int curve) |
115 | 1.54k | { |
116 | 1.54k | static const curve_params pp[] = { |
117 | 1.54k | { P256_P, P256_B, P256_R2, 0x00000001, 65 }, |
118 | 1.54k | { P384_P, P384_B, P384_R2, 0x00000001, 97 }, |
119 | 1.54k | { P521_P, P521_B, P521_R2, 0x00000001, 133 } |
120 | 1.54k | }; |
121 | | |
122 | 1.54k | return &pp[curve - BR_EC_secp256r1]; |
123 | 1.54k | } |
124 | | |
125 | 7.47M | #define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) |
126 | | |
127 | | /* |
128 | | * Type for a point in Jacobian coordinates: |
129 | | * -- three values, x, y and z, in Montgomery representation |
130 | | * -- affine coordinates are X = x / z^2 and Y = y / z^3 |
131 | | * -- for the point at infinity, z = 0 |
132 | | */ |
133 | | typedef struct { |
134 | | uint32_t c[3][I31_LEN]; |
135 | | } jacobian; |
136 | | |
137 | | /* |
138 | | * We use a custom interpreter that uses a dozen registers, and |
139 | | * only six operations: |
140 | | * MSET(d, a) copy a into d |
141 | | * MADD(d, a) d = d+a (modular) |
142 | | * MSUB(d, a) d = d-a (modular) |
143 | | * MMUL(d, a, b) d = a*b (Montgomery multiplication) |
144 | | * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers |
145 | | * MTZ(d) clear return value if d = 0 |
146 | | * Destination of MMUL (d) must be distinct from operands (a and b). |
147 | | * There is no such constraint for MSUB and MADD. |
148 | | * |
149 | | * Registers include the operand coordinates, and temporaries. |
150 | | */ |
151 | | #define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4)) |
152 | | #define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4)) |
153 | | #define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4)) |
154 | | #define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b)) |
155 | | #define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b)) |
156 | | #define MTZ(d) (0x5000 + ((d) << 8)) |
157 | | #define ENDCODE 0 |
158 | | |
159 | | /* |
160 | | * Registers for the input operands. |
161 | | */ |
162 | 2.99M | #define P1x 0 |
163 | | #define P1y 1 |
164 | | #define P1z 2 |
165 | 1.49M | #define P2x 3 |
166 | | #define P2y 4 |
167 | | #define P2z 5 |
168 | | |
169 | | /* |
170 | | * Alternate names for the first input operand. |
171 | | */ |
172 | | #define Px 0 |
173 | | #define Py 1 |
174 | | #define Pz 2 |
175 | | |
176 | | /* |
177 | | * Temporaries. |
178 | | */ |
179 | | #define t1 6 |
180 | | #define t2 7 |
181 | | #define t3 8 |
182 | | #define t4 9 |
183 | | #define t5 10 |
184 | | #define t6 11 |
185 | | #define t7 12 |
186 | | |
187 | | /* |
188 | | * Extra scratch registers available when there is no second operand (e.g. |
189 | | * for "double" and "affine"). |
190 | | */ |
191 | | #define t8 3 |
192 | | #define t9 4 |
193 | | #define t10 5 |
194 | | |
195 | | /* |
196 | | * Doubling formulas are: |
197 | | * |
198 | | * s = 4*x*y^2 |
199 | | * m = 3*(x + z^2)*(x - z^2) |
200 | | * x' = m^2 - 2*s |
201 | | * y' = m*(s - x') - 8*y^4 |
202 | | * z' = 2*y*z |
203 | | * |
204 | | * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it |
205 | | * should. This case should not happen anyway, because our curves have |
206 | | * prime order, and thus do not contain any point of order 2. |
207 | | * |
208 | | * If P is infinity (z = 0), then again the formulas yield infinity, |
209 | | * which is correct. Thus, this code works for all points. |
210 | | * |
211 | | * Cost: 8 multiplications |
212 | | */ |
213 | | static const uint16_t code_double[] = { |
214 | | /* |
215 | | * Compute z^2 (in t1). |
216 | | */ |
217 | | MMUL(t1, Pz, Pz), |
218 | | |
219 | | /* |
220 | | * Compute x-z^2 (in t2) and then x+z^2 (in t1). |
221 | | */ |
222 | | MSET(t2, Px), |
223 | | MSUB(t2, t1), |
224 | | MADD(t1, Px), |
225 | | |
226 | | /* |
227 | | * Compute m = 3*(x+z^2)*(x-z^2) (in t1). |
228 | | */ |
229 | | MMUL(t3, t1, t2), |
230 | | MSET(t1, t3), |
231 | | MADD(t1, t3), |
232 | | MADD(t1, t3), |
233 | | |
234 | | /* |
235 | | * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3). |
236 | | */ |
237 | | MMUL(t3, Py, Py), |
238 | | MADD(t3, t3), |
239 | | MMUL(t2, Px, t3), |
240 | | MADD(t2, t2), |
241 | | |
242 | | /* |
243 | | * Compute x' = m^2 - 2*s. |
244 | | */ |
245 | | MMUL(Px, t1, t1), |
246 | | MSUB(Px, t2), |
247 | | MSUB(Px, t2), |
248 | | |
249 | | /* |
250 | | * Compute z' = 2*y*z. |
251 | | */ |
252 | | MMUL(t4, Py, Pz), |
253 | | MSET(Pz, t4), |
254 | | MADD(Pz, t4), |
255 | | |
256 | | /* |
257 | | * Compute y' = m*(s - x') - 8*y^4. Note that we already have |
258 | | * 2*y^2 in t3. |
259 | | */ |
260 | | MSUB(t2, Px), |
261 | | MMUL(Py, t1, t2), |
262 | | MMUL(t4, t3, t3), |
263 | | MSUB(Py, t4), |
264 | | MSUB(Py, t4), |
265 | | |
266 | | ENDCODE |
267 | | }; |
268 | | |
269 | | /* |
270 | | * Addtions formulas are: |
271 | | * |
272 | | * u1 = x1 * z2^2 |
273 | | * u2 = x2 * z1^2 |
274 | | * s1 = y1 * z2^3 |
275 | | * s2 = y2 * z1^3 |
276 | | * h = u2 - u1 |
277 | | * r = s2 - s1 |
278 | | * x3 = r^2 - h^3 - 2 * u1 * h^2 |
279 | | * y3 = r * (u1 * h^2 - x3) - s1 * h^3 |
280 | | * z3 = h * z1 * z2 |
281 | | * |
282 | | * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that |
283 | | * z3 == 0, so the result is correct. |
284 | | * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is |
285 | | * not correct. |
286 | | * h == 0 only if u1 == u2; this happens in two cases: |
287 | | * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2 |
288 | | * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity) |
289 | | * |
290 | | * Thus, the following situations are not handled correctly: |
291 | | * -- P1 = 0 and P2 != 0 |
292 | | * -- P1 != 0 and P2 = 0 |
293 | | * -- P1 = P2 |
294 | | * All other cases are properly computed. However, even in "incorrect" |
295 | | * situations, the three coordinates still are properly formed field |
296 | | * elements. |
297 | | * |
298 | | * The returned flag is cleared if r == 0. This happens in the following |
299 | | * cases: |
300 | | * -- Both points are on the same horizontal line (same Y coordinate). |
301 | | * -- Both points are infinity. |
302 | | * -- One point is infinity and the other is on line Y = 0. |
303 | | * The third case cannot happen with our curves (there is no valid point |
304 | | * on line Y = 0 since that would be a point of order 2). If the two |
305 | | * source points are non-infinity, then remains only the case where the |
306 | | * two points are on the same horizontal line. |
307 | | * |
308 | | * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and |
309 | | * P2 != 0: |
310 | | * -- If the returned value is not the point at infinity, then it was properly |
311 | | * computed. |
312 | | * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result |
313 | | * is indeed the point at infinity. |
314 | | * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should |
315 | | * use the 'double' code. |
316 | | * |
317 | | * Cost: 16 multiplications |
318 | | */ |
319 | | static const uint16_t code_add[] = { |
320 | | /* |
321 | | * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). |
322 | | */ |
323 | | MMUL(t3, P2z, P2z), |
324 | | MMUL(t1, P1x, t3), |
325 | | MMUL(t4, P2z, t3), |
326 | | MMUL(t3, P1y, t4), |
327 | | |
328 | | /* |
329 | | * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). |
330 | | */ |
331 | | MMUL(t4, P1z, P1z), |
332 | | MMUL(t2, P2x, t4), |
333 | | MMUL(t5, P1z, t4), |
334 | | MMUL(t4, P2y, t5), |
335 | | |
336 | | /* |
337 | | * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4). |
338 | | */ |
339 | | MSUB(t2, t1), |
340 | | MSUB(t4, t3), |
341 | | |
342 | | /* |
343 | | * Report cases where r = 0 through the returned flag. |
344 | | */ |
345 | | MTZ(t4), |
346 | | |
347 | | /* |
348 | | * Compute u1*h^2 (in t6) and h^3 (in t5). |
349 | | */ |
350 | | MMUL(t7, t2, t2), |
351 | | MMUL(t6, t1, t7), |
352 | | MMUL(t5, t7, t2), |
353 | | |
354 | | /* |
355 | | * Compute x3 = r^2 - h^3 - 2*u1*h^2. |
356 | | * t1 and t7 can be used as scratch registers. |
357 | | */ |
358 | | MMUL(P1x, t4, t4), |
359 | | MSUB(P1x, t5), |
360 | | MSUB(P1x, t6), |
361 | | MSUB(P1x, t6), |
362 | | |
363 | | /* |
364 | | * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. |
365 | | */ |
366 | | MSUB(t6, P1x), |
367 | | MMUL(P1y, t4, t6), |
368 | | MMUL(t1, t5, t3), |
369 | | MSUB(P1y, t1), |
370 | | |
371 | | /* |
372 | | * Compute z3 = h*z1*z2. |
373 | | */ |
374 | | MMUL(t1, P1z, P2z), |
375 | | MMUL(P1z, t1, t2), |
376 | | |
377 | | ENDCODE |
378 | | }; |
379 | | |
380 | | /* |
381 | | * Check that the point is on the curve. This code snippet assumes the |
382 | | * following conventions: |
383 | | * -- Coordinates x and y have been freshly decoded in P1 (but not |
384 | | * converted to Montgomery coordinates yet). |
385 | | * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1. |
386 | | */ |
387 | | static const uint16_t code_check[] = { |
388 | | |
389 | | /* Convert x and y to Montgomery representation. */ |
390 | | MMUL(t1, P1x, P2x), |
391 | | MMUL(t2, P1y, P2x), |
392 | | MSET(P1x, t1), |
393 | | MSET(P1y, t2), |
394 | | |
395 | | /* Compute x^3 in t1. */ |
396 | | MMUL(t2, P1x, P1x), |
397 | | MMUL(t1, P1x, t2), |
398 | | |
399 | | /* Subtract 3*x from t1. */ |
400 | | MSUB(t1, P1x), |
401 | | MSUB(t1, P1x), |
402 | | MSUB(t1, P1x), |
403 | | |
404 | | /* Add b. */ |
405 | | MADD(t1, P2y), |
406 | | |
407 | | /* Compute y^2 in t2. */ |
408 | | MMUL(t2, P1y, P1y), |
409 | | |
410 | | /* Compare y^2 with x^3 - 3*x + b; they must match. */ |
411 | | MSUB(t1, t2), |
412 | | MTZ(t1), |
413 | | |
414 | | /* Set z to 1 (in Montgomery representation). */ |
415 | | MMUL(P1z, P2x, P2z), |
416 | | |
417 | | ENDCODE |
418 | | }; |
419 | | |
420 | | /* |
421 | | * Conversion back to affine coordinates. This code snippet assumes that |
422 | | * the z coordinate of P2 is set to 1 (not in Montgomery representation). |
423 | | */ |
424 | | static const uint16_t code_affine[] = { |
425 | | |
426 | | /* Save z*R in t1. */ |
427 | | MSET(t1, P1z), |
428 | | |
429 | | /* Compute z^3 in t2. */ |
430 | | MMUL(t2, P1z, P1z), |
431 | | MMUL(t3, P1z, t2), |
432 | | MMUL(t2, t3, P2z), |
433 | | |
434 | | /* Invert to (1/z^3) in t2. */ |
435 | | MINV(t2, t3, t4), |
436 | | |
437 | | /* Compute y. */ |
438 | | MSET(t3, P1y), |
439 | | MMUL(P1y, t2, t3), |
440 | | |
441 | | /* Compute (1/z^2) in t3. */ |
442 | | MMUL(t3, t2, t1), |
443 | | |
444 | | /* Compute x. */ |
445 | | MSET(t2, P1x), |
446 | | MMUL(P1x, t2, t3), |
447 | | |
448 | | ENDCODE |
449 | | }; |
450 | | |
451 | | static uint32_t |
452 | | run_code(jacobian *P1, const jacobian *P2, |
453 | | const curve_params *cc, const uint16_t *code) |
454 | 1.49M | { |
455 | 1.49M | uint32_t r; |
456 | 1.49M | uint32_t t[13][I31_LEN]; |
457 | 1.49M | size_t u; |
458 | | |
459 | 1.49M | r = 1; |
460 | | |
461 | | /* |
462 | | * Copy the two operands in the dedicated registers. |
463 | | */ |
464 | 1.49M | memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t)); |
465 | 1.49M | memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t)); |
466 | | |
467 | | /* |
468 | | * Run formulas. |
469 | | */ |
470 | 36.3M | for (u = 0;; u ++) { |
471 | 36.3M | unsigned op, d, a, b; |
472 | | |
473 | 36.3M | op = code[u]; |
474 | 36.3M | if (op == 0) { |
475 | 1.49M | break; |
476 | 1.49M | } |
477 | 34.8M | d = (op >> 8) & 0x0F; |
478 | 34.8M | a = (op >> 4) & 0x0F; |
479 | 34.8M | b = op & 0x0F; |
480 | 34.8M | op >>= 12; |
481 | 34.8M | switch (op) { |
482 | 0 | uint32_t ctl; |
483 | 0 | size_t plen; |
484 | 0 | unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3]; |
485 | | |
486 | 2.98M | case 0: |
487 | 2.98M | memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t)); |
488 | 2.98M | break; |
489 | 5.96M | case 1: |
490 | 5.96M | ctl = br_i31_add(t[d], t[a], 1); |
491 | 5.96M | ctl |= NOT(br_i31_sub(t[d], cc->p, 0)); |
492 | 5.96M | br_i31_sub(t[d], cc->p, ctl); |
493 | 5.96M | break; |
494 | 9.45M | case 2: |
495 | 9.45M | br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1)); |
496 | 9.45M | break; |
497 | 15.9M | case 3: |
498 | 15.9M | br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i); |
499 | 15.9M | break; |
500 | 1.54k | case 4: |
501 | 1.54k | plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3; |
502 | 1.54k | br_i31_encode(tp, plen, cc->p); |
503 | 1.54k | tp[plen - 1] -= 2; |
504 | 1.54k | br_i31_modpow(t[d], tp, plen, |
505 | 1.54k | cc->p, cc->p0i, t[a], t[b]); |
506 | 1.54k | break; |
507 | 500k | default: |
508 | 500k | r &= ~br_i31_iszero(t[d]); |
509 | 500k | break; |
510 | 34.8M | } |
511 | 34.8M | } |
512 | | |
513 | | /* |
514 | | * Copy back result. |
515 | | */ |
516 | 1.49M | memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t)); |
517 | 1.49M | return r; |
518 | 1.49M | } |
519 | | |
520 | | static void |
521 | | set_one(uint32_t *x, const uint32_t *p) |
522 | 3.58k | { |
523 | 3.58k | size_t plen; |
524 | | |
525 | 3.58k | plen = (p[0] + 63) >> 5; |
526 | 3.58k | memset(x, 0, plen * sizeof *x); |
527 | 3.58k | x[0] = p[0]; |
528 | 3.58k | x[1] = 0x00000001; |
529 | 3.58k | } |
530 | | |
531 | | static void |
532 | | point_zero(jacobian *P, const curve_params *cc) |
533 | 4.09k | { |
534 | 4.09k | memset(P, 0, sizeof *P); |
535 | 4.09k | P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0]; |
536 | 4.09k | } |
537 | | |
538 | | static inline void |
539 | | point_double(jacobian *P, const curve_params *cc) |
540 | 993k | { |
541 | 993k | run_code(P, P, cc, code_double); |
542 | 993k | } |
543 | | |
544 | | static inline uint32_t |
545 | | point_add(jacobian *P1, const jacobian *P2, const curve_params *cc) |
546 | 498k | { |
547 | 498k | return run_code(P1, P2, cc, code_add); |
548 | 498k | } |
549 | | |
550 | | static void |
551 | | point_mul(jacobian *P, const unsigned char *x, size_t xlen, |
552 | | const curve_params *cc) |
553 | 2.04k | { |
554 | | /* |
555 | | * We do a simple double-and-add ladder with a 2-bit window |
556 | | * to make only one add every two doublings. We thus first |
557 | | * precompute 2P and 3P in some local buffers. |
558 | | * |
559 | | * We always perform two doublings and one addition; the |
560 | | * addition is with P, 2P and 3P and is done in a temporary |
561 | | * array. |
562 | | * |
563 | | * The addition code cannot handle cases where one of the |
564 | | * operands is infinity, which is the case at the start of the |
565 | | * ladder. We therefore need to maintain a flag that controls |
566 | | * this situation. |
567 | | */ |
568 | 2.04k | uint32_t qz; |
569 | 2.04k | jacobian P2, P3, Q, T, U; |
570 | | |
571 | 2.04k | memcpy(&P2, P, sizeof P2); |
572 | 2.04k | point_double(&P2, cc); |
573 | 2.04k | memcpy(&P3, P, sizeof P3); |
574 | 2.04k | point_add(&P3, &P2, cc); |
575 | | |
576 | 2.04k | point_zero(&Q, cc); |
577 | 2.04k | qz = 1; |
578 | 125k | while (xlen -- > 0) { |
579 | 123k | int k; |
580 | | |
581 | 619k | for (k = 6; k >= 0; k -= 2) { |
582 | 495k | uint32_t bits; |
583 | 495k | uint32_t bnz; |
584 | | |
585 | 495k | point_double(&Q, cc); |
586 | 495k | point_double(&Q, cc); |
587 | 495k | memcpy(&T, P, sizeof T); |
588 | 495k | memcpy(&U, &Q, sizeof U); |
589 | 495k | bits = (*x >> k) & (uint32_t)3; |
590 | 495k | bnz = NEQ(bits, 0); |
591 | 495k | CCOPY(EQ(bits, 2), &T, &P2, sizeof T); |
592 | 495k | CCOPY(EQ(bits, 3), &T, &P3, sizeof T); |
593 | 495k | point_add(&U, &T, cc); |
594 | 495k | CCOPY(bnz & qz, &Q, &T, sizeof Q); |
595 | 495k | CCOPY(bnz & ~qz, &Q, &U, sizeof Q); |
596 | 495k | qz &= ~bnz; |
597 | 495k | } |
598 | 123k | x ++; |
599 | 123k | } |
600 | 2.04k | memcpy(P, &Q, sizeof Q); |
601 | 2.04k | } |
602 | | |
603 | | /* |
604 | | * Decode point into Jacobian coordinates. This function does not support |
605 | | * the point at infinity. If the point is invalid then this returns 0, but |
606 | | * the coordinates are still set to properly formed field elements. |
607 | | */ |
608 | | static uint32_t |
609 | | point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc) |
610 | 2.04k | { |
611 | | /* |
612 | | * Points must use uncompressed format: |
613 | | * -- first byte is 0x04; |
614 | | * -- coordinates X and Y use unsigned big-endian, with the same |
615 | | * length as the field modulus. |
616 | | * |
617 | | * We don't support hybrid format (uncompressed, but first byte |
618 | | * has value 0x06 or 0x07, depending on the least significant bit |
619 | | * of Y) because it is rather useless, and explicitly forbidden |
620 | | * by PKIX (RFC 5480, section 2.2). |
621 | | * |
622 | | * We don't support compressed format either, because it is not |
623 | | * much used in practice (there are or were patent-related |
624 | | * concerns about point compression, which explains the lack of |
625 | | * generalised support). Also, point compression support would |
626 | | * need a bit more code. |
627 | | */ |
628 | 2.04k | const unsigned char *buf; |
629 | 2.04k | size_t plen, zlen; |
630 | 2.04k | uint32_t r; |
631 | 2.04k | jacobian Q; |
632 | | |
633 | 2.04k | buf = src; |
634 | 2.04k | point_zero(P, cc); |
635 | 2.04k | plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3; |
636 | 2.04k | if (len != 1 + (plen << 1)) { |
637 | 0 | return 0; |
638 | 0 | } |
639 | 2.04k | r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p); |
640 | 2.04k | r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p); |
641 | | |
642 | | /* |
643 | | * Check first byte. |
644 | | */ |
645 | 2.04k | r &= EQ(buf[0], 0x04); |
646 | | /* obsolete |
647 | | r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06) |
648 | | & ~(uint32_t)(buf[0] ^ buf[plen << 1])); |
649 | | */ |
650 | | |
651 | | /* |
652 | | * Convert coordinates and check that the point is valid. |
653 | | */ |
654 | 2.04k | zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t); |
655 | 2.04k | memcpy(Q.c[0], cc->R2, zlen); |
656 | 2.04k | memcpy(Q.c[1], cc->b, zlen); |
657 | 2.04k | set_one(Q.c[2], cc->p); |
658 | 2.04k | r &= ~run_code(P, &Q, cc, code_check); |
659 | 2.04k | return r; |
660 | 2.04k | } |
661 | | |
662 | | /* |
663 | | * Encode a point. This method assumes that the point is correct and is |
664 | | * not the point at infinity. Encoded size is always 1+2*plen, where |
665 | | * plen is the field modulus length, in bytes. |
666 | | */ |
667 | | static void |
668 | | point_encode(void *dst, const jacobian *P, const curve_params *cc) |
669 | 1.54k | { |
670 | 1.54k | unsigned char *buf; |
671 | 1.54k | uint32_t xbl; |
672 | 1.54k | size_t plen; |
673 | 1.54k | jacobian Q, T; |
674 | | |
675 | 1.54k | buf = dst; |
676 | 1.54k | xbl = cc->p[0]; |
677 | 1.54k | xbl -= (xbl >> 5); |
678 | 1.54k | plen = (xbl + 7) >> 3; |
679 | 1.54k | buf[0] = 0x04; |
680 | 1.54k | memcpy(&Q, P, sizeof *P); |
681 | 1.54k | set_one(T.c[2], cc->p); |
682 | 1.54k | run_code(&Q, &T, cc, code_affine); |
683 | 1.54k | br_i31_encode(buf + 1, plen, Q.c[0]); |
684 | 1.54k | br_i31_encode(buf + 1 + plen, plen, Q.c[1]); |
685 | 1.54k | } |
686 | | |
687 | | static const br_ec_curve_def * |
688 | | id_to_curve_def(int curve) |
689 | 1.65k | { |
690 | 1.65k | switch (curve) { |
691 | 118 | case BR_EC_secp256r1: |
692 | 118 | return &br_secp256r1; |
693 | 732 | case BR_EC_secp384r1: |
694 | 732 | return &br_secp384r1; |
695 | 804 | case BR_EC_secp521r1: |
696 | 804 | return &br_secp521r1; |
697 | 1.65k | } |
698 | 0 | return NULL; |
699 | 1.65k | } |
700 | | |
701 | | static const unsigned char * |
702 | | api_generator(int curve, size_t *len) |
703 | 1.54k | { |
704 | 1.54k | const br_ec_curve_def *cd; |
705 | | |
706 | 1.54k | cd = id_to_curve_def(curve); |
707 | 1.54k | *len = cd->generator_len; |
708 | 1.54k | return cd->generator; |
709 | 1.54k | } |
710 | | |
711 | | static const unsigned char * |
712 | | api_order(int curve, size_t *len) |
713 | 114 | { |
714 | 114 | const br_ec_curve_def *cd; |
715 | | |
716 | 114 | cd = id_to_curve_def(curve); |
717 | 114 | *len = cd->order_len; |
718 | 114 | return cd->order; |
719 | 114 | } |
720 | | |
721 | | static size_t |
722 | | api_xoff(int curve, size_t *len) |
723 | 0 | { |
724 | 0 | api_generator(curve, len); |
725 | 0 | *len >>= 1; |
726 | 0 | return 1; |
727 | 0 | } |
728 | | |
729 | | static uint32_t |
730 | | api_mul(unsigned char *G, size_t Glen, |
731 | | const unsigned char *x, size_t xlen, int curve) |
732 | 1.03k | { |
733 | 1.03k | uint32_t r; |
734 | 1.03k | const curve_params *cc; |
735 | 1.03k | jacobian P; |
736 | | |
737 | 1.03k | cc = id_to_curve(curve); |
738 | 1.03k | if (Glen != cc->point_len) { |
739 | 0 | return 0; |
740 | 0 | } |
741 | 1.03k | r = point_decode(&P, G, Glen, cc); |
742 | 1.03k | point_mul(&P, x, xlen, cc); |
743 | 1.03k | point_encode(G, &P, cc); |
744 | 1.03k | return r; |
745 | 1.03k | } |
746 | | |
747 | | static size_t |
748 | | api_mulgen(unsigned char *R, |
749 | | const unsigned char *x, size_t xlen, int curve) |
750 | 1.03k | { |
751 | 1.03k | const unsigned char *G; |
752 | 1.03k | size_t Glen; |
753 | | |
754 | 1.03k | G = api_generator(curve, &Glen); |
755 | 1.03k | memcpy(R, G, Glen); |
756 | 1.03k | api_mul(R, Glen, x, xlen, curve); |
757 | 1.03k | return Glen; |
758 | 1.03k | } |
759 | | |
760 | | static uint32_t |
761 | | api_muladd(unsigned char *A, const unsigned char *B, size_t len, |
762 | | const unsigned char *x, size_t xlen, |
763 | | const unsigned char *y, size_t ylen, int curve) |
764 | 508 | { |
765 | 508 | uint32_t r, t, z; |
766 | 508 | const curve_params *cc; |
767 | 508 | jacobian P, Q; |
768 | | |
769 | | /* |
770 | | * TODO: see about merging the two ladders. Right now, we do |
771 | | * two independent point multiplications, which is a bit |
772 | | * wasteful of CPU resources (but yields short code). |
773 | | */ |
774 | | |
775 | 508 | cc = id_to_curve(curve); |
776 | 508 | if (len != cc->point_len) { |
777 | 0 | return 0; |
778 | 0 | } |
779 | 508 | r = point_decode(&P, A, len, cc); |
780 | 508 | if (B == NULL) { |
781 | 508 | size_t Glen; |
782 | | |
783 | 508 | B = api_generator(curve, &Glen); |
784 | 508 | } |
785 | 508 | r &= point_decode(&Q, B, len, cc); |
786 | 508 | point_mul(&P, x, xlen, cc); |
787 | 508 | point_mul(&Q, y, ylen, cc); |
788 | | |
789 | | /* |
790 | | * We want to compute P+Q. Since the base points A and B are distinct |
791 | | * from infinity, and the multipliers are non-zero and lower than the |
792 | | * curve order, then we know that P and Q are non-infinity. This |
793 | | * leaves two special situations to test for: |
794 | | * -- If P = Q then we must use point_double(). |
795 | | * -- If P+Q = 0 then we must report an error. |
796 | | */ |
797 | 508 | t = point_add(&P, &Q, cc); |
798 | 508 | point_double(&Q, cc); |
799 | 508 | z = br_i31_iszero(P.c[2]); |
800 | | |
801 | | /* |
802 | | * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we |
803 | | * have the following: |
804 | | * |
805 | | * z = 0, t = 0 return P (normal addition) |
806 | | * z = 0, t = 1 return P (normal addition) |
807 | | * z = 1, t = 0 return Q (a 'double' case) |
808 | | * z = 1, t = 1 report an error (P+Q = 0) |
809 | | */ |
810 | 508 | CCOPY(z & ~t, &P, &Q, sizeof Q); |
811 | 508 | point_encode(A, &P, cc); |
812 | 508 | r &= ~(z & t); |
813 | | |
814 | 508 | return r; |
815 | 508 | } |
816 | | |
817 | | /* see bearssl_ec.h */ |
818 | | const br_ec_impl br_ec_prime_i31 = { |
819 | | (uint32_t)0x03800000, |
820 | | &api_generator, |
821 | | &api_order, |
822 | | &api_xoff, |
823 | | &api_mul, |
824 | | &api_mulgen, |
825 | | &api_muladd |
826 | | }; |