/src/openssl32/crypto/bn/bn_gf2m.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2002-2021 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved  | 
4  |  |  *  | 
5  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
6  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
7  |  |  * in the file LICENSE in the source distribution or at  | 
8  |  |  * https://www.openssl.org/source/license.html  | 
9  |  |  */  | 
10  |  |  | 
11  |  | #include <assert.h>  | 
12  |  | #include <limits.h>  | 
13  |  | #include <stdio.h>  | 
14  |  | #include "internal/cryptlib.h"  | 
15  |  | #include "bn_local.h"  | 
16  |  |  | 
17  |  | #ifndef OPENSSL_NO_EC2M  | 
18  |  | # include <openssl/ec.h>  | 
19  |  |  | 
20  |  | /*  | 
21  |  |  * Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should  | 
22  |  |  * fail.  | 
23  |  |  */  | 
24  | 225k  | # define MAX_ITERATIONS 50  | 
25  |  |  | 
26  | 17.7G  | # define SQR_nibble(w)   ((((w) & 8) << 3) \  | 
27  | 17.7G  |                        |  (((w) & 4) << 2) \  | 
28  | 17.7G  |                        |  (((w) & 2) << 1) \  | 
29  | 17.7G  |                        |   ((w) & 1))  | 
30  |  |  | 
31  |  |  | 
32  |  | /* Platform-specific macros to accelerate squaring. */  | 
33  |  | # if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)  | 
34  |  | #  define SQR1(w) \  | 
35  | 1.11G  |     SQR_nibble((w) >> 60) << 56 | SQR_nibble((w) >> 56) << 48 | \  | 
36  | 1.11G  |     SQR_nibble((w) >> 52) << 40 | SQR_nibble((w) >> 48) << 32 | \  | 
37  | 1.11G  |     SQR_nibble((w) >> 44) << 24 | SQR_nibble((w) >> 40) << 16 | \  | 
38  | 1.11G  |     SQR_nibble((w) >> 36) <<  8 | SQR_nibble((w) >> 32)  | 
39  |  | #  define SQR0(w) \  | 
40  | 1.11G  |     SQR_nibble((w) >> 28) << 56 | SQR_nibble((w) >> 24) << 48 | \  | 
41  | 1.11G  |     SQR_nibble((w) >> 20) << 40 | SQR_nibble((w) >> 16) << 32 | \  | 
42  | 1.11G  |     SQR_nibble((w) >> 12) << 24 | SQR_nibble((w) >>  8) << 16 | \  | 
43  | 1.11G  |     SQR_nibble((w) >>  4) <<  8 | SQR_nibble((w)      )  | 
44  |  | # endif  | 
45  |  | # ifdef THIRTY_TWO_BIT  | 
46  |  | #  define SQR1(w) \  | 
47  |  |     SQR_nibble((w) >> 28) << 24 | SQR_nibble((w) >> 24) << 16 | \  | 
48  |  |     SQR_nibble((w) >> 20) <<  8 | SQR_nibble((w) >> 16)  | 
49  |  | #  define SQR0(w) \  | 
50  |  |     SQR_nibble((w) >> 12) << 24 | SQR_nibble((w) >>  8) << 16 | \  | 
51  |  |     SQR_nibble((w) >>  4) <<  8 | SQR_nibble((w)      )  | 
52  |  | # endif  | 
53  |  |  | 
54  |  | # if !defined(OPENSSL_BN_ASM_GF2m)  | 
55  |  | /*  | 
56  |  |  * Product of two polynomials a, b each with degree < BN_BITS2 - 1, result is  | 
57  |  |  * a polynomial r with degree < 2 * BN_BITS - 1 The caller MUST ensure that  | 
58  |  |  * the variables have the right amount of space allocated.  | 
59  |  |  */  | 
60  |  | #  ifdef THIRTY_TWO_BIT  | 
61  |  | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a,  | 
62  |  |                             const BN_ULONG b)  | 
63  |  | { | 
64  |  |     register BN_ULONG h, l, s;  | 
65  |  |     BN_ULONG tab[8], top2b = a >> 30;  | 
66  |  |     register BN_ULONG a1, a2, a4;  | 
67  |  |  | 
68  |  |     a1 = a & (0x3FFFFFFF);  | 
69  |  |     a2 = a1 << 1;  | 
70  |  |     a4 = a2 << 1;  | 
71  |  |  | 
72  |  |     tab[0] = 0;  | 
73  |  |     tab[1] = a1;  | 
74  |  |     tab[2] = a2;  | 
75  |  |     tab[3] = a1 ^ a2;  | 
76  |  |     tab[4] = a4;  | 
77  |  |     tab[5] = a1 ^ a4;  | 
78  |  |     tab[6] = a2 ^ a4;  | 
79  |  |     tab[7] = a1 ^ a2 ^ a4;  | 
80  |  |  | 
81  |  |     s = tab[b & 0x7];  | 
82  |  |     l = s;  | 
83  |  |     s = tab[b >> 3 & 0x7];  | 
84  |  |     l ^= s << 3;  | 
85  |  |     h = s >> 29;  | 
86  |  |     s = tab[b >> 6 & 0x7];  | 
87  |  |     l ^= s << 6;  | 
88  |  |     h ^= s >> 26;  | 
89  |  |     s = tab[b >> 9 & 0x7];  | 
90  |  |     l ^= s << 9;  | 
91  |  |     h ^= s >> 23;  | 
92  |  |     s = tab[b >> 12 & 0x7];  | 
93  |  |     l ^= s << 12;  | 
94  |  |     h ^= s >> 20;  | 
95  |  |     s = tab[b >> 15 & 0x7];  | 
96  |  |     l ^= s << 15;  | 
97  |  |     h ^= s >> 17;  | 
98  |  |     s = tab[b >> 18 & 0x7];  | 
99  |  |     l ^= s << 18;  | 
100  |  |     h ^= s >> 14;  | 
101  |  |     s = tab[b >> 21 & 0x7];  | 
102  |  |     l ^= s << 21;  | 
103  |  |     h ^= s >> 11;  | 
104  |  |     s = tab[b >> 24 & 0x7];  | 
105  |  |     l ^= s << 24;  | 
106  |  |     h ^= s >> 8;  | 
107  |  |     s = tab[b >> 27 & 0x7];  | 
108  |  |     l ^= s << 27;  | 
109  |  |     h ^= s >> 5;  | 
110  |  |     s = tab[b >> 30];  | 
111  |  |     l ^= s << 30;  | 
112  |  |     h ^= s >> 2;  | 
113  |  |  | 
114  |  |     /* compensate for the top two bits of a */  | 
115  |  |  | 
116  |  |     if (top2b & 01) { | 
117  |  |         l ^= b << 30;  | 
118  |  |         h ^= b >> 2;  | 
119  |  |     }  | 
120  |  |     if (top2b & 02) { | 
121  |  |         l ^= b << 31;  | 
122  |  |         h ^= b >> 1;  | 
123  |  |     }  | 
124  |  |  | 
125  |  |     *r1 = h;  | 
126  |  |     *r0 = l;  | 
127  |  | }  | 
128  |  | #  endif  | 
129  |  | #  if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)  | 
130  |  | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a,  | 
131  |  |                             const BN_ULONG b)  | 
132  |  | { | 
133  |  |     register BN_ULONG h, l, s;  | 
134  |  |     BN_ULONG tab[16], top3b = a >> 61;  | 
135  |  |     register BN_ULONG a1, a2, a4, a8;  | 
136  |  |  | 
137  |  |     a1 = a & (0x1FFFFFFFFFFFFFFFULL);  | 
138  |  |     a2 = a1 << 1;  | 
139  |  |     a4 = a2 << 1;  | 
140  |  |     a8 = a4 << 1;  | 
141  |  |  | 
142  |  |     tab[0] = 0;  | 
143  |  |     tab[1] = a1;  | 
144  |  |     tab[2] = a2;  | 
145  |  |     tab[3] = a1 ^ a2;  | 
146  |  |     tab[4] = a4;  | 
147  |  |     tab[5] = a1 ^ a4;  | 
148  |  |     tab[6] = a2 ^ a4;  | 
149  |  |     tab[7] = a1 ^ a2 ^ a4;  | 
150  |  |     tab[8] = a8;  | 
151  |  |     tab[9] = a1 ^ a8;  | 
152  |  |     tab[10] = a2 ^ a8;  | 
153  |  |     tab[11] = a1 ^ a2 ^ a8;  | 
154  |  |     tab[12] = a4 ^ a8;  | 
155  |  |     tab[13] = a1 ^ a4 ^ a8;  | 
156  |  |     tab[14] = a2 ^ a4 ^ a8;  | 
157  |  |     tab[15] = a1 ^ a2 ^ a4 ^ a8;  | 
158  |  |  | 
159  |  |     s = tab[b & 0xF];  | 
160  |  |     l = s;  | 
161  |  |     s = tab[b >> 4 & 0xF];  | 
162  |  |     l ^= s << 4;  | 
163  |  |     h = s >> 60;  | 
164  |  |     s = tab[b >> 8 & 0xF];  | 
165  |  |     l ^= s << 8;  | 
166  |  |     h ^= s >> 56;  | 
167  |  |     s = tab[b >> 12 & 0xF];  | 
168  |  |     l ^= s << 12;  | 
169  |  |     h ^= s >> 52;  | 
170  |  |     s = tab[b >> 16 & 0xF];  | 
171  |  |     l ^= s << 16;  | 
172  |  |     h ^= s >> 48;  | 
173  |  |     s = tab[b >> 20 & 0xF];  | 
174  |  |     l ^= s << 20;  | 
175  |  |     h ^= s >> 44;  | 
176  |  |     s = tab[b >> 24 & 0xF];  | 
177  |  |     l ^= s << 24;  | 
178  |  |     h ^= s >> 40;  | 
179  |  |     s = tab[b >> 28 & 0xF];  | 
180  |  |     l ^= s << 28;  | 
181  |  |     h ^= s >> 36;  | 
182  |  |     s = tab[b >> 32 & 0xF];  | 
183  |  |     l ^= s << 32;  | 
184  |  |     h ^= s >> 32;  | 
185  |  |     s = tab[b >> 36 & 0xF];  | 
186  |  |     l ^= s << 36;  | 
187  |  |     h ^= s >> 28;  | 
188  |  |     s = tab[b >> 40 & 0xF];  | 
189  |  |     l ^= s << 40;  | 
190  |  |     h ^= s >> 24;  | 
191  |  |     s = tab[b >> 44 & 0xF];  | 
192  |  |     l ^= s << 44;  | 
193  |  |     h ^= s >> 20;  | 
194  |  |     s = tab[b >> 48 & 0xF];  | 
195  |  |     l ^= s << 48;  | 
196  |  |     h ^= s >> 16;  | 
197  |  |     s = tab[b >> 52 & 0xF];  | 
198  |  |     l ^= s << 52;  | 
199  |  |     h ^= s >> 12;  | 
200  |  |     s = tab[b >> 56 & 0xF];  | 
201  |  |     l ^= s << 56;  | 
202  |  |     h ^= s >> 8;  | 
203  |  |     s = tab[b >> 60];  | 
204  |  |     l ^= s << 60;  | 
205  |  |     h ^= s >> 4;  | 
206  |  |  | 
207  |  |     /* compensate for the top three bits of a */  | 
208  |  |  | 
209  |  |     if (top3b & 01) { | 
210  |  |         l ^= b << 61;  | 
211  |  |         h ^= b >> 3;  | 
212  |  |     }  | 
213  |  |     if (top3b & 02) { | 
214  |  |         l ^= b << 62;  | 
215  |  |         h ^= b >> 2;  | 
216  |  |     }  | 
217  |  |     if (top3b & 04) { | 
218  |  |         l ^= b << 63;  | 
219  |  |         h ^= b >> 1;  | 
220  |  |     }  | 
221  |  |  | 
222  |  |     *r1 = h;  | 
223  |  |     *r0 = l;  | 
224  |  | }  | 
225  |  | #  endif  | 
226  |  |  | 
227  |  | /*  | 
228  |  |  * Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1,  | 
229  |  |  * result is a polynomial r with degree < 4 * BN_BITS2 - 1 The caller MUST  | 
230  |  |  * ensure that the variables have the right amount of space allocated.  | 
231  |  |  */  | 
232  |  | static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0,  | 
233  |  |                             const BN_ULONG b1, const BN_ULONG b0)  | 
234  |  | { | 
235  |  |     BN_ULONG m1, m0;  | 
236  |  |     /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */  | 
237  |  |     bn_GF2m_mul_1x1(r + 3, r + 2, a1, b1);  | 
238  |  |     bn_GF2m_mul_1x1(r + 1, r, a0, b0);  | 
239  |  |     bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);  | 
240  |  |     /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */  | 
241  |  |     r[2] ^= m1 ^ r[1] ^ r[3];   /* h0 ^= m1 ^ l1 ^ h1; */  | 
242  |  |     r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */  | 
243  |  | }  | 
244  |  | # else  | 
245  |  | void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1,  | 
246  |  |                      BN_ULONG b0);  | 
247  |  | # endif  | 
248  |  |  | 
249  |  | /*  | 
250  |  |  * Add polynomials a and b and store result in r; r could be a or b, a and b  | 
251  |  |  * could be equal; r is the bitwise XOR of a and b.  | 
252  |  |  */  | 
253  |  | int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)  | 
254  | 187M  | { | 
255  | 187M  |     int i;  | 
256  | 187M  |     const BIGNUM *at, *bt;  | 
257  |  |  | 
258  | 187M  |     bn_check_top(a);  | 
259  | 187M  |     bn_check_top(b);  | 
260  |  |  | 
261  | 187M  |     if (a->top < b->top) { | 
262  | 706k  |         at = b;  | 
263  | 706k  |         bt = a;  | 
264  | 186M  |     } else { | 
265  | 186M  |         at = a;  | 
266  | 186M  |         bt = b;  | 
267  | 186M  |     }  | 
268  |  |  | 
269  | 187M  |     if (bn_wexpand(r, at->top) == NULL)  | 
270  | 0  |         return 0;  | 
271  |  |  | 
272  | 1.26G  |     for (i = 0; i < bt->top; i++) { | 
273  | 1.07G  |         r->d[i] = at->d[i] ^ bt->d[i];  | 
274  | 1.07G  |     }  | 
275  | 190M  |     for (; i < at->top; i++) { | 
276  | 3.08M  |         r->d[i] = at->d[i];  | 
277  | 3.08M  |     }  | 
278  |  |  | 
279  | 187M  |     r->top = at->top;  | 
280  | 187M  |     bn_correct_top(r);  | 
281  |  |  | 
282  | 187M  |     return 1;  | 
283  | 187M  | }  | 
284  |  |  | 
285  |  | /*-  | 
286  |  |  * Some functions allow for representation of the irreducible polynomials  | 
287  |  |  * as an int[], say p.  The irreducible f(t) is then of the form:  | 
288  |  |  *     t^p[0] + t^p[1] + ... + t^p[k]  | 
289  |  |  * where m = p[0] > p[1] > ... > p[k] = 0.  | 
290  |  |  */  | 
291  |  |  | 
292  |  | /* Performs modular reduction of a and store result in r.  r could be a. */  | 
293  |  | int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])  | 
294  | 294M  | { | 
295  | 294M  |     int j, k;  | 
296  | 294M  |     int n, dN, d0, d1;  | 
297  | 294M  |     BN_ULONG zz, *z;  | 
298  |  |  | 
299  | 294M  |     bn_check_top(a);  | 
300  |  |  | 
301  | 294M  |     if (p[0] == 0) { | 
302  |  |         /* reduction mod 1 => return 0 */  | 
303  | 0  |         BN_zero(r);  | 
304  | 0  |         return 1;  | 
305  | 0  |     }  | 
306  |  |  | 
307  |  |     /*  | 
308  |  |      * Since the algorithm does reduction in the r value, if a != r, copy the  | 
309  |  |      * contents of a into r so we can do reduction in r.  | 
310  |  |      */  | 
311  | 294M  |     if (a != r) { | 
312  | 294M  |         if (!bn_wexpand(r, a->top))  | 
313  | 0  |             return 0;  | 
314  | 3.62G  |         for (j = 0; j < a->top; j++) { | 
315  | 3.33G  |             r->d[j] = a->d[j];  | 
316  | 3.33G  |         }  | 
317  | 294M  |         r->top = a->top;  | 
318  | 294M  |     }  | 
319  | 294M  |     z = r->d;  | 
320  |  |  | 
321  |  |     /* start reduction */  | 
322  | 294M  |     dN = p[0] / BN_BITS2;  | 
323  | 3.58G  |     for (j = r->top - 1; j > dN;) { | 
324  | 3.29G  |         zz = z[j];  | 
325  | 3.29G  |         if (z[j] == 0) { | 
326  | 1.64G  |             j--;  | 
327  | 1.64G  |             continue;  | 
328  | 1.64G  |         }  | 
329  | 1.64G  |         z[j] = 0;  | 
330  |  |  | 
331  | 6.46G  |         for (k = 1; p[k] != 0; k++) { | 
332  |  |             /* reducing component t^p[k] */  | 
333  | 4.82G  |             n = p[0] - p[k];  | 
334  | 4.82G  |             d0 = n % BN_BITS2;  | 
335  | 4.82G  |             d1 = BN_BITS2 - d0;  | 
336  | 4.82G  |             n /= BN_BITS2;  | 
337  | 4.82G  |             z[j - n] ^= (zz >> d0);  | 
338  | 4.82G  |             if (d0)  | 
339  | 4.81G  |                 z[j - n - 1] ^= (zz << d1);  | 
340  | 4.82G  |         }  | 
341  |  |  | 
342  |  |         /* reducing component t^0 */  | 
343  | 1.64G  |         n = dN;  | 
344  | 1.64G  |         d0 = p[0] % BN_BITS2;  | 
345  | 1.64G  |         d1 = BN_BITS2 - d0;  | 
346  | 1.64G  |         z[j - n] ^= (zz >> d0);  | 
347  | 1.64G  |         if (d0)  | 
348  | 1.64G  |             z[j - n - 1] ^= (zz << d1);  | 
349  | 1.64G  |     }  | 
350  |  |  | 
351  |  |     /* final round of reduction */  | 
352  | 585M  |     while (j == dN) { | 
353  |  |  | 
354  | 584M  |         d0 = p[0] % BN_BITS2;  | 
355  | 584M  |         zz = z[dN] >> d0;  | 
356  | 584M  |         if (zz == 0)  | 
357  | 293M  |             break;  | 
358  | 291M  |         d1 = BN_BITS2 - d0;  | 
359  |  |  | 
360  |  |         /* clear up the top d1 bits */  | 
361  | 291M  |         if (d0)  | 
362  | 291M  |             z[dN] = (z[dN] << d1) >> d1;  | 
363  | 0  |         else  | 
364  | 0  |             z[dN] = 0;  | 
365  | 291M  |         z[0] ^= zz;             /* reduction t^0 component */  | 
366  |  |  | 
367  | 1.13G  |         for (k = 1; p[k] != 0; k++) { | 
368  | 848M  |             BN_ULONG tmp_ulong;  | 
369  |  |  | 
370  |  |             /* reducing component t^p[k] */  | 
371  | 848M  |             n = p[k] / BN_BITS2;  | 
372  | 848M  |             d0 = p[k] % BN_BITS2;  | 
373  | 848M  |             d1 = BN_BITS2 - d0;  | 
374  | 848M  |             z[n] ^= (zz << d0);  | 
375  | 848M  |             if (d0 && (tmp_ulong = zz >> d1))  | 
376  | 28.0M  |                 z[n + 1] ^= tmp_ulong;  | 
377  | 848M  |         }  | 
378  |  |  | 
379  | 291M  |     }  | 
380  |  |  | 
381  | 294M  |     bn_correct_top(r);  | 
382  | 294M  |     return 1;  | 
383  | 294M  | }  | 
384  |  |  | 
385  |  | /*  | 
386  |  |  * Performs modular reduction of a by p and store result in r.  r could be a.  | 
387  |  |  * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper  | 
388  |  |  * function is only provided for convenience; for best performance, use the  | 
389  |  |  * BN_GF2m_mod_arr function.  | 
390  |  |  */  | 
391  |  | int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)  | 
392  | 111k  | { | 
393  | 111k  |     int ret = 0;  | 
394  | 111k  |     int arr[6];  | 
395  | 111k  |     bn_check_top(a);  | 
396  | 111k  |     bn_check_top(p);  | 
397  | 111k  |     ret = BN_GF2m_poly2arr(p, arr, OSSL_NELEM(arr));  | 
398  | 111k  |     if (!ret || ret > (int)OSSL_NELEM(arr)) { | 
399  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
400  | 0  |         return 0;  | 
401  | 0  |     }  | 
402  | 111k  |     ret = BN_GF2m_mod_arr(r, a, arr);  | 
403  | 111k  |     bn_check_top(r);  | 
404  | 111k  |     return ret;  | 
405  | 111k  | }  | 
406  |  |  | 
407  |  | /*  | 
408  |  |  * Compute the product of two polynomials a and b, reduce modulo p, and store  | 
409  |  |  * the result in r.  r could be a or b; a could be b.  | 
410  |  |  */  | 
411  |  | int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  | 
412  |  |                         const int p[], BN_CTX *ctx)  | 
413  | 98.2M  | { | 
414  | 98.2M  |     int zlen, i, j, k, ret = 0;  | 
415  | 98.2M  |     BIGNUM *s;  | 
416  | 98.2M  |     BN_ULONG x1, x0, y1, y0, zz[4];  | 
417  |  |  | 
418  | 98.2M  |     bn_check_top(a);  | 
419  | 98.2M  |     bn_check_top(b);  | 
420  |  |  | 
421  | 98.2M  |     if (a == b) { | 
422  | 0  |         return BN_GF2m_mod_sqr_arr(r, a, p, ctx);  | 
423  | 0  |     }  | 
424  |  |  | 
425  | 98.2M  |     BN_CTX_start(ctx);  | 
426  | 98.2M  |     if ((s = BN_CTX_get(ctx)) == NULL)  | 
427  | 0  |         goto err;  | 
428  |  |  | 
429  | 98.2M  |     zlen = a->top + b->top + 4;  | 
430  | 98.2M  |     if (!bn_wexpand(s, zlen))  | 
431  | 0  |         goto err;  | 
432  | 98.2M  |     s->top = zlen;  | 
433  |  |  | 
434  | 1.62G  |     for (i = 0; i < zlen; i++)  | 
435  | 1.52G  |         s->d[i] = 0;  | 
436  |  |  | 
437  | 386M  |     for (j = 0; j < b->top; j += 2) { | 
438  | 287M  |         y0 = b->d[j];  | 
439  | 287M  |         y1 = ((j + 1) == b->top) ? 0 : b->d[j + 1];  | 
440  | 1.15G  |         for (i = 0; i < a->top; i += 2) { | 
441  | 867M  |             x0 = a->d[i];  | 
442  | 867M  |             x1 = ((i + 1) == a->top) ? 0 : a->d[i + 1];  | 
443  | 867M  |             bn_GF2m_mul_2x2(zz, x1, x0, y1, y0);  | 
444  | 4.33G  |             for (k = 0; k < 4; k++)  | 
445  | 3.46G  |                 s->d[i + j + k] ^= zz[k];  | 
446  | 867M  |         }  | 
447  | 287M  |     }  | 
448  |  |  | 
449  | 98.2M  |     bn_correct_top(s);  | 
450  | 98.2M  |     if (BN_GF2m_mod_arr(r, s, p))  | 
451  | 98.2M  |         ret = 1;  | 
452  | 98.2M  |     bn_check_top(r);  | 
453  |  |  | 
454  | 98.2M  |  err:  | 
455  | 98.2M  |     BN_CTX_end(ctx);  | 
456  | 98.2M  |     return ret;  | 
457  | 98.2M  | }  | 
458  |  |  | 
459  |  | /*  | 
460  |  |  * Compute the product of two polynomials a and b, reduce modulo p, and store  | 
461  |  |  * the result in r.  r could be a or b; a could equal b. This function calls  | 
462  |  |  * down to the BN_GF2m_mod_mul_arr implementation; this wrapper function is  | 
463  |  |  * only provided for convenience; for best performance, use the  | 
464  |  |  * BN_GF2m_mod_mul_arr function.  | 
465  |  |  */  | 
466  |  | int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  | 
467  |  |                     const BIGNUM *p, BN_CTX *ctx)  | 
468  | 331k  | { | 
469  | 331k  |     int ret = 0;  | 
470  | 331k  |     const int max = BN_num_bits(p) + 1;  | 
471  | 331k  |     int *arr;  | 
472  |  |  | 
473  | 331k  |     bn_check_top(a);  | 
474  | 331k  |     bn_check_top(b);  | 
475  | 331k  |     bn_check_top(p);  | 
476  |  |  | 
477  | 331k  |     arr = OPENSSL_malloc(sizeof(*arr) * max);  | 
478  | 331k  |     if (arr == NULL)  | 
479  | 0  |         return 0;  | 
480  | 331k  |     ret = BN_GF2m_poly2arr(p, arr, max);  | 
481  | 331k  |     if (!ret || ret > max) { | 
482  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
483  | 0  |         goto err;  | 
484  | 0  |     }  | 
485  | 331k  |     ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx);  | 
486  | 331k  |     bn_check_top(r);  | 
487  | 331k  |  err:  | 
488  | 331k  |     OPENSSL_free(arr);  | 
489  | 331k  |     return ret;  | 
490  | 331k  | }  | 
491  |  |  | 
492  |  | /* Square a, reduce the result mod p, and store it in a.  r could be a. */  | 
493  |  | int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],  | 
494  |  |                         BN_CTX *ctx)  | 
495  | 195M  | { | 
496  | 195M  |     int i, ret = 0;  | 
497  | 195M  |     BIGNUM *s;  | 
498  |  |  | 
499  | 195M  |     bn_check_top(a);  | 
500  | 195M  |     BN_CTX_start(ctx);  | 
501  | 195M  |     if ((s = BN_CTX_get(ctx)) == NULL)  | 
502  | 0  |         goto err;  | 
503  | 195M  |     if (!bn_wexpand(s, 2 * a->top))  | 
504  | 0  |         goto err;  | 
505  |  |  | 
506  | 1.30G  |     for (i = a->top - 1; i >= 0; i--) { | 
507  | 1.11G  |         s->d[2 * i + 1] = SQR1(a->d[i]);  | 
508  | 1.11G  |         s->d[2 * i] = SQR0(a->d[i]);  | 
509  | 1.11G  |     }  | 
510  |  |  | 
511  | 195M  |     s->top = 2 * a->top;  | 
512  | 195M  |     bn_correct_top(s);  | 
513  | 195M  |     if (!BN_GF2m_mod_arr(r, s, p))  | 
514  | 0  |         goto err;  | 
515  | 195M  |     bn_check_top(r);  | 
516  | 195M  |     ret = 1;  | 
517  | 195M  |  err:  | 
518  | 195M  |     BN_CTX_end(ctx);  | 
519  | 195M  |     return ret;  | 
520  | 195M  | }  | 
521  |  |  | 
522  |  | /*  | 
523  |  |  * Square a, reduce the result mod p, and store it in a.  r could be a. This  | 
524  |  |  * function calls down to the BN_GF2m_mod_sqr_arr implementation; this  | 
525  |  |  * wrapper function is only provided for convenience; for best performance,  | 
526  |  |  * use the BN_GF2m_mod_sqr_arr function.  | 
527  |  |  */  | 
528  |  | int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  | 
529  | 0  | { | 
530  | 0  |     int ret = 0;  | 
531  | 0  |     const int max = BN_num_bits(p) + 1;  | 
532  | 0  |     int *arr;  | 
533  |  | 
  | 
534  | 0  |     bn_check_top(a);  | 
535  | 0  |     bn_check_top(p);  | 
536  |  | 
  | 
537  | 0  |     arr = OPENSSL_malloc(sizeof(*arr) * max);  | 
538  | 0  |     if (arr == NULL)  | 
539  | 0  |         return 0;  | 
540  | 0  |     ret = BN_GF2m_poly2arr(p, arr, max);  | 
541  | 0  |     if (!ret || ret > max) { | 
542  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
543  | 0  |         goto err;  | 
544  | 0  |     }  | 
545  | 0  |     ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx);  | 
546  | 0  |     bn_check_top(r);  | 
547  | 0  |  err:  | 
548  | 0  |     OPENSSL_free(arr);  | 
549  | 0  |     return ret;  | 
550  | 0  | }  | 
551  |  |  | 
552  |  | /*  | 
553  |  |  * Invert a, reduce modulo p, and store the result in r. r could be a. Uses  | 
554  |  |  * Modified Almost Inverse Algorithm (Algorithm 10) from Hankerson, D.,  | 
555  |  |  * Hernandez, J.L., and Menezes, A.  "Software Implementation of Elliptic  | 
556  |  |  * Curve Cryptography Over Binary Fields".  | 
557  |  |  */  | 
558  |  | static int BN_GF2m_mod_inv_vartime(BIGNUM *r, const BIGNUM *a,  | 
559  |  |                                    const BIGNUM *p, BN_CTX *ctx)  | 
560  | 111k  | { | 
561  | 111k  |     BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;  | 
562  | 111k  |     int ret = 0;  | 
563  |  |  | 
564  | 111k  |     bn_check_top(a);  | 
565  | 111k  |     bn_check_top(p);  | 
566  |  |  | 
567  | 111k  |     BN_CTX_start(ctx);  | 
568  |  |  | 
569  | 111k  |     b = BN_CTX_get(ctx);  | 
570  | 111k  |     c = BN_CTX_get(ctx);  | 
571  | 111k  |     u = BN_CTX_get(ctx);  | 
572  | 111k  |     v = BN_CTX_get(ctx);  | 
573  | 111k  |     if (v == NULL)  | 
574  | 0  |         goto err;  | 
575  |  |  | 
576  | 111k  |     if (!BN_GF2m_mod(u, a, p))  | 
577  | 0  |         goto err;  | 
578  | 111k  |     if (BN_is_zero(u))  | 
579  | 0  |         goto err;  | 
580  |  |  | 
581  | 111k  |     if (!BN_copy(v, p))  | 
582  | 0  |         goto err;  | 
583  |  | # if 0  | 
584  |  |     if (!BN_one(b))  | 
585  |  |         goto err;  | 
586  |  |  | 
587  |  |     while (1) { | 
588  |  |         while (!BN_is_odd(u)) { | 
589  |  |             if (BN_is_zero(u))  | 
590  |  |                 goto err;  | 
591  |  |             if (!BN_rshift1(u, u))  | 
592  |  |                 goto err;  | 
593  |  |             if (BN_is_odd(b)) { | 
594  |  |                 if (!BN_GF2m_add(b, b, p))  | 
595  |  |                     goto err;  | 
596  |  |             }  | 
597  |  |             if (!BN_rshift1(b, b))  | 
598  |  |                 goto err;  | 
599  |  |         }  | 
600  |  |  | 
601  |  |         if (BN_abs_is_word(u, 1))  | 
602  |  |             break;  | 
603  |  |  | 
604  |  |         if (BN_num_bits(u) < BN_num_bits(v)) { | 
605  |  |             tmp = u;  | 
606  |  |             u = v;  | 
607  |  |             v = tmp;  | 
608  |  |             tmp = b;  | 
609  |  |             b = c;  | 
610  |  |             c = tmp;  | 
611  |  |         }  | 
612  |  |  | 
613  |  |         if (!BN_GF2m_add(u, u, v))  | 
614  |  |             goto err;  | 
615  |  |         if (!BN_GF2m_add(b, b, c))  | 
616  |  |             goto err;  | 
617  |  |     }  | 
618  |  | # else  | 
619  | 111k  |     { | 
620  | 111k  |         int i;  | 
621  | 111k  |         int ubits = BN_num_bits(u);  | 
622  | 111k  |         int vbits = BN_num_bits(v); /* v is copy of p */  | 
623  | 111k  |         int top = p->top;  | 
624  | 111k  |         BN_ULONG *udp, *bdp, *vdp, *cdp;  | 
625  |  |  | 
626  | 111k  |         if (!bn_wexpand(u, top))  | 
627  | 0  |             goto err;  | 
628  | 111k  |         udp = u->d;  | 
629  | 118k  |         for (i = u->top; i < top; i++)  | 
630  | 6.21k  |             udp[i] = 0;  | 
631  | 111k  |         u->top = top;  | 
632  | 111k  |         if (!bn_wexpand(b, top))  | 
633  | 0  |           goto err;  | 
634  | 111k  |         bdp = b->d;  | 
635  | 111k  |         bdp[0] = 1;  | 
636  | 408k  |         for (i = 1; i < top; i++)  | 
637  | 296k  |             bdp[i] = 0;  | 
638  | 111k  |         b->top = top;  | 
639  | 111k  |         if (!bn_wexpand(c, top))  | 
640  | 0  |           goto err;  | 
641  | 111k  |         cdp = c->d;  | 
642  | 520k  |         for (i = 0; i < top; i++)  | 
643  | 408k  |             cdp[i] = 0;  | 
644  | 111k  |         c->top = top;  | 
645  | 111k  |         vdp = v->d;             /* It pays off to "cache" *->d pointers,  | 
646  |  |                                  * because it allows optimizer to be more  | 
647  |  |                                  * aggressive. But we don't have to "cache"  | 
648  |  |                                  * p->d, because *p is declared 'const'... */  | 
649  | 18.5M  |         while (1) { | 
650  | 55.4M  |             while (ubits && !(udp[0] & 1)) { | 
651  | 36.8M  |                 BN_ULONG u0, u1, b0, b1, mask;  | 
652  |  |  | 
653  | 36.8M  |                 u0 = udp[0];  | 
654  | 36.8M  |                 b0 = bdp[0];  | 
655  | 36.8M  |                 mask = (BN_ULONG)0 - (b0 & 1);  | 
656  | 36.8M  |                 b0 ^= p->d[0] & mask;  | 
657  | 155M  |                 for (i = 0; i < top - 1; i++) { | 
658  | 118M  |                     u1 = udp[i + 1];  | 
659  | 118M  |                     udp[i] = ((u0 >> 1) | (u1 << (BN_BITS2 - 1))) & BN_MASK2;  | 
660  | 118M  |                     u0 = u1;  | 
661  | 118M  |                     b1 = bdp[i + 1] ^ (p->d[i + 1] & mask);  | 
662  | 118M  |                     bdp[i] = ((b0 >> 1) | (b1 << (BN_BITS2 - 1))) & BN_MASK2;  | 
663  | 118M  |                     b0 = b1;  | 
664  | 118M  |                 }  | 
665  | 36.8M  |                 udp[i] = u0 >> 1;  | 
666  | 36.8M  |                 bdp[i] = b0 >> 1;  | 
667  | 36.8M  |                 ubits--;  | 
668  | 36.8M  |             }  | 
669  |  |  | 
670  | 18.5M  |             if (ubits <= BN_BITS2) { | 
671  | 5.84M  |                 if (udp[0] == 0) /* poly was reducible */  | 
672  | 0  |                     goto err;  | 
673  | 5.84M  |                 if (udp[0] == 1)  | 
674  | 111k  |                     break;  | 
675  | 5.84M  |             }  | 
676  |  |  | 
677  | 18.4M  |             if (ubits < vbits) { | 
678  | 7.45M  |                 i = ubits;  | 
679  | 7.45M  |                 ubits = vbits;  | 
680  | 7.45M  |                 vbits = i;  | 
681  | 7.45M  |                 tmp = u;  | 
682  | 7.45M  |                 u = v;  | 
683  | 7.45M  |                 v = tmp;  | 
684  | 7.45M  |                 tmp = b;  | 
685  | 7.45M  |                 b = c;  | 
686  | 7.45M  |                 c = tmp;  | 
687  | 7.45M  |                 udp = vdp;  | 
688  | 7.45M  |                 vdp = v->d;  | 
689  | 7.45M  |                 bdp = cdp;  | 
690  | 7.45M  |                 cdp = c->d;  | 
691  | 7.45M  |             }  | 
692  | 96.4M  |             for (i = 0; i < top; i++) { | 
693  | 77.9M  |                 udp[i] ^= vdp[i];  | 
694  | 77.9M  |                 bdp[i] ^= cdp[i];  | 
695  | 77.9M  |             }  | 
696  | 18.4M  |             if (ubits == vbits) { | 
697  | 3.70M  |                 BN_ULONG ul;  | 
698  | 3.70M  |                 int utop = (ubits - 1) / BN_BITS2;  | 
699  |  |  | 
700  | 3.79M  |                 while ((ul = udp[utop]) == 0 && utop)  | 
701  | 92.5k  |                     utop--;  | 
702  | 3.70M  |                 ubits = utop * BN_BITS2 + BN_num_bits_word(ul);  | 
703  | 3.70M  |             }  | 
704  | 18.4M  |         }  | 
705  | 111k  |         bn_correct_top(b);  | 
706  | 111k  |     }  | 
707  | 0  | # endif  | 
708  |  |  | 
709  | 111k  |     if (!BN_copy(r, b))  | 
710  | 0  |         goto err;  | 
711  | 111k  |     bn_check_top(r);  | 
712  | 111k  |     ret = 1;  | 
713  |  |  | 
714  | 111k  |  err:  | 
715  |  | # ifdef BN_DEBUG  | 
716  |  |     /* BN_CTX_end would complain about the expanded form */  | 
717  |  |     bn_correct_top(c);  | 
718  |  |     bn_correct_top(u);  | 
719  |  |     bn_correct_top(v);  | 
720  |  | # endif  | 
721  | 111k  |     BN_CTX_end(ctx);  | 
722  | 111k  |     return ret;  | 
723  | 111k  | }  | 
724  |  |  | 
725  |  | /*-  | 
726  |  |  * Wrapper for BN_GF2m_mod_inv_vartime that blinds the input before calling.  | 
727  |  |  * This is not constant time.  | 
728  |  |  * But it does eliminate first order deduction on the input.  | 
729  |  |  */  | 
730  |  | int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  | 
731  | 111k  | { | 
732  | 111k  |     BIGNUM *b = NULL;  | 
733  | 111k  |     int ret = 0;  | 
734  | 111k  |     int numbits;  | 
735  |  |  | 
736  | 111k  |     BN_CTX_start(ctx);  | 
737  | 111k  |     if ((b = BN_CTX_get(ctx)) == NULL)  | 
738  | 0  |         goto err;  | 
739  |  |  | 
740  |  |     /* Fail on a non-sensical input p value */  | 
741  | 111k  |     numbits = BN_num_bits(p);  | 
742  | 111k  |     if (numbits <= 1)  | 
743  | 0  |         goto err;  | 
744  |  |  | 
745  |  |     /* generate blinding value */  | 
746  | 111k  |     do { | 
747  | 111k  |         if (!BN_priv_rand_ex(b, numbits - 1,  | 
748  | 111k  |                              BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY, 0, ctx))  | 
749  | 0  |             goto err;  | 
750  | 111k  |     } while (BN_is_zero(b));  | 
751  |  |  | 
752  |  |     /* r := a * b */  | 
753  | 111k  |     if (!BN_GF2m_mod_mul(r, a, b, p, ctx))  | 
754  | 0  |         goto err;  | 
755  |  |  | 
756  |  |     /* r := 1/(a * b) */  | 
757  | 111k  |     if (!BN_GF2m_mod_inv_vartime(r, r, p, ctx))  | 
758  | 0  |         goto err;  | 
759  |  |  | 
760  |  |     /* r := b/(a * b) = 1/a */  | 
761  | 111k  |     if (!BN_GF2m_mod_mul(r, r, b, p, ctx))  | 
762  | 0  |         goto err;  | 
763  |  |  | 
764  | 111k  |     ret = 1;  | 
765  |  |  | 
766  | 111k  |  err:  | 
767  | 111k  |     BN_CTX_end(ctx);  | 
768  | 111k  |     return ret;  | 
769  | 111k  | }  | 
770  |  |  | 
771  |  | /*  | 
772  |  |  * Invert xx, reduce modulo p, and store the result in r. r could be xx.  | 
773  |  |  * This function calls down to the BN_GF2m_mod_inv implementation; this  | 
774  |  |  * wrapper function is only provided for convenience; for best performance,  | 
775  |  |  * use the BN_GF2m_mod_inv function.  | 
776  |  |  */  | 
777  |  | int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[],  | 
778  |  |                         BN_CTX *ctx)  | 
779  | 0  | { | 
780  | 0  |     BIGNUM *field;  | 
781  | 0  |     int ret = 0;  | 
782  |  | 
  | 
783  | 0  |     bn_check_top(xx);  | 
784  | 0  |     BN_CTX_start(ctx);  | 
785  | 0  |     if ((field = BN_CTX_get(ctx)) == NULL)  | 
786  | 0  |         goto err;  | 
787  | 0  |     if (!BN_GF2m_arr2poly(p, field))  | 
788  | 0  |         goto err;  | 
789  |  |  | 
790  | 0  |     ret = BN_GF2m_mod_inv(r, xx, field, ctx);  | 
791  | 0  |     bn_check_top(r);  | 
792  |  | 
  | 
793  | 0  |  err:  | 
794  | 0  |     BN_CTX_end(ctx);  | 
795  | 0  |     return ret;  | 
796  | 0  | }  | 
797  |  |  | 
798  |  | /*  | 
799  |  |  * Divide y by x, reduce modulo p, and store the result in r. r could be x  | 
800  |  |  * or y, x could equal y.  | 
801  |  |  */  | 
802  |  | int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,  | 
803  |  |                     const BIGNUM *p, BN_CTX *ctx)  | 
804  | 107k  | { | 
805  | 107k  |     BIGNUM *xinv = NULL;  | 
806  | 107k  |     int ret = 0;  | 
807  |  |  | 
808  | 107k  |     bn_check_top(y);  | 
809  | 107k  |     bn_check_top(x);  | 
810  | 107k  |     bn_check_top(p);  | 
811  |  |  | 
812  | 107k  |     BN_CTX_start(ctx);  | 
813  | 107k  |     xinv = BN_CTX_get(ctx);  | 
814  | 107k  |     if (xinv == NULL)  | 
815  | 0  |         goto err;  | 
816  |  |  | 
817  | 107k  |     if (!BN_GF2m_mod_inv(xinv, x, p, ctx))  | 
818  | 0  |         goto err;  | 
819  | 107k  |     if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx))  | 
820  | 0  |         goto err;  | 
821  | 107k  |     bn_check_top(r);  | 
822  | 107k  |     ret = 1;  | 
823  |  |  | 
824  | 107k  |  err:  | 
825  | 107k  |     BN_CTX_end(ctx);  | 
826  | 107k  |     return ret;  | 
827  | 107k  | }  | 
828  |  |  | 
829  |  | /*  | 
830  |  |  * Divide yy by xx, reduce modulo p, and store the result in r. r could be xx  | 
831  |  |  * * or yy, xx could equal yy. This function calls down to the  | 
832  |  |  * BN_GF2m_mod_div implementation; this wrapper function is only provided for  | 
833  |  |  * convenience; for best performance, use the BN_GF2m_mod_div function.  | 
834  |  |  */  | 
835  |  | int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx,  | 
836  |  |                         const int p[], BN_CTX *ctx)  | 
837  | 0  | { | 
838  | 0  |     BIGNUM *field;  | 
839  | 0  |     int ret = 0;  | 
840  |  | 
  | 
841  | 0  |     bn_check_top(yy);  | 
842  | 0  |     bn_check_top(xx);  | 
843  |  | 
  | 
844  | 0  |     BN_CTX_start(ctx);  | 
845  | 0  |     if ((field = BN_CTX_get(ctx)) == NULL)  | 
846  | 0  |         goto err;  | 
847  | 0  |     if (!BN_GF2m_arr2poly(p, field))  | 
848  | 0  |         goto err;  | 
849  |  |  | 
850  | 0  |     ret = BN_GF2m_mod_div(r, yy, xx, field, ctx);  | 
851  | 0  |     bn_check_top(r);  | 
852  |  | 
  | 
853  | 0  |  err:  | 
854  | 0  |     BN_CTX_end(ctx);  | 
855  | 0  |     return ret;  | 
856  | 0  | }  | 
857  |  |  | 
858  |  | /*  | 
859  |  |  * Compute the bth power of a, reduce modulo p, and store the result in r.  r  | 
860  |  |  * could be a. Uses simple square-and-multiply algorithm A.5.1 from IEEE  | 
861  |  |  * P1363.  | 
862  |  |  */  | 
863  |  | int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  | 
864  |  |                         const int p[], BN_CTX *ctx)  | 
865  | 13.8k  | { | 
866  | 13.8k  |     int ret = 0, i, n;  | 
867  | 13.8k  |     BIGNUM *u;  | 
868  |  |  | 
869  | 13.8k  |     bn_check_top(a);  | 
870  | 13.8k  |     bn_check_top(b);  | 
871  |  |  | 
872  | 13.8k  |     if (BN_is_zero(b))  | 
873  | 0  |         return BN_one(r);  | 
874  |  |  | 
875  | 13.8k  |     if (BN_abs_is_word(b, 1))  | 
876  | 0  |         return (BN_copy(r, a) != NULL);  | 
877  |  |  | 
878  | 13.8k  |     BN_CTX_start(ctx);  | 
879  | 13.8k  |     if ((u = BN_CTX_get(ctx)) == NULL)  | 
880  | 0  |         goto err;  | 
881  |  |  | 
882  | 13.8k  |     if (!BN_GF2m_mod_arr(u, a, p))  | 
883  | 0  |         goto err;  | 
884  |  |  | 
885  | 13.8k  |     n = BN_num_bits(b) - 1;  | 
886  | 2.06M  |     for (i = n - 1; i >= 0; i--) { | 
887  | 2.05M  |         if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx))  | 
888  | 0  |             goto err;  | 
889  | 2.05M  |         if (BN_is_bit_set(b, i)) { | 
890  | 0  |             if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx))  | 
891  | 0  |                 goto err;  | 
892  | 0  |         }  | 
893  | 2.05M  |     }  | 
894  | 13.8k  |     if (!BN_copy(r, u))  | 
895  | 0  |         goto err;  | 
896  | 13.8k  |     bn_check_top(r);  | 
897  | 13.8k  |     ret = 1;  | 
898  | 13.8k  |  err:  | 
899  | 13.8k  |     BN_CTX_end(ctx);  | 
900  | 13.8k  |     return ret;  | 
901  | 13.8k  | }  | 
902  |  |  | 
903  |  | /*  | 
904  |  |  * Compute the bth power of a, reduce modulo p, and store the result in r.  r  | 
905  |  |  * could be a. This function calls down to the BN_GF2m_mod_exp_arr  | 
906  |  |  * implementation; this wrapper function is only provided for convenience;  | 
907  |  |  * for best performance, use the BN_GF2m_mod_exp_arr function.  | 
908  |  |  */  | 
909  |  | int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  | 
910  |  |                     const BIGNUM *p, BN_CTX *ctx)  | 
911  | 0  | { | 
912  | 0  |     int ret = 0;  | 
913  | 0  |     const int max = BN_num_bits(p) + 1;  | 
914  | 0  |     int *arr;  | 
915  |  | 
  | 
916  | 0  |     bn_check_top(a);  | 
917  | 0  |     bn_check_top(b);  | 
918  | 0  |     bn_check_top(p);  | 
919  |  | 
  | 
920  | 0  |     arr = OPENSSL_malloc(sizeof(*arr) * max);  | 
921  | 0  |     if (arr == NULL)  | 
922  | 0  |         return 0;  | 
923  | 0  |     ret = BN_GF2m_poly2arr(p, arr, max);  | 
924  | 0  |     if (!ret || ret > max) { | 
925  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
926  | 0  |         goto err;  | 
927  | 0  |     }  | 
928  | 0  |     ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx);  | 
929  | 0  |     bn_check_top(r);  | 
930  | 0  |  err:  | 
931  | 0  |     OPENSSL_free(arr);  | 
932  | 0  |     return ret;  | 
933  | 0  | }  | 
934  |  |  | 
935  |  | /*  | 
936  |  |  * Compute the square root of a, reduce modulo p, and store the result in r.  | 
937  |  |  * r could be a. Uses exponentiation as in algorithm A.4.1 from IEEE P1363.  | 
938  |  |  */  | 
939  |  | int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[],  | 
940  |  |                          BN_CTX *ctx)  | 
941  | 13.8k  | { | 
942  | 13.8k  |     int ret = 0;  | 
943  | 13.8k  |     BIGNUM *u;  | 
944  |  |  | 
945  | 13.8k  |     bn_check_top(a);  | 
946  |  |  | 
947  | 13.8k  |     if (p[0] == 0) { | 
948  |  |         /* reduction mod 1 => return 0 */  | 
949  | 0  |         BN_zero(r);  | 
950  | 0  |         return 1;  | 
951  | 0  |     }  | 
952  |  |  | 
953  | 13.8k  |     BN_CTX_start(ctx);  | 
954  | 13.8k  |     if ((u = BN_CTX_get(ctx)) == NULL)  | 
955  | 0  |         goto err;  | 
956  |  |  | 
957  | 13.8k  |     if (!BN_set_bit(u, p[0] - 1))  | 
958  | 0  |         goto err;  | 
959  | 13.8k  |     ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx);  | 
960  | 13.8k  |     bn_check_top(r);  | 
961  |  |  | 
962  | 13.8k  |  err:  | 
963  | 13.8k  |     BN_CTX_end(ctx);  | 
964  | 13.8k  |     return ret;  | 
965  | 13.8k  | }  | 
966  |  |  | 
967  |  | /*  | 
968  |  |  * Compute the square root of a, reduce modulo p, and store the result in r.  | 
969  |  |  * r could be a. This function calls down to the BN_GF2m_mod_sqrt_arr  | 
970  |  |  * implementation; this wrapper function is only provided for convenience;  | 
971  |  |  * for best performance, use the BN_GF2m_mod_sqrt_arr function.  | 
972  |  |  */  | 
973  |  | int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  | 
974  | 0  | { | 
975  | 0  |     int ret = 0;  | 
976  | 0  |     const int max = BN_num_bits(p) + 1;  | 
977  | 0  |     int *arr;  | 
978  |  | 
  | 
979  | 0  |     bn_check_top(a);  | 
980  | 0  |     bn_check_top(p);  | 
981  |  | 
  | 
982  | 0  |     arr = OPENSSL_malloc(sizeof(*arr) * max);  | 
983  | 0  |     if (arr == NULL)  | 
984  | 0  |         return 0;  | 
985  | 0  |     ret = BN_GF2m_poly2arr(p, arr, max);  | 
986  | 0  |     if (!ret || ret > max) { | 
987  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
988  | 0  |         goto err;  | 
989  | 0  |     }  | 
990  | 0  |     ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx);  | 
991  | 0  |     bn_check_top(r);  | 
992  | 0  |  err:  | 
993  | 0  |     OPENSSL_free(arr);  | 
994  | 0  |     return ret;  | 
995  | 0  | }  | 
996  |  |  | 
997  |  | /*  | 
998  |  |  * Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns  | 
999  |  |  * 0. Uses algorithms A.4.7 and A.4.6 from IEEE P1363.  | 
1000  |  |  */  | 
1001  |  | int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[],  | 
1002  |  |                                BN_CTX *ctx)  | 
1003  | 83.5k  | { | 
1004  | 83.5k  |     int ret = 0, count = 0, j;  | 
1005  | 83.5k  |     BIGNUM *a, *z, *rho, *w, *w2, *tmp;  | 
1006  |  |  | 
1007  | 83.5k  |     bn_check_top(a_);  | 
1008  |  |  | 
1009  | 83.5k  |     if (p[0] == 0) { | 
1010  |  |         /* reduction mod 1 => return 0 */  | 
1011  | 0  |         BN_zero(r);  | 
1012  | 0  |         return 1;  | 
1013  | 0  |     }  | 
1014  |  |  | 
1015  | 83.5k  |     BN_CTX_start(ctx);  | 
1016  | 83.5k  |     a = BN_CTX_get(ctx);  | 
1017  | 83.5k  |     z = BN_CTX_get(ctx);  | 
1018  | 83.5k  |     w = BN_CTX_get(ctx);  | 
1019  | 83.5k  |     if (w == NULL)  | 
1020  | 0  |         goto err;  | 
1021  |  |  | 
1022  | 83.5k  |     if (!BN_GF2m_mod_arr(a, a_, p))  | 
1023  | 0  |         goto err;  | 
1024  |  |  | 
1025  | 83.5k  |     if (BN_is_zero(a)) { | 
1026  | 1.01k  |         BN_zero(r);  | 
1027  | 1.01k  |         ret = 1;  | 
1028  | 1.01k  |         goto err;  | 
1029  | 1.01k  |     }  | 
1030  |  |  | 
1031  | 82.5k  |     if (p[0] & 0x1) {           /* m is odd */ | 
1032  |  |         /* compute half-trace of a */  | 
1033  | 47.9k  |         if (!BN_copy(z, a))  | 
1034  | 0  |             goto err;  | 
1035  | 3.99M  |         for (j = 1; j <= (p[0] - 1) / 2; j++) { | 
1036  | 3.94M  |             if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))  | 
1037  | 0  |                 goto err;  | 
1038  | 3.94M  |             if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))  | 
1039  | 0  |                 goto err;  | 
1040  | 3.94M  |             if (!BN_GF2m_add(z, z, a))  | 
1041  | 0  |                 goto err;  | 
1042  | 3.94M  |         }  | 
1043  |  |  | 
1044  | 47.9k  |     } else {                    /* m is even */ | 
1045  |  |  | 
1046  | 34.5k  |         rho = BN_CTX_get(ctx);  | 
1047  | 34.5k  |         w2 = BN_CTX_get(ctx);  | 
1048  | 34.5k  |         tmp = BN_CTX_get(ctx);  | 
1049  | 34.5k  |         if (tmp == NULL)  | 
1050  | 0  |             goto err;  | 
1051  | 256k  |         do { | 
1052  | 256k  |             if (!BN_priv_rand_ex(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY,  | 
1053  | 256k  |                                  0, ctx))  | 
1054  | 0  |                 goto err;  | 
1055  | 256k  |             if (!BN_GF2m_mod_arr(rho, rho, p))  | 
1056  | 0  |                 goto err;  | 
1057  | 256k  |             BN_zero(z);  | 
1058  | 256k  |             if (!BN_copy(w, rho))  | 
1059  | 0  |                 goto err;  | 
1060  | 89.0M  |             for (j = 1; j <= p[0] - 1; j++) { | 
1061  | 88.8M  |                 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))  | 
1062  | 0  |                     goto err;  | 
1063  | 88.8M  |                 if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx))  | 
1064  | 0  |                     goto err;  | 
1065  | 88.8M  |                 if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx))  | 
1066  | 0  |                     goto err;  | 
1067  | 88.8M  |                 if (!BN_GF2m_add(z, z, tmp))  | 
1068  | 0  |                     goto err;  | 
1069  | 88.8M  |                 if (!BN_GF2m_add(w, w2, rho))  | 
1070  | 0  |                     goto err;  | 
1071  | 88.8M  |             }  | 
1072  | 256k  |             count++;  | 
1073  | 256k  |         } while (BN_is_zero(w) && (count < MAX_ITERATIONS));  | 
1074  | 34.5k  |         if (BN_is_zero(w)) { | 
1075  | 4.17k  |             ERR_raise(ERR_LIB_BN, BN_R_TOO_MANY_ITERATIONS);  | 
1076  | 4.17k  |             goto err;  | 
1077  | 4.17k  |         }  | 
1078  | 34.5k  |     }  | 
1079  |  |  | 
1080  | 78.3k  |     if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx))  | 
1081  | 0  |         goto err;  | 
1082  | 78.3k  |     if (!BN_GF2m_add(w, z, w))  | 
1083  | 0  |         goto err;  | 
1084  | 78.3k  |     if (BN_GF2m_cmp(w, a)) { | 
1085  | 26.9k  |         ERR_raise(ERR_LIB_BN, BN_R_NO_SOLUTION);  | 
1086  | 26.9k  |         goto err;  | 
1087  | 26.9k  |     }  | 
1088  |  |  | 
1089  | 51.3k  |     if (!BN_copy(r, z))  | 
1090  | 0  |         goto err;  | 
1091  | 51.3k  |     bn_check_top(r);  | 
1092  |  |  | 
1093  | 51.3k  |     ret = 1;  | 
1094  |  |  | 
1095  | 83.5k  |  err:  | 
1096  | 83.5k  |     BN_CTX_end(ctx);  | 
1097  | 83.5k  |     return ret;  | 
1098  | 51.3k  | }  | 
1099  |  |  | 
1100  |  | /*  | 
1101  |  |  * Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns  | 
1102  |  |  * 0. This function calls down to the BN_GF2m_mod_solve_quad_arr  | 
1103  |  |  * implementation; this wrapper function is only provided for convenience;  | 
1104  |  |  * for best performance, use the BN_GF2m_mod_solve_quad_arr function.  | 
1105  |  |  */  | 
1106  |  | int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,  | 
1107  |  |                            BN_CTX *ctx)  | 
1108  | 0  | { | 
1109  | 0  |     int ret = 0;  | 
1110  | 0  |     const int max = BN_num_bits(p) + 1;  | 
1111  | 0  |     int *arr;  | 
1112  |  | 
  | 
1113  | 0  |     bn_check_top(a);  | 
1114  | 0  |     bn_check_top(p);  | 
1115  |  | 
  | 
1116  | 0  |     arr = OPENSSL_malloc(sizeof(*arr) * max);  | 
1117  | 0  |     if (arr == NULL)  | 
1118  | 0  |         goto err;  | 
1119  | 0  |     ret = BN_GF2m_poly2arr(p, arr, max);  | 
1120  | 0  |     if (!ret || ret > max) { | 
1121  | 0  |         ERR_raise(ERR_LIB_BN, BN_R_INVALID_LENGTH);  | 
1122  | 0  |         goto err;  | 
1123  | 0  |     }  | 
1124  | 0  |     ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx);  | 
1125  | 0  |     bn_check_top(r);  | 
1126  | 0  |  err:  | 
1127  | 0  |     OPENSSL_free(arr);  | 
1128  | 0  |     return ret;  | 
1129  | 0  | }  | 
1130  |  |  | 
1131  |  | /*  | 
1132  |  |  * Convert the bit-string representation of a polynomial ( \sum_{i=0}^n a_i * | 
1133  |  |  * x^i) into an array of integers corresponding to the bits with non-zero  | 
1134  |  |  * coefficient.  The array is intended to be suitable for use with  | 
1135  |  |  * `BN_GF2m_mod_arr()`, and so the constant term of the polynomial must not be  | 
1136  |  |  * zero.  This translates to a requirement that the input BIGNUM `a` is odd.  | 
1137  |  |  *  | 
1138  |  |  * Given sufficient room, the array is terminated with -1.  Up to max elements  | 
1139  |  |  * of the array will be filled.  | 
1140  |  |  *  | 
1141  |  |  * The return value is total number of array elements that would be filled if  | 
1142  |  |  * array was large enough, including the terminating `-1`.  It is `0` when `a`  | 
1143  |  |  * is not odd or the constant term is zero contrary to requirement.  | 
1144  |  |  *  | 
1145  |  |  * The return value is also `0` when the leading exponent exceeds  | 
1146  |  |  * `OPENSSL_ECC_MAX_FIELD_BITS`, this guards against CPU exhaustion attacks,  | 
1147  |  |  */  | 
1148  |  | int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)  | 
1149  | 641k  | { | 
1150  | 641k  |     int i, j, k = 0;  | 
1151  | 641k  |     BN_ULONG mask;  | 
1152  |  |  | 
1153  | 641k  |     if (!BN_is_odd(a))  | 
1154  | 0  |         return 0;  | 
1155  |  |  | 
1156  | 3.07M  |     for (i = a->top - 1; i >= 0; i--) { | 
1157  | 2.43M  |         if (!a->d[i])  | 
1158  |  |             /* skip word if a->d[i] == 0 */  | 
1159  | 920k  |             continue;  | 
1160  | 1.51M  |         mask = BN_TBIT;  | 
1161  | 98.5M  |         for (j = BN_BITS2 - 1; j >= 0; j--) { | 
1162  | 97.0M  |             if (a->d[i] & mask) { | 
1163  | 2.82M  |                 if (k < max)  | 
1164  | 2.82M  |                     p[k] = BN_BITS2 * i + j;  | 
1165  | 2.82M  |                 k++;  | 
1166  | 2.82M  |             }  | 
1167  | 97.0M  |             mask >>= 1;  | 
1168  | 97.0M  |         }  | 
1169  | 1.51M  |     }  | 
1170  |  |  | 
1171  | 641k  |     if (k > 0 && p[0] > OPENSSL_ECC_MAX_FIELD_BITS)  | 
1172  | 0  |         return 0;  | 
1173  |  |  | 
1174  | 641k  |     if (k < max)  | 
1175  | 641k  |         p[k] = -1;  | 
1176  |  |  | 
1177  | 641k  |     return k + 1;  | 
1178  | 641k  | }  | 
1179  |  |  | 
1180  |  | /*  | 
1181  |  |  * Convert the coefficient array representation of a polynomial to a  | 
1182  |  |  * bit-string.  The array must be terminated by -1.  | 
1183  |  |  */  | 
1184  |  | int BN_GF2m_arr2poly(const int p[], BIGNUM *a)  | 
1185  | 0  | { | 
1186  | 0  |     int i;  | 
1187  |  | 
  | 
1188  | 0  |     bn_check_top(a);  | 
1189  | 0  |     BN_zero(a);  | 
1190  | 0  |     for (i = 0; p[i] != -1; i++) { | 
1191  | 0  |         if (BN_set_bit(a, p[i]) == 0)  | 
1192  | 0  |             return 0;  | 
1193  | 0  |     }  | 
1194  | 0  |     bn_check_top(a);  | 
1195  |  | 
  | 
1196  | 0  |     return 1;  | 
1197  | 0  | }  | 
1198  |  |  | 
1199  |  | #endif  |