/src/libecc/src/nn/nn_div.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2017 - This file is part of libecc project |
3 | | * |
4 | | * Authors: |
5 | | * Ryad BENADJILA <ryadbenadjila@gmail.com> |
6 | | * Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr> |
7 | | * Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr> |
8 | | * |
9 | | * Contributors: |
10 | | * Nicolas VIVET <nicolas.vivet@ssi.gouv.fr> |
11 | | * Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr> |
12 | | * |
13 | | * This software is licensed under a dual BSD and GPL v2 license. |
14 | | * See LICENSE file at the root folder of the project. |
15 | | */ |
16 | | #include "nn_div.h" |
17 | | #include "nn_mul.h" |
18 | | #include "nn_logical.h" |
19 | | #include "nn_add.h" |
20 | | #include "nn.h" |
21 | | |
22 | | /* |
23 | | * Some helper functions to perform operations on an arbitrary part |
24 | | * of a multiprecision number. |
25 | | * This is exactly the same code as for operations on the least significant |
26 | | * part of a multiprecision number except for the starting point in the |
27 | | * array representing it. |
28 | | * Done in *constant time*. |
29 | | * |
30 | | * Operations producing an output are in place. |
31 | | */ |
32 | | |
33 | | /* |
34 | | * Compare all the bits of in2 with the same number of bits in in1 starting at |
35 | | * 'shift' position in in1. in1 must be long enough for that purpose, i.e. |
36 | | * in1->wlen >= (in2->wlen + shift). The comparison value is provided in |
37 | | * 'cmp' parameter. The function returns 0 on success, -1 on error. |
38 | | * |
39 | | * The function is an internal helper; it expects initialized nn in1 and |
40 | | * in2: it does not verify that. |
41 | | */ |
42 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp) |
43 | 2.95M | { |
44 | 2.95M | int ret, mask, tmp; |
45 | 2.95M | u8 i; |
46 | | |
47 | 2.95M | MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err); |
48 | 2.95M | MUST_HAVE((cmp != NULL), ret, err); |
49 | | |
50 | 2.95M | tmp = 0; |
51 | 17.4M | for (i = in2->wlen; i > 0; i--) { |
52 | 14.5M | mask = (!(tmp & 0x1)); |
53 | 14.5M | tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask); |
54 | 14.5M | tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask); |
55 | 14.5M | } |
56 | 2.95M | (*cmp) = tmp; |
57 | 2.95M | ret = 0; |
58 | | |
59 | 2.95M | err: |
60 | 2.95M | return ret; |
61 | 2.95M | } |
62 | | |
63 | | /* |
64 | | * Conditionally subtract a shifted version of in from out, i.e.: |
65 | | * - if cnd == 1, out <- out - (in << shift) |
66 | | * - if cnd == 0, out <- out |
67 | | * The function returns 0 on success, -1 on error. On success, 'borrow' |
68 | | * provides the possible borrow resulting from the subtraction. 'borrow' |
69 | | * is not meaningful on error. |
70 | | * |
71 | | * The function is an internal helper; it expects initialized nn out and |
72 | | * in: it does not verify that. |
73 | | */ |
74 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in, |
75 | | u8 shift, word_t *borrow) |
76 | 1.34M | { |
77 | 1.34M | word_t tmp, borrow1, borrow2, _borrow = WORD(0); |
78 | 1.34M | word_t mask = WORD_MASK_IFNOTZERO(cnd); |
79 | 1.34M | int ret; |
80 | 1.34M | u8 i; |
81 | | |
82 | 1.34M | MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err); |
83 | 1.34M | MUST_HAVE((borrow != NULL), ret, err); |
84 | | |
85 | | /* |
86 | | * Perform subtraction one word at a time, |
87 | | * propagating the borrow. |
88 | | */ |
89 | 8.04M | for (i = 0; i < in->wlen; i++) { |
90 | 6.69M | tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask)); |
91 | 6.69M | borrow1 = (word_t)(tmp > out->val[shift + i]); |
92 | 6.69M | out->val[shift + i] = (word_t)(tmp - _borrow); |
93 | 6.69M | borrow2 = (word_t)(out->val[shift + i] > tmp); |
94 | | /* There is at most one borrow going out. */ |
95 | 6.69M | _borrow = (word_t)(borrow1 | borrow2); |
96 | 6.69M | } |
97 | | |
98 | 1.34M | (*borrow) = _borrow; |
99 | 1.34M | ret = 0; |
100 | | |
101 | 1.34M | err: |
102 | 1.34M | return ret; |
103 | 1.34M | } |
104 | | |
105 | | /* |
106 | | * Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return |
107 | | * borrow. The function returns 0 on success, -1 on error. 'borrow' is |
108 | | * meaningful only on success. |
109 | | * |
110 | | * The function is an internal helper; it expects initialized nn out and |
111 | | * in: it does not verify that. |
112 | | */ |
113 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift, |
114 | | word_t *borrow) |
115 | 1.08M | { |
116 | 1.08M | word_t _borrow = WORD(0), prod_high, prod_low, tmp; |
117 | 1.08M | int ret; |
118 | 1.08M | u8 i; |
119 | | |
120 | 1.08M | MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err); |
121 | 1.08M | MUST_HAVE((borrow != NULL), ret, err); |
122 | | |
123 | 6.66M | for (i = 0; i < in->wlen; i++) { |
124 | | /* |
125 | | * Compute the result of the multiplication of |
126 | | * two words. |
127 | | */ |
128 | 5.58M | WORD_MUL(prod_high, prod_low, in->val[i], w); |
129 | | |
130 | | /* |
131 | | * And add previous borrow. |
132 | | */ |
133 | 5.58M | prod_low = (word_t)(prod_low + _borrow); |
134 | 5.58M | prod_high = (word_t)(prod_high + (prod_low < _borrow)); |
135 | | |
136 | | /* |
137 | | * Subtract computed word at current position in result. |
138 | | */ |
139 | 5.58M | tmp = (word_t)(out->val[shift + i] - prod_low); |
140 | 5.58M | _borrow = (word_t)(prod_high + (tmp > out->val[shift + i])); |
141 | 5.58M | out->val[shift + i] = tmp; |
142 | 5.58M | } |
143 | | |
144 | 1.08M | (*borrow) = _borrow; |
145 | 1.08M | ret = 0; |
146 | | |
147 | 1.08M | err: |
148 | 1.08M | return ret; |
149 | 1.08M | } |
150 | | |
151 | | /* |
152 | | * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b' |
153 | | * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on |
154 | | * return. * Computation are performed in *constant time*, only depending on |
155 | | * the lengths of 'a' and 'b', but not on the values of 'a' and 'b'. |
156 | | * |
157 | | * This uses the above function to perform arithmetic on arbitrary parts |
158 | | * of multiprecision numbers. |
159 | | * |
160 | | * The algorithm used is schoolbook division: |
161 | | * + the quotient is computed word by word, |
162 | | * + a small division of the MSW is performed to obtain an |
163 | | * approximation of the MSW of the quotient, |
164 | | * + the approximation is corrected to obtain the correct |
165 | | * multiprecision MSW of the quotient, |
166 | | * + the corresponding product is subtracted from the dividend, |
167 | | * + the same procedure is used for the following word of the quotient. |
168 | | * |
169 | | * It is assumed that: |
170 | | * + b is normalized: the MSB of its MSW is 1, |
171 | | * + the most significant part of a is smaller than b, |
172 | | * + a precomputed reciprocal |
173 | | * v = floor(B^3/(d+1)) - B |
174 | | * where d is the MSW of the (normalized) divisor |
175 | | * is given to perform the small 3-by-2 division. |
176 | | * + using this reciprocal, the approximated quotient is always |
177 | | * too small and at most one multiprecision correction is needed. |
178 | | * |
179 | | * It returns 0 on sucess, -1 on error. |
180 | | * |
181 | | * CAUTION: |
182 | | * |
183 | | * - The function is expected to be used ONLY by the generic version |
184 | | * nn_divrem_normalized() defined later in the file. |
185 | | * - All parameters must have been initialized. Unlike exported/public |
186 | | * functions, this internal helper does not verify that nn parameters |
187 | | * have been initialized. Again, this is expected from the caller |
188 | | * (nn_divrem_normalized()). |
189 | | * - The function does not support aliasing of 'b' or 'q'. See |
190 | | * _nn_divrem_normalized_aliased() for such a wrapper. |
191 | | * |
192 | | */ |
193 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r, |
194 | | nn_src_t a, nn_src_t b, word_t v) |
195 | 261k | { |
196 | 261k | word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */ |
197 | 261k | int small, cmp, ret; |
198 | 261k | u8 i; |
199 | | |
200 | 261k | MUST_HAVE(!(b->wlen <= 0), ret, err); |
201 | 261k | MUST_HAVE(!(a->wlen <= b->wlen), ret, err); |
202 | 261k | MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err); |
203 | 261k | MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err); |
204 | | |
205 | | /* Handle trivial aliasing for a and r */ |
206 | 261k | if (r != a) { |
207 | 261k | ret = nn_set_wlen(r, a->wlen); EG(ret, err); |
208 | 261k | ret = nn_copy(r, a); EG(ret, err); |
209 | 261k | } |
210 | | |
211 | 261k | ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err); |
212 | | |
213 | | /* |
214 | | * Compute subsequent words of the quotient one by one. |
215 | | * Perform approximate 3-by-2 division using the precomputed |
216 | | * reciprocal and correct afterward. |
217 | | */ |
218 | 1.34M | for (i = r->wlen; i > b->wlen; i--) { |
219 | 1.08M | u8 shift = (u8)(i - b->wlen - 1); |
220 | | |
221 | | /* |
222 | | * Perform 3-by-2 approximate division: |
223 | | * <qstar, qh, ql> = <rh, rl> * (v + B) |
224 | | * We are only interested in qstar. |
225 | | */ |
226 | 1.08M | rh = r->val[i - 1]; |
227 | 1.08M | rl = r->val[i - 2]; |
228 | | /* Perform 2-by-1 multiplication. */ |
229 | 1.08M | WORD_MUL(qh, ql, rl, v); |
230 | 1.08M | WORD_MUL(qstar, ql, rh, v); |
231 | | /* And propagate carries. */ |
232 | 1.08M | qh = (word_t)(qh + ql); |
233 | 1.08M | qstar = (word_t)(qstar + (qh < ql)); |
234 | 1.08M | qh = (word_t)(qh + rl); |
235 | 1.08M | rh = (word_t)(rh + (qh < rl)); |
236 | 1.08M | qstar = (word_t)(qstar + rh); |
237 | | |
238 | | /* |
239 | | * Compute approximate quotient times divisor |
240 | | * and subtract it from remainder: |
241 | | * r = r - (b*qstar << B^shift) |
242 | | */ |
243 | 1.08M | ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err); |
244 | | |
245 | | /* Check the approximate quotient was indeed not too large. */ |
246 | 1.08M | MUST_HAVE(!(r->val[i - 1] < borrow), ret, err); |
247 | 1.08M | r->val[i - 1] = (word_t)(r->val[i - 1] - borrow); |
248 | | |
249 | | /* |
250 | | * Check whether the approximate quotient was too small or not. |
251 | | * At most one multiprecision correction is needed. |
252 | | */ |
253 | 1.08M | ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err); |
254 | 1.08M | small = ((!!(r->val[i - 1])) | (cmp >= 0)); |
255 | | /* Perform conditional multiprecision correction. */ |
256 | 1.08M | ret = _nn_cnd_sub_shift(small, r, b, shift, &borrow); EG(ret, err); |
257 | 1.08M | MUST_HAVE(!(r->val[i - 1] != borrow), ret, err); |
258 | 1.08M | r->val[i - 1] = (word_t)(r->val[i - 1] - borrow); |
259 | | /* |
260 | | * Adjust the quotient if it was too small and set it in the |
261 | | * multiprecision array. |
262 | | */ |
263 | 1.08M | qstar = (word_t)(qstar + (word_t)small); |
264 | 1.08M | q->val[shift] = qstar; |
265 | | /* |
266 | | * Check that the MSW of remainder was cancelled out and that |
267 | | * we could not increase the quotient anymore. |
268 | | */ |
269 | 1.08M | MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err); |
270 | | |
271 | 1.08M | ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err); |
272 | 1.08M | MUST_HAVE(!(cmp >= 0), ret, err); |
273 | | |
274 | 1.08M | ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err); |
275 | 1.08M | } |
276 | | |
277 | 261k | err: |
278 | 261k | return ret; |
279 | 261k | } |
280 | | |
281 | | /* |
282 | | * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b' |
283 | | * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized. |
284 | | * Compared to _nn_divrem_normalized(), this internal version |
285 | | * explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r' |
286 | | * result is stored in 'b' on success), hence the removal of 'r' parameter from |
287 | | * function prototype compared to _nn_divrem_normalized(). |
288 | | * |
289 | | * The computation is performed in *constant time*, see documentation of |
290 | | * _nn_divrem_normalized(). |
291 | | * |
292 | | * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the |
293 | | * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than |
294 | | * 'b'. |
295 | | * |
296 | | * The function returns 0 on success, -1 on error. |
297 | | * |
298 | | * CAUTION: |
299 | | * |
300 | | * - The function is expected to be used ONLY by the generic version |
301 | | * nn_divrem_normalized() defined later in the file. |
302 | | * - All parameters must have been initialized. Unlike exported/public |
303 | | * functions, this internal helper does not verify that nn parameters |
304 | | * have been initialized. Again, this is expected from the caller |
305 | | * (nn_divrem_normalized()). |
306 | | * - The function does not support aliasing of 'a' or 'q'. |
307 | | * |
308 | | */ |
309 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v) |
310 | 0 | { |
311 | 0 | int ret; |
312 | 0 | nn r; |
313 | 0 | r.magic = WORD(0); |
314 | |
|
315 | 0 | ret = nn_init(&r, 0); EG(ret, err); |
316 | 0 | ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err); |
317 | 0 | ret = nn_copy(b, &r); EG(ret, err); |
318 | | |
319 | 0 | err: |
320 | 0 | nn_uninit(&r); |
321 | 0 | return ret; |
322 | 0 | } |
323 | | |
324 | | /* |
325 | | * Compute quotient and remainder of Euclidean division, and do not normalize |
326 | | * them. Done in *constant time*, see documentation of _nn_divrem_normalized(). |
327 | | * |
328 | | * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the |
329 | | * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than |
330 | | * 'b'. |
331 | | * |
332 | | * Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point |
333 | | * to the same nn. |
334 | | * |
335 | | * The function returns 0 on success, -1 on error. |
336 | | */ |
337 | | int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v) |
338 | 261k | { |
339 | 261k | int ret; |
340 | | |
341 | 261k | ret = nn_check_initialized(a); EG(ret, err); |
342 | 261k | ret = nn_check_initialized(q); EG(ret, err); |
343 | 261k | ret = nn_check_initialized(r); EG(ret, err); |
344 | | |
345 | 261k | if (r == b) { |
346 | 0 | ret = _nn_divrem_normalized_aliased(q, a, r, v); |
347 | 261k | } else { |
348 | 261k | ret = nn_check_initialized(b); EG(ret, err); |
349 | 261k | ret = _nn_divrem_normalized(q, r, a, b, v); |
350 | 261k | } |
351 | | |
352 | 261k | err: |
353 | 261k | return ret; |
354 | 261k | } |
355 | | |
356 | | /* |
357 | | * Compute remainder only and do not normalize it. |
358 | | * Constant time, see documentation of _nn_divrem_normalized. |
359 | | * |
360 | | * Support aliasing of inputs and outputs. |
361 | | * |
362 | | * The function returns 0 on success, -1 on error. |
363 | | */ |
364 | | int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v) |
365 | 0 | { |
366 | 0 | int ret; |
367 | 0 | nn q; |
368 | 0 | q.magic = WORD(0); |
369 | |
|
370 | 0 | ret = nn_init(&q, 0); EG(ret, err); |
371 | 0 | ret = nn_divrem_normalized(&q, r, a, b, v); |
372 | |
|
373 | 0 | err: |
374 | 0 | nn_uninit(&q); |
375 | 0 | return ret; |
376 | 0 | } |
377 | | |
378 | | /* |
379 | | * Compute quotient and remainder of Euclidean division, |
380 | | * and do not normalize them. |
381 | | * Done in *constant time*, |
382 | | * only depending on the lengths of 'a' and 'b' and the value of 'cnt', |
383 | | * but not on the values of 'a' and 'b'. |
384 | | * |
385 | | * Assume that b has been normalized by a 'cnt' bit shift, |
386 | | * that v is the reciprocal of the MSW of 'b', |
387 | | * but a is not shifted yet. |
388 | | * Useful when multiple multiplication by the same b are performed, |
389 | | * e.g. at the fp level. |
390 | | * |
391 | | * All outputs MUST have been initialized. The function does not support |
392 | | * aliasing of 'b' or 'q'. It returns 0 on success, -1 on error. |
393 | | * |
394 | | * CAUTION: |
395 | | * |
396 | | * - The function is expected to be used ONLY by the generic version |
397 | | * nn_divrem_normalized() defined later in the file. |
398 | | * - All parameters must have been initialized. Unlike exported/public |
399 | | * functions, this internal helper does not verify that |
400 | | * have been initialized. Again, this is expected from the caller |
401 | | * (nn_divrem_unshifted()). |
402 | | * - The function does not support aliasing. See |
403 | | * _nn_divrem_unshifted_aliased() for such a wrapper. |
404 | | */ |
405 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm, |
406 | | word_t v, bitcnt_t cnt) |
407 | 281k | { |
408 | 281k | nn a_shift; |
409 | 281k | u8 new_wlen, b_wlen; |
410 | 281k | int larger, ret, cmp; |
411 | 281k | word_t borrow; |
412 | 281k | a_shift.magic = WORD(0); |
413 | | |
414 | | /* Avoid overflow */ |
415 | 281k | MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err); |
416 | | |
417 | | /* We now know that new_wlen will fit in an u8 */ |
418 | 281k | new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt)); |
419 | | |
420 | 281k | b_wlen = b_norm->wlen; |
421 | 281k | if (new_wlen < b_wlen) { /* trivial case */ |
422 | 15.0k | ret = nn_copy(r, a); EG(ret, err); |
423 | 15.0k | ret = nn_zero(q); |
424 | 15.0k | goto err; |
425 | 15.0k | } |
426 | | |
427 | | /* Shift a. */ |
428 | 266k | ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err); |
429 | 266k | ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err); |
430 | 266k | ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err); |
431 | 266k | ret = nn_set_wlen(r, new_wlen); EG(ret, err); |
432 | | |
433 | 266k | if (new_wlen == b_wlen) { |
434 | | /* Ensure that a is smaller than b. */ |
435 | 4.67k | ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err); |
436 | 4.67k | larger = (cmp >= 0); |
437 | 4.67k | ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err); |
438 | 4.67k | MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err); |
439 | | |
440 | | /* Set MSW of quotient. */ |
441 | 4.67k | ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err); |
442 | 4.67k | q->val[new_wlen - b_wlen] = (word_t) larger; |
443 | | /* And we are done as the quotient is 0 or 1. */ |
444 | 261k | } else if (new_wlen > b_wlen) { |
445 | | /* Ensure that most significant part of a is smaller than b. */ |
446 | 261k | ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err); |
447 | 261k | larger = (cmp >= 0); |
448 | 261k | ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err); |
449 | 261k | MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err); |
450 | | |
451 | | /* |
452 | | * Perform division with MSP of a smaller than b. This ensures |
453 | | * that the quotient is of length a_len - b_len. |
454 | | */ |
455 | 261k | ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err); |
456 | | |
457 | | /* Set MSW of quotient. */ |
458 | 261k | ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err); |
459 | 261k | q->val[new_wlen - b_wlen] = (word_t) larger; |
460 | 261k | } /* else a is smaller than b... treated above. */ |
461 | | |
462 | 266k | ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err); |
463 | 266k | ret = nn_set_wlen(r, b_wlen); |
464 | | |
465 | 281k | err: |
466 | 281k | nn_uninit(&a_shift); |
467 | | |
468 | 281k | return ret; |
469 | 266k | } |
470 | | |
471 | | /* |
472 | | * Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success, |
473 | | * result 'r' is passed through 'b_norm'. |
474 | | * |
475 | | * CAUTION: |
476 | | * |
477 | | * - The function is expected to be used ONLY by the generic version |
478 | | * nn_divrem_normalized() defined later in the file. |
479 | | * - All parameter must have been initialized. Unlike exported/public |
480 | | * functions, this internal helper does not verify that nn parameters |
481 | | * have been initialized. Again, this is expected from the caller |
482 | | * (nn_divrem_unshifted()). |
483 | | */ |
484 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm, |
485 | | word_t v, bitcnt_t cnt) |
486 | 0 | { |
487 | 0 | int ret; |
488 | 0 | nn r; |
489 | 0 | r.magic = WORD(0); |
490 | |
|
491 | 0 | ret = nn_init(&r, 0); EG(ret, err); |
492 | 0 | ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err); |
493 | 0 | ret = nn_copy(b_norm, &r); EG(ret, err); |
494 | | |
495 | 0 | err: |
496 | 0 | nn_uninit(&r); |
497 | 0 | return ret; |
498 | 0 | } |
499 | | |
500 | | /* |
501 | | * Compute quotient and remainder and do not normalize them. |
502 | | * Constant time, see documentation of _nn_divrem_unshifted(). |
503 | | * |
504 | | * Alias-supporting version of _nn_divrem_unshifted for 'r' only. |
505 | | * |
506 | | * The function returns 0 on success, -1 on error. |
507 | | */ |
508 | | int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b, |
509 | | word_t v, bitcnt_t cnt) |
510 | 180k | { |
511 | 180k | int ret; |
512 | | |
513 | 180k | ret = nn_check_initialized(a); EG(ret, err); |
514 | 180k | ret = nn_check_initialized(q); EG(ret, err); |
515 | 180k | ret = nn_check_initialized(r); EG(ret, err); |
516 | | |
517 | 180k | if (r == b) { |
518 | 0 | ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt); |
519 | 180k | } else { |
520 | 180k | ret = nn_check_initialized(b); EG(ret, err); |
521 | 180k | ret = _nn_divrem_unshifted(q, r, a, b, v, cnt); |
522 | 180k | } |
523 | | |
524 | 180k | err: |
525 | 180k | return ret; |
526 | 180k | } |
527 | | |
528 | | /* |
529 | | * Compute remainder only and do not normalize it. |
530 | | * Constant time, see documentation of _nn_divrem_unshifted. |
531 | | * |
532 | | * Aliasing of inputs and outputs is possible. |
533 | | * |
534 | | * The function returns 0 on success, -1 on error. |
535 | | */ |
536 | | int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt) |
537 | 180k | { |
538 | 180k | nn q; |
539 | 180k | int ret; |
540 | 180k | q.magic = WORD(0); |
541 | | |
542 | 180k | ret = nn_init(&q, 0); EG(ret, err); |
543 | 180k | ret = nn_divrem_unshifted(&q, r, a, b, v, cnt); |
544 | | |
545 | 180k | err: |
546 | 180k | nn_uninit(&q); |
547 | | |
548 | 180k | return ret; |
549 | 180k | } |
550 | | |
551 | | /* |
552 | | * Helper functions for arithmetic in 2-by-1 division |
553 | | * used in the reciprocal computation. |
554 | | * |
555 | | * These are variations of the nn multiprecision functions |
556 | | * acting on arrays of fixed length, in place, |
557 | | * and returning carry/borrow. |
558 | | * |
559 | | * Done in constant time. |
560 | | */ |
561 | | |
562 | | /* |
563 | | * Comparison of two limbs numbers. Internal helper. |
564 | | * Checks left to the caller |
565 | | */ |
566 | | ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2]) |
567 | 514k | { |
568 | 514k | int mask, ret = 0; |
569 | 514k | ret += (a[1] > b[1]); |
570 | 514k | ret -= (a[1] < b[1]); |
571 | 514k | mask = !(ret & 0x1); |
572 | 514k | ret += ((a[0] > b[0]) & mask); |
573 | 514k | ret -= ((a[0] < b[0]) & mask); |
574 | 514k | return ret; |
575 | 514k | } |
576 | | |
577 | | /* |
578 | | * Addition of two limbs numbers with carry returned. Internal helper. |
579 | | * Checks left to the caller. |
580 | | */ |
581 | | ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2]) |
582 | 71.7k | { |
583 | 71.7k | word_t carry; |
584 | 71.7k | a[0] = (word_t)(a[0] + b[0]); |
585 | 71.7k | carry = (word_t)(a[0] < b[0]); |
586 | 71.7k | a[1] = (word_t)(a[1] + carry); |
587 | 71.7k | carry = (word_t)(a[1] < carry); |
588 | 71.7k | a[1] = (word_t)(a[1] + b[1]); |
589 | 71.7k | carry = (word_t)(carry | (a[1] < b[1])); |
590 | 71.7k | return carry; |
591 | 71.7k | } |
592 | | |
593 | | /* |
594 | | * Subtraction of two limbs numbers with borrow returned. Internal helper. |
595 | | * Checks left to the caller. |
596 | | */ |
597 | | ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2]) |
598 | 175k | { |
599 | 175k | word_t borrow, tmp; |
600 | 175k | tmp = (word_t)(a[0] - b[0]); |
601 | 175k | borrow = (word_t)(tmp > a[0]); |
602 | 175k | a[0] = tmp; |
603 | 175k | tmp = (word_t)(a[1] - borrow); |
604 | 175k | borrow = (word_t)(tmp > a[1]); |
605 | 175k | a[1] = (word_t)(tmp - b[1]); |
606 | 175k | borrow = (word_t)(borrow | (a[1] > tmp)); |
607 | 175k | return borrow; |
608 | 175k | } |
609 | | |
610 | | /* |
611 | | * Helper macros for conditional subtraction in 2-by-1 division |
612 | | * used in the reciprocal computation. |
613 | | * |
614 | | * Done in constant time. |
615 | | */ |
616 | | |
617 | | /* Conditional subtraction of a one limb number from a two limbs number. */ |
618 | 138k | #define WORD_CND_SUB_21(cnd, ah, al, b) do { \ |
619 | 138k | word_t tmp, mask; \ |
620 | 138k | mask = WORD_MASK_IFNOTZERO((cnd)); \ |
621 | 138k | tmp = (word_t)((al) - ((b) & mask)); \ |
622 | 138k | (ah) = (word_t)((ah) - (tmp > (al))); \ |
623 | 138k | (al) = tmp; \ |
624 | 138k | } while (0) |
625 | | /* Conditional subtraction of a two limbs number from a two limbs number. */ |
626 | 138k | #define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do { \ |
627 | 138k | word_t tmp, mask; \ |
628 | 138k | mask = WORD_MASK_IFNOTZERO((cnd)); \ |
629 | 138k | tmp = (word_t)((al) - ((bl) & mask)); \ |
630 | 138k | (ah) = (word_t)((ah) - (tmp > (al))); \ |
631 | 138k | (al) = tmp; \ |
632 | 138k | (ah) = (word_t)((ah) - ((bh) & mask)); \ |
633 | 138k | } while (0) |
634 | | |
635 | | /* |
636 | | * divide two words by a normalized word using schoolbook division on half |
637 | | * words. This is only used below in the reciprocal computation. No checks |
638 | | * are performed on inputs. This is expected to be done by the caller. |
639 | | * |
640 | | * Returns 0 on success, -1 on error. |
641 | | */ |
642 | | ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b) |
643 | 69.3k | { |
644 | 69.3k | word_t bh, bl, qh, ql, rm, rhl[2], phl[2]; |
645 | 69.3k | int larger, ret; |
646 | 69.3k | u8 j; |
647 | | |
648 | 69.3k | MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err); |
649 | 69.3k | bh = WRSHIFT((b), HWORD_BITS); |
650 | 69.3k | bl = WLSHIFT((b), HWORD_BITS); |
651 | 69.3k | rhl[1] = ah; |
652 | 69.3k | rhl[0] = al; |
653 | | |
654 | | /* |
655 | | * Compute high part of the quotient. We know from |
656 | | * MUST_HAVE() check above that bh (a word_t) is not 0 |
657 | | */ |
658 | | |
659 | 69.3k | KNOWN_FACT(bh != 0, ret, err); |
660 | 69.3k | qh = (rhl[1] / bh); |
661 | 69.3k | qh = WORD_MIN(qh, HWORD_MASK); |
662 | 69.3k | WORD_MUL(phl[1], phl[0], qh, (b)); |
663 | 69.3k | phl[1] = (WLSHIFT(phl[1], HWORD_BITS) | |
664 | 69.3k | WRSHIFT(phl[0], HWORD_BITS)); |
665 | 69.3k | phl[0] = WLSHIFT(phl[0], HWORD_BITS); |
666 | | |
667 | 207k | for (j = 0; j < 2; j++) { |
668 | 138k | larger = (_wcmp_22(phl, rhl) > 0); |
669 | 138k | qh = (word_t)(qh - (word_t)larger); |
670 | 138k | WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl); |
671 | 138k | } |
672 | | |
673 | 69.3k | ret = (_wcmp_22(phl, rhl) > 0); |
674 | 69.3k | MUST_HAVE(!(ret), ret, err); |
675 | 69.3k | IGNORE_RET_VAL(_wsub_22(rhl, phl)); |
676 | 69.3k | MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err); |
677 | | |
678 | | /* Compute low part of the quotient. */ |
679 | 69.3k | rm = (WLSHIFT(rhl[1], HWORD_BITS) | |
680 | 69.3k | WRSHIFT(rhl[0], HWORD_BITS)); |
681 | 69.3k | ql = (rm / bh); |
682 | 69.3k | ql = WORD_MIN(ql, HWORD_MASK); |
683 | 69.3k | WORD_MUL(phl[1], phl[0], ql, (b)); |
684 | | |
685 | 207k | for (j = 0; j < 2; j++) { |
686 | 138k | larger = (_wcmp_22(phl, rhl) > 0); |
687 | 138k | ql = (word_t) (ql - (word_t)larger); |
688 | 138k | WORD_CND_SUB_21(larger, phl[1], phl[0], (b)); |
689 | 138k | } |
690 | | |
691 | 69.3k | ret = _wcmp_22(phl, rhl) > 0; |
692 | 69.3k | MUST_HAVE(!(ret), ret, err); |
693 | 69.3k | IGNORE_RET_VAL(_wsub_22(rhl, phl)); |
694 | | /* Set outputs. */ |
695 | 69.3k | MUST_HAVE((rhl[1] == WORD(0)), ret, err); |
696 | 69.3k | MUST_HAVE(!(rhl[0] >= (b)), ret, err); |
697 | 69.3k | (*q) = (WLSHIFT(qh, HWORD_BITS) | ql); |
698 | 69.3k | (*r) = rhl[0]; |
699 | 69.3k | MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err); |
700 | 69.3k | ret = 0; |
701 | | |
702 | 69.3k | err: |
703 | 69.3k | return ret; |
704 | 69.3k | } |
705 | | |
706 | | /* |
707 | | * Compute the reciprocal of d as |
708 | | * floor(B^3/(d+1)) - B |
709 | | * which is used to perform approximate small division using a multiplication. |
710 | | * |
711 | | * No attempt was made to make it constant time. Indeed, such values are usually |
712 | | * precomputed in contexts where constant time is wanted, e.g. in the fp layer. |
713 | | * |
714 | | * Returns 0 on success, -1 on error. |
715 | | */ |
716 | | int wreciprocal(word_t dh, word_t dl, word_t *reciprocal) |
717 | 101k | { |
718 | 101k | word_t q, carry, r[2], t[2]; |
719 | 101k | int ret; |
720 | | |
721 | 101k | MUST_HAVE((reciprocal != NULL), ret, err); |
722 | | |
723 | 101k | if (((word_t)(dh + WORD(1)) == WORD(0)) && |
724 | 101k | ((word_t)(dl + WORD(1)) == WORD(0))) { |
725 | 27.2k | (*reciprocal) = WORD(0); |
726 | 27.2k | ret = 0; |
727 | 27.2k | goto err; |
728 | 27.2k | } |
729 | | |
730 | 73.7k | if ((word_t)(dh + WORD(1)) == WORD(0)) { |
731 | 4.42k | q = (word_t)(~dh); |
732 | 4.42k | r[1] = (word_t)(~dl); |
733 | 69.3k | } else { |
734 | 69.3k | t[1] = (word_t)(~dh); |
735 | 69.3k | t[0] = (word_t)(~dl); |
736 | 69.3k | ret = _word_divrem(&q, r+1, t[1], t[0], |
737 | 69.3k | (word_t)(dh + WORD(1))); EG(ret, err); |
738 | 69.3k | } |
739 | | |
740 | 73.7k | if ((word_t)(dl + WORD(1)) == WORD(0)) { |
741 | 2.02k | (*reciprocal) = q; |
742 | 2.02k | ret = 0; |
743 | 2.02k | goto err; |
744 | 2.02k | } |
745 | | |
746 | 71.7k | r[0] = WORD(0); |
747 | | |
748 | 71.7k | WORD_MUL(t[1], t[0], q, (word_t)~dl); |
749 | 71.7k | carry = _wadd_22(r, t); |
750 | | |
751 | 71.7k | t[0] = (word_t)(dl + WORD(1)); |
752 | 71.7k | t[1] = dh; |
753 | 108k | while (carry || (_wcmp_22(r, t) >= 0)) { |
754 | 36.6k | q++; |
755 | 36.6k | carry = (word_t)(carry - _wsub_22(r, t)); |
756 | 36.6k | } |
757 | | |
758 | 71.7k | (*reciprocal) = q; |
759 | 71.7k | ret = 0; |
760 | | |
761 | 101k | err: |
762 | 101k | return ret; |
763 | 71.7k | } |
764 | | |
765 | | /* |
766 | | * Given an odd number p, compute division coefficients p_normalized, |
767 | | * p_shift and p_reciprocal so that: |
768 | | * - p_shift = p_rounded_bitlen - bitsizeof(p), where |
769 | | * o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of |
770 | | * minimum number of words required to store p) and |
771 | | * o p_bitlen is the real bit size of p |
772 | | * - p_normalized = p << p_shift |
773 | | * - p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B |
774 | | * with B = 2^WORDSIZE |
775 | | * |
776 | | * These coefficients are useful for the optimized shifted variants of NN |
777 | | * division and modular functions. Because we have two word_t outputs |
778 | | * (p_shift and p_reciprocal), these are passed through word_t pointers. |
779 | | * Aliasing of outputs with the input is possible since p_in is copied in |
780 | | * local p at the beginning of the function. |
781 | | * |
782 | | * The function returns 0 on success, -1 on error. |
783 | | */ |
784 | | int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift, |
785 | | word_t *p_reciprocal, nn_src_t p_in) |
786 | 0 | { |
787 | 0 | bitcnt_t p_rounded_bitlen, p_bitlen; |
788 | 0 | nn p, tmp_nn; |
789 | 0 | int ret; |
790 | 0 | p.magic = tmp_nn.magic = WORD(0); |
791 | |
|
792 | 0 | ret = nn_check_initialized(p_in); EG(ret, err); |
793 | | |
794 | 0 | MUST_HAVE((p_shift != NULL), ret, err); |
795 | 0 | MUST_HAVE((p_reciprocal != NULL), ret, err); |
796 | | |
797 | 0 | ret = nn_init(&p, 0); EG(ret, err); |
798 | 0 | ret = nn_copy(&p, p_in); EG(ret, err); |
799 | | |
800 | | /* |
801 | | * In order for our reciprocal division routines to work, it is expected |
802 | | * that the bit length (including leading zeroes) of input prime |
803 | | * p is >= 2 * wlen where wlen is the number of bits of a word size. |
804 | | */ |
805 | 0 | if (p.wlen < 2) { |
806 | 0 | ret = nn_set_wlen(&p, 2); EG(ret, err); |
807 | 0 | } |
808 | | |
809 | 0 | ret = nn_init(p_normalized, 0); EG(ret, err); |
810 | 0 | ret = nn_init(&tmp_nn, 0); EG(ret, err); |
811 | | |
812 | | /* p_rounded_bitlen = bitlen of p rounded to word size */ |
813 | 0 | p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen); |
814 | | |
815 | | /* p_shift */ |
816 | 0 | ret = nn_bitlen(&p, &p_bitlen); EG(ret, err); |
817 | 0 | (*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen); |
818 | | |
819 | | /* p_normalized = p << pshift */ |
820 | 0 | ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err); |
821 | | |
822 | | /* Sanity check to protect the p_reciprocal computation */ |
823 | 0 | MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err); |
824 | | |
825 | | /* |
826 | | * p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B |
827 | | * where B = 2^wlen where wlen = word size in bits. We use our NN |
828 | | * helper to compute it. |
829 | | */ |
830 | 0 | ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err); |
831 | 0 | ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal); |
832 | |
|
833 | 0 | err: |
834 | 0 | nn_uninit(&p); |
835 | 0 | nn_uninit(&tmp_nn); |
836 | |
|
837 | 0 | return ret; |
838 | 0 | } |
839 | | |
840 | | /* |
841 | | * Compute quotient remainder of Euclidean division. |
842 | | * |
843 | | * This function is a wrapper to normalize the divisor, i.e. shift it so that |
844 | | * the MSB of its MSW is set, and precompute the reciprocal of this MSW to be |
845 | | * used to perform small divisions using multiplications during the long |
846 | | * schoolbook division. It uses the helper functions/macros above. |
847 | | * |
848 | | * This is NOT constant time with regards to the word length of a and b, |
849 | | * but also the actual bitlength of b as we need to normalize b at the |
850 | | * bit level. |
851 | | * Moreover the precomputation of the reciprocal is not constant time at all. |
852 | | * |
853 | | * r need not be initialized, the function does it for the the caller. |
854 | | * |
855 | | * This function does not support aliasing. This is an internal helper, which |
856 | | * expects caller to check parameters. |
857 | | * |
858 | | * It returns 0 on sucess, -1 on error. |
859 | | */ |
860 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b) |
861 | 101k | { |
862 | 101k | nn b_large, b_normalized; |
863 | 101k | bitcnt_t cnt; |
864 | 101k | word_t v; |
865 | 101k | nn_src_t ptr = b; |
866 | 101k | int ret, iszero; |
867 | 101k | b_large.magic = b_normalized.magic = WORD(0); |
868 | | |
869 | 101k | ret = nn_init(r, 0); EG(ret, err); |
870 | 101k | ret = nn_init(q, 0); EG(ret, err); |
871 | 101k | ret = nn_init(&b_large, 0); EG(ret, err); |
872 | | |
873 | 101k | MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err); |
874 | | |
875 | 101k | if (b->wlen == 1) { |
876 | 29.3k | ret = nn_copy(&b_large, b); EG(ret, err); |
877 | | |
878 | | /* Expand our big number with zeroes */ |
879 | 29.3k | ret = nn_set_wlen(&b_large, 2); EG(ret, err); |
880 | | |
881 | | /* |
882 | | * This cast could seem inappropriate, but we are |
883 | | * sure here that we won't touch ptr since it is only |
884 | | * given as a const parameter to sub functions. |
885 | | */ |
886 | 29.3k | ptr = (nn_src_t) &b_large; |
887 | 29.3k | } |
888 | | |
889 | | /* After this, we only handle >= 2 words big numbers */ |
890 | 101k | MUST_HAVE(!(ptr->wlen < 2), ret, err); |
891 | | |
892 | 101k | ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err); |
893 | 101k | ret = nn_clz(ptr, &cnt); EG(ret, err); |
894 | 101k | ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err); |
895 | 101k | ret = wreciprocal(b_normalized.val[ptr->wlen - 1], |
896 | 101k | b_normalized.val[ptr->wlen - 2], |
897 | 101k | &v); /* Not constant time. */ EG(ret, err); |
898 | | |
899 | 101k | ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt); |
900 | | |
901 | 101k | err: |
902 | 101k | nn_uninit(&b_normalized); |
903 | 101k | nn_uninit(&b_large); |
904 | | |
905 | 101k | return ret; |
906 | 101k | } |
907 | | |
908 | | /* |
909 | | * Returns 0 on succes, -1 on error. Internal helper. Checks on params |
910 | | * expected from the caller. |
911 | | */ |
912 | | ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b) |
913 | 20.2k | { |
914 | 20.2k | nn a_cpy, b_cpy; |
915 | 20.2k | int ret; |
916 | 20.2k | a_cpy.magic = b_cpy.magic = WORD(0); |
917 | | |
918 | 20.2k | ret = nn_init(&a_cpy, 0); EG(ret, err); |
919 | 20.2k | ret = nn_init(&b_cpy, 0); EG(ret, err); |
920 | 20.2k | ret = nn_copy(&a_cpy, a); EG(ret, err); |
921 | 20.2k | ret = nn_copy(&b_cpy, b); EG(ret, err); |
922 | 20.2k | ret = _nn_divrem(q, r, &a_cpy, &b_cpy); |
923 | | |
924 | 20.2k | err: |
925 | 20.2k | nn_uninit(&b_cpy); |
926 | 20.2k | nn_uninit(&a_cpy); |
927 | | |
928 | 20.2k | return ret; |
929 | 20.2k | } |
930 | | |
931 | | |
932 | | |
933 | | /* |
934 | | * Compute quotient and remainder and normalize them. |
935 | | * Not constant time, see documentation of _nn_divrem. |
936 | | * |
937 | | * Aliased version of _nn_divrem. Returns 0 on success, |
938 | | * -1 on error. |
939 | | */ |
940 | | int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b) |
941 | 101k | { |
942 | 101k | int ret; |
943 | | |
944 | | /* _nn_divrem initializes q and r */ |
945 | 101k | ret = nn_check_initialized(a); EG(ret, err); |
946 | 101k | ret = nn_check_initialized(b); EG(ret, err); |
947 | 101k | MUST_HAVE(((q != NULL) && (r != NULL)), ret, err); |
948 | | |
949 | | /* |
950 | | * Handle aliasing whenever any of the inputs is |
951 | | * used as an output. |
952 | | */ |
953 | 101k | if ((a == q) || (a == r) || (b == q) || (b == r)) { |
954 | 20.2k | ret = __nn_divrem_notrim_alias(q, r, a, b); |
955 | 80.8k | } else { |
956 | 80.8k | ret = _nn_divrem(q, r, a, b); |
957 | 80.8k | } |
958 | | |
959 | 101k | err: |
960 | 101k | return ret; |
961 | 101k | } |
962 | | |
963 | | /* |
964 | | * Compute quotient and remainder and normalize them. |
965 | | * Not constant time, see documentation of _nn_divrem(). |
966 | | * Returns 0 on success, -1 on error. |
967 | | */ |
968 | | int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b) |
969 | 94.0k | { |
970 | 94.0k | int ret; |
971 | | |
972 | 94.0k | ret = nn_divrem_notrim(q, r, a, b); EG(ret, err); |
973 | | |
974 | | /* Normalize (trim) the quotient and rest to avoid size overflow */ |
975 | 94.0k | ret = nn_normalize(q); EG(ret, err); |
976 | 94.0k | ret = nn_normalize(r); |
977 | | |
978 | 94.0k | err: |
979 | 94.0k | return ret; |
980 | 94.0k | } |
981 | | |
982 | | /* |
983 | | * Compute remainder only and do not normalize it. Not constant time, see |
984 | | * documentation of _nn_divrem(). Returns 0 on success, -1 on error. |
985 | | */ |
986 | | int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b) |
987 | 6.96k | { |
988 | 6.96k | int ret; |
989 | 6.96k | nn q; |
990 | 6.96k | q.magic = WORD(0); |
991 | | |
992 | | /* nn_divrem() will init q. */ |
993 | 6.96k | ret = nn_divrem_notrim(&q, r, a, b); |
994 | | |
995 | 6.96k | nn_uninit(&q); |
996 | | |
997 | 6.96k | return ret; |
998 | 6.96k | } |
999 | | |
1000 | | /* |
1001 | | * Compute remainder only and normalize it. Not constant time, see |
1002 | | * documentation of _nn_divrem(). r is initialized by the function. |
1003 | | * Returns 0 on success, -1 on error. |
1004 | | */ |
1005 | | int nn_mod(nn_t r, nn_src_t a, nn_src_t b) |
1006 | 35.1k | { |
1007 | 35.1k | int ret; |
1008 | 35.1k | nn q; |
1009 | 35.1k | q.magic = WORD(0); |
1010 | | |
1011 | | /* nn_divrem will init q. */ |
1012 | 35.1k | ret = nn_divrem(&q, r, a, b); |
1013 | | |
1014 | 35.1k | nn_uninit(&q); |
1015 | | |
1016 | 35.1k | return ret; |
1017 | 35.1k | } |
1018 | | |
1019 | | /* |
1020 | | * Below follow gcd and xgcd non constant time functions for the user ease. |
1021 | | */ |
1022 | | |
1023 | | /* |
1024 | | * Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant |
1025 | | * time per the algorithm used. The function returns 0 on success, -1 on |
1026 | | * error. internal helper: expect caller to check parameters. |
1027 | | */ |
1028 | | ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, |
1029 | | int *sign) |
1030 | 1.03k | { |
1031 | 1.03k | nn_t c, d, q, r, u1, v1, u2, v2; |
1032 | 1.03k | nn scratch[8]; |
1033 | 1.03k | int ret, swap, iszero; |
1034 | 1.03k | u8 i; |
1035 | | |
1036 | 9.35k | for (i = 0; i < 8; i++){ |
1037 | 8.31k | scratch[i].magic = WORD(0); |
1038 | 8.31k | } |
1039 | | |
1040 | | /* |
1041 | | * Maintain: |
1042 | | * |u1 v1| |c| = |a| |
1043 | | * |u2 v2| |d| |b| |
1044 | | * u1, v1, u2, v2 >= 0 |
1045 | | * c >= d |
1046 | | * |
1047 | | * Initially: |
1048 | | * |1 0 | |a| = |a| |
1049 | | * |0 1 | |b| |b| |
1050 | | * |
1051 | | * At each iteration: |
1052 | | * c >= d |
1053 | | * c = q*d + r |
1054 | | * |u1 v1| = |q*u1+v1 u1| |
1055 | | * |u2 v2| |q*u2+v2 u2| |
1056 | | * |
1057 | | * Finally, after i steps: |
1058 | | * |u1 v1| |g| = |a| |
1059 | | * |u2 v2| |0| = |b| |
1060 | | * |
1061 | | * Inverting the matrix: |
1062 | | * |g| = (-1)^i | v2 -v1| |a| |
1063 | | * |0| |-u2 u1| |b| |
1064 | | */ |
1065 | | |
1066 | | /* |
1067 | | * Initialization. |
1068 | | */ |
1069 | 1.03k | ret = nn_init(g, 0); EG(ret, err); |
1070 | 1.03k | ret = nn_init(u, 0); EG(ret, err); |
1071 | 1.03k | ret = nn_init(v, 0); EG(ret, err); |
1072 | 1.03k | ret = nn_iszero(b, &iszero); EG(ret, err); |
1073 | 1.03k | if (iszero) { |
1074 | | /* gcd(0, a) = a, and 1*a + 0*b = a */ |
1075 | 32 | ret = nn_copy(g, a); EG(ret, err); |
1076 | 32 | ret = nn_one(u); EG(ret, err); |
1077 | 32 | ret = nn_zero(v); EG(ret, err); |
1078 | 32 | (*sign) = 1; |
1079 | 32 | goto err; |
1080 | 32 | } |
1081 | | |
1082 | 9.06k | for (i = 0; i < 8; i++){ |
1083 | 8.05k | ret = nn_init(&scratch[i], 0); EG(ret, err); |
1084 | 8.05k | } |
1085 | | |
1086 | 1.00k | u1 = &(scratch[0]); |
1087 | 1.00k | v1 = &(scratch[1]); |
1088 | 1.00k | u2 = &(scratch[2]); |
1089 | 1.00k | v2 = &(scratch[3]); |
1090 | 1.00k | ret = nn_one(u1); EG(ret, err); |
1091 | 1.00k | ret = nn_zero(v1); EG(ret, err); |
1092 | 1.00k | ret = nn_zero(u2); EG(ret, err); |
1093 | 1.00k | ret = nn_one(v2); EG(ret, err); |
1094 | 1.00k | c = &(scratch[4]); |
1095 | 1.00k | d = &(scratch[5]); |
1096 | 1.00k | ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */ |
1097 | 1.00k | ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */ |
1098 | 1.00k | q = &(scratch[6]); |
1099 | 1.00k | r = &(scratch[7]); |
1100 | 1.00k | swap = 0; |
1101 | | |
1102 | | /* |
1103 | | * Loop. |
1104 | | */ |
1105 | 1.00k | ret = nn_iszero(d, &iszero); EG(ret, err); |
1106 | 29.8k | while (!iszero) { |
1107 | 29.3k | ret = nn_divrem(q, r, c, d); EG(ret, err); |
1108 | 29.3k | ret = nn_normalize(q); EG(ret, err); |
1109 | 29.3k | ret = nn_normalize(r); EG(ret, err); |
1110 | 29.3k | ret = nn_copy(c, r); EG(ret, err); |
1111 | 29.3k | ret = nn_mul(r, q, u1); EG(ret, err); |
1112 | 29.3k | ret = nn_normalize(r); EG(ret, err); |
1113 | 29.3k | ret = nn_add(v1, v1, r); EG(ret, err); |
1114 | 29.3k | ret = nn_mul(r, q, u2); EG(ret, err); |
1115 | 29.3k | ret = nn_normalize(r); EG(ret, err); |
1116 | 29.3k | ret = nn_add(v2, v2, r); EG(ret, err); |
1117 | 29.3k | ret = nn_normalize(v1); EG(ret, err); |
1118 | 29.3k | ret = nn_normalize(v2); EG(ret, err); |
1119 | 29.3k | swap = 1; |
1120 | 29.3k | ret = nn_iszero(c, &iszero); EG(ret, err); |
1121 | 29.3k | if (iszero) { |
1122 | 523 | break; |
1123 | 523 | } |
1124 | 28.8k | ret = nn_divrem(q, r, d, c); EG(ret, err); |
1125 | 28.8k | ret = nn_normalize(q); EG(ret, err); |
1126 | 28.8k | ret = nn_normalize(r); EG(ret, err); |
1127 | 28.8k | ret = nn_copy(d, r); EG(ret, err); |
1128 | 28.8k | ret = nn_mul(r, q, v1); EG(ret, err); |
1129 | 28.8k | ret = nn_normalize(r); EG(ret, err); |
1130 | 28.8k | ret = nn_add(u1, u1, r); EG(ret, err); |
1131 | 28.8k | ret = nn_mul(r, q, v2); EG(ret, err); |
1132 | 28.8k | ret = nn_normalize(r); EG(ret, err); |
1133 | 28.8k | ret = nn_add(u2, u2, r); EG(ret, err); |
1134 | 28.8k | ret = nn_normalize(u1); EG(ret, err); |
1135 | 28.8k | ret = nn_normalize(u2); EG(ret, err); |
1136 | 28.8k | swap = 0; |
1137 | | |
1138 | | /* refresh loop condition */ |
1139 | 28.8k | ret = nn_iszero(d, &iszero); EG(ret, err); |
1140 | 28.8k | } |
1141 | | |
1142 | | /* Copies could be skipped. */ |
1143 | 1.00k | if (swap) { |
1144 | 523 | ret = nn_copy(g, d); EG(ret, err); |
1145 | 523 | ret = nn_copy(u, u2); EG(ret, err); |
1146 | 523 | ret = nn_copy(v, u1); EG(ret, err); |
1147 | 523 | } else { |
1148 | 484 | ret = nn_copy(g, c); EG(ret, err); |
1149 | 484 | ret = nn_copy(u, v2); EG(ret, err); |
1150 | 484 | ret = nn_copy(v, v1); EG(ret, err); |
1151 | 484 | } |
1152 | | |
1153 | | /* swap = -1 means u <= 0; = 1 means v <= 0 */ |
1154 | 1.00k | (*sign) = swap ? -1 : 1; |
1155 | 1.00k | ret = 0; |
1156 | | |
1157 | 1.03k | err: |
1158 | | /* |
1159 | | * We uninit scratch elements in all cases, i.e. whether or not |
1160 | | * we return an error or not. |
1161 | | */ |
1162 | 9.35k | for (i = 0; i < 8; i++){ |
1163 | 8.31k | nn_uninit(&scratch[i]); |
1164 | 8.31k | } |
1165 | | /* Unitialize output in case of error */ |
1166 | 1.03k | if (ret){ |
1167 | 0 | nn_uninit(v); |
1168 | 0 | nn_uninit(u); |
1169 | 0 | nn_uninit(g); |
1170 | 0 | } |
1171 | | |
1172 | 1.03k | return ret; |
1173 | 1.00k | } |
1174 | | |
1175 | | /* |
1176 | | * Aliased version of xgcd, and no assumption on a and b. Not constant time at |
1177 | | * all. returns 0 on success, -1 on error. XXX document 'sign' |
1178 | | */ |
1179 | | int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign) |
1180 | 1.03k | { |
1181 | | /* Handle aliasing |
1182 | | * Note: in order to properly handle aliasing, we accept to lose |
1183 | | * some "space" on the stack with copies. |
1184 | | */ |
1185 | 1.03k | nn a_cpy, b_cpy; |
1186 | 1.03k | nn_src_t a_, b_; |
1187 | 1.03k | int ret, cmp, _sign; |
1188 | 1.03k | a_cpy.magic = b_cpy.magic = WORD(0); |
1189 | | |
1190 | | /* The internal _nn_xgcd function initializes g, u and v */ |
1191 | 1.03k | ret = nn_check_initialized(a); EG(ret, err); |
1192 | 1.03k | ret = nn_check_initialized(b); EG(ret, err); |
1193 | 1.03k | MUST_HAVE((sign != NULL), ret, err); |
1194 | | |
1195 | 1.03k | ret = nn_init(&b_cpy, 0); EG(ret, err); |
1196 | | |
1197 | | /* Aliasing of a */ |
1198 | 1.03k | if ((g == a) || (u == a) || (v == a)){ |
1199 | 0 | ret = nn_copy(&a_cpy, a); EG(ret, err); |
1200 | 0 | a_ = &a_cpy; |
1201 | 1.03k | } else { |
1202 | 1.03k | a_ = a; |
1203 | 1.03k | } |
1204 | | /* Aliasing of b */ |
1205 | 1.03k | if ((g == b) || (u == b) || (v == b)) { |
1206 | 0 | ret = nn_copy(&b_cpy, b); EG(ret, err); |
1207 | 0 | b_ = &b_cpy; |
1208 | 1.03k | } else { |
1209 | 1.03k | b_ = b; |
1210 | 1.03k | } |
1211 | | |
1212 | 1.03k | ret = nn_cmp(a_, b_, &cmp); EG(ret, err); |
1213 | 1.03k | if (cmp < 0) { |
1214 | | /* If a < b, swap the inputs */ |
1215 | 753 | ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err); |
1216 | 753 | (*sign) = -(_sign); |
1217 | 753 | } else { |
1218 | 286 | ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err); |
1219 | 286 | (*sign) = _sign; |
1220 | 286 | } |
1221 | | |
1222 | 1.03k | err: |
1223 | 1.03k | nn_uninit(&b_cpy); |
1224 | 1.03k | nn_uninit(&a_cpy); |
1225 | | |
1226 | 1.03k | return ret; |
1227 | 1.03k | } |
1228 | | |
1229 | | /* |
1230 | | * Compute g = gcd(a, b). Internally use the xgcd and drop u and v. |
1231 | | * Not constant time at all. Returns 0 on success, -1 on error. |
1232 | | * XXX document 'sign'. |
1233 | | */ |
1234 | | int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign) |
1235 | 564 | { |
1236 | 564 | nn u, v; |
1237 | 564 | int ret; |
1238 | 564 | u.magic = v.magic = WORD(0); |
1239 | | |
1240 | | /* nn_xgcd will initialize g, u and v and |
1241 | | * check if a and b are indeed initialized. |
1242 | | */ |
1243 | 564 | ret = nn_xgcd(g, &u, &v, a, b, sign); |
1244 | | |
1245 | 564 | nn_uninit(&u); |
1246 | 564 | nn_uninit(&v); |
1247 | | |
1248 | 564 | return ret; |
1249 | 564 | } |