/src/dropbear/libtommath/bn_s_mp_sqr_fast.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "tommath_private.h" |
2 | | #ifdef BN_S_MP_SQR_FAST_C |
3 | | /* LibTomMath, multiple-precision integer library -- Tom St Denis */ |
4 | | /* SPDX-License-Identifier: Unlicense */ |
5 | | |
6 | | /* the jist of squaring... |
7 | | * you do like mult except the offset of the tmpx [one that |
8 | | * starts closer to zero] can't equal the offset of tmpy. |
9 | | * So basically you set up iy like before then you min it with |
10 | | * (ty-tx) so that it never happens. You double all those |
11 | | * you add in the inner loop |
12 | | |
13 | | After that loop you do the squares and add them in. |
14 | | */ |
15 | | |
16 | | mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) |
17 | 75.6M | { |
18 | 75.6M | int olduse, pa, ix, iz; |
19 | 75.6M | mp_digit W[MP_WARRAY], *tmpx; |
20 | 75.6M | mp_word W1; |
21 | 75.6M | mp_err err; |
22 | | |
23 | | /* grow the destination as required */ |
24 | 75.6M | pa = a->used + a->used; |
25 | 75.6M | if (b->alloc < pa) { |
26 | 103k | if ((err = mp_grow(b, pa)) != MP_OKAY) { |
27 | 0 | return err; |
28 | 0 | } |
29 | 103k | } |
30 | | |
31 | | /* number of output digits to produce */ |
32 | 75.6M | W1 = 0; |
33 | 4.54G | for (ix = 0; ix < pa; ix++) { |
34 | 4.47G | int tx, ty, iy; |
35 | 4.47G | mp_word _W; |
36 | 4.47G | mp_digit *tmpy; |
37 | | |
38 | | /* clear counter */ |
39 | 4.47G | _W = 0; |
40 | | |
41 | | /* get offsets into the two bignums */ |
42 | 4.47G | ty = MP_MIN(a->used-1, ix); |
43 | 4.47G | tx = ix - ty; |
44 | | |
45 | | /* setup temp aliases */ |
46 | 4.47G | tmpx = a->dp + tx; |
47 | 4.47G | tmpy = a->dp + ty; |
48 | | |
49 | | /* this is the number of times the loop will iterrate, essentially |
50 | | while (tx++ < a->used && ty-- >= 0) { ... } |
51 | | */ |
52 | 4.47G | iy = MP_MIN(a->used-tx, ty+1); |
53 | | |
54 | | /* now for squaring tx can never equal ty |
55 | | * we halve the distance since they approach at a rate of 2x |
56 | | * and we have to round because odd cases need to be executed |
57 | | */ |
58 | 4.47G | iy = MP_MIN(iy, ((ty-tx)+1)>>1); |
59 | | |
60 | | /* execute loop */ |
61 | 41.1G | for (iz = 0; iz < iy; iz++) { |
62 | 36.6G | _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; |
63 | 36.6G | } |
64 | | |
65 | | /* double the inner product and add carry */ |
66 | 4.47G | _W = _W + _W + W1; |
67 | | |
68 | | /* even columns have the square term in them */ |
69 | 4.47G | if (((unsigned)ix & 1u) == 0u) { |
70 | 2.23G | _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1]; |
71 | 2.23G | } |
72 | | |
73 | | /* store it */ |
74 | 4.47G | W[ix] = (mp_digit)_W & MP_MASK; |
75 | | |
76 | | /* make next carry */ |
77 | 4.47G | W1 = _W >> (mp_word)MP_DIGIT_BIT; |
78 | 4.47G | } |
79 | | |
80 | | /* setup dest */ |
81 | 75.6M | olduse = b->used; |
82 | 75.6M | b->used = a->used+a->used; |
83 | | |
84 | 75.6M | { |
85 | 75.6M | mp_digit *tmpb; |
86 | 75.6M | tmpb = b->dp; |
87 | 4.54G | for (ix = 0; ix < pa; ix++) { |
88 | 4.47G | *tmpb++ = W[ix] & MP_MASK; |
89 | 4.47G | } |
90 | | |
91 | | /* clear unused digits [that existed in the old copy of c] */ |
92 | 75.6M | MP_ZERO_DIGITS(tmpb, olduse - ix); |
93 | 75.6M | } |
94 | 75.6M | mp_clamp(b); |
95 | 75.6M | return MP_OKAY; |
96 | 75.6M | } |
97 | | #endif |