Line | Count | Source |
1 | | /* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base. |
2 | | |
3 | | Copyright 2007-2009, 2012, 2015, 2016, 2018, 2020 Free Software |
4 | | Foundation, Inc. |
5 | | |
6 | | This file is part of the GNU MP Library. |
7 | | |
8 | | The GNU MP Library is free software; you can redistribute it and/or modify |
9 | | it under the terms of either: |
10 | | |
11 | | * the GNU Lesser General Public License as published by the Free |
12 | | Software Foundation; either version 3 of the License, or (at your |
13 | | option) any later version. |
14 | | |
15 | | or |
16 | | |
17 | | * the GNU General Public License as published by the Free Software |
18 | | Foundation; either version 2 of the License, or (at your option) any |
19 | | later version. |
20 | | |
21 | | or both in parallel, as here. |
22 | | |
23 | | The GNU MP Library is distributed in the hope that it will be useful, but |
24 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
25 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
26 | | for more details. |
27 | | |
28 | | You should have received copies of the GNU General Public License and the |
29 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
30 | | see https://www.gnu.org/licenses/. */ |
31 | | |
32 | | |
33 | | #include "gmp-impl.h" |
34 | | #include "longlong.h" |
35 | | |
36 | | |
37 | | #define getbit(p,bi) \ |
38 | 525k | ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) |
39 | | |
40 | | static inline mp_limb_t |
41 | | getbits (const mp_limb_t *p, mp_bitcnt_t bi, unsigned nbits) |
42 | 169k | { |
43 | 169k | unsigned nbits_in_r; |
44 | 169k | mp_limb_t r; |
45 | 169k | mp_size_t i; |
46 | | |
47 | 169k | if (bi <= nbits) |
48 | 1.56k | { |
49 | 1.56k | return p[0] & (((mp_limb_t) 1 << bi) - 1); |
50 | 1.56k | } |
51 | 167k | else |
52 | 167k | { |
53 | 167k | bi -= nbits; /* bit index of low bit to extract */ |
54 | 167k | i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ |
55 | 167k | bi %= GMP_NUMB_BITS; /* bit index in low word */ |
56 | 167k | r = p[i] >> bi; /* extract (low) bits */ |
57 | 167k | nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ |
58 | 167k | if (nbits_in_r < nbits) /* did we get enough bits? */ |
59 | 9.02k | r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ |
60 | 167k | return r & (((mp_limb_t) 1 << nbits) - 1); |
61 | 167k | } |
62 | 169k | } |
63 | | |
64 | | static inline unsigned |
65 | | win_size (mp_bitcnt_t eb) |
66 | 2.89k | { |
67 | 2.89k | unsigned k; |
68 | 2.89k | static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; |
69 | 2.89k | ASSERT (eb > 1); |
70 | 12.8k | for (k = 0; eb > x[k++];) |
71 | 9.97k | ; |
72 | 2.89k | return k; |
73 | 2.89k | } |
74 | | |
75 | | /* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base. |
76 | | Requires that ep[en-1] is non-zero. |
77 | | Uses scratch space tp[3n-1..0], i.e., 3n words. */ |
78 | | /* We only use n words in the scratch space, we should pass tp + n to |
79 | | mullo/sqrlo as a temporary area, it is needed. */ |
80 | | void |
81 | | mpn_powlo (mp_ptr rp, mp_srcptr bp, |
82 | | mp_srcptr ep, mp_size_t en, |
83 | | mp_size_t n, mp_ptr tp) |
84 | 2.89k | { |
85 | 2.89k | unsigned cnt; |
86 | 2.89k | mp_bitcnt_t ebi; |
87 | 2.89k | unsigned windowsize, this_windowsize; |
88 | 2.89k | mp_limb_t expbits; |
89 | 2.89k | mp_limb_t *pp; |
90 | 2.89k | long i; |
91 | 2.89k | int flipflop; |
92 | 2.89k | TMP_DECL; |
93 | | |
94 | 2.89k | ASSERT (en > 1 || (en == 1 && ep[0] > 1)); |
95 | | |
96 | 2.89k | TMP_MARK; |
97 | | |
98 | 2.89k | MPN_SIZEINBASE_2EXP(ebi, ep, en, 1); |
99 | | |
100 | 2.89k | windowsize = win_size (ebi); |
101 | 2.89k | if (windowsize > 1) |
102 | 2.80k | { |
103 | 2.80k | mp_limb_t *this_pp, *last_pp; |
104 | 2.80k | ASSERT (windowsize < ebi); |
105 | | |
106 | 2.80k | pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1))); |
107 | | |
108 | 2.80k | this_pp = pp; |
109 | | |
110 | 2.80k | MPN_COPY (this_pp, bp, n); |
111 | | |
112 | | /* Store b^2 in tp. */ |
113 | 2.80k | mpn_sqrlo (tp, bp, n); |
114 | | |
115 | | /* Precompute odd powers of b and put them in the temporary area at pp. */ |
116 | 2.80k | i = (1 << (windowsize - 1)) - 1; |
117 | 2.80k | do |
118 | 39.7k | { |
119 | 39.7k | last_pp = this_pp; |
120 | 39.7k | this_pp += n; |
121 | 39.7k | mpn_mullo_n (this_pp, last_pp, tp, n); |
122 | 39.7k | } while (--i != 0); |
123 | | |
124 | 2.80k | expbits = getbits (ep, ebi, windowsize); |
125 | 2.80k | ebi -= windowsize; |
126 | | |
127 | | /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */ |
128 | 2.80k | count_trailing_zeros (cnt, expbits); |
129 | 2.80k | ebi += cnt; |
130 | 2.80k | expbits >>= cnt; |
131 | | |
132 | 2.80k | MPN_COPY (rp, pp + n * (expbits >> 1), n); |
133 | 2.80k | } |
134 | 90 | else |
135 | 90 | { |
136 | 90 | pp = tp + n; |
137 | 90 | MPN_COPY (pp, bp, n); |
138 | 90 | MPN_COPY (rp, bp, n); |
139 | 90 | --ebi; |
140 | 90 | } |
141 | | |
142 | 2.89k | flipflop = 0; |
143 | | |
144 | 2.89k | do |
145 | 168k | { |
146 | 525k | while (getbit (ep, ebi) == 0) |
147 | 358k | { |
148 | 358k | mpn_sqrlo (tp, rp, n); |
149 | 358k | MP_PTR_SWAP (rp, tp); |
150 | 358k | flipflop = ! flipflop; |
151 | 358k | if (--ebi == 0) |
152 | 2.40k | goto done; |
153 | 358k | } |
154 | | |
155 | | /* The next bit of the exponent is 1. Now extract the largest block of |
156 | | bits <= windowsize, and such that the least significant bit is 1. */ |
157 | | |
158 | 166k | expbits = getbits (ep, ebi, windowsize); |
159 | 166k | this_windowsize = MIN (windowsize, ebi); |
160 | | |
161 | 166k | count_trailing_zeros (cnt, expbits); |
162 | 166k | this_windowsize -= cnt; |
163 | 166k | ebi -= this_windowsize; |
164 | 166k | expbits >>= cnt; |
165 | | |
166 | 475k | while (this_windowsize > 1) |
167 | 309k | { |
168 | 309k | mpn_sqrlo (tp, rp, n); |
169 | 309k | mpn_sqrlo (rp, tp, n); |
170 | 309k | this_windowsize -= 2; |
171 | 309k | } |
172 | | |
173 | 166k | if (this_windowsize != 0) |
174 | 86.2k | mpn_sqrlo (tp, rp, n); |
175 | 80.2k | else |
176 | 80.2k | { |
177 | 80.2k | MP_PTR_SWAP (rp, tp); |
178 | 80.2k | flipflop = ! flipflop; |
179 | 80.2k | } |
180 | | |
181 | 166k | mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n); |
182 | 166k | } while (ebi != 0); |
183 | | |
184 | 2.89k | done: |
185 | 2.89k | if (flipflop) |
186 | 1.72k | MPN_COPY (tp, rp, n); |
187 | 2.89k | TMP_FREE; |
188 | 2.89k | } |