/src/gmp-6.2.1/mpn/brootinv.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b). |
2 | | |
3 | | Contributed to the GNU project by Martin Boij (as part of perfpow.c). |
4 | | |
5 | | Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc. |
6 | | |
7 | | This file is part of the GNU MP Library. |
8 | | |
9 | | The GNU MP Library is free software; you can redistribute it and/or modify |
10 | | it under the terms of either: |
11 | | |
12 | | * the GNU Lesser General Public License as published by the Free |
13 | | Software Foundation; either version 3 of the License, or (at your |
14 | | option) any later version. |
15 | | |
16 | | or |
17 | | |
18 | | * the GNU General Public License as published by the Free Software |
19 | | Foundation; either version 2 of the License, or (at your option) any |
20 | | later version. |
21 | | |
22 | | or both in parallel, as here. |
23 | | |
24 | | The GNU MP Library is distributed in the hope that it will be useful, but |
25 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
26 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
27 | | for more details. |
28 | | |
29 | | You should have received copies of the GNU General Public License and the |
30 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
31 | | see https://www.gnu.org/licenses/. */ |
32 | | |
33 | | #include "gmp-impl.h" |
34 | | |
35 | | /* Computes a^2e (mod B). Uses right-to-left binary algorithm, since |
36 | | typical use will have e small. */ |
37 | | static mp_limb_t |
38 | | powsquaredlimb (mp_limb_t a, mp_limb_t e) |
39 | 0 | { |
40 | 0 | mp_limb_t r; |
41 | |
|
42 | 0 | r = 1; |
43 | | /* if (LIKELY (e != 0)) */ |
44 | 0 | do { |
45 | 0 | a *= a; |
46 | 0 | if (e & 1) |
47 | 0 | r *= a; |
48 | 0 | e >>= 1; |
49 | 0 | } while (e != 0); |
50 | |
|
51 | 0 | return r; |
52 | 0 | } |
53 | | |
54 | | /* Compute r such that r^k * y = 1 (mod B^n). |
55 | | |
56 | | Iterates |
57 | | r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b) |
58 | | using Hensel lifting, each time doubling the number of known bits in r. |
59 | | |
60 | | Works just for odd k. Else the Hensel lifting degenerates. |
61 | | |
62 | | FIXME: |
63 | | |
64 | | (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows). |
65 | | |
66 | | (2) Rewrite iteration as |
67 | | r' <-- r - k^{-1} r (r^k y - 1) |
68 | | and take advantage of the zero low part of r^k y - 1. |
69 | | |
70 | | (3) Use wrap-around trick. |
71 | | |
72 | | (4) Use a small table to get starting value. |
73 | | |
74 | | Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo |
75 | | Currently mpn_powlo requires 3*bn |
76 | | so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS). |
77 | | */ |
78 | | |
79 | | void |
80 | | mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp) |
81 | 0 | { |
82 | 0 | mp_ptr tp2, tp3; |
83 | 0 | mp_limb_t kinv, k2, r0, y0; |
84 | 0 | mp_size_t order[GMP_LIMB_BITS + 1]; |
85 | 0 | int d; |
86 | |
|
87 | 0 | ASSERT (bn > 0); |
88 | 0 | ASSERT ((k & 1) != 0); |
89 | | |
90 | 0 | tp2 = tp + bn; |
91 | 0 | tp3 = tp + bn + ((bn + 3) >> 1); |
92 | 0 | k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */ |
93 | |
|
94 | 0 | binvert_limb (kinv, k); |
95 | | |
96 | | /* 4-bit initial approximation: |
97 | | |
98 | | y%16 | 1 3 5 7 9 11 13 15, |
99 | | k%4 +-------------------------+k2%2 |
100 | | 1 | 1 11 13 7 9 3 5 15 | 1 |
101 | | 3 | 1 3 5 7 9 11 13 15 | 0 |
102 | | |
103 | | */ |
104 | 0 | y0 = yp[0]; |
105 | |
|
106 | 0 | r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8); /* 4 bits */ |
107 | 0 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */ |
108 | 0 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */ |
109 | 0 | #if GMP_NUMB_BITS > 16 |
110 | 0 | { |
111 | 0 | unsigned prec = 16; |
112 | 0 | do |
113 | 0 | { |
114 | 0 | r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2)); |
115 | 0 | prec *= 2; |
116 | 0 | } |
117 | 0 | while (prec < GMP_NUMB_BITS); |
118 | 0 | } |
119 | 0 | #endif |
120 | |
|
121 | 0 | rp[0] = r0; |
122 | 0 | if (bn == 1) |
123 | 0 | return; |
124 | | |
125 | 0 | d = 0; |
126 | 0 | for (; bn != 2; bn = (bn + 1) >> 1) |
127 | 0 | order[d++] = bn; |
128 | |
|
129 | 0 | order[d] = 2; |
130 | 0 | bn = 1; |
131 | |
|
132 | 0 | do |
133 | 0 | { |
134 | 0 | mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */ |
135 | 0 | tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1); |
136 | |
|
137 | 0 | bn = order[d]; |
138 | |
|
139 | 0 | mpn_powlo (rp, tp, &k2, 1, bn, tp3); |
140 | 0 | mpn_mullo_n (tp, yp, rp, bn); |
141 | | |
142 | | /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */ |
143 | | /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/ |
144 | 0 | { |
145 | 0 | mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */ |
146 | 0 | int borrow; |
147 | 0 | borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0; |
148 | 0 | if (bn > pbn) /* 3 < bn */ |
149 | 0 | { |
150 | 0 | if (borrow) |
151 | 0 | mpn_com (tp + pbn, tp + pbn, bn - pbn); |
152 | 0 | else |
153 | 0 | mpn_neg (tp + pbn, tp + pbn, bn - pbn); |
154 | 0 | } |
155 | 0 | } |
156 | 0 | mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0); |
157 | 0 | } |
158 | 0 | while (--d >= 0); |
159 | 0 | } |