/src/gmp-6.2.1/mpn/brootinv.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).  | 
2  |  |  | 
3  |  |    Contributed to the GNU project by Martin Boij (as part of perfpow.c).  | 
4  |  |  | 
5  |  | Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.  | 
6  |  |  | 
7  |  | This file is part of the GNU MP Library.  | 
8  |  |  | 
9  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
10  |  | it under the terms of either:  | 
11  |  |  | 
12  |  |   * the GNU Lesser General Public License as published by the Free  | 
13  |  |     Software Foundation; either version 3 of the License, or (at your  | 
14  |  |     option) any later version.  | 
15  |  |  | 
16  |  | or  | 
17  |  |  | 
18  |  |   * the GNU General Public License as published by the Free Software  | 
19  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
20  |  |     later version.  | 
21  |  |  | 
22  |  | or both in parallel, as here.  | 
23  |  |  | 
24  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
25  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
26  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
27  |  | for more details.  | 
28  |  |  | 
29  |  | You should have received copies of the GNU General Public License and the  | 
30  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
31  |  | see https://www.gnu.org/licenses/.  */  | 
32  |  |  | 
33  |  | #include "gmp-impl.h"  | 
34  |  |  | 
35  |  | /* Computes a^2e (mod B). Uses right-to-left binary algorithm, since  | 
36  |  |    typical use will have e small. */  | 
37  |  | static mp_limb_t  | 
38  |  | powsquaredlimb (mp_limb_t a, mp_limb_t e)  | 
39  | 0  | { | 
40  | 0  |   mp_limb_t r;  | 
41  |  | 
  | 
42  | 0  |   r = 1;  | 
43  |  |   /* if (LIKELY (e != 0)) */  | 
44  | 0  |   do { | 
45  | 0  |     a *= a;  | 
46  | 0  |     if (e & 1)  | 
47  | 0  |       r *= a;  | 
48  | 0  |     e >>= 1;  | 
49  | 0  |   } while (e != 0);  | 
50  |  | 
  | 
51  | 0  |   return r;  | 
52  | 0  | }  | 
53  |  |  | 
54  |  | /* Compute r such that r^k * y = 1 (mod B^n).  | 
55  |  |  | 
56  |  |    Iterates  | 
57  |  |      r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b) | 
58  |  |    using Hensel lifting, each time doubling the number of known bits in r.  | 
59  |  |  | 
60  |  |    Works just for odd k.  Else the Hensel lifting degenerates.  | 
61  |  |  | 
62  |  |    FIXME:  | 
63  |  |  | 
64  |  |      (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).  | 
65  |  |  | 
66  |  |      (2) Rewrite iteration as  | 
67  |  |      r' <-- r - k^{-1} r (r^k y - 1) | 
68  |  |    and take advantage of the zero low part of r^k y - 1.  | 
69  |  |  | 
70  |  |      (3) Use wrap-around trick.  | 
71  |  |  | 
72  |  |      (4) Use a small table to get starting value.  | 
73  |  |  | 
74  |  |    Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo  | 
75  |  |    Currently mpn_powlo requires 3*bn  | 
76  |  |    so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).  | 
77  |  | */  | 
78  |  |  | 
79  |  | void  | 
80  |  | mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)  | 
81  | 0  | { | 
82  | 0  |   mp_ptr tp2, tp3;  | 
83  | 0  |   mp_limb_t kinv, k2, r0, y0;  | 
84  | 0  |   mp_size_t order[GMP_LIMB_BITS + 1];  | 
85  | 0  |   int d;  | 
86  |  | 
  | 
87  | 0  |   ASSERT (bn > 0);  | 
88  | 0  |   ASSERT ((k & 1) != 0);  | 
89  |  |  | 
90  | 0  |   tp2 = tp + bn;  | 
91  | 0  |   tp3 = tp + bn + ((bn + 3) >> 1);  | 
92  | 0  |   k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */  | 
93  |  | 
  | 
94  | 0  |   binvert_limb (kinv, k);  | 
95  |  |  | 
96  |  |   /* 4-bit initial approximation:  | 
97  |  |  | 
98  |  |    y%16 | 1  3  5  7  9 11 13 15,  | 
99  |  |     k%4 +-------------------------+k2%2  | 
100  |  |      1  | 1 11 13  7  9  3  5 15  |  1  | 
101  |  |      3  | 1  3  5  7  9 11 13 15  |  0  | 
102  |  |  | 
103  |  |   */  | 
104  | 0  |   y0 = yp[0];  | 
105  |  | 
  | 
106  | 0  |   r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8);      /* 4 bits */  | 
107  | 0  |   r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */  | 
108  | 0  |   r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */  | 
109  | 0  | #if GMP_NUMB_BITS > 16  | 
110  | 0  |   { | 
111  | 0  |     unsigned prec = 16;  | 
112  | 0  |     do  | 
113  | 0  |       { | 
114  | 0  |   r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));  | 
115  | 0  |   prec *= 2;  | 
116  | 0  |       }  | 
117  | 0  |     while (prec < GMP_NUMB_BITS);  | 
118  | 0  |   }  | 
119  | 0  | #endif  | 
120  |  | 
  | 
121  | 0  |   rp[0] = r0;  | 
122  | 0  |   if (bn == 1)  | 
123  | 0  |     return;  | 
124  |  |  | 
125  | 0  |   d = 0;  | 
126  | 0  |   for (; bn != 2; bn = (bn + 1) >> 1)  | 
127  | 0  |     order[d++] = bn;  | 
128  |  | 
  | 
129  | 0  |   order[d] = 2;  | 
130  | 0  |   bn = 1;  | 
131  |  | 
  | 
132  | 0  |   do  | 
133  | 0  |     { | 
134  | 0  |       mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */  | 
135  | 0  |       tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);  | 
136  |  | 
  | 
137  | 0  |       bn = order[d];  | 
138  |  | 
  | 
139  | 0  |       mpn_powlo (rp, tp, &k2, 1, bn, tp3);  | 
140  | 0  |       mpn_mullo_n (tp, yp, rp, bn);  | 
141  |  |  | 
142  |  |       /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */  | 
143  |  |       /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/  | 
144  | 0  |       { | 
145  | 0  |   mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */  | 
146  | 0  |   int borrow;  | 
147  | 0  |   borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;  | 
148  | 0  |   if (bn > pbn) /* 3 < bn */  | 
149  | 0  |     { | 
150  | 0  |       if (borrow)  | 
151  | 0  |         mpn_com (tp + pbn, tp + pbn, bn - pbn);  | 
152  | 0  |       else  | 
153  | 0  |         mpn_neg (tp + pbn, tp + pbn, bn - pbn);  | 
154  | 0  |     }  | 
155  | 0  |       }  | 
156  | 0  |       mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);  | 
157  | 0  |     }  | 
158  | 0  |   while (--d >= 0);  | 
159  | 0  | }  |