/src/gmp/mpn/hgcd_reduce.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd_reduce.c. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 2011, 2012 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | /* Computes R -= A * B. Result must be non-negative. Normalized down |
39 | | to size an, and resulting size is returned. */ |
40 | | static mp_size_t |
41 | | submul (mp_ptr rp, mp_size_t rn, |
42 | | mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) |
43 | 0 | { |
44 | 0 | mp_ptr tp; |
45 | 0 | TMP_DECL; |
46 | |
|
47 | 0 | ASSERT (bn > 0); |
48 | 0 | ASSERT (an >= bn); |
49 | 0 | ASSERT (rn >= an); |
50 | 0 | ASSERT (an + bn <= rn + 1); |
51 | |
|
52 | 0 | TMP_MARK; |
53 | 0 | tp = TMP_ALLOC_LIMBS (an + bn); |
54 | |
|
55 | 0 | mpn_mul (tp, ap, an, bp, bn); |
56 | 0 | ASSERT ((an + bn <= rn) || (tp[rn] == 0)); |
57 | 0 | ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn))); |
58 | 0 | TMP_FREE; |
59 | |
|
60 | 0 | while (rn > an && (rp[rn-1] == 0)) |
61 | 0 | rn--; |
62 | |
|
63 | 0 | return rn; |
64 | 0 | } |
65 | | |
66 | | /* Computes (a, b) <-- M^{-1} (a; b) */ |
67 | | /* FIXME: |
68 | | x Take scratch parameter, and figure out scratch need. |
69 | | |
70 | | x Use some fallback for small M->n? |
71 | | */ |
72 | | static mp_size_t |
73 | | hgcd_matrix_apply (const struct hgcd_matrix *M, |
74 | | mp_ptr ap, mp_ptr bp, |
75 | | mp_size_t n) |
76 | 0 | { |
77 | 0 | mp_size_t an, bn, un, vn, nn; |
78 | 0 | mp_size_t mn[2][2]; |
79 | 0 | mp_size_t modn; |
80 | 0 | mp_ptr tp, sp, scratch; |
81 | 0 | mp_limb_t cy; |
82 | 0 | unsigned i, j; |
83 | |
|
84 | 0 | TMP_DECL; |
85 | |
|
86 | 0 | ASSERT ( (ap[n-1] | bp[n-1]) > 0); |
87 | |
|
88 | 0 | an = n; |
89 | 0 | MPN_NORMALIZE (ap, an); |
90 | 0 | bn = n; |
91 | 0 | MPN_NORMALIZE (bp, bn); |
92 | |
|
93 | 0 | for (i = 0; i < 2; i++) |
94 | 0 | for (j = 0; j < 2; j++) |
95 | 0 | { |
96 | 0 | mp_size_t k; |
97 | 0 | k = M->n; |
98 | 0 | MPN_NORMALIZE (M->p[i][j], k); |
99 | 0 | mn[i][j] = k; |
100 | 0 | } |
101 | |
|
102 | 0 | ASSERT (mn[0][0] > 0); |
103 | 0 | ASSERT (mn[1][1] > 0); |
104 | 0 | ASSERT ( (mn[0][1] | mn[1][0]) > 0); |
105 | |
|
106 | 0 | TMP_MARK; |
107 | |
|
108 | 0 | if (mn[0][1] == 0) |
109 | 0 | { |
110 | | /* A unchanged, M = (1, 0; q, 1) */ |
111 | 0 | ASSERT (mn[0][0] == 1); |
112 | 0 | ASSERT (M->p[0][0][0] == 1); |
113 | 0 | ASSERT (mn[1][1] == 1); |
114 | 0 | ASSERT (M->p[1][1][0] == 1); |
115 | | |
116 | | /* Put B <-- B - q A */ |
117 | 0 | nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]); |
118 | 0 | } |
119 | 0 | else if (mn[1][0] == 0) |
120 | 0 | { |
121 | | /* B unchanged, M = (1, q; 0, 1) */ |
122 | 0 | ASSERT (mn[0][0] == 1); |
123 | 0 | ASSERT (M->p[0][0][0] == 1); |
124 | 0 | ASSERT (mn[1][1] == 1); |
125 | 0 | ASSERT (M->p[1][1][0] == 1); |
126 | | |
127 | | /* Put A <-- A - q * B */ |
128 | 0 | nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]); |
129 | 0 | } |
130 | 0 | else |
131 | 0 | { |
132 | | /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01. |
133 | | B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */ |
134 | 0 | un = MIN (an - mn[0][0], bn - mn[1][0]) + 1; |
135 | 0 | vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1; |
136 | |
|
137 | 0 | nn = MAX (un, vn); |
138 | | /* In the range of interest, mulmod_bnm1 should always beat mullo. */ |
139 | 0 | modn = mpn_mulmod_bnm1_next_size (nn + 1); |
140 | |
|
141 | 0 | TMP_ALLOC_LIMBS_3 (tp, modn, |
142 | 0 | sp, modn, |
143 | 0 | scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n)); |
144 | |
|
145 | 0 | ASSERT (n <= 2*modn); |
146 | |
|
147 | 0 | if (n > modn) |
148 | 0 | { |
149 | 0 | cy = mpn_add (ap, ap, modn, ap + modn, n - modn); |
150 | 0 | MPN_INCR_U (ap, modn, cy); |
151 | |
|
152 | 0 | cy = mpn_add (bp, bp, modn, bp + modn, n - modn); |
153 | 0 | MPN_INCR_U (bp, modn, cy); |
154 | |
|
155 | 0 | n = modn; |
156 | 0 | } |
157 | |
|
158 | 0 | mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch); |
159 | 0 | mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch); |
160 | | |
161 | | /* FIXME: Handle the small n case in some better way. */ |
162 | 0 | if (n + mn[1][1] < modn) |
163 | 0 | MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]); |
164 | 0 | if (n + mn[0][1] < modn) |
165 | 0 | MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]); |
166 | |
|
167 | 0 | cy = mpn_sub_n (tp, tp, sp, modn); |
168 | 0 | MPN_DECR_U (tp, modn, cy); |
169 | |
|
170 | 0 | ASSERT (mpn_zero_p (tp + nn, modn - nn)); |
171 | |
|
172 | 0 | mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch); |
173 | 0 | MPN_COPY (ap, tp, nn); |
174 | 0 | mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch); |
175 | |
|
176 | 0 | if (n + mn[1][0] < modn) |
177 | 0 | MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]); |
178 | 0 | if (n + mn[0][0] < modn) |
179 | 0 | MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]); |
180 | |
|
181 | 0 | cy = mpn_sub_n (tp, tp, sp, modn); |
182 | 0 | MPN_DECR_U (tp, modn, cy); |
183 | |
|
184 | 0 | ASSERT (mpn_zero_p (tp + nn, modn - nn)); |
185 | 0 | MPN_COPY (bp, tp, nn); |
186 | |
|
187 | 0 | while ( (ap[nn-1] | bp[nn-1]) == 0) |
188 | 0 | { |
189 | 0 | nn--; |
190 | 0 | ASSERT (nn > 0); |
191 | 0 | } |
192 | 0 | } |
193 | 0 | TMP_FREE; |
194 | |
|
195 | 0 | return nn; |
196 | 0 | } |
197 | | |
198 | | mp_size_t |
199 | | mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p) |
200 | 0 | { |
201 | 0 | mp_size_t itch; |
202 | 0 | if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD)) |
203 | 0 | { |
204 | 0 | itch = mpn_hgcd_itch (n-p); |
205 | | |
206 | | /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 * |
207 | | (p + ceil((n-p)/2) - 1 <= n + p - 1 */ |
208 | 0 | if (itch < n + p - 1) |
209 | 0 | itch = n + p - 1; |
210 | 0 | } |
211 | 0 | else |
212 | 0 | { |
213 | 0 | itch = 2*(n-p) + mpn_hgcd_itch (n-p); |
214 | | /* Currently, hgcd_matrix_apply allocates its own storage. */ |
215 | 0 | } |
216 | 0 | return itch; |
217 | 0 | } |
218 | | |
219 | | /* FIXME: Document storage need. */ |
220 | | mp_size_t |
221 | | mpn_hgcd_reduce (struct hgcd_matrix *M, |
222 | | mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p, |
223 | | mp_ptr tp) |
224 | 0 | { |
225 | 0 | mp_size_t nn; |
226 | 0 | if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD)) |
227 | 0 | { |
228 | 0 | nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp); |
229 | 0 | if (nn > 0) |
230 | | /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) |
231 | | = 2 (n - 1) */ |
232 | 0 | return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); |
233 | 0 | } |
234 | 0 | else |
235 | 0 | { |
236 | 0 | MPN_COPY (tp, ap + p, n - p); |
237 | 0 | MPN_COPY (tp + n - p, bp + p, n - p); |
238 | 0 | if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p))) |
239 | 0 | return hgcd_matrix_apply (M, ap, bp, n); |
240 | 0 | } |
241 | 0 | return 0; |
242 | 0 | } |