/src/gmp/mpn/hgcd_matrix.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd_matrix.c. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | /* For input of size n, matrix elements are of size at most ceil(n/2) |
39 | | - 1, but we need two limbs extra. */ |
40 | | void |
41 | | mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p) |
42 | 0 | { |
43 | 0 | mp_size_t s = (n+1)/2 + 1; |
44 | 0 | M->alloc = s; |
45 | 0 | M->n = 1; |
46 | 0 | MPN_ZERO (p, 4 * s); |
47 | 0 | M->p[0][0] = p; |
48 | 0 | M->p[0][1] = p + s; |
49 | 0 | M->p[1][0] = p + 2 * s; |
50 | 0 | M->p[1][1] = p + 3 * s; |
51 | |
|
52 | 0 | M->p[0][0][0] = M->p[1][1][0] = 1; |
53 | 0 | } |
54 | | |
55 | | /* Update column COL, adding in Q * column (1-COL). Temporary storage: |
56 | | * qn + n <= M->alloc, where n is the size of the largest element in |
57 | | * column 1 - COL. */ |
58 | | void |
59 | | mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn, |
60 | | unsigned col, mp_ptr tp) |
61 | 0 | { |
62 | 0 | ASSERT (col < 2); |
63 | |
|
64 | 0 | if (qn == 1) |
65 | 0 | { |
66 | 0 | mp_limb_t q = qp[0]; |
67 | 0 | mp_limb_t c0, c1; |
68 | |
|
69 | 0 | c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q); |
70 | 0 | c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q); |
71 | |
|
72 | 0 | M->p[0][col][M->n] = c0; |
73 | 0 | M->p[1][col][M->n] = c1; |
74 | |
|
75 | 0 | M->n += (c0 | c1) != 0; |
76 | 0 | } |
77 | 0 | else |
78 | 0 | { |
79 | 0 | unsigned row; |
80 | | |
81 | | /* Carries for the unlikely case that we get both high words |
82 | | from the multiplication and carries from the addition. */ |
83 | 0 | mp_limb_t c[2]; |
84 | 0 | mp_size_t n; |
85 | | |
86 | | /* The matrix will not necessarily grow in size by qn, so we |
87 | | need normalization in order not to overflow M. */ |
88 | |
|
89 | 0 | for (n = M->n; n + qn > M->n; n--) |
90 | 0 | { |
91 | 0 | ASSERT (n > 0); |
92 | 0 | if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0) |
93 | 0 | break; |
94 | 0 | } |
95 | |
|
96 | 0 | ASSERT (qn + n <= M->alloc); |
97 | |
|
98 | 0 | for (row = 0; row < 2; row++) |
99 | 0 | { |
100 | 0 | if (qn <= n) |
101 | 0 | mpn_mul (tp, M->p[row][1-col], n, qp, qn); |
102 | 0 | else |
103 | 0 | mpn_mul (tp, qp, qn, M->p[row][1-col], n); |
104 | |
|
105 | 0 | ASSERT (n + qn >= M->n); |
106 | 0 | c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n); |
107 | 0 | } |
108 | |
|
109 | 0 | n += qn; |
110 | |
|
111 | 0 | if (c[0] | c[1]) |
112 | 0 | { |
113 | 0 | M->p[0][col][n] = c[0]; |
114 | 0 | M->p[1][col][n] = c[1]; |
115 | 0 | n++; |
116 | 0 | } |
117 | 0 | else |
118 | 0 | { |
119 | 0 | n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0; |
120 | 0 | ASSERT (n >= M->n); |
121 | 0 | } |
122 | 0 | M->n = n; |
123 | 0 | } |
124 | |
|
125 | 0 | ASSERT (M->n < M->alloc); |
126 | 0 | } |
127 | | |
128 | | /* Multiply M by M1 from the right. Since the M1 elements fit in |
129 | | GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs |
130 | | temporary space M->n */ |
131 | | void |
132 | | mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1, |
133 | | mp_ptr tp) |
134 | 0 | { |
135 | 0 | mp_size_t n0, n1; |
136 | | |
137 | | /* Could avoid copy by some swapping of pointers. */ |
138 | 0 | MPN_COPY (tp, M->p[0][0], M->n); |
139 | 0 | n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n); |
140 | 0 | MPN_COPY (tp, M->p[1][0], M->n); |
141 | 0 | n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n); |
142 | | |
143 | | /* Depends on zero initialization */ |
144 | 0 | M->n = MAX(n0, n1); |
145 | 0 | ASSERT (M->n < M->alloc); |
146 | 0 | } |
147 | | |
148 | | /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs |
149 | | of temporary storage (see mpn_matrix22_mul_itch). */ |
150 | | void |
151 | | mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1, |
152 | | mp_ptr tp) |
153 | 0 | { |
154 | 0 | mp_size_t n; |
155 | | |
156 | | /* About the new size of M:s elements. Since M1's diagonal elements |
157 | | are > 0, no element can decrease. The new elements are of size |
158 | | M->n + M1->n, one limb more or less. The computation of the |
159 | | matrix product produces elements of size M->n + M1->n + 1. But |
160 | | the true size, after normalization, may be three limbs smaller. |
161 | | |
162 | | The reason that the product has normalized size >= M->n + M1->n - |
163 | | 2 is subtle. It depends on the fact that M and M1 can be factored |
164 | | as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have |
165 | | M ending with a large power and M1 starting with a large power of |
166 | | the same matrix. */ |
167 | | |
168 | | /* FIXME: Strassen multiplication gives only a small speedup. In FFT |
169 | | multiplication range, this function could be sped up quite a lot |
170 | | using invariance. */ |
171 | 0 | ASSERT (M->n + M1->n < M->alloc); |
172 | |
|
173 | 0 | ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1] |
174 | 0 | | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0); |
175 | |
|
176 | 0 | ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1] |
177 | 0 | | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0); |
178 | |
|
179 | 0 | mpn_matrix22_mul (M->p[0][0], M->p[0][1], |
180 | 0 | M->p[1][0], M->p[1][1], M->n, |
181 | 0 | M1->p[0][0], M1->p[0][1], |
182 | 0 | M1->p[1][0], M1->p[1][1], M1->n, tp); |
183 | | |
184 | | /* Index of last potentially non-zero limb, size is one greater. */ |
185 | 0 | n = M->n + M1->n; |
186 | |
|
187 | 0 | n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); |
188 | 0 | n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); |
189 | 0 | n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); |
190 | |
|
191 | 0 | ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0); |
192 | |
|
193 | 0 | M->n = n + 1; |
194 | 0 | } |
195 | | |
196 | | /* Multiplies the least significant p limbs of (a;b) by M^-1. |
197 | | Temporary space needed: 2 * (p + M->n)*/ |
198 | | mp_size_t |
199 | | mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M, |
200 | | mp_size_t n, mp_ptr ap, mp_ptr bp, |
201 | | mp_size_t p, mp_ptr tp) |
202 | 0 | { |
203 | | /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b) |
204 | | = (r11 a - r01 b; - r10 a + r00 b */ |
205 | |
|
206 | 0 | mp_ptr t0 = tp; |
207 | 0 | mp_ptr t1 = tp + p + M->n; |
208 | 0 | mp_limb_t ah, bh; |
209 | 0 | mp_limb_t cy; |
210 | |
|
211 | 0 | ASSERT (p + M->n < n); |
212 | | |
213 | | /* First compute the two values depending on a, before overwriting a */ |
214 | |
|
215 | 0 | if (M->n >= p) |
216 | 0 | { |
217 | 0 | mpn_mul (t0, M->p[1][1], M->n, ap, p); |
218 | 0 | mpn_mul (t1, M->p[1][0], M->n, ap, p); |
219 | 0 | } |
220 | 0 | else |
221 | 0 | { |
222 | 0 | mpn_mul (t0, ap, p, M->p[1][1], M->n); |
223 | 0 | mpn_mul (t1, ap, p, M->p[1][0], M->n); |
224 | 0 | } |
225 | | |
226 | | /* Update a */ |
227 | 0 | MPN_COPY (ap, t0, p); |
228 | 0 | ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n); |
229 | |
|
230 | 0 | if (M->n >= p) |
231 | 0 | mpn_mul (t0, M->p[0][1], M->n, bp, p); |
232 | 0 | else |
233 | 0 | mpn_mul (t0, bp, p, M->p[0][1], M->n); |
234 | |
|
235 | 0 | cy = mpn_sub (ap, ap, n, t0, p + M->n); |
236 | 0 | ASSERT (cy <= ah); |
237 | 0 | ah -= cy; |
238 | | |
239 | | /* Update b */ |
240 | 0 | if (M->n >= p) |
241 | 0 | mpn_mul (t0, M->p[0][0], M->n, bp, p); |
242 | 0 | else |
243 | 0 | mpn_mul (t0, bp, p, M->p[0][0], M->n); |
244 | |
|
245 | 0 | MPN_COPY (bp, t0, p); |
246 | 0 | bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n); |
247 | 0 | cy = mpn_sub (bp, bp, n, t1, p + M->n); |
248 | 0 | ASSERT (cy <= bh); |
249 | 0 | bh -= cy; |
250 | |
|
251 | 0 | if (ah > 0 || bh > 0) |
252 | 0 | { |
253 | 0 | ap[n] = ah; |
254 | 0 | bp[n] = bh; |
255 | 0 | n++; |
256 | 0 | } |
257 | 0 | else |
258 | 0 | { |
259 | | /* The subtraction can reduce the size by at most one limb. */ |
260 | 0 | if (ap[n-1] == 0 && bp[n-1] == 0) |
261 | 0 | n--; |
262 | 0 | } |
263 | 0 | ASSERT (ap[n-1] > 0 || bp[n-1] > 0); |
264 | 0 | return n; |
265 | 0 | } |