/src/gmp/mpn/hgcd_matrix.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* hgcd_matrix.c.  | 
2  |  |  | 
3  |  |    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY  | 
4  |  |    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST  | 
5  |  |    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.  | 
6  |  |  | 
7  |  | Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.  | 
8  |  |  | 
9  |  | This file is part of the GNU MP Library.  | 
10  |  |  | 
11  |  | The GNU MP Library is free software; you can redistribute it and/or modify  | 
12  |  | it under the terms of either:  | 
13  |  |  | 
14  |  |   * the GNU Lesser General Public License as published by the Free  | 
15  |  |     Software Foundation; either version 3 of the License, or (at your  | 
16  |  |     option) any later version.  | 
17  |  |  | 
18  |  | or  | 
19  |  |  | 
20  |  |   * the GNU General Public License as published by the Free Software  | 
21  |  |     Foundation; either version 2 of the License, or (at your option) any  | 
22  |  |     later version.  | 
23  |  |  | 
24  |  | or both in parallel, as here.  | 
25  |  |  | 
26  |  | The GNU MP Library is distributed in the hope that it will be useful, but  | 
27  |  | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  | 
28  |  | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License  | 
29  |  | for more details.  | 
30  |  |  | 
31  |  | You should have received copies of the GNU General Public License and the  | 
32  |  | GNU Lesser General Public License along with the GNU MP Library.  If not,  | 
33  |  | see https://www.gnu.org/licenses/.  */  | 
34  |  |  | 
35  |  | #include "gmp-impl.h"  | 
36  |  | #include "longlong.h"  | 
37  |  |  | 
38  |  | /* For input of size n, matrix elements are of size at most ceil(n/2)  | 
39  |  |    - 1, but we need two limbs extra. */  | 
40  |  | void  | 
41  |  | mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)  | 
42  | 0  | { | 
43  | 0  |   mp_size_t s = (n+1)/2 + 1;  | 
44  | 0  |   M->alloc = s;  | 
45  | 0  |   M->n = 1;  | 
46  | 0  |   MPN_ZERO (p, 4 * s);  | 
47  | 0  |   M->p[0][0] = p;  | 
48  | 0  |   M->p[0][1] = p + s;  | 
49  | 0  |   M->p[1][0] = p + 2 * s;  | 
50  | 0  |   M->p[1][1] = p + 3 * s;  | 
51  |  | 
  | 
52  | 0  |   M->p[0][0][0] = M->p[1][1][0] = 1;  | 
53  | 0  | }  | 
54  |  |  | 
55  |  | /* Update column COL, adding in Q * column (1-COL). Temporary storage:  | 
56  |  |  * qn + n <= M->alloc, where n is the size of the largest element in  | 
57  |  |  * column 1 - COL. */  | 
58  |  | void  | 
59  |  | mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,  | 
60  |  |         unsigned col, mp_ptr tp)  | 
61  | 0  | { | 
62  | 0  |   ASSERT (col < 2);  | 
63  |  | 
  | 
64  | 0  |   if (qn == 1)  | 
65  | 0  |     { | 
66  | 0  |       mp_limb_t q = qp[0];  | 
67  | 0  |       mp_limb_t c0, c1;  | 
68  |  | 
  | 
69  | 0  |       c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);  | 
70  | 0  |       c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);  | 
71  |  | 
  | 
72  | 0  |       M->p[0][col][M->n] = c0;  | 
73  | 0  |       M->p[1][col][M->n] = c1;  | 
74  |  | 
  | 
75  | 0  |       M->n += (c0 | c1) != 0;  | 
76  | 0  |     }  | 
77  | 0  |   else  | 
78  | 0  |     { | 
79  | 0  |       unsigned row;  | 
80  |  |  | 
81  |  |       /* Carries for the unlikely case that we get both high words  | 
82  |  |    from the multiplication and carries from the addition. */  | 
83  | 0  |       mp_limb_t c[2];  | 
84  | 0  |       mp_size_t n;  | 
85  |  |  | 
86  |  |       /* The matrix will not necessarily grow in size by qn, so we  | 
87  |  |    need normalization in order not to overflow M. */  | 
88  |  | 
  | 
89  | 0  |       for (n = M->n; n + qn > M->n; n--)  | 
90  | 0  |   { | 
91  | 0  |     ASSERT (n > 0);  | 
92  | 0  |     if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)  | 
93  | 0  |       break;  | 
94  | 0  |   }  | 
95  |  | 
  | 
96  | 0  |       ASSERT (qn + n <= M->alloc);  | 
97  |  | 
  | 
98  | 0  |       for (row = 0; row < 2; row++)  | 
99  | 0  |   { | 
100  | 0  |     if (qn <= n)  | 
101  | 0  |       mpn_mul (tp, M->p[row][1-col], n, qp, qn);  | 
102  | 0  |     else  | 
103  | 0  |       mpn_mul (tp, qp, qn, M->p[row][1-col], n);  | 
104  |  | 
  | 
105  | 0  |     ASSERT (n + qn >= M->n);  | 
106  | 0  |     c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);  | 
107  | 0  |   }  | 
108  |  | 
  | 
109  | 0  |       n += qn;  | 
110  |  | 
  | 
111  | 0  |       if (c[0] | c[1])  | 
112  | 0  |   { | 
113  | 0  |     M->p[0][col][n] = c[0];  | 
114  | 0  |     M->p[1][col][n] = c[1];  | 
115  | 0  |     n++;  | 
116  | 0  |   }  | 
117  | 0  |       else  | 
118  | 0  |   { | 
119  | 0  |     n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;  | 
120  | 0  |     ASSERT (n >= M->n);  | 
121  | 0  |   }  | 
122  | 0  |       M->n = n;  | 
123  | 0  |     }  | 
124  |  | 
  | 
125  | 0  |   ASSERT (M->n < M->alloc);  | 
126  | 0  | }  | 
127  |  |  | 
128  |  | /* Multiply M by M1 from the right. Since the M1 elements fit in  | 
129  |  |    GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs  | 
130  |  |    temporary space M->n */  | 
131  |  | void  | 
132  |  | mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,  | 
133  |  |            mp_ptr tp)  | 
134  | 0  | { | 
135  | 0  |   mp_size_t n0, n1;  | 
136  |  |  | 
137  |  |   /* Could avoid copy by some swapping of pointers. */  | 
138  | 0  |   MPN_COPY (tp, M->p[0][0], M->n);  | 
139  | 0  |   n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);  | 
140  | 0  |   MPN_COPY (tp, M->p[1][0], M->n);  | 
141  | 0  |   n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);  | 
142  |  |  | 
143  |  |   /* Depends on zero initialization */  | 
144  | 0  |   M->n = MAX(n0, n1);  | 
145  | 0  |   ASSERT (M->n < M->alloc);  | 
146  | 0  | }  | 
147  |  |  | 
148  |  | /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs  | 
149  |  |    of temporary storage (see mpn_matrix22_mul_itch). */  | 
150  |  | void  | 
151  |  | mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,  | 
152  |  |          mp_ptr tp)  | 
153  | 0  | { | 
154  | 0  |   mp_size_t n;  | 
155  |  |  | 
156  |  |   /* About the new size of M:s elements. Since M1's diagonal elements  | 
157  |  |      are > 0, no element can decrease. The new elements are of size  | 
158  |  |      M->n + M1->n, one limb more or less. The computation of the  | 
159  |  |      matrix product produces elements of size M->n + M1->n + 1. But  | 
160  |  |      the true size, after normalization, may be three limbs smaller.  | 
161  |  |  | 
162  |  |      The reason that the product has normalized size >= M->n + M1->n -  | 
163  |  |      2 is subtle. It depends on the fact that M and M1 can be factored  | 
164  |  |      as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have  | 
165  |  |      M ending with a large power and M1 starting with a large power of  | 
166  |  |      the same matrix. */  | 
167  |  |  | 
168  |  |   /* FIXME: Strassen multiplication gives only a small speedup. In FFT  | 
169  |  |      multiplication range, this function could be sped up quite a lot  | 
170  |  |      using invariance. */  | 
171  | 0  |   ASSERT (M->n + M1->n < M->alloc);  | 
172  |  | 
  | 
173  | 0  |   ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]  | 
174  | 0  |      | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);  | 
175  |  | 
  | 
176  | 0  |   ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]  | 
177  | 0  |      | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);  | 
178  |  | 
  | 
179  | 0  |   mpn_matrix22_mul (M->p[0][0], M->p[0][1],  | 
180  | 0  |         M->p[1][0], M->p[1][1], M->n,  | 
181  | 0  |         M1->p[0][0], M1->p[0][1],  | 
182  | 0  |         M1->p[1][0], M1->p[1][1], M1->n, tp);  | 
183  |  |  | 
184  |  |   /* Index of last potentially non-zero limb, size is one greater. */  | 
185  | 0  |   n = M->n + M1->n;  | 
186  |  | 
  | 
187  | 0  |   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);  | 
188  | 0  |   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);  | 
189  | 0  |   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);  | 
190  |  | 
  | 
191  | 0  |   ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);  | 
192  |  | 
  | 
193  | 0  |   M->n = n + 1;  | 
194  | 0  | }  | 
195  |  |  | 
196  |  | /* Multiplies the least significant p limbs of (a;b) by M^-1.  | 
197  |  |    Temporary space needed: 2 * (p + M->n)*/  | 
198  |  | mp_size_t  | 
199  |  | mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,  | 
200  |  |       mp_size_t n, mp_ptr ap, mp_ptr bp,  | 
201  |  |       mp_size_t p, mp_ptr tp)  | 
202  | 0  | { | 
203  |  |   /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)  | 
204  |  |      = (r11 a - r01 b; - r10 a + r00 b */  | 
205  |  | 
  | 
206  | 0  |   mp_ptr t0 = tp;  | 
207  | 0  |   mp_ptr t1 = tp + p + M->n;  | 
208  | 0  |   mp_limb_t ah, bh;  | 
209  | 0  |   mp_limb_t cy;  | 
210  |  | 
  | 
211  | 0  |   ASSERT (p + M->n  < n);  | 
212  |  |  | 
213  |  |   /* First compute the two values depending on a, before overwriting a */  | 
214  |  | 
  | 
215  | 0  |   if (M->n >= p)  | 
216  | 0  |     { | 
217  | 0  |       mpn_mul (t0, M->p[1][1], M->n, ap, p);  | 
218  | 0  |       mpn_mul (t1, M->p[1][0], M->n, ap, p);  | 
219  | 0  |     }  | 
220  | 0  |   else  | 
221  | 0  |     { | 
222  | 0  |       mpn_mul (t0, ap, p, M->p[1][1], M->n);  | 
223  | 0  |       mpn_mul (t1, ap, p, M->p[1][0], M->n);  | 
224  | 0  |     }  | 
225  |  |  | 
226  |  |   /* Update a */  | 
227  | 0  |   MPN_COPY (ap, t0, p);  | 
228  | 0  |   ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);  | 
229  |  | 
  | 
230  | 0  |   if (M->n >= p)  | 
231  | 0  |     mpn_mul (t0, M->p[0][1], M->n, bp, p);  | 
232  | 0  |   else  | 
233  | 0  |     mpn_mul (t0, bp, p, M->p[0][1], M->n);  | 
234  |  | 
  | 
235  | 0  |   cy = mpn_sub (ap, ap, n, t0, p + M->n);  | 
236  | 0  |   ASSERT (cy <= ah);  | 
237  | 0  |   ah -= cy;  | 
238  |  |  | 
239  |  |   /* Update b */  | 
240  | 0  |   if (M->n >= p)  | 
241  | 0  |     mpn_mul (t0, M->p[0][0], M->n, bp, p);  | 
242  | 0  |   else  | 
243  | 0  |     mpn_mul (t0, bp, p, M->p[0][0], M->n);  | 
244  |  | 
  | 
245  | 0  |   MPN_COPY (bp, t0, p);  | 
246  | 0  |   bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);  | 
247  | 0  |   cy = mpn_sub (bp, bp, n, t1, p + M->n);  | 
248  | 0  |   ASSERT (cy <= bh);  | 
249  | 0  |   bh -= cy;  | 
250  |  | 
  | 
251  | 0  |   if (ah > 0 || bh > 0)  | 
252  | 0  |     { | 
253  | 0  |       ap[n] = ah;  | 
254  | 0  |       bp[n] = bh;  | 
255  | 0  |       n++;  | 
256  | 0  |     }  | 
257  | 0  |   else  | 
258  | 0  |     { | 
259  |  |       /* The subtraction can reduce the size by at most one limb. */  | 
260  | 0  |       if (ap[n-1] == 0 && bp[n-1] == 0)  | 
261  | 0  |   n--;  | 
262  | 0  |     }  | 
263  | 0  |   ASSERT (ap[n-1] > 0 || bp[n-1] > 0);  | 
264  | 0  |   return n;  | 
265  | 0  | }  |