Line | Count | Source (jump to first uncovered line) |
1 | | /* hgcd.c. |
2 | | |
3 | | THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY |
4 | | SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST |
5 | | GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
6 | | |
7 | | Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc. |
8 | | |
9 | | This file is part of the GNU MP Library. |
10 | | |
11 | | The GNU MP Library is free software; you can redistribute it and/or modify |
12 | | it under the terms of either: |
13 | | |
14 | | * the GNU Lesser General Public License as published by the Free |
15 | | Software Foundation; either version 3 of the License, or (at your |
16 | | option) any later version. |
17 | | |
18 | | or |
19 | | |
20 | | * the GNU General Public License as published by the Free Software |
21 | | Foundation; either version 2 of the License, or (at your option) any |
22 | | later version. |
23 | | |
24 | | or both in parallel, as here. |
25 | | |
26 | | The GNU MP Library is distributed in the hope that it will be useful, but |
27 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
28 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
29 | | for more details. |
30 | | |
31 | | You should have received copies of the GNU General Public License and the |
32 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
33 | | see https://www.gnu.org/licenses/. */ |
34 | | |
35 | | #include "gmp-impl.h" |
36 | | #include "longlong.h" |
37 | | |
38 | | |
39 | | /* Size analysis for hgcd: |
40 | | |
41 | | For the recursive calls, we have n1 <= ceil(n / 2). Then the |
42 | | storage need is determined by the storage for the recursive call |
43 | | computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1 |
44 | | (after this, the storage needed for M1 can be recycled). |
45 | | |
46 | | Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1) |
47 | | = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2, |
48 | | and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total, |
49 | | 4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12. |
50 | | |
51 | | For the recursive call, we need S(n1) = S(ceil(n/2)). |
52 | | |
53 | | S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2)) |
54 | | <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k)) |
55 | | <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k)) |
56 | | <= 20 ceil(n/4) + 22k + S(ceil(n/2^k)) |
57 | | */ |
58 | | |
59 | | mp_size_t |
60 | | mpn_hgcd_itch (mp_size_t n) |
61 | 0 | { |
62 | 0 | unsigned k; |
63 | 0 | int count; |
64 | 0 | mp_size_t nscaled; |
65 | |
|
66 | 0 | if (BELOW_THRESHOLD (n, HGCD_THRESHOLD)) |
67 | 0 | return n; |
68 | | |
69 | | /* Get the recursion depth. */ |
70 | 0 | nscaled = (n - 1) / (HGCD_THRESHOLD - 1); |
71 | 0 | count_leading_zeros (count, nscaled); |
72 | 0 | k = GMP_LIMB_BITS - count; |
73 | |
|
74 | 0 | return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD; |
75 | 0 | } |
76 | | |
77 | | /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M |
78 | | with elements of size at most (n+1)/2 - 1. Returns new size of a, |
79 | | b, or zero if no reduction is possible. */ |
80 | | |
81 | | mp_size_t |
82 | | mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n, |
83 | | struct hgcd_matrix *M, mp_ptr tp) |
84 | 0 | { |
85 | 0 | mp_size_t s = n/2 + 1; |
86 | |
|
87 | 0 | mp_size_t nn; |
88 | 0 | int success = 0; |
89 | |
|
90 | 0 | if (n <= s) |
91 | | /* Happens when n <= 2, a fairly uninteresting case but exercised |
92 | | by the random inputs of the testsuite. */ |
93 | 0 | return 0; |
94 | | |
95 | 0 | ASSERT ((ap[n-1] | bp[n-1]) > 0); |
96 | |
|
97 | 0 | ASSERT ((n+1)/2 - 1 < M->alloc); |
98 | |
|
99 | 0 | if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD)) |
100 | 0 | { |
101 | 0 | mp_size_t n2 = (3*n)/4 + 1; |
102 | 0 | mp_size_t p = n/2; |
103 | |
|
104 | 0 | nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp); |
105 | 0 | if (nn) |
106 | 0 | { |
107 | 0 | n = nn; |
108 | 0 | success = 1; |
109 | 0 | } |
110 | | |
111 | | /* NOTE: It appears this loop never runs more than once (at |
112 | | least when not recursing to hgcd_appr). */ |
113 | 0 | while (n > n2) |
114 | 0 | { |
115 | | /* Needs n + 1 storage */ |
116 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
117 | 0 | if (!nn) |
118 | 0 | return success ? n : 0; |
119 | | |
120 | 0 | n = nn; |
121 | 0 | success = 1; |
122 | 0 | } |
123 | | |
124 | 0 | if (n > s + 2) |
125 | 0 | { |
126 | 0 | struct hgcd_matrix M1; |
127 | 0 | mp_size_t scratch; |
128 | |
|
129 | 0 | p = 2*s - n + 1; |
130 | 0 | scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); |
131 | |
|
132 | 0 | mpn_hgcd_matrix_init(&M1, n - p, tp); |
133 | | |
134 | | /* FIXME: Should use hgcd_reduce, but that may require more |
135 | | scratch space, which requires review. */ |
136 | |
|
137 | 0 | nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch); |
138 | 0 | if (nn > 0) |
139 | 0 | { |
140 | | /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ |
141 | 0 | ASSERT (M->n + 2 >= M1.n); |
142 | | |
143 | | /* Furthermore, assume M ends with a quotient (1, q; 0, 1), |
144 | | then either q or q + 1 is a correct quotient, and M1 will |
145 | | start with either (1, 0; 1, 1) or (2, 1; 1, 1). This |
146 | | rules out the case that the size of M * M1 is much |
147 | | smaller than the expected M->n + M1->n. */ |
148 | |
|
149 | 0 | ASSERT (M->n + M1.n < M->alloc); |
150 | | |
151 | | /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) |
152 | | = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ |
153 | 0 | n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); |
154 | | |
155 | | /* We need a bound for of M->n + M1.n. Let n be the original |
156 | | input size. Then |
157 | | |
158 | | ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 |
159 | | |
160 | | and it follows that |
161 | | |
162 | | M.n + M1.n <= ceil(n/2) + 1 |
163 | | |
164 | | Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the |
165 | | amount of needed scratch space. */ |
166 | 0 | mpn_hgcd_matrix_mul (M, &M1, tp + scratch); |
167 | 0 | success = 1; |
168 | 0 | } |
169 | 0 | } |
170 | 0 | } |
171 | | |
172 | 0 | for (;;) |
173 | 0 | { |
174 | | /* Needs s+3 < n */ |
175 | 0 | nn = mpn_hgcd_step (n, ap, bp, s, M, tp); |
176 | 0 | if (!nn) |
177 | 0 | return success ? n : 0; |
178 | | |
179 | 0 | n = nn; |
180 | 0 | success = 1; |
181 | 0 | } |
182 | 0 | } |