Line | Count | Source (jump to first uncovered line) |
1 | | /* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K. |
2 | | |
3 | | Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software |
4 | | Foundation, Inc. |
5 | | |
6 | | This file is part of the GNU MP Library. |
7 | | |
8 | | The GNU MP Library is free software; you can redistribute it and/or modify |
9 | | it under the terms of either: |
10 | | |
11 | | * the GNU Lesser General Public License as published by the Free |
12 | | Software Foundation; either version 3 of the License, or (at your |
13 | | option) any later version. |
14 | | |
15 | | or |
16 | | |
17 | | * the GNU General Public License as published by the Free Software |
18 | | Foundation; either version 2 of the License, or (at your option) any |
19 | | later version. |
20 | | |
21 | | or both in parallel, as here. |
22 | | |
23 | | The GNU MP Library is distributed in the hope that it will be useful, but |
24 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
25 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
26 | | for more details. |
27 | | |
28 | | You should have received copies of the GNU General Public License and the |
29 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
30 | | see https://www.gnu.org/licenses/. */ |
31 | | |
32 | | #include "gmp-impl.h" |
33 | | |
34 | | /* How many special cases? Minimum is 2: 0 and 1; |
35 | | * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented. |
36 | | */ |
37 | 56 | #define APARTAJ_KALKULOJ 2 |
38 | | |
39 | | /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever |
40 | | * the operands fit. |
41 | | */ |
42 | | #define UZU_BIN_UIUI 0 |
43 | | |
44 | | /* Whether to use a shortcut to precompute the product of four |
45 | | * elements (1), or precompute only the product of a couple (0). |
46 | | * |
47 | | * In both cases the precomputed product is then updated with some |
48 | | * linear operations to obtain the product of the next four (1) |
49 | | * [or two (0)] operands. |
50 | | */ |
51 | | #define KVAROPE 1 |
52 | | |
53 | | static void |
54 | | posmpz_init (mpz_ptr r) |
55 | 28 | { |
56 | 28 | mp_ptr rp; |
57 | 28 | ASSERT (SIZ (r) > 0); |
58 | 28 | rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2); |
59 | 28 | *rp = 0; |
60 | 28 | *++rp = 0; |
61 | 28 | } |
62 | | |
63 | | /* Equivalent to mpz_add_ui (r, r, in), but faster when |
64 | | 0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */ |
65 | | static void |
66 | | posmpz_inc_ui (mpz_ptr r, unsigned long in) |
67 | 115k | { |
68 | | #if BITS_PER_ULONG > GMP_NUMB_BITS |
69 | | mpz_add_ui (r, r, in); |
70 | | #else |
71 | 115k | ASSERT (SIZ (r) > 0); |
72 | 115k | MPN_INCR_U (PTR (r), SIZ (r) + 1, in); |
73 | 115k | SIZ (r) += PTR (r)[SIZ (r)]; |
74 | 115k | #endif |
75 | 115k | } |
76 | | |
77 | | /* Equivalent to mpz_sub_ui (r, r, in), but faster when |
78 | | 0 < SIZ (r) and we know in advance that the result is positive. */ |
79 | | static void |
80 | | posmpz_dec_ui (mpz_ptr r, unsigned long in) |
81 | 115k | { |
82 | | #if BITS_PER_ULONG > GMP_NUMB_BITS |
83 | | mpz_sub_ui (r, r, in); |
84 | | #else |
85 | 115k | ASSERT (mpz_cmp_ui (r, in) >= 0); |
86 | 115k | MPN_DECR_U (PTR (r), SIZ (r), in); |
87 | 115k | SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0); |
88 | 115k | #endif |
89 | 115k | } |
90 | | |
91 | | /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when |
92 | | 0 < SIZ (r) and we know in advance that the result is positive. */ |
93 | | static void |
94 | | posmpz_rsh1 (mpz_ptr r) |
95 | 28 | { |
96 | 28 | mp_ptr rp; |
97 | 28 | mp_size_t rn; |
98 | | |
99 | 28 | rn = SIZ (r); |
100 | 28 | rp = PTR (r); |
101 | 28 | ASSERT (rn > 0); |
102 | 28 | mpn_rshift (rp, rp, rn, 1); |
103 | 28 | SIZ (r) -= rp[rn - 1] == 0; |
104 | 28 | } |
105 | | |
106 | | /* Computes r = n(n+(2*k-1))/2 |
107 | | It uses a sqare instead of a product, computing |
108 | | r = ((n+k-1)^2 + n - (k-1)^2)/2 |
109 | | As a side effect, sets t = n+k-1 |
110 | | */ |
111 | | static void |
112 | | mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t) |
113 | 28 | { |
114 | 28 | ASSERT (k > 0 && SIZ(n) > 0); |
115 | 28 | --k; |
116 | 28 | mpz_add_ui (t, n, k); |
117 | 28 | mpz_mul (r, t, t); |
118 | 28 | mpz_add (r, r, n); |
119 | 28 | posmpz_rsh1 (r); |
120 | 28 | if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2)))) |
121 | 28 | posmpz_dec_ui (r, (k + (k & 1))*(k >> 1)); |
122 | 0 | else |
123 | 0 | { |
124 | 0 | mpz_t tmp; |
125 | 0 | mpz_init_set_ui (tmp, (k + (k & 1))); |
126 | 0 | mpz_mul_ui (tmp, tmp, k >> 1); |
127 | 0 | mpz_sub (r, r, tmp); |
128 | 0 | mpz_clear (tmp); |
129 | 0 | } |
130 | 28 | } |
131 | | |
132 | | #if KVAROPE |
133 | | static void |
134 | | rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t) |
135 | 57.3k | { |
136 | 57.3k | if (k - lk < 5) |
137 | 28.6k | { |
138 | 86.3k | do { |
139 | 86.3k | posmpz_inc_ui (p, 4*k+2); |
140 | 86.3k | mpz_addmul_ui (P, p, 4*k); |
141 | 86.3k | posmpz_dec_ui (P, k); |
142 | 86.3k | mpz_mul (r, r, P); |
143 | 86.3k | } while (--k > lk); |
144 | 28.6k | } |
145 | 28.6k | else |
146 | 28.6k | { |
147 | 28.6k | mpz_t lt; |
148 | 28.6k | unsigned long int m; |
149 | | |
150 | 28.6k | ALLOC (lt) = 0; |
151 | 28.6k | SIZ (lt) = 0; |
152 | 28.6k | if (t == NULL) |
153 | 14.3k | t = lt; |
154 | 28.6k | m = ((k + lk) >> 1) + 1; |
155 | 28.6k | rek_raising_fac4 (r, p, P, k, m, t); |
156 | | |
157 | 28.6k | posmpz_inc_ui (p, 4*m+2); |
158 | 28.6k | mpz_addmul_ui (P, p, 4*m); |
159 | 28.6k | posmpz_dec_ui (P, m); |
160 | 28.6k | mpz_set (t, P); |
161 | 28.6k | rek_raising_fac4 (t, p, P, m - 1, lk, NULL); |
162 | | |
163 | 28.6k | mpz_mul (r, r, t); |
164 | 28.6k | mpz_clear (lt); |
165 | 28.6k | } |
166 | 57.3k | } |
167 | | |
168 | | /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function |
169 | | rek_raising_fac4, and exploiting an idea inspired by a piece of |
170 | | code that Fredrik Johansson wrote and by a comment by Niels Möller. |
171 | | |
172 | | Assume k = 4i then compute: |
173 | | p = (n+1)(n+4i)/2 - i |
174 | | (n+1+1)(n+4i)/2 = p + i + (n+4i)/2 |
175 | | (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1 |
176 | | P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8 |
177 | | n' = n + 2 |
178 | | i' = i - 1 |
179 | | (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P |
180 | | (n'-1)(n'+4i'+2)/2 - i' - 1 = p |
181 | | (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2) |
182 | | (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1 |
183 | | (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2 |
184 | | p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i' |
185 | | p' - 4i' - 2 = p |
186 | | (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P |
187 | | (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P |
188 | | (p' - 3i' - 1)*(p' - i')/2 = P |
189 | | (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2 |
190 | | (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2 |
191 | | (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2 |
192 | | (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i' |
193 | | P' = P + 4i'p' - i' |
194 | | |
195 | | And compute the product P * P' * P" ... |
196 | | */ |
197 | | |
198 | | static void |
199 | | mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) |
200 | 14 | { |
201 | 14 | ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0)); |
202 | 14 | posmpz_init (n); |
203 | 14 | posmpz_inc_ui (n, 1); |
204 | 14 | SIZ (r) = 0; |
205 | 14 | if (k & 1) |
206 | 0 | { |
207 | 0 | mpz_set (r, n); |
208 | 0 | posmpz_inc_ui (n, 1); |
209 | 0 | } |
210 | 14 | k >>= 1; |
211 | 14 | if (APARTAJ_KALKULOJ < 2 && k == 0) |
212 | 0 | return; |
213 | | |
214 | 14 | mpz_hmul_nbnpk (p, n, k, t); |
215 | 14 | posmpz_init (p); |
216 | | |
217 | 14 | if (k & 1) |
218 | 4 | { |
219 | 4 | if (SIZ (r)) |
220 | 0 | mpz_mul (r, r, p); |
221 | 4 | else |
222 | 4 | mpz_set (r, p); |
223 | 4 | posmpz_inc_ui (p, k - 1); |
224 | 4 | } |
225 | 14 | k >>= 1; |
226 | 14 | if (APARTAJ_KALKULOJ < 4 && k == 0) |
227 | 0 | return; |
228 | | |
229 | 14 | mpz_hmul_nbnpk (t, p, k, n); |
230 | 14 | if (SIZ (r)) |
231 | 4 | mpz_mul (r, r, t); |
232 | 10 | else |
233 | 10 | mpz_set (r, t); |
234 | | |
235 | 14 | if (APARTAJ_KALKULOJ > 8 || k > 1) |
236 | 14 | { |
237 | 14 | posmpz_dec_ui (p, k); |
238 | 14 | rek_raising_fac4 (r, p, t, k - 1, 0, n); |
239 | 14 | } |
240 | 14 | } |
241 | | |
242 | | #else /* KVAROPE */ |
243 | | |
244 | | static void |
245 | | rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2) |
246 | | { |
247 | | /* Should the threshold depend on SIZ (n) ? */ |
248 | | if (k - lk < 10) |
249 | | { |
250 | | do { |
251 | | posmpz_inc_ui (n, k); |
252 | | mpz_mul (r, r, n); |
253 | | --k; |
254 | | } while (k > lk); |
255 | | } |
256 | | else |
257 | | { |
258 | | mpz_t t3; |
259 | | unsigned long int m; |
260 | | |
261 | | m = ((k + lk) >> 1) + 1; |
262 | | rek_raising_fac (r, n, k, m, t1, t2); |
263 | | |
264 | | posmpz_inc_ui (n, m); |
265 | | if (t1 == NULL) |
266 | | { |
267 | | mpz_init_set (t3, n); |
268 | | t1 = t3; |
269 | | } |
270 | | else |
271 | | { |
272 | | ALLOC (t3) = 0; |
273 | | mpz_set (t1, n); |
274 | | } |
275 | | rek_raising_fac (t1, n, m - 1, lk, t2, NULL); |
276 | | |
277 | | mpz_mul (r, r, t1); |
278 | | mpz_clear (t3); |
279 | | } |
280 | | } |
281 | | |
282 | | /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function |
283 | | rek_raising_fac, and exploiting an idea inspired by a piece of |
284 | | code that Fredrik Johansson wrote. |
285 | | |
286 | | Force an even k = 2i then compute: |
287 | | p = (n+1)(n+2i)/2 |
288 | | i' = i - 1 |
289 | | p == (n+1)(n+2i'+2)/2 |
290 | | p' = p + i' == (n+2)(n+2i'+1)/2 |
291 | | n' = n + 1 |
292 | | p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2 |
293 | | |
294 | | And compute the product p * p' * p" ... |
295 | | */ |
296 | | |
297 | | static void |
298 | | mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) |
299 | | { |
300 | | unsigned long int hk; |
301 | | ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1)); |
302 | | mpz_add_ui (n, n, 1); |
303 | | hk = k >> 1; |
304 | | mpz_hmul_nbnpk (p, n, hk, t); |
305 | | |
306 | | if ((k & 1) != 0) |
307 | | { |
308 | | mpz_add_ui (t, t, hk + 1); |
309 | | mpz_mul (r, t, p); |
310 | | } |
311 | | else |
312 | | { |
313 | | mpz_set (r, p); |
314 | | } |
315 | | |
316 | | if ((APARTAJ_KALKULOJ > 3) || (hk > 1)) |
317 | | { |
318 | | posmpz_init (p); |
319 | | rek_raising_fac (r, p, hk - 1, 0, t, n); |
320 | | } |
321 | | } |
322 | | #endif /* KVAROPE */ |
323 | | |
324 | | /* This is a poor implementation. Look at bin_uiui.c for improvement ideas. |
325 | | In fact consider calling mpz_bin_uiui() when the arguments fit, leaving |
326 | | the code here only for big n. |
327 | | |
328 | | The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol |
329 | | 1 section 1.2.6 part G. */ |
330 | | |
331 | | void |
332 | | mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k) |
333 | 14 | { |
334 | 14 | mpz_t ni; |
335 | 14 | mp_size_t negate; |
336 | | |
337 | 14 | if (SIZ (n) < 0) |
338 | 0 | { |
339 | | /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */ |
340 | 0 | mpz_init (ni); |
341 | 0 | mpz_add_ui (ni, n, 1L); |
342 | 0 | mpz_neg (ni, ni); |
343 | 0 | negate = (k & 1); /* (-1)^k */ |
344 | 0 | } |
345 | 14 | else |
346 | 14 | { |
347 | | /* bin(n,k) == 0 if k>n |
348 | | (no test for this under the n<0 case, since -n+k-1 >= k there) */ |
349 | 14 | if (mpz_cmp_ui (n, k) < 0) |
350 | 0 | { |
351 | 0 | SIZ (r) = 0; |
352 | 0 | return; |
353 | 0 | } |
354 | | |
355 | | /* set ni = n-k */ |
356 | 14 | mpz_init (ni); |
357 | 14 | mpz_sub_ui (ni, n, k); |
358 | 14 | negate = 0; |
359 | 14 | } |
360 | | |
361 | | /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0 |
362 | | for positive, 1 for negative). */ |
363 | | |
364 | | /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's |
365 | | whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k |
366 | | = ni, and new ni of ni+k-ni = k. */ |
367 | 14 | if (mpz_cmp_ui (ni, k) < 0) |
368 | 14 | { |
369 | 14 | unsigned long tmp; |
370 | 14 | tmp = k; |
371 | 14 | k = mpz_get_ui (ni); |
372 | 14 | mpz_set_ui (ni, tmp); |
373 | 14 | } |
374 | | |
375 | 14 | if (k < APARTAJ_KALKULOJ) |
376 | 0 | { |
377 | 0 | if (k == 0) |
378 | 0 | { |
379 | 0 | SIZ (r) = 1; |
380 | 0 | MPZ_NEWALLOC (r, 1)[0] = 1; |
381 | 0 | } |
382 | | #if APARTAJ_KALKULOJ > 2 |
383 | | else if (k > 1) |
384 | | { |
385 | | mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2)); |
386 | | mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */ |
387 | | if (APARTAJ_KALKULOJ == 2 || k == 2) |
388 | | { |
389 | | mpz_add (r, r, ni); /* n^2+n= n(n+1) */ |
390 | | posmpz_rsh1 (r); |
391 | | } |
392 | | #if APARTAJ_KALKULOJ > 3 |
393 | | #if APARTAJ_KALKULOJ != 5 |
394 | | #error Not implemented! 3 < APARTAJ_KALKULOJ != 5 |
395 | | #endif |
396 | | else /* k > 2 */ |
397 | | { /* k = 3, 4 */ |
398 | | mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */ |
399 | | if (k == 3) |
400 | | { |
401 | | mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */ |
402 | | /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */ |
403 | | } |
404 | | else /* k = 4 */ |
405 | | { |
406 | | mpz_add (ni, ni, r); /* (n+1)^2+n */ |
407 | | mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */ |
408 | | /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */ |
409 | | /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */ |
410 | | /* because the next division will shift away this bit anyway. */ |
411 | | /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */ |
412 | | } |
413 | | mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1)); |
414 | | SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0; |
415 | | } |
416 | | #endif |
417 | | } |
418 | | #endif |
419 | 0 | else |
420 | 0 | { /* k = 1 */ |
421 | 0 | mpz_add_ui (r, ni, 1); |
422 | 0 | } |
423 | 0 | } |
424 | | #if UZU_BIN_UIUI |
425 | | else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0) |
426 | | { |
427 | | mpz_bin_uiui (r, mpz_get_ui (ni) + k, k); |
428 | | } |
429 | | #endif |
430 | 14 | else |
431 | 14 | { |
432 | 14 | mp_limb_t count; |
433 | 14 | mpz_t num, den; |
434 | | |
435 | 14 | mpz_init (num); |
436 | 14 | mpz_init (den); |
437 | | |
438 | 14 | #if KVAROPE |
439 | 14 | mpz_raising_fac4 (num, ni, k, den, r); |
440 | 14 | popc_limb (count, k); |
441 | 14 | ASSERT (k - (k >> 1) - (k >> 2) - count >= 0); |
442 | 14 | mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count); |
443 | | #else |
444 | | mpz_raising_fac (num, ni, k, den, r); |
445 | | popc_limb (count, k); |
446 | | ASSERT (k - (k >> 1) - count >= 0); |
447 | | mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count); |
448 | | #endif |
449 | | |
450 | 14 | mpz_oddfac_1(den, k, 0); |
451 | | |
452 | 14 | mpz_divexact(r, num, den); |
453 | 14 | mpz_clear (num); |
454 | 14 | mpz_clear (den); |
455 | 14 | } |
456 | 14 | mpz_clear (ni); |
457 | | |
458 | 14 | SIZ(r) = (SIZ(r) ^ -negate) + negate; |
459 | 14 | } |