/src/gmp-6.2.1/mpz/bin_ui.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K. |
2 | | |
3 | | Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc. |
4 | | |
5 | | This file is part of the GNU MP Library. |
6 | | |
7 | | The GNU MP Library is free software; you can redistribute it and/or modify |
8 | | it under the terms of either: |
9 | | |
10 | | * the GNU Lesser General Public License as published by the Free |
11 | | Software Foundation; either version 3 of the License, or (at your |
12 | | option) any later version. |
13 | | |
14 | | or |
15 | | |
16 | | * the GNU General Public License as published by the Free Software |
17 | | Foundation; either version 2 of the License, or (at your option) any |
18 | | later version. |
19 | | |
20 | | or both in parallel, as here. |
21 | | |
22 | | The GNU MP Library is distributed in the hope that it will be useful, but |
23 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
24 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
25 | | for more details. |
26 | | |
27 | | You should have received copies of the GNU General Public License and the |
28 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
29 | | see https://www.gnu.org/licenses/. */ |
30 | | |
31 | | #include "gmp-impl.h" |
32 | | |
33 | | /* How many special cases? Minimum is 2: 0 and 1; |
34 | | * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented. |
35 | | */ |
36 | 0 | #define APARTAJ_KALKULOJ 2 |
37 | | |
38 | | /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever |
39 | | * the operands fit. |
40 | | */ |
41 | | #define UZU_BIN_UIUI 0 |
42 | | |
43 | | /* Whether to use a shortcut to precompute the product of four |
44 | | * elements (1), or precompute only the product of a couple (0). |
45 | | * |
46 | | * In both cases the precomputed product is then updated with some |
47 | | * linear operations to obtain the product of the next four (1) |
48 | | * [or two (0)] operands. |
49 | | */ |
50 | | #define KVAROPE 1 |
51 | | |
52 | | static void |
53 | | posmpz_init (mpz_ptr r) |
54 | 0 | { |
55 | 0 | mp_ptr rp; |
56 | 0 | ASSERT (SIZ (r) > 0); |
57 | 0 | rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2); |
58 | 0 | *rp = 0; |
59 | 0 | *++rp = 0; |
60 | 0 | } |
61 | | |
62 | | /* Equivalent to mpz_add_ui (r, r, in), but faster when |
63 | | 0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */ |
64 | | static void |
65 | | posmpz_inc_ui (mpz_ptr r, unsigned long in) |
66 | 0 | { |
67 | | #if BITS_PER_ULONG > GMP_NUMB_BITS |
68 | | mpz_add_ui (r, r, in); |
69 | | #else |
70 | 0 | ASSERT (SIZ (r) > 0); |
71 | 0 | MPN_INCR_U (PTR (r), SIZ (r) + 1, in); |
72 | 0 | SIZ (r) += (PTR (r)[SIZ (r)] != 0); |
73 | 0 | #endif |
74 | 0 | } |
75 | | |
76 | | /* Equivalent to mpz_sub_ui (r, r, in), but faster when |
77 | | 0 < SIZ (r) and we know in advance that the result is positive. */ |
78 | | static void |
79 | | posmpz_dec_ui (mpz_ptr r, unsigned long in) |
80 | 0 | { |
81 | | #if BITS_PER_ULONG > GMP_NUMB_BITS |
82 | | mpz_sub_ui (r, r, in); |
83 | | #else |
84 | 0 | ASSERT (mpz_cmp_ui (r, in) >= 0); |
85 | 0 | MPN_DECR_U (PTR (r), SIZ (r), in); |
86 | 0 | SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0); |
87 | 0 | #endif |
88 | 0 | } |
89 | | |
90 | | /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when |
91 | | 0 < SIZ (r) and we know in advance that the result is positive. */ |
92 | | static void |
93 | | posmpz_rsh1 (mpz_ptr r) |
94 | 0 | { |
95 | 0 | mp_ptr rp; |
96 | 0 | mp_size_t rn; |
97 | |
|
98 | 0 | rn = SIZ (r); |
99 | 0 | rp = PTR (r); |
100 | 0 | ASSERT (rn > 0); |
101 | 0 | mpn_rshift (rp, rp, rn, 1); |
102 | 0 | SIZ (r) -= rp[rn - 1] == 0; |
103 | 0 | } |
104 | | |
105 | | /* Computes r = n(n+(2*k-1))/2 |
106 | | It uses a sqare instead of a product, computing |
107 | | r = ((n+k-1)^2 + n - (k-1)^2)/2 |
108 | | As a side effect, sets t = n+k-1 |
109 | | */ |
110 | | static void |
111 | | mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t) |
112 | 0 | { |
113 | 0 | ASSERT (k > 0 && SIZ(n) > 0); |
114 | 0 | --k; |
115 | 0 | mpz_add_ui (t, n, k); |
116 | 0 | mpz_mul (r, t, t); |
117 | 0 | mpz_add (r, r, n); |
118 | 0 | posmpz_rsh1 (r); |
119 | 0 | if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2)))) |
120 | 0 | posmpz_dec_ui (r, (k + (k & 1))*(k >> 1)); |
121 | 0 | else |
122 | 0 | { |
123 | 0 | mpz_t tmp; |
124 | 0 | mpz_init_set_ui (tmp, (k + (k & 1))); |
125 | 0 | mpz_mul_ui (tmp, tmp, k >> 1); |
126 | 0 | mpz_sub (r, r, tmp); |
127 | 0 | mpz_clear (tmp); |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | #if KVAROPE |
132 | | static void |
133 | | rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t) |
134 | 0 | { |
135 | 0 | if (k - lk < 5) |
136 | 0 | { |
137 | 0 | do { |
138 | 0 | posmpz_inc_ui (p, 4*k+2); |
139 | 0 | mpz_addmul_ui (P, p, 4*k); |
140 | 0 | posmpz_dec_ui (P, k); |
141 | 0 | mpz_mul (r, r, P); |
142 | 0 | } while (--k > lk); |
143 | 0 | } |
144 | 0 | else |
145 | 0 | { |
146 | 0 | mpz_t lt; |
147 | 0 | unsigned long int m; |
148 | |
|
149 | 0 | m = ((k + lk) >> 1) + 1; |
150 | 0 | rek_raising_fac4 (r, p, P, k, m, t); |
151 | |
|
152 | 0 | posmpz_inc_ui (p, 4*m+2); |
153 | 0 | mpz_addmul_ui (P, p, 4*m); |
154 | 0 | posmpz_dec_ui (P, m); |
155 | 0 | if (t == NULL) |
156 | 0 | { |
157 | 0 | mpz_init_set (lt, P); |
158 | 0 | t = lt; |
159 | 0 | } |
160 | 0 | else |
161 | 0 | { |
162 | 0 | ALLOC (lt) = 0; |
163 | 0 | mpz_set (t, P); |
164 | 0 | } |
165 | 0 | rek_raising_fac4 (t, p, P, m - 1, lk, NULL); |
166 | |
|
167 | 0 | mpz_mul (r, r, t); |
168 | 0 | mpz_clear (lt); |
169 | 0 | } |
170 | 0 | } |
171 | | |
172 | | /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function |
173 | | rek_raising_fac4, and exploiting an idea inspired by a piece of |
174 | | code that Fredrik Johansson wrote and by a comment by Niels Möller. |
175 | | |
176 | | Assume k = 4i then compute: |
177 | | p = (n+1)(n+4i)/2 - i |
178 | | (n+1+1)(n+4i)/2 = p + i + (n+4i)/2 |
179 | | (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1 |
180 | | P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8 |
181 | | n' = n + 2 |
182 | | i' = i - 1 |
183 | | (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P |
184 | | (n'-1)(n'+4i'+2)/2 - i' - 1 = p |
185 | | (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2) |
186 | | (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1 |
187 | | (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2 |
188 | | p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i' |
189 | | p' - 4i' - 2 = p |
190 | | (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P |
191 | | (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P |
192 | | (p' - 3i' - 1)*(p' - i')/2 = P |
193 | | (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2 |
194 | | (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2 |
195 | | (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2 |
196 | | (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i' |
197 | | P' = P + 4i'p' - i' |
198 | | |
199 | | And compute the product P * P' * P" ... |
200 | | */ |
201 | | |
202 | | static void |
203 | | mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) |
204 | 0 | { |
205 | 0 | ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0)); |
206 | 0 | posmpz_init (n); |
207 | 0 | posmpz_inc_ui (n, 1); |
208 | 0 | SIZ (r) = 0; |
209 | 0 | if (k & 1) |
210 | 0 | { |
211 | 0 | mpz_set (r, n); |
212 | 0 | posmpz_inc_ui (n, 1); |
213 | 0 | } |
214 | 0 | k >>= 1; |
215 | 0 | if (APARTAJ_KALKULOJ < 2 && k == 0) |
216 | 0 | return; |
217 | | |
218 | 0 | mpz_hmul_nbnpk (p, n, k, t); |
219 | 0 | posmpz_init (p); |
220 | |
|
221 | 0 | if (k & 1) |
222 | 0 | { |
223 | 0 | if (SIZ (r)) |
224 | 0 | mpz_mul (r, r, p); |
225 | 0 | else |
226 | 0 | mpz_set (r, p); |
227 | 0 | posmpz_inc_ui (p, k - 1); |
228 | 0 | } |
229 | 0 | k >>= 1; |
230 | 0 | if (APARTAJ_KALKULOJ < 4 && k == 0) |
231 | 0 | return; |
232 | | |
233 | 0 | mpz_hmul_nbnpk (t, p, k, n); |
234 | 0 | if (SIZ (r)) |
235 | 0 | mpz_mul (r, r, t); |
236 | 0 | else |
237 | 0 | mpz_set (r, t); |
238 | |
|
239 | 0 | if (APARTAJ_KALKULOJ > 8 || k > 1) |
240 | 0 | { |
241 | 0 | posmpz_dec_ui (p, k); |
242 | 0 | rek_raising_fac4 (r, p, t, k - 1, 0, n); |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | #else /* KVAROPE */ |
247 | | |
248 | | static void |
249 | | rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2) |
250 | | { |
251 | | /* Should the threshold depend on SIZ (n) ? */ |
252 | | if (k - lk < 10) |
253 | | { |
254 | | do { |
255 | | posmpz_inc_ui (n, k); |
256 | | mpz_mul (r, r, n); |
257 | | --k; |
258 | | } while (k > lk); |
259 | | } |
260 | | else |
261 | | { |
262 | | mpz_t t3; |
263 | | unsigned long int m; |
264 | | |
265 | | m = ((k + lk) >> 1) + 1; |
266 | | rek_raising_fac (r, n, k, m, t1, t2); |
267 | | |
268 | | posmpz_inc_ui (n, m); |
269 | | if (t1 == NULL) |
270 | | { |
271 | | mpz_init_set (t3, n); |
272 | | t1 = t3; |
273 | | } |
274 | | else |
275 | | { |
276 | | ALLOC (t3) = 0; |
277 | | mpz_set (t1, n); |
278 | | } |
279 | | rek_raising_fac (t1, n, m - 1, lk, t2, NULL); |
280 | | |
281 | | mpz_mul (r, r, t1); |
282 | | mpz_clear (t3); |
283 | | } |
284 | | } |
285 | | |
286 | | /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function |
287 | | rek_raising_fac, and exploiting an idea inspired by a piece of |
288 | | code that Fredrik Johansson wrote. |
289 | | |
290 | | Force an even k = 2i then compute: |
291 | | p = (n+1)(n+2i)/2 |
292 | | i' = i - 1 |
293 | | p == (n+1)(n+2i'+2)/2 |
294 | | p' = p + i' == (n+2)(n+2i'+1)/2 |
295 | | n' = n + 1 |
296 | | p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2 |
297 | | |
298 | | And compute the product p * p' * p" ... |
299 | | */ |
300 | | |
301 | | static void |
302 | | mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p) |
303 | | { |
304 | | unsigned long int hk; |
305 | | ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1)); |
306 | | mpz_add_ui (n, n, 1); |
307 | | hk = k >> 1; |
308 | | mpz_hmul_nbnpk (p, n, hk, t); |
309 | | |
310 | | if ((k & 1) != 0) |
311 | | { |
312 | | mpz_add_ui (t, t, hk + 1); |
313 | | mpz_mul (r, t, p); |
314 | | } |
315 | | else |
316 | | { |
317 | | mpz_set (r, p); |
318 | | } |
319 | | |
320 | | if ((APARTAJ_KALKULOJ > 3) || (hk > 1)) |
321 | | { |
322 | | posmpz_init (p); |
323 | | rek_raising_fac (r, p, hk - 1, 0, t, n); |
324 | | } |
325 | | } |
326 | | #endif /* KVAROPE */ |
327 | | |
328 | | /* This is a poor implementation. Look at bin_uiui.c for improvement ideas. |
329 | | In fact consider calling mpz_bin_uiui() when the arguments fit, leaving |
330 | | the code here only for big n. |
331 | | |
332 | | The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol |
333 | | 1 section 1.2.6 part G. */ |
334 | | |
335 | | void |
336 | | mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k) |
337 | 0 | { |
338 | 0 | mpz_t ni; |
339 | 0 | mp_size_t negate; |
340 | |
|
341 | 0 | if (SIZ (n) < 0) |
342 | 0 | { |
343 | | /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */ |
344 | 0 | mpz_init (ni); |
345 | 0 | mpz_add_ui (ni, n, 1L); |
346 | 0 | mpz_neg (ni, ni); |
347 | 0 | negate = (k & 1); /* (-1)^k */ |
348 | 0 | } |
349 | 0 | else |
350 | 0 | { |
351 | | /* bin(n,k) == 0 if k>n |
352 | | (no test for this under the n<0 case, since -n+k-1 >= k there) */ |
353 | 0 | if (mpz_cmp_ui (n, k) < 0) |
354 | 0 | { |
355 | 0 | SIZ (r) = 0; |
356 | 0 | return; |
357 | 0 | } |
358 | | |
359 | | /* set ni = n-k */ |
360 | 0 | mpz_init (ni); |
361 | 0 | mpz_sub_ui (ni, n, k); |
362 | 0 | negate = 0; |
363 | 0 | } |
364 | | |
365 | | /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0 |
366 | | for positive, 1 for negative). */ |
367 | | |
368 | | /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's |
369 | | whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k |
370 | | = ni, and new ni of ni+k-ni = k. */ |
371 | 0 | if (mpz_cmp_ui (ni, k) < 0) |
372 | 0 | { |
373 | 0 | unsigned long tmp; |
374 | 0 | tmp = k; |
375 | 0 | k = mpz_get_ui (ni); |
376 | 0 | mpz_set_ui (ni, tmp); |
377 | 0 | } |
378 | |
|
379 | 0 | if (k < APARTAJ_KALKULOJ) |
380 | 0 | { |
381 | 0 | if (k == 0) |
382 | 0 | { |
383 | 0 | SIZ (r) = 1; |
384 | 0 | MPZ_NEWALLOC (r, 1)[0] = 1; |
385 | 0 | } |
386 | | #if APARTAJ_KALKULOJ > 2 |
387 | | else if (k == 2) |
388 | | { |
389 | | mpz_add_ui (ni, ni, 1); |
390 | | mpz_mul (r, ni, ni); |
391 | | mpz_add (r, r, ni); |
392 | | posmpz_rsh1 (r); |
393 | | } |
394 | | #endif |
395 | | #if APARTAJ_KALKULOJ > 3 |
396 | | else if (k > 2) |
397 | | { /* k = 3, 4 */ |
398 | | mpz_add_ui (ni, ni, 2); /* n+1 */ |
399 | | mpz_mul (r, ni, ni); /* (n+1)^2 */ |
400 | | mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */ |
401 | | if (k == 3) |
402 | | { |
403 | | mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */ |
404 | | /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */ |
405 | | mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1); |
406 | | MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r)); |
407 | | } |
408 | | else /* k = 4 */ |
409 | | { |
410 | | mpz_add (ni, ni, r); /* (n+1)^2+n */ |
411 | | mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */ |
412 | | mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */ |
413 | | /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */ |
414 | | mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3); |
415 | | MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r)); |
416 | | } |
417 | | } |
418 | | #endif |
419 | 0 | else |
420 | 0 | { /* k = 1 */ |
421 | 0 | mpz_add_ui (r, ni, 1); |
422 | 0 | } |
423 | 0 | } |
424 | | #if UZU_BIN_UIUI |
425 | | else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0) |
426 | | { |
427 | | mpz_bin_uiui (r, mpz_get_ui (ni) + k, k); |
428 | | } |
429 | | #endif |
430 | 0 | else |
431 | 0 | { |
432 | 0 | mp_limb_t count; |
433 | 0 | mpz_t num, den; |
434 | |
|
435 | 0 | mpz_init (num); |
436 | 0 | mpz_init (den); |
437 | |
|
438 | 0 | #if KVAROPE |
439 | 0 | mpz_raising_fac4 (num, ni, k, den, r); |
440 | 0 | popc_limb (count, k); |
441 | 0 | ASSERT (k - (k >> 1) - (k >> 2) - count >= 0); |
442 | 0 | mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count); |
443 | | #else |
444 | | mpz_raising_fac (num, ni, k, den, r); |
445 | | popc_limb (count, k); |
446 | | ASSERT (k - (k >> 1) - count >= 0); |
447 | | mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count); |
448 | | #endif |
449 | |
|
450 | 0 | mpz_oddfac_1(den, k, 0); |
451 | |
|
452 | 0 | mpz_divexact(r, num, den); |
453 | 0 | mpz_clear (num); |
454 | 0 | mpz_clear (den); |
455 | 0 | } |
456 | 0 | mpz_clear (ni); |
457 | |
|
458 | 0 | SIZ(r) = (SIZ(r) ^ -negate) + negate; |
459 | 0 | } |