/src/gmp-6.2.1/mpz/oddfac_1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!. |
2 | | |
3 | | Contributed to the GNU project by Marco Bodrato. |
4 | | |
5 | | THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. |
6 | | IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. |
7 | | IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR |
8 | | DISAPPEAR IN A FUTURE GNU MP RELEASE. |
9 | | |
10 | | Copyright 2010-2012, 2015-2017 Free Software Foundation, Inc. |
11 | | |
12 | | This file is part of the GNU MP Library. |
13 | | |
14 | | The GNU MP Library is free software; you can redistribute it and/or modify |
15 | | it under the terms of either: |
16 | | |
17 | | * the GNU Lesser General Public License as published by the Free |
18 | | Software Foundation; either version 3 of the License, or (at your |
19 | | option) any later version. |
20 | | |
21 | | or |
22 | | |
23 | | * the GNU General Public License as published by the Free Software |
24 | | Foundation; either version 2 of the License, or (at your option) any |
25 | | later version. |
26 | | |
27 | | or both in parallel, as here. |
28 | | |
29 | | The GNU MP Library is distributed in the hope that it will be useful, but |
30 | | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
31 | | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
32 | | for more details. |
33 | | |
34 | | You should have received copies of the GNU General Public License and the |
35 | | GNU Lesser General Public License along with the GNU MP Library. If not, |
36 | | see https://www.gnu.org/licenses/. */ |
37 | | |
38 | | #include "gmp-impl.h" |
39 | | #include "longlong.h" |
40 | | |
41 | | /* TODO: |
42 | | - split this file in smaller parts with functions that can be recycled for different computations. |
43 | | */ |
44 | | |
45 | | /**************************************************************/ |
46 | | /* Section macros: common macros, for mswing/fac/bin (&sieve) */ |
47 | | /**************************************************************/ |
48 | | |
49 | | #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ |
50 | 0 | if ((PR) > (MAX_PR)) { \ |
51 | 0 | (VEC)[(I)++] = (PR); \ |
52 | 0 | (PR) = 1; \ |
53 | 0 | } |
54 | | |
55 | | #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ |
56 | 0 | do { \ |
57 | 0 | if ((PR) > (MAX_PR)) { \ |
58 | 0 | (VEC)[(I)++] = (PR); \ |
59 | 0 | (PR) = (P); \ |
60 | 0 | } else \ |
61 | 0 | (PR) *= (P); \ |
62 | 0 | } while (0) |
63 | | |
64 | | #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ |
65 | 0 | __max_i = (end); \ |
66 | 0 | \ |
67 | 0 | do { \ |
68 | 0 | ++__i; \ |
69 | 0 | if (((sieve)[__index] & __mask) == 0) \ |
70 | 0 | { \ |
71 | 0 | mp_limb_t prime; \ |
72 | 0 | prime = id_to_n(__i) |
73 | | |
74 | | #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ |
75 | 0 | do { \ |
76 | 0 | mp_limb_t __mask, __index, __max_i, __i; \ |
77 | 0 | \ |
78 | 0 | __i = (start)-(off); \ |
79 | 0 | __index = __i / GMP_LIMB_BITS; \ |
80 | 0 | __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ |
81 | 0 | __i += (off); \ |
82 | 0 | \ |
83 | 0 | LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) |
84 | | |
85 | | #define LOOP_ON_SIEVE_STOP \ |
86 | 0 | } \ |
87 | 0 | __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ |
88 | 0 | __index += __mask & 1; \ |
89 | 0 | } while (__i <= __max_i) |
90 | | |
91 | | #define LOOP_ON_SIEVE_END \ |
92 | 0 | LOOP_ON_SIEVE_STOP; \ |
93 | 0 | } while (0) |
94 | | |
95 | | /*********************************************************/ |
96 | | /* Section sieve: sieving functions and tools for primes */ |
97 | | /*********************************************************/ |
98 | | |
99 | | #if WANT_ASSERT |
100 | | static mp_limb_t |
101 | 0 | bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } |
102 | | #endif |
103 | | |
104 | | /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ |
105 | | static mp_limb_t |
106 | 0 | id_to_n (mp_limb_t id) { return id*3+1+(id&1); } |
107 | | |
108 | | /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ |
109 | | static mp_limb_t |
110 | 0 | n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } |
111 | | |
112 | | #if WANT_ASSERT |
113 | | static mp_size_t |
114 | 0 | primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } |
115 | | #endif |
116 | | |
117 | | /*********************************************************/ |
118 | | /* Section mswing: 2-multiswing factorial */ |
119 | | /*********************************************************/ |
120 | | |
121 | | /* Returns an approximation of the sqare root of x. |
122 | | * It gives: |
123 | | * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2 |
124 | | * or |
125 | | * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8 |
126 | | */ |
127 | | static mp_limb_t |
128 | | limb_apprsqrt (mp_limb_t x) |
129 | 0 | { |
130 | 0 | int s; |
131 | |
|
132 | 0 | ASSERT (x > 2); |
133 | 0 | count_leading_zeros (s, x); |
134 | 0 | s = (GMP_LIMB_BITS - s) >> 1; |
135 | 0 | return ((CNST_LIMB(1) << s) + (x >> s)) >> 1; |
136 | 0 | } |
137 | | |
138 | | #if 0 |
139 | | /* A count-then-exponentiate variant for SWING_A_PRIME */ |
140 | | #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ |
141 | | do { \ |
142 | | mp_limb_t __q, __prime; \ |
143 | | int __exp; \ |
144 | | __prime = (P); \ |
145 | | __exp = 0; \ |
146 | | __q = (N); \ |
147 | | do { \ |
148 | | __q /= __prime; \ |
149 | | __exp += __q & 1; \ |
150 | | } while (__q >= __prime); \ |
151 | | if (__exp) { /* Store $prime^{exp}$ */ \ |
152 | | for (__q = __prime; --__exp; __q *= __prime); \ |
153 | | FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \ |
154 | | }; \ |
155 | | } while (0) |
156 | | #else |
157 | | #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ |
158 | 0 | do { \ |
159 | 0 | mp_limb_t __q, __prime; \ |
160 | 0 | __prime = (P); \ |
161 | 0 | FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ |
162 | 0 | __q = (N); \ |
163 | 0 | do { \ |
164 | 0 | __q /= __prime; \ |
165 | 0 | if ((__q & 1) != 0) (PR) *= __prime; \ |
166 | 0 | } while (__q >= __prime); \ |
167 | 0 | } while (0) |
168 | | #endif |
169 | | |
170 | | #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ |
171 | 0 | do { \ |
172 | 0 | mp_limb_t __prime; \ |
173 | 0 | __prime = (P); \ |
174 | 0 | if ((((N) / __prime) & 1) != 0) \ |
175 | 0 | FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \ |
176 | 0 | } while (0) |
177 | | |
178 | | /* mpz_2multiswing_1 computes the odd part of the 2-multiswing |
179 | | factorial of the parameter n. The result x is an odd positive |
180 | | integer so that multiswing(n,2) = x 2^a. |
181 | | |
182 | | Uses the algorithm described by Peter Luschny in "Divide, Swing and |
183 | | Conquer the Factorial!". |
184 | | |
185 | | The pointer sieve points to primesieve_size(n) limbs containing a |
186 | | bit-array where primes are marked as 0. |
187 | | Enough (FIXME: explain :-) limbs must be pointed by factors. |
188 | | */ |
189 | | |
190 | | static void |
191 | | mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors) |
192 | 0 | { |
193 | 0 | mp_limb_t prod, max_prod; |
194 | 0 | mp_size_t j; |
195 | |
|
196 | 0 | ASSERT (n > 25); |
197 | | |
198 | 0 | j = 0; |
199 | 0 | prod = -(n & 1); |
200 | 0 | n &= ~ CNST_LIMB(1); /* n-1, if n is odd */ |
201 | |
|
202 | 0 | prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */ |
203 | 0 | max_prod = GMP_NUMB_MAX / (n-1); |
204 | | |
205 | | /* Handle prime = 3 separately. */ |
206 | 0 | SWING_A_PRIME (3, n, prod, max_prod, factors, j); |
207 | | |
208 | | /* Swing primes from 5 to n/3 */ |
209 | 0 | { |
210 | 0 | mp_limb_t s, l_max_prod; |
211 | |
|
212 | 0 | s = limb_apprsqrt(n); |
213 | 0 | ASSERT (s >= 5); |
214 | 0 | s = n_to_bit (s); |
215 | 0 | ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n); |
216 | 0 | ASSERT (s < n_to_bit (n / 3)); |
217 | 0 | LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); |
218 | 0 | SWING_A_PRIME (prime, n, prod, max_prod, factors, j); |
219 | 0 | LOOP_ON_SIEVE_STOP; |
220 | |
|
221 | 0 | ASSERT (max_prod <= GMP_NUMB_MAX / 3); |
222 | | |
223 | 0 | l_max_prod = max_prod * 3; |
224 | |
|
225 | 0 | LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3), sieve); |
226 | 0 | SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j); |
227 | 0 | LOOP_ON_SIEVE_END; |
228 | 0 | } |
229 | | |
230 | | /* Store primes from (n+1)/2 to n */ |
231 | 0 | LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve); |
232 | 0 | FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); |
233 | 0 | LOOP_ON_SIEVE_END; |
234 | |
|
235 | 0 | if (LIKELY (j != 0)) |
236 | 0 | { |
237 | 0 | factors[j++] = prod; |
238 | 0 | mpz_prodlimbs (x, factors, j); |
239 | 0 | } |
240 | 0 | else |
241 | 0 | { |
242 | 0 | ASSERT (ALLOC (x) > 0); |
243 | 0 | PTR (x)[0] = prod; |
244 | 0 | SIZ (x) = 1; |
245 | 0 | } |
246 | 0 | } |
247 | | |
248 | | #undef SWING_A_PRIME |
249 | | #undef SH_SWING_A_PRIME |
250 | | #undef LOOP_ON_SIEVE_END |
251 | | #undef LOOP_ON_SIEVE_STOP |
252 | | #undef LOOP_ON_SIEVE_BEGIN |
253 | | #undef LOOP_ON_SIEVE_CONTINUE |
254 | | #undef FACTOR_LIST_APPEND |
255 | | |
256 | | /*********************************************************/ |
257 | | /* Section oddfac: odd factorial, needed also by binomial*/ |
258 | | /*********************************************************/ |
259 | | |
260 | | #if TUNE_PROGRAM_BUILD |
261 | | #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1)) |
262 | | #else |
263 | | #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1)) |
264 | | #endif |
265 | | |
266 | | /* mpz_oddfac_1 computes the odd part of the factorial of the |
267 | | parameter n. I.e. n! = x 2^a, where x is the returned value: an |
268 | | odd positive integer. |
269 | | |
270 | | If flag != 0 a square is skipped in the DSC part, e.g. |
271 | | if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!. |
272 | | |
273 | | If n is too small, flag is ignored, and an ASSERT can be triggered. |
274 | | |
275 | | TODO: FAC_DSC_THRESHOLD is used here with two different roles: |
276 | | - to decide when prime factorisation is needed, |
277 | | - to stop the recursion, once sieving is done. |
278 | | Maybe two thresholds can do a better job. |
279 | | */ |
280 | | void |
281 | | mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) |
282 | 0 | { |
283 | 0 | ASSERT (n <= GMP_NUMB_MAX); |
284 | 0 | ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD))); |
285 | | |
286 | 0 | if (n <= ODD_FACTORIAL_TABLE_LIMIT) |
287 | 0 | { |
288 | 0 | MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n]; |
289 | 0 | SIZ (x) = 1; |
290 | 0 | } |
291 | 0 | else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1) |
292 | 0 | { |
293 | 0 | mp_ptr px; |
294 | |
|
295 | 0 | px = MPZ_NEWALLOC (x, 2); |
296 | 0 | umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]); |
297 | 0 | SIZ (x) = 2; |
298 | 0 | } |
299 | 0 | else |
300 | 0 | { |
301 | 0 | unsigned s; |
302 | 0 | mp_ptr factors; |
303 | |
|
304 | 0 | s = 0; |
305 | 0 | { |
306 | 0 | mp_limb_t tn; |
307 | 0 | mp_limb_t prod, max_prod, i; |
308 | 0 | mp_size_t j; |
309 | 0 | TMP_SDECL; |
310 | |
|
311 | | #if TUNE_PROGRAM_BUILD |
312 | | ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD); |
313 | | ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2)); |
314 | | #endif |
315 | | |
316 | | /* Compute the number of recursive steps for the DSC algorithm. */ |
317 | 0 | for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) |
318 | 0 | tn >>= 1; |
319 | |
|
320 | 0 | j = 0; |
321 | |
|
322 | 0 | TMP_SMARK; |
323 | 0 | factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB); |
324 | 0 | ASSERT (tn >= FACTORS_PER_LIMB); |
325 | | |
326 | 0 | prod = 1; |
327 | | #if TUNE_PROGRAM_BUILD |
328 | | max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT; |
329 | | #else |
330 | 0 | max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD; |
331 | 0 | #endif |
332 | |
|
333 | 0 | ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); |
334 | 0 | do { |
335 | 0 | i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2; |
336 | 0 | factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX; |
337 | 0 | do { |
338 | 0 | FACTOR_LIST_STORE (i, prod, max_prod, factors, j); |
339 | 0 | i += 2; |
340 | 0 | } while (i <= tn); |
341 | 0 | max_prod <<= 1; |
342 | 0 | tn >>= 1; |
343 | 0 | } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); |
344 | |
|
345 | 0 | factors[j++] = prod; |
346 | 0 | factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1]; |
347 | 0 | factors[j++] = __gmp_oddfac_table[tn >> 1]; |
348 | 0 | mpz_prodlimbs (x, factors, j); |
349 | |
|
350 | 0 | TMP_SFREE; |
351 | 0 | } |
352 | | |
353 | 0 | if (s != 0) |
354 | | /* Use the algorithm described by Peter Luschny in "Divide, |
355 | | Swing and Conquer the Factorial!". |
356 | | |
357 | | Improvement: there are two temporary buffers, factors and |
358 | | square, that are never used together; with a good estimate |
359 | | of the maximal needed size, they could share a single |
360 | | allocation. |
361 | | */ |
362 | 0 | { |
363 | 0 | mpz_t mswing; |
364 | 0 | mp_ptr sieve; |
365 | 0 | mp_size_t size; |
366 | 0 | TMP_DECL; |
367 | |
|
368 | 0 | TMP_MARK; |
369 | |
|
370 | 0 | flag--; |
371 | 0 | size = n / GMP_NUMB_BITS + 4; |
372 | 0 | ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1)); |
373 | | /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); |
374 | | one more can be overwritten by mul, another for the sieve */ |
375 | 0 | MPZ_TMP_INIT (mswing, size); |
376 | | /* Initialize size, so that ASSERT can check it correctly. */ |
377 | 0 | ASSERT_CODE (SIZ (mswing) = 0); |
378 | | |
379 | | /* Put the sieve on the second half, it will be overwritten by the last mswing. */ |
380 | 0 | sieve = PTR (mswing) + size / 2 + 1; |
381 | |
|
382 | 0 | size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1; |
383 | |
|
384 | 0 | factors = TMP_ALLOC_LIMBS (size); |
385 | 0 | do { |
386 | 0 | mp_ptr square, px; |
387 | 0 | mp_size_t nx, ns; |
388 | 0 | mp_limb_t cy; |
389 | 0 | TMP_DECL; |
390 | |
|
391 | 0 | s--; |
392 | 0 | ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */ |
393 | 0 | mpz_2multiswing_1 (mswing, n >> s, sieve, factors); |
394 | |
|
395 | 0 | TMP_MARK; |
396 | 0 | nx = SIZ (x); |
397 | 0 | if (s == flag) { |
398 | 0 | size = nx; |
399 | 0 | square = TMP_ALLOC_LIMBS (size); |
400 | 0 | MPN_COPY (square, PTR (x), nx); |
401 | 0 | } else { |
402 | 0 | size = nx << 1; |
403 | 0 | square = TMP_ALLOC_LIMBS (size); |
404 | 0 | mpn_sqr (square, PTR (x), nx); |
405 | 0 | size -= (square[size - 1] == 0); |
406 | 0 | } |
407 | 0 | ns = SIZ (mswing); |
408 | 0 | nx = size + ns; |
409 | 0 | px = MPZ_NEWALLOC (x, nx); |
410 | 0 | ASSERT (ns <= size); |
411 | 0 | cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */ |
412 | |
|
413 | 0 | SIZ(x) = nx - (cy == 0); |
414 | 0 | TMP_FREE; |
415 | 0 | } while (s != 0); |
416 | 0 | TMP_FREE; |
417 | 0 | } |
418 | 0 | } |
419 | 0 | } |
420 | | |
421 | | #undef FACTORS_PER_LIMB |
422 | | #undef FACTOR_LIST_STORE |