Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpz/oddfac_1.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3
Contributed to the GNU project by Marco Bodrato.
4
5
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6
IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7
IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8
DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10
Copyright 2010-2012, 2015-2017 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
#include "longlong.h"
40
41
/* TODO:
42
   - split this file in smaller parts with functions that can be recycled for different computations.
43
 */
44
45
/**************************************************************/
46
/* Section macros: common macros, for mswing/fac/bin (&sieve) */
47
/**************************************************************/
48
49
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)      \
50
92.0k
  if ((PR) > (MAX_PR)) {         \
51
9.09k
    (VEC)[(I)++] = (PR);          \
52
9.09k
    (PR) = 1;             \
53
9.09k
  }
54
55
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)    \
56
4.15M
  do {               \
57
4.15M
    if ((PR) > (MAX_PR)) {         \
58
880k
      (VEC)[(I)++] = (PR);          \
59
880k
      (PR) = (P);           \
60
880k
    } else             \
61
4.15M
      (PR) *= (P);           \
62
4.15M
  } while (0)
63
64
#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)     \
65
11.5k
    __max_i = (end);            \
66
11.5k
                \
67
12.8M
    do {             \
68
12.8M
      ++__i;              \
69
12.8M
      if (((sieve)[__index] & __mask) == 0)     \
70
12.8M
  {             \
71
4.44M
    mp_limb_t prime;          \
72
4.44M
    prime = id_to_n(__i)
73
74
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)    \
75
7.70k
  do {               \
76
7.70k
    mp_limb_t __mask, __index, __max_i, __i;      \
77
7.70k
                \
78
7.70k
    __i = (start)-(off);          \
79
7.70k
    __index = __i / GMP_LIMB_BITS;       \
80
7.70k
    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);    \
81
7.70k
    __i += (off);           \
82
7.70k
                \
83
7.70k
    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84
85
#define LOOP_ON_SIEVE_STOP          \
86
4.44M
  }              \
87
12.8M
      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);  \
88
12.8M
      __index += __mask & 1;          \
89
12.8M
    }  while (__i <= __max_i)
90
91
#define LOOP_ON_SIEVE_END         \
92
4.35M
    LOOP_ON_SIEVE_STOP;           \
93
7.70k
  } while (0)
94
95
/*********************************************************/
96
/* Section sieve: sieving functions and tools for primes */
97
/*********************************************************/
98
99
#if WANT_ASSERT
100
static mp_limb_t
101
7.70k
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102
#endif
103
104
/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105
static mp_limb_t
106
4.44M
id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
107
108
/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109
static mp_limb_t
110
23.8k
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112
#if WANT_ASSERT
113
static mp_size_t
114
781
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115
#endif
116
117
/*********************************************************/
118
/* Section mswing: 2-multiswing factorial                */
119
/*********************************************************/
120
121
/* Returns an approximation of the sqare root of x.
122
 * It gives:
123
 *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124
 * or
125
 *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126
 */
127
static mp_limb_t
128
limb_apprsqrt (mp_limb_t x)
129
3.85k
{
130
3.85k
  int s;
131
132
3.85k
  ASSERT (x > 2);
133
3.85k
  count_leading_zeros (s, x);
134
3.85k
  s = (GMP_LIMB_BITS - s) >> 1;
135
3.85k
  return ((CNST_LIMB(1) << s) + (x >> s)) >> 1;
136
3.85k
}
137
138
#if 0
139
/* A count-then-exponentiate variant for SWING_A_PRIME */
140
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)   \
141
  do {              \
142
    mp_limb_t __q, __prime;       \
143
    int __exp;            \
144
    __prime = (P);          \
145
    __exp = 0;            \
146
    __q = (N);            \
147
    do {            \
148
      __q /= __prime;         \
149
      __exp += __q & 1;         \
150
    } while (__q >= __prime);       \
151
    if (__exp) { /* Store $prime^{exp}$ */    \
152
      for (__q = __prime; --__exp; __q *= __prime); \
153
      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154
    };              \
155
  } while (0)
156
#else
157
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158
92.0k
  do {           \
159
92.0k
    mp_limb_t __q, __prime;     \
160
92.0k
    __prime = (P);        \
161
92.0k
    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162
92.0k
    __q = (N);          \
163
238k
    do {         \
164
238k
      __q /= __prime;       \
165
238k
      if ((__q & 1) != 0) (PR) *= __prime; \
166
238k
    } while (__q >= __prime);      \
167
92.0k
  } while (0)
168
#endif
169
170
#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)  \
171
1.92M
  do {             \
172
1.92M
    mp_limb_t __prime;          \
173
1.92M
    __prime = (P);          \
174
1.92M
    if ((((N) / __prime) & 1) != 0)     \
175
1.92M
      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);  \
176
1.92M
  } while (0)
177
178
/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179
   factorial of the parameter n.  The result x is an odd positive
180
   integer so that multiswing(n,2) = x 2^a.
181
182
   Uses the algorithm described by Peter Luschny in "Divide, Swing and
183
   Conquer the Factorial!".
184
185
   The pointer sieve points to primesieve_size(n) limbs containing a
186
   bit-array where primes are marked as 0.
187
   Enough (FIXME: explain :-) limbs must be pointed by factors.
188
 */
189
190
static void
191
mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192
3.85k
{
193
3.85k
  mp_limb_t prod, max_prod;
194
3.85k
  mp_size_t j;
195
196
3.85k
  ASSERT (n > 25);
197
198
3.85k
  j = 0;
199
3.85k
  prod  = -(n & 1);
200
3.85k
  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201
202
3.85k
  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203
3.85k
  max_prod = GMP_NUMB_MAX / (n-1);
204
205
  /* Handle prime = 3 separately. */
206
3.85k
  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207
208
  /* Swing primes from 5 to n/3 */
209
3.85k
  {
210
3.85k
    mp_limb_t s, l_max_prod;
211
212
3.85k
    s = limb_apprsqrt(n);
213
3.85k
    ASSERT (s >= 5);
214
3.85k
    s = n_to_bit (s);
215
3.85k
    ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216
3.85k
    ASSERT (s < n_to_bit (n / 3));
217
211k
    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218
211k
    SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219
211k
    LOOP_ON_SIEVE_STOP;
220
221
3.85k
    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222
223
3.85k
    l_max_prod = max_prod * 3;
224
225
6.92M
    LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3), sieve);
226
6.92M
    SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227
6.92M
    LOOP_ON_SIEVE_END;
228
3.85k
  }
229
230
  /* Store primes from (n+1)/2 to n */
231
10.1M
  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232
10.1M
  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233
10.1M
  LOOP_ON_SIEVE_END;
234
235
3.85k
  if (LIKELY (j != 0))
236
3.85k
    {
237
3.85k
      factors[j++] = prod;
238
3.85k
      mpz_prodlimbs (x, factors, j);
239
3.85k
    }
240
0
  else
241
0
    {
242
0
      ASSERT (ALLOC (x) > 0);
243
0
      PTR (x)[0] = prod;
244
0
      SIZ (x) = 1;
245
0
    }
246
3.85k
}
247
248
#undef SWING_A_PRIME
249
#undef SH_SWING_A_PRIME
250
#undef LOOP_ON_SIEVE_END
251
#undef LOOP_ON_SIEVE_STOP
252
#undef LOOP_ON_SIEVE_BEGIN
253
#undef LOOP_ON_SIEVE_CONTINUE
254
#undef FACTOR_LIST_APPEND
255
256
/*********************************************************/
257
/* Section oddfac: odd factorial, needed also by binomial*/
258
/*********************************************************/
259
260
#if TUNE_PROGRAM_BUILD
261
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
262
#else
263
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
264
#endif
265
266
/* mpz_oddfac_1 computes the odd part of the factorial of the
267
   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
268
   odd positive integer.
269
270
   If flag != 0 a square is skipped in the DSC part, e.g.
271
   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
272
273
   If n is too small, flag is ignored, and an ASSERT can be triggered.
274
275
   TODO: FAC_DSC_THRESHOLD is used here with two different roles:
276
    - to decide when prime factorisation is needed,
277
    - to stop the recursion, once sieving is done.
278
   Maybe two thresholds can do a better job.
279
 */
280
void
281
mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
282
896
{
283
896
  ASSERT (n <= GMP_NUMB_MAX);
284
896
  ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
285
286
896
  if (n <= ODD_FACTORIAL_TABLE_LIMIT)
287
25
    {
288
25
      MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
289
25
      SIZ (x) = 1;
290
25
    }
291
871
  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
292
13
    {
293
13
      mp_ptr   px;
294
295
13
      px = MPZ_NEWALLOC (x, 2);
296
13
      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
297
13
      SIZ (x) = 2;
298
13
    }
299
858
  else
300
858
    {
301
858
      unsigned s;
302
858
      mp_ptr   factors;
303
304
858
      s = 0;
305
858
      {
306
858
  mp_limb_t tn;
307
858
  mp_limb_t prod, max_prod, i;
308
858
  mp_size_t j;
309
858
  TMP_SDECL;
310
311
#if TUNE_PROGRAM_BUILD
312
  ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
313
  ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
314
#endif
315
316
  /* Compute the number of recursive steps for the DSC algorithm. */
317
4.70k
  for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
318
3.85k
    tn >>= 1;
319
320
858
  j = 0;
321
322
858
  TMP_SMARK;
323
858
  factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
324
858
  ASSERT (tn >= FACTORS_PER_LIMB);
325
326
858
  prod = 1;
327
#if TUNE_PROGRAM_BUILD
328
  max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
329
#else
330
858
  max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
331
858
#endif
332
333
858
  ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
334
4.30k
  do {
335
4.30k
    i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
336
4.30k
    factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
337
625k
    do {
338
625k
      FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
339
625k
      i += 2;
340
625k
    } while (i <= tn);
341
4.30k
    max_prod <<= 1;
342
4.30k
    tn >>= 1;
343
4.30k
  } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
344
345
858
  factors[j++] = prod;
346
858
  factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
347
858
  factors[j++] = __gmp_oddfac_table[tn >> 1];
348
858
  mpz_prodlimbs (x, factors, j);
349
350
858
  TMP_SFREE;
351
858
      }
352
353
858
      if (s != 0)
354
  /* Use the algorithm described by Peter Luschny in "Divide,
355
     Swing and Conquer the Factorial!".
356
357
     Improvement: there are two temporary buffers, factors and
358
     square, that are never used together; with a good estimate
359
     of the maximal needed size, they could share a single
360
     allocation.
361
  */
362
781
  {
363
781
    mpz_t mswing;
364
781
    mp_ptr sieve;
365
781
    mp_size_t size;
366
781
    TMP_DECL;
367
368
781
    TMP_MARK;
369
370
781
    flag--;
371
781
    size = n / GMP_NUMB_BITS + 4;
372
781
    ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
373
    /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
374
       one more can be overwritten by mul, another for the sieve */
375
781
    MPZ_TMP_INIT (mswing, size);
376
    /* Initialize size, so that ASSERT can check it correctly. */
377
781
    ASSERT_CODE (SIZ (mswing) = 0);
378
379
    /* Put the sieve on the second half, it will be overwritten by the last mswing. */
380
781
    sieve = PTR (mswing) + size / 2 + 1;
381
382
781
    size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
383
384
781
    factors = TMP_ALLOC_LIMBS (size);
385
3.85k
    do {
386
3.85k
      mp_ptr    square, px;
387
3.85k
      mp_size_t nx, ns;
388
3.85k
      mp_limb_t cy;
389
3.85k
      TMP_DECL;
390
391
3.85k
      s--;
392
3.85k
      ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
393
3.85k
      mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
394
395
3.85k
      TMP_MARK;
396
3.85k
      nx = SIZ (x);
397
3.85k
      if (s == flag) {
398
0
        size = nx;
399
0
        square = TMP_ALLOC_LIMBS (size);
400
0
        MPN_COPY (square, PTR (x), nx);
401
3.85k
      } else {
402
3.85k
        size = nx << 1;
403
3.85k
        square = TMP_ALLOC_LIMBS (size);
404
3.85k
        mpn_sqr (square, PTR (x), nx);
405
3.85k
        size -= (square[size - 1] == 0);
406
3.85k
      }
407
3.85k
      ns = SIZ (mswing);
408
3.85k
      nx = size + ns;
409
3.85k
      px = MPZ_NEWALLOC (x, nx);
410
3.85k
      ASSERT (ns <= size);
411
3.85k
      cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
412
413
3.85k
      SIZ(x) = nx - (cy == 0);
414
3.85k
      TMP_FREE;
415
3.85k
    } while (s != 0);
416
781
    TMP_FREE;
417
781
  }
418
858
    }
419
896
}
420
421
#undef FACTORS_PER_LIMB
422
#undef FACTOR_LIST_STORE