Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpz/oddfac_1.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3
Contributed to the GNU project by Marco Bodrato.
4
5
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6
IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7
IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8
DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10
Copyright 2010-2012, 2015-2017, 2020, 2021 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
#include "longlong.h"
40
41
/* TODO:
42
   - split this file in smaller parts with functions that can be recycled for different computations.
43
 */
44
45
/**************************************************************/
46
/* Section macros: common macros, for mswing/fac/bin (&sieve) */
47
/**************************************************************/
48
49
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)      \
50
1.76k
  if ((PR) > (MAX_PR)) {         \
51
183
    (VEC)[(I)++] = (PR);          \
52
183
    (PR) = 1;             \
53
183
  }
54
55
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)    \
56
74.6k
  do {               \
57
74.6k
    if ((PR) > (MAX_PR)) {         \
58
17.3k
      (VEC)[(I)++] = (PR);          \
59
17.3k
      (PR) = (P);           \
60
17.3k
    } else             \
61
74.6k
      (PR) *= (P);           \
62
74.6k
  } while (0)
63
64
#define LOOP_ON_SIEVE_CONTINUE(prime,end)     \
65
210
    __max_i = (end);            \
66
210
                \
67
247k
    do {             \
68
247k
      ++__i;              \
69
247k
      if ((*__sieve & __mask) == 0)       \
70
247k
  {             \
71
85.8k
    mp_limb_t prime;          \
72
85.8k
    prime = id_to_n(__i)
73
74
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)    \
75
140
  do {               \
76
140
    mp_limb_t __mask, *__sieve, __max_i, __i;     \
77
140
                \
78
140
    __i = (start)-(off);          \
79
140
    __sieve = (sieve) + __i / GMP_LIMB_BITS;     \
80
140
    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);    \
81
140
    __i += (off);           \
82
140
                \
83
140
    LOOP_ON_SIEVE_CONTINUE(prime,end)
84
85
#define LOOP_ON_SIEVE_STOP          \
86
85.8k
  }              \
87
247k
      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);  \
88
247k
      __sieve += __mask & 1;          \
89
247k
    }  while (__i <= __max_i)
90
91
#define LOOP_ON_SIEVE_END         \
92
84.1k
    LOOP_ON_SIEVE_STOP;           \
93
140
  } while (0)
94
95
/*********************************************************/
96
/* Section sieve: sieving functions and tools for primes */
97
/*********************************************************/
98
99
#if WANT_ASSERT
100
static mp_limb_t
101
140
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102
#endif
103
104
/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105
static mp_limb_t
106
85.8k
id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
107
108
/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109
static mp_limb_t
110
434
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112
#if WANT_ASSERT
113
static mp_size_t
114
14
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115
#endif
116
117
/*********************************************************/
118
/* Section mswing: 2-multiswing factorial                */
119
/*********************************************************/
120
121
/* Returns an approximation of the sqare root of x.
122
 * It gives:
123
 *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124
 * or
125
 *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126
 */
127
static mp_limb_t
128
limb_apprsqrt (mp_limb_t x)
129
70
{
130
70
  int s;
131
132
70
  ASSERT (x > 2);
133
70
  count_leading_zeros (s, x);
134
70
  s = (GMP_LIMB_BITS - s) >> 1;
135
70
  return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
136
70
}
137
138
#if 0
139
/* A count-then-exponentiate variant for SWING_A_PRIME */
140
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)   \
141
  do {              \
142
    mp_limb_t __q, __prime;       \
143
    int __exp;            \
144
    __prime = (P);          \
145
    __exp = 0;            \
146
    __q = (N);            \
147
    do {            \
148
      __q /= __prime;         \
149
      __exp += __q & 1;         \
150
    } while (__q >= __prime);       \
151
    if (__exp) { /* Store $prime^{exp}$ */    \
152
      for (__q = __prime; --__exp; __q *= __prime); \
153
      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154
    };              \
155
  } while (0)
156
#else
157
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158
1.76k
  do {           \
159
1.76k
    mp_limb_t __q, __prime;     \
160
1.76k
    __prime = (P);        \
161
1.76k
    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162
1.76k
    __q = (N);          \
163
4.55k
    do {         \
164
4.55k
      __q /= __prime;       \
165
4.55k
      if ((__q & 1) != 0) (PR) *= __prime; \
166
4.55k
    } while (__q >= __prime);      \
167
1.76k
  } while (0)
168
#endif
169
170
#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)  \
171
37.2k
  do {             \
172
37.2k
    mp_limb_t __prime;          \
173
37.2k
    __prime = (P);          \
174
37.2k
    if ((((N) / __prime) & 1) != 0)     \
175
37.2k
      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);  \
176
37.2k
  } while (0)
177
178
/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179
   factorial of the parameter n.  The result x is an odd positive
180
   integer so that multiswing(n,2) = x 2^a.
181
182
   Uses the algorithm described by Peter Luschny in "Divide, Swing and
183
   Conquer the Factorial!".
184
185
   The pointer sieve points to primesieve_size(n) limbs containing a
186
   bit-array where primes are marked as 0.
187
   Enough (FIXME: explain :-) limbs must be pointed by factors.
188
 */
189
190
static void
191
mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192
70
{
193
70
  mp_limb_t prod, max_prod;
194
70
  mp_size_t j;
195
196
70
  ASSERT (n > 25);
197
198
70
  j = 0;
199
70
  prod  = -(n & 1);
200
70
  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201
202
70
  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203
70
  max_prod = GMP_NUMB_MAX / (n-1);
204
205
  /* Handle prime = 3 separately. */
206
70
  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207
208
  /* Swing primes from 5 to n/3 */
209
70
  {
210
70
    mp_limb_t s, l_max_prod;
211
212
70
    s = limb_apprsqrt(n);
213
70
    ASSERT (s >= 5);
214
70
    s = n_to_bit (s);
215
70
    ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216
70
    ASSERT (s < n_to_bit (n / 3));
217
4.08k
    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218
4.08k
    SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219
4.08k
    LOOP_ON_SIEVE_STOP;
220
221
70
    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222
223
70
    l_max_prod = max_prod * 3;
224
225
133k
    LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3));
226
133k
    SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227
133k
    LOOP_ON_SIEVE_END;
228
70
  }
229
230
  /* Store primes from (n+1)/2 to n */
231
195k
  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232
195k
  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233
195k
  LOOP_ON_SIEVE_END;
234
235
70
  if (LIKELY (j != 0))
236
70
    {
237
70
      factors[j++] = prod;
238
70
      mpz_prodlimbs (x, factors, j);
239
70
    }
240
0
  else
241
0
    {
242
0
      ASSERT (ALLOC (x) > 0);
243
0
      PTR (x)[0] = prod;
244
0
      SIZ (x) = 1;
245
0
    }
246
70
}
247
248
#undef SWING_A_PRIME
249
#undef SH_SWING_A_PRIME
250
#undef LOOP_ON_SIEVE_END
251
#undef LOOP_ON_SIEVE_STOP
252
#undef LOOP_ON_SIEVE_BEGIN
253
#undef LOOP_ON_SIEVE_CONTINUE
254
#undef FACTOR_LIST_APPEND
255
256
/*********************************************************/
257
/* Section oddfac: odd factorial, needed also by binomial*/
258
/*********************************************************/
259
260
/* FIXME: refine che following estimate. */
261
262
#if TUNE_PROGRAM_BUILD
263
#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD_LIMIT*FAC_DSC_THRESHOLD_LIMIT-1)+1) - 1)
264
#else
265
#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD*FAC_DSC_THRESHOLD-1)+1) - 1)
266
#endif
267
268
/* mpz_oddfac_1 computes the odd part of the factorial of the
269
   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
270
   odd positive integer.
271
272
   If flag != 0 a square is skipped in the DSC part, e.g.
273
   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
274
275
   If n is too small, flag is ignored, and an ASSERT can be triggered.
276
277
   TODO: FAC_DSC_THRESHOLD is used here with two different roles:
278
    - to decide when prime factorisation is needed,
279
    - to stop the recursion, once sieving is done.
280
   Maybe two thresholds can do a better job.
281
 */
282
void
283
mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
284
14
{
285
14
  ASSERT (n <= GMP_NUMB_MAX);
286
14
  ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
287
288
14
  if (n <= ODD_FACTORIAL_TABLE_LIMIT)
289
0
    {
290
0
      MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
291
0
      SIZ (x) = 1;
292
0
    }
293
14
  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
294
0
    {
295
0
      mp_ptr   px;
296
297
0
      px = MPZ_NEWALLOC (x, 2);
298
0
      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
299
0
      SIZ (x) = 2;
300
0
    }
301
14
  else
302
14
    {
303
14
      unsigned s;
304
14
      mp_ptr   factors;
305
306
14
      s = 0;
307
14
      {
308
14
  mp_limb_t tn;
309
14
  mp_limb_t prod, max_prod;
310
14
  mp_size_t j;
311
14
  TMP_SDECL;
312
313
#if TUNE_PROGRAM_BUILD
314
  ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
315
  ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
316
#endif
317
318
  /* Compute the number of recursive steps for the DSC algorithm. */
319
84
  for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
320
70
    tn >>= 1;
321
322
14
  j = 0;
323
324
14
  TMP_SMARK;
325
14
  factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
326
14
  ASSERT (tn >= FACTORS_PER_LIMB);
327
328
14
  prod = 1;
329
#if TUNE_PROGRAM_BUILD
330
  max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
331
#else
332
14
  max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD * FAC_DSC_THRESHOLD);
333
14
#endif
334
335
14
  ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
336
70
  do {
337
70
    factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
338
70
    mp_limb_t diff = (tn - ODD_DOUBLEFACTORIAL_TABLE_LIMIT) & -CNST_LIMB (2);
339
70
    if ((diff & 2) != 0)
340
33
      {
341
33
        FACTOR_LIST_STORE (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff, prod, max_prod, factors, j);
342
33
        diff -= 2;
343
33
      }
344
70
    if (diff != 0)
345
70
      {
346
70
        mp_limb_t fac = (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2) *
347
70
    (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff);
348
6.35k
        do {
349
6.35k
    FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
350
6.35k
    diff -= 4;
351
6.35k
    fac += diff * 2;
352
6.35k
        } while (diff != 0);
353
70
      }
354
70
    max_prod <<= 2;
355
70
    tn >>= 1;
356
70
  } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
357
358
14
  factors[j++] = prod;
359
14
  factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
360
14
  factors[j++] = __gmp_oddfac_table[tn >> 1];
361
14
  mpz_prodlimbs (x, factors, j);
362
363
14
  TMP_SFREE;
364
14
      }
365
366
14
      if (s != 0)
367
  /* Use the algorithm described by Peter Luschny in "Divide,
368
     Swing and Conquer the Factorial!".
369
370
     Improvement: there are two temporary buffers, factors and
371
     square, that are never used together; with a good estimate
372
     of the maximal needed size, they could share a single
373
     allocation.
374
  */
375
14
  {
376
14
    mpz_t mswing;
377
14
    mp_ptr sieve;
378
14
    mp_size_t size;
379
14
    TMP_DECL;
380
381
14
    TMP_MARK;
382
383
14
    flag--;
384
14
    size = n / GMP_NUMB_BITS + 4;
385
14
    ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
386
    /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
387
       one more can be overwritten by mul, another for the sieve */
388
14
    MPZ_TMP_INIT (mswing, size);
389
    /* Initialize size, so that ASSERT can check it correctly. */
390
14
    ASSERT_CODE (SIZ (mswing) = 0);
391
392
    /* Put the sieve on the second half, it will be overwritten by the last mswing. */
393
14
    sieve = PTR (mswing) + size / 2 + 1;
394
395
14
    size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
396
397
14
    factors = TMP_ALLOC_LIMBS (size);
398
70
    do {
399
70
      mp_ptr    square, px;
400
70
      mp_size_t nx, ns;
401
70
      mp_limb_t cy;
402
70
      TMP_DECL;
403
404
70
      s--;
405
70
      ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
406
70
      mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
407
408
70
      TMP_MARK;
409
70
      nx = SIZ (x);
410
70
      if (s == flag) {
411
0
        size = nx;
412
0
        square = TMP_ALLOC_LIMBS (size);
413
0
        MPN_COPY (square, PTR (x), nx);
414
70
      } else {
415
70
        size = nx << 1;
416
70
        square = TMP_ALLOC_LIMBS (size);
417
70
        mpn_sqr (square, PTR (x), nx);
418
70
        size -= (square[size - 1] == 0);
419
70
      }
420
70
      ns = SIZ (mswing);
421
70
      nx = size + ns;
422
70
      px = MPZ_NEWALLOC (x, nx);
423
70
      ASSERT (ns <= size);
424
70
      cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
425
426
70
      SIZ(x) = nx - (cy == 0);
427
70
      TMP_FREE;
428
70
    } while (s != 0);
429
14
    TMP_FREE;
430
14
  }
431
14
    }
432
14
}
433
434
#undef FACTORS_PER_LIMB
435
#undef FACTOR_LIST_STORE