Coverage Report

Created: 2024-06-28 06:19

/src/gmp-6.2.1/mpz/oddfac_1.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3
Contributed to the GNU project by Marco Bodrato.
4
5
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6
IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7
IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8
DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10
Copyright 2010-2012, 2015-2017 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
#include "longlong.h"
40
41
/* TODO:
42
   - split this file in smaller parts with functions that can be recycled for different computations.
43
 */
44
45
/**************************************************************/
46
/* Section macros: common macros, for mswing/fac/bin (&sieve) */
47
/**************************************************************/
48
49
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)      \
50
0
  if ((PR) > (MAX_PR)) {         \
51
0
    (VEC)[(I)++] = (PR);          \
52
0
    (PR) = 1;             \
53
0
  }
54
55
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)    \
56
0
  do {               \
57
0
    if ((PR) > (MAX_PR)) {         \
58
0
      (VEC)[(I)++] = (PR);          \
59
0
      (PR) = (P);           \
60
0
    } else             \
61
0
      (PR) *= (P);           \
62
0
  } while (0)
63
64
#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)     \
65
0
    __max_i = (end);            \
66
0
                \
67
0
    do {             \
68
0
      ++__i;              \
69
0
      if (((sieve)[__index] & __mask) == 0)     \
70
0
  {             \
71
0
    mp_limb_t prime;          \
72
0
    prime = id_to_n(__i)
73
74
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)    \
75
0
  do {               \
76
0
    mp_limb_t __mask, __index, __max_i, __i;      \
77
0
                \
78
0
    __i = (start)-(off);          \
79
0
    __index = __i / GMP_LIMB_BITS;       \
80
0
    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);    \
81
0
    __i += (off);           \
82
0
                \
83
0
    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
84
85
#define LOOP_ON_SIEVE_STOP          \
86
0
  }              \
87
0
      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);  \
88
0
      __index += __mask & 1;          \
89
0
    }  while (__i <= __max_i)
90
91
#define LOOP_ON_SIEVE_END         \
92
0
    LOOP_ON_SIEVE_STOP;           \
93
0
  } while (0)
94
95
/*********************************************************/
96
/* Section sieve: sieving functions and tools for primes */
97
/*********************************************************/
98
99
#if WANT_ASSERT
100
static mp_limb_t
101
0
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102
#endif
103
104
/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105
static mp_limb_t
106
0
id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
107
108
/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109
static mp_limb_t
110
0
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112
#if WANT_ASSERT
113
static mp_size_t
114
0
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115
#endif
116
117
/*********************************************************/
118
/* Section mswing: 2-multiswing factorial                */
119
/*********************************************************/
120
121
/* Returns an approximation of the sqare root of x.
122
 * It gives:
123
 *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124
 * or
125
 *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126
 */
127
static mp_limb_t
128
limb_apprsqrt (mp_limb_t x)
129
0
{
130
0
  int s;
131
132
0
  ASSERT (x > 2);
133
0
  count_leading_zeros (s, x);
134
0
  s = (GMP_LIMB_BITS - s) >> 1;
135
0
  return ((CNST_LIMB(1) << s) + (x >> s)) >> 1;
136
0
}
137
138
#if 0
139
/* A count-then-exponentiate variant for SWING_A_PRIME */
140
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)   \
141
  do {              \
142
    mp_limb_t __q, __prime;       \
143
    int __exp;            \
144
    __prime = (P);          \
145
    __exp = 0;            \
146
    __q = (N);            \
147
    do {            \
148
      __q /= __prime;         \
149
      __exp += __q & 1;         \
150
    } while (__q >= __prime);       \
151
    if (__exp) { /* Store $prime^{exp}$ */    \
152
      for (__q = __prime; --__exp; __q *= __prime); \
153
      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154
    };              \
155
  } while (0)
156
#else
157
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158
0
  do {           \
159
0
    mp_limb_t __q, __prime;     \
160
0
    __prime = (P);        \
161
0
    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162
0
    __q = (N);          \
163
0
    do {         \
164
0
      __q /= __prime;       \
165
0
      if ((__q & 1) != 0) (PR) *= __prime; \
166
0
    } while (__q >= __prime);      \
167
0
  } while (0)
168
#endif
169
170
#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)  \
171
0
  do {             \
172
0
    mp_limb_t __prime;          \
173
0
    __prime = (P);          \
174
0
    if ((((N) / __prime) & 1) != 0)     \
175
0
      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);  \
176
0
  } while (0)
177
178
/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179
   factorial of the parameter n.  The result x is an odd positive
180
   integer so that multiswing(n,2) = x 2^a.
181
182
   Uses the algorithm described by Peter Luschny in "Divide, Swing and
183
   Conquer the Factorial!".
184
185
   The pointer sieve points to primesieve_size(n) limbs containing a
186
   bit-array where primes are marked as 0.
187
   Enough (FIXME: explain :-) limbs must be pointed by factors.
188
 */
189
190
static void
191
mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192
0
{
193
0
  mp_limb_t prod, max_prod;
194
0
  mp_size_t j;
195
196
0
  ASSERT (n > 25);
197
198
0
  j = 0;
199
0
  prod  = -(n & 1);
200
0
  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201
202
0
  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203
0
  max_prod = GMP_NUMB_MAX / (n-1);
204
205
  /* Handle prime = 3 separately. */
206
0
  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207
208
  /* Swing primes from 5 to n/3 */
209
0
  {
210
0
    mp_limb_t s, l_max_prod;
211
212
0
    s = limb_apprsqrt(n);
213
0
    ASSERT (s >= 5);
214
0
    s = n_to_bit (s);
215
0
    ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216
0
    ASSERT (s < n_to_bit (n / 3));
217
0
    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218
0
    SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219
0
    LOOP_ON_SIEVE_STOP;
220
221
0
    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222
223
0
    l_max_prod = max_prod * 3;
224
225
0
    LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3), sieve);
226
0
    SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227
0
    LOOP_ON_SIEVE_END;
228
0
  }
229
230
  /* Store primes from (n+1)/2 to n */
231
0
  LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232
0
  FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233
0
  LOOP_ON_SIEVE_END;
234
235
0
  if (LIKELY (j != 0))
236
0
    {
237
0
      factors[j++] = prod;
238
0
      mpz_prodlimbs (x, factors, j);
239
0
    }
240
0
  else
241
0
    {
242
0
      ASSERT (ALLOC (x) > 0);
243
0
      PTR (x)[0] = prod;
244
0
      SIZ (x) = 1;
245
0
    }
246
0
}
247
248
#undef SWING_A_PRIME
249
#undef SH_SWING_A_PRIME
250
#undef LOOP_ON_SIEVE_END
251
#undef LOOP_ON_SIEVE_STOP
252
#undef LOOP_ON_SIEVE_BEGIN
253
#undef LOOP_ON_SIEVE_CONTINUE
254
#undef FACTOR_LIST_APPEND
255
256
/*********************************************************/
257
/* Section oddfac: odd factorial, needed also by binomial*/
258
/*********************************************************/
259
260
#if TUNE_PROGRAM_BUILD
261
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
262
#else
263
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
264
#endif
265
266
/* mpz_oddfac_1 computes the odd part of the factorial of the
267
   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
268
   odd positive integer.
269
270
   If flag != 0 a square is skipped in the DSC part, e.g.
271
   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
272
273
   If n is too small, flag is ignored, and an ASSERT can be triggered.
274
275
   TODO: FAC_DSC_THRESHOLD is used here with two different roles:
276
    - to decide when prime factorisation is needed,
277
    - to stop the recursion, once sieving is done.
278
   Maybe two thresholds can do a better job.
279
 */
280
void
281
mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
282
0
{
283
0
  ASSERT (n <= GMP_NUMB_MAX);
284
0
  ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
285
286
0
  if (n <= ODD_FACTORIAL_TABLE_LIMIT)
287
0
    {
288
0
      MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
289
0
      SIZ (x) = 1;
290
0
    }
291
0
  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
292
0
    {
293
0
      mp_ptr   px;
294
295
0
      px = MPZ_NEWALLOC (x, 2);
296
0
      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
297
0
      SIZ (x) = 2;
298
0
    }
299
0
  else
300
0
    {
301
0
      unsigned s;
302
0
      mp_ptr   factors;
303
304
0
      s = 0;
305
0
      {
306
0
  mp_limb_t tn;
307
0
  mp_limb_t prod, max_prod, i;
308
0
  mp_size_t j;
309
0
  TMP_SDECL;
310
311
#if TUNE_PROGRAM_BUILD
312
  ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
313
  ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
314
#endif
315
316
  /* Compute the number of recursive steps for the DSC algorithm. */
317
0
  for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
318
0
    tn >>= 1;
319
320
0
  j = 0;
321
322
0
  TMP_SMARK;
323
0
  factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
324
0
  ASSERT (tn >= FACTORS_PER_LIMB);
325
326
0
  prod = 1;
327
#if TUNE_PROGRAM_BUILD
328
  max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
329
#else
330
0
  max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
331
0
#endif
332
333
0
  ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
334
0
  do {
335
0
    i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
336
0
    factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
337
0
    do {
338
0
      FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
339
0
      i += 2;
340
0
    } while (i <= tn);
341
0
    max_prod <<= 1;
342
0
    tn >>= 1;
343
0
  } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
344
345
0
  factors[j++] = prod;
346
0
  factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
347
0
  factors[j++] = __gmp_oddfac_table[tn >> 1];
348
0
  mpz_prodlimbs (x, factors, j);
349
350
0
  TMP_SFREE;
351
0
      }
352
353
0
      if (s != 0)
354
  /* Use the algorithm described by Peter Luschny in "Divide,
355
     Swing and Conquer the Factorial!".
356
357
     Improvement: there are two temporary buffers, factors and
358
     square, that are never used together; with a good estimate
359
     of the maximal needed size, they could share a single
360
     allocation.
361
  */
362
0
  {
363
0
    mpz_t mswing;
364
0
    mp_ptr sieve;
365
0
    mp_size_t size;
366
0
    TMP_DECL;
367
368
0
    TMP_MARK;
369
370
0
    flag--;
371
0
    size = n / GMP_NUMB_BITS + 4;
372
0
    ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
373
    /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
374
       one more can be overwritten by mul, another for the sieve */
375
0
    MPZ_TMP_INIT (mswing, size);
376
    /* Initialize size, so that ASSERT can check it correctly. */
377
0
    ASSERT_CODE (SIZ (mswing) = 0);
378
379
    /* Put the sieve on the second half, it will be overwritten by the last mswing. */
380
0
    sieve = PTR (mswing) + size / 2 + 1;
381
382
0
    size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
383
384
0
    factors = TMP_ALLOC_LIMBS (size);
385
0
    do {
386
0
      mp_ptr    square, px;
387
0
      mp_size_t nx, ns;
388
0
      mp_limb_t cy;
389
0
      TMP_DECL;
390
391
0
      s--;
392
0
      ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
393
0
      mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
394
395
0
      TMP_MARK;
396
0
      nx = SIZ (x);
397
0
      if (s == flag) {
398
0
        size = nx;
399
0
        square = TMP_ALLOC_LIMBS (size);
400
0
        MPN_COPY (square, PTR (x), nx);
401
0
      } else {
402
0
        size = nx << 1;
403
0
        square = TMP_ALLOC_LIMBS (size);
404
0
        mpn_sqr (square, PTR (x), nx);
405
0
        size -= (square[size - 1] == 0);
406
0
      }
407
0
      ns = SIZ (mswing);
408
0
      nx = size + ns;
409
0
      px = MPZ_NEWALLOC (x, nx);
410
0
      ASSERT (ns <= size);
411
0
      cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
412
413
0
      SIZ(x) = nx - (cy == 0);
414
0
      TMP_FREE;
415
0
    } while (s != 0);
416
0
    TMP_FREE;
417
0
  }
418
0
    }
419
0
}
420
421
#undef FACTORS_PER_LIMB
422
#undef FACTOR_LIST_STORE