Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpz/bin_ui.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
2
3
Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc.
4
5
This file is part of the GNU MP Library.
6
7
The GNU MP Library is free software; you can redistribute it and/or modify
8
it under the terms of either:
9
10
  * the GNU Lesser General Public License as published by the Free
11
    Software Foundation; either version 3 of the License, or (at your
12
    option) any later version.
13
14
or
15
16
  * the GNU General Public License as published by the Free Software
17
    Foundation; either version 2 of the License, or (at your option) any
18
    later version.
19
20
or both in parallel, as here.
21
22
The GNU MP Library is distributed in the hope that it will be useful, but
23
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25
for more details.
26
27
You should have received copies of the GNU General Public License and the
28
GNU Lesser General Public License along with the GNU MP Library.  If not,
29
see https://www.gnu.org/licenses/.  */
30
31
#include "gmp-impl.h"
32
33
/* How many special cases? Minimum is 2: 0 and 1;
34
 * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
35
 */
36
3.43k
#define APARTAJ_KALKULOJ 2
37
38
/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
39
 * the operands fit.
40
 */
41
#define UZU_BIN_UIUI 0
42
43
/* Whether to use a shortcut to precompute the product of four
44
 * elements (1), or precompute only the product of a couple (0).
45
 *
46
 * In both cases the precomputed product is then updated with some
47
 * linear operations to obtain the product of the next four (1)
48
 * [or two (0)] operands.
49
 */
50
#define KVAROPE 1
51
52
static void
53
posmpz_init (mpz_ptr r)
54
1.70k
{
55
1.70k
  mp_ptr rp;
56
1.70k
  ASSERT (SIZ (r) > 0);
57
1.70k
  rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
58
1.70k
  *rp = 0;
59
1.70k
  *++rp = 0;
60
1.70k
}
61
62
/* Equivalent to mpz_add_ui (r, r, in), but faster when
63
   0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
64
static void
65
posmpz_inc_ui (mpz_ptr r, unsigned long in)
66
5.90M
{
67
#if BITS_PER_ULONG > GMP_NUMB_BITS
68
  mpz_add_ui (r, r, in);
69
#else
70
5.90M
  ASSERT (SIZ (r) > 0);
71
5.90M
  MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
72
5.90M
  SIZ (r) += (PTR (r)[SIZ (r)] != 0);
73
5.90M
#endif
74
5.90M
}
75
76
/* Equivalent to mpz_sub_ui (r, r, in), but faster when
77
   0 < SIZ (r) and we know in advance that the result is positive. */
78
static void
79
posmpz_dec_ui (mpz_ptr r, unsigned long in)
80
5.90M
{
81
#if BITS_PER_ULONG > GMP_NUMB_BITS
82
  mpz_sub_ui (r, r, in);
83
#else
84
5.90M
  ASSERT (mpz_cmp_ui (r, in) >= 0);
85
5.90M
  MPN_DECR_U (PTR (r), SIZ (r), in);
86
5.90M
  SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
87
5.90M
#endif
88
5.90M
}
89
90
/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
91
   0 < SIZ (r) and we know in advance that the result is positive. */
92
static void
93
posmpz_rsh1 (mpz_ptr r)
94
1.69k
{
95
1.69k
  mp_ptr rp;
96
1.69k
  mp_size_t rn;
97
98
1.69k
  rn = SIZ (r);
99
1.69k
  rp = PTR (r);
100
1.69k
  ASSERT (rn > 0);
101
1.69k
  mpn_rshift (rp, rp, rn, 1);
102
1.69k
  SIZ (r) -= rp[rn - 1] == 0;
103
1.69k
}
104
105
/* Computes r = n(n+(2*k-1))/2
106
   It uses a sqare instead of a product, computing
107
   r = ((n+k-1)^2 + n - (k-1)^2)/2
108
   As a side effect, sets t = n+k-1
109
 */
110
static void
111
mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
112
1.69k
{
113
1.69k
  ASSERT (k > 0 && SIZ(n) > 0);
114
1.69k
  --k;
115
1.69k
  mpz_add_ui (t, n, k);
116
1.69k
  mpz_mul (r, t, t);
117
1.69k
  mpz_add (r, r, n);
118
1.69k
  posmpz_rsh1 (r);
119
1.69k
  if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
120
1.69k
    posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
121
0
  else
122
0
    {
123
0
      mpz_t tmp;
124
0
      mpz_init_set_ui (tmp, (k + (k & 1)));
125
0
      mpz_mul_ui (tmp, tmp, k >> 1);
126
0
      mpz_sub (r, r, tmp);
127
0
      mpz_clear (tmp);
128
0
    }
129
1.69k
}
130
131
#if KVAROPE
132
static void
133
rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
134
3.04M
{
135
3.04M
  if (k - lk < 5)
136
1.52M
    {
137
4.37M
      do {
138
4.37M
  posmpz_inc_ui (p, 4*k+2);
139
4.37M
  mpz_addmul_ui (P, p, 4*k);
140
4.37M
  posmpz_dec_ui (P, k);
141
4.37M
  mpz_mul (r, r, P);
142
4.37M
      } while (--k > lk);
143
1.52M
    }
144
1.52M
  else
145
1.52M
    {
146
1.52M
      mpz_t lt;
147
1.52M
      unsigned long int m;
148
149
1.52M
      m = ((k + lk) >> 1) + 1;
150
1.52M
      rek_raising_fac4 (r, p, P, k, m, t);
151
152
1.52M
      posmpz_inc_ui (p, 4*m+2);
153
1.52M
      mpz_addmul_ui (P, p, 4*m);
154
1.52M
      posmpz_dec_ui (P, m);
155
1.52M
      if (t == NULL)
156
1.51M
  {
157
1.51M
    mpz_init_set (lt, P);
158
1.51M
    t = lt;
159
1.51M
  }
160
8.26k
      else
161
8.26k
  {
162
8.26k
    ALLOC (lt) = 0;
163
8.26k
    mpz_set (t, P);
164
8.26k
  }
165
1.52M
      rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
166
167
1.52M
      mpz_mul (r, r, t);
168
1.52M
      mpz_clear (lt);
169
1.52M
    }
170
3.04M
}
171
172
/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
173
   rek_raising_fac4, and exploiting an idea inspired by a piece of
174
   code that Fredrik Johansson wrote and by a comment by Niels Möller.
175
176
   Assume k = 4i then compute:
177
     p  = (n+1)(n+4i)/2 - i
178
    (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
179
    (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
180
     P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
181
     n' = n + 2
182
     i' = i - 1
183
    (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
184
    (n'-1)(n'+4i'+2)/2 - i' - 1 = p
185
    (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
186
    (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
187
    (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
188
     p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
189
    p' - 4i' - 2 = p
190
    (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
191
    (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
192
    (p' - 3i' - 1)*(p' - i')/2 = P
193
    (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
194
    (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
195
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
196
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
197
     P' = P + 4i'p' - i'
198
199
   And compute the product P * P' * P" ...
200
 */
201
202
static void
203
mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
204
852
{
205
852
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
206
852
  posmpz_init (n);
207
852
  posmpz_inc_ui (n, 1);
208
852
  SIZ (r) = 0;
209
852
  if (k & 1)
210
502
    {
211
502
      mpz_set (r, n);
212
502
      posmpz_inc_ui (n, 1);
213
502
    }
214
852
  k >>= 1;
215
852
  if (APARTAJ_KALKULOJ < 2 && k == 0)
216
0
    return;
217
218
852
  mpz_hmul_nbnpk (p, n, k, t);
219
852
  posmpz_init (p);
220
221
852
  if (k & 1)
222
351
    {
223
351
      if (SIZ (r))
224
213
  mpz_mul (r, r, p);
225
138
      else
226
138
  mpz_set (r, p);
227
351
      posmpz_inc_ui (p, k - 1);
228
351
    }
229
852
  k >>= 1;
230
852
  if (APARTAJ_KALKULOJ < 4 && k == 0)
231
10
    return;
232
233
842
  mpz_hmul_nbnpk (t, p, k, n);
234
842
  if (SIZ (r))
235
630
    mpz_mul (r, r, t);
236
212
  else
237
212
    mpz_set (r, t);
238
239
842
  if (APARTAJ_KALKULOJ > 8 || k > 1)
240
838
    {
241
838
      posmpz_dec_ui (p, k);
242
838
      rek_raising_fac4 (r, p, t, k - 1, 0, n);
243
838
    }
244
842
}
245
246
#else /* KVAROPE */
247
248
static void
249
rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
250
{
251
  /* Should the threshold depend on SIZ (n) ? */
252
  if (k - lk < 10)
253
    {
254
      do {
255
  posmpz_inc_ui (n, k);
256
  mpz_mul (r, r, n);
257
  --k;
258
      } while (k > lk);
259
    }
260
  else
261
    {
262
      mpz_t t3;
263
      unsigned long int m;
264
265
      m = ((k + lk) >> 1) + 1;
266
      rek_raising_fac (r, n, k, m, t1, t2);
267
268
      posmpz_inc_ui (n, m);
269
      if (t1 == NULL)
270
  {
271
    mpz_init_set (t3, n);
272
    t1 = t3;
273
  }
274
      else
275
  {
276
    ALLOC (t3) = 0;
277
    mpz_set (t1, n);
278
  }
279
      rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
280
281
      mpz_mul (r, r, t1);
282
      mpz_clear (t3);
283
    }
284
}
285
286
/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
287
   rek_raising_fac, and exploiting an idea inspired by a piece of
288
   code that Fredrik Johansson wrote.
289
290
   Force an even k = 2i then compute:
291
     p  = (n+1)(n+2i)/2
292
     i' = i - 1
293
     p == (n+1)(n+2i'+2)/2
294
     p' = p + i' == (n+2)(n+2i'+1)/2
295
     n' = n + 1
296
     p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
297
298
   And compute the product p * p' * p" ...
299
*/
300
301
static void
302
mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
303
{
304
  unsigned long int hk;
305
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
306
  mpz_add_ui (n, n, 1);
307
  hk = k >> 1;
308
  mpz_hmul_nbnpk (p, n, hk, t);
309
310
  if ((k & 1) != 0)
311
    {
312
      mpz_add_ui (t, t, hk + 1);
313
      mpz_mul (r, t, p);
314
    }
315
  else
316
    {
317
      mpz_set (r, p);
318
    }
319
320
  if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
321
    {
322
      posmpz_init (p);
323
      rek_raising_fac (r, p, hk - 1, 0, t, n);
324
    }
325
}
326
#endif /* KVAROPE */
327
328
/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
329
   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
330
   the code here only for big n.
331
332
   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
333
   1 section 1.2.6 part G. */
334
335
void
336
mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
337
900
{
338
900
  mpz_t      ni;
339
900
  mp_size_t  negate;
340
341
900
  if (SIZ (n) < 0)
342
0
    {
343
      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
344
0
      mpz_init (ni);
345
0
      mpz_add_ui (ni, n, 1L);
346
0
      mpz_neg (ni, ni);
347
0
      negate = (k & 1);   /* (-1)^k */
348
0
    }
349
900
  else
350
900
    {
351
      /* bin(n,k) == 0 if k>n
352
   (no test for this under the n<0 case, since -n+k-1 >= k there) */
353
900
      if (mpz_cmp_ui (n, k) < 0)
354
14
  {
355
14
    SIZ (r) = 0;
356
14
    return;
357
14
  }
358
359
      /* set ni = n-k */
360
886
      mpz_init (ni);
361
886
      mpz_sub_ui (ni, n, k);
362
886
      negate = 0;
363
886
    }
364
365
  /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
366
     for positive, 1 for negative). */
367
368
  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
369
     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
370
     = ni, and new ni of ni+k-ni = k.  */
371
886
  if (mpz_cmp_ui (ni, k) < 0)
372
407
    {
373
407
      unsigned long  tmp;
374
407
      tmp = k;
375
407
      k = mpz_get_ui (ni);
376
407
      mpz_set_ui (ni, tmp);
377
407
    }
378
379
886
  if (k < APARTAJ_KALKULOJ)
380
34
    {
381
34
      if (k == 0)
382
23
  {
383
23
    SIZ (r) = 1;
384
23
    MPZ_NEWALLOC (r, 1)[0] = 1;
385
23
  }
386
#if APARTAJ_KALKULOJ > 2
387
      else if (k == 2)
388
  {
389
    mpz_add_ui (ni, ni, 1);
390
    mpz_mul (r, ni, ni);
391
    mpz_add (r, r, ni);
392
    posmpz_rsh1 (r);
393
  }
394
#endif
395
#if APARTAJ_KALKULOJ > 3
396
      else if (k > 2)
397
  { /* k = 3, 4 */
398
    mpz_add_ui (ni, ni, 2); /* n+1 */
399
    mpz_mul (r, ni, ni); /* (n+1)^2 */
400
    mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
401
    if (k == 3)
402
      {
403
        mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
404
        /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
405
        mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1);
406
        MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
407
      }
408
    else /* k = 4 */
409
      {
410
        mpz_add (ni, ni, r); /* (n+1)^2+n */
411
        mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
412
        mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */
413
        /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
414
        mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3);
415
        MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
416
      }
417
  }
418
#endif
419
11
      else
420
11
  { /* k = 1 */
421
11
    mpz_add_ui (r, ni, 1);
422
11
  }
423
34
    }
424
#if UZU_BIN_UIUI
425
  else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
426
    {
427
      mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
428
    }
429
#endif
430
852
  else
431
852
    {
432
852
      mp_limb_t count;
433
852
      mpz_t num, den;
434
435
852
      mpz_init (num);
436
852
      mpz_init (den);
437
438
852
#if KVAROPE
439
852
      mpz_raising_fac4 (num, ni, k, den, r);
440
852
      popc_limb (count, k);
441
852
      ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
442
852
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
443
#else
444
      mpz_raising_fac (num, ni, k, den, r);
445
      popc_limb (count, k);
446
      ASSERT (k - (k >> 1) - count >= 0);
447
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
448
#endif
449
450
852
      mpz_oddfac_1(den, k, 0);
451
452
852
      mpz_divexact(r, num, den);
453
852
      mpz_clear (num);
454
852
      mpz_clear (den);
455
852
    }
456
886
  mpz_clear (ni);
457
458
886
  SIZ(r) = (SIZ(r) ^ -negate) + negate;
459
886
}