Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpz/bin_ui.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
2
3
Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software
4
Foundation, Inc.
5
6
This file is part of the GNU MP Library.
7
8
The GNU MP Library is free software; you can redistribute it and/or modify
9
it under the terms of either:
10
11
  * the GNU Lesser General Public License as published by the Free
12
    Software Foundation; either version 3 of the License, or (at your
13
    option) any later version.
14
15
or
16
17
  * the GNU General Public License as published by the Free Software
18
    Foundation; either version 2 of the License, or (at your option) any
19
    later version.
20
21
or both in parallel, as here.
22
23
The GNU MP Library is distributed in the hope that it will be useful, but
24
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26
for more details.
27
28
You should have received copies of the GNU General Public License and the
29
GNU Lesser General Public License along with the GNU MP Library.  If not,
30
see https://www.gnu.org/licenses/.  */
31
32
#include "gmp-impl.h"
33
34
/* How many special cases? Minimum is 2: 0 and 1;
35
 * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
36
 */
37
56
#define APARTAJ_KALKULOJ 2
38
39
/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
40
 * the operands fit.
41
 */
42
#define UZU_BIN_UIUI 0
43
44
/* Whether to use a shortcut to precompute the product of four
45
 * elements (1), or precompute only the product of a couple (0).
46
 *
47
 * In both cases the precomputed product is then updated with some
48
 * linear operations to obtain the product of the next four (1)
49
 * [or two (0)] operands.
50
 */
51
#define KVAROPE 1
52
53
static void
54
posmpz_init (mpz_ptr r)
55
28
{
56
28
  mp_ptr rp;
57
28
  ASSERT (SIZ (r) > 0);
58
28
  rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
59
28
  *rp = 0;
60
28
  *++rp = 0;
61
28
}
62
63
/* Equivalent to mpz_add_ui (r, r, in), but faster when
64
   0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
65
static void
66
posmpz_inc_ui (mpz_ptr r, unsigned long in)
67
115k
{
68
#if BITS_PER_ULONG > GMP_NUMB_BITS
69
  mpz_add_ui (r, r, in);
70
#else
71
115k
  ASSERT (SIZ (r) > 0);
72
115k
  MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
73
115k
  SIZ (r) += PTR (r)[SIZ (r)];
74
115k
#endif
75
115k
}
76
77
/* Equivalent to mpz_sub_ui (r, r, in), but faster when
78
   0 < SIZ (r) and we know in advance that the result is positive. */
79
static void
80
posmpz_dec_ui (mpz_ptr r, unsigned long in)
81
115k
{
82
#if BITS_PER_ULONG > GMP_NUMB_BITS
83
  mpz_sub_ui (r, r, in);
84
#else
85
115k
  ASSERT (mpz_cmp_ui (r, in) >= 0);
86
115k
  MPN_DECR_U (PTR (r), SIZ (r), in);
87
115k
  SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
88
115k
#endif
89
115k
}
90
91
/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
92
   0 < SIZ (r) and we know in advance that the result is positive. */
93
static void
94
posmpz_rsh1 (mpz_ptr r)
95
28
{
96
28
  mp_ptr rp;
97
28
  mp_size_t rn;
98
99
28
  rn = SIZ (r);
100
28
  rp = PTR (r);
101
28
  ASSERT (rn > 0);
102
28
  mpn_rshift (rp, rp, rn, 1);
103
28
  SIZ (r) -= rp[rn - 1] == 0;
104
28
}
105
106
/* Computes r = n(n+(2*k-1))/2
107
   It uses a sqare instead of a product, computing
108
   r = ((n+k-1)^2 + n - (k-1)^2)/2
109
   As a side effect, sets t = n+k-1
110
 */
111
static void
112
mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
113
28
{
114
28
  ASSERT (k > 0 && SIZ(n) > 0);
115
28
  --k;
116
28
  mpz_add_ui (t, n, k);
117
28
  mpz_mul (r, t, t);
118
28
  mpz_add (r, r, n);
119
28
  posmpz_rsh1 (r);
120
28
  if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
121
28
    posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
122
0
  else
123
0
    {
124
0
      mpz_t tmp;
125
0
      mpz_init_set_ui (tmp, (k + (k & 1)));
126
0
      mpz_mul_ui (tmp, tmp, k >> 1);
127
0
      mpz_sub (r, r, tmp);
128
0
      mpz_clear (tmp);
129
0
    }
130
28
}
131
132
#if KVAROPE
133
static void
134
rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
135
57.3k
{
136
57.3k
  if (k - lk < 5)
137
28.6k
    {
138
86.3k
      do {
139
86.3k
  posmpz_inc_ui (p, 4*k+2);
140
86.3k
  mpz_addmul_ui (P, p, 4*k);
141
86.3k
  posmpz_dec_ui (P, k);
142
86.3k
  mpz_mul (r, r, P);
143
86.3k
      } while (--k > lk);
144
28.6k
    }
145
28.6k
  else
146
28.6k
    {
147
28.6k
      mpz_t lt;
148
28.6k
      unsigned long int m;
149
150
28.6k
      ALLOC (lt) = 0;
151
28.6k
      SIZ (lt) = 0;
152
28.6k
      if (t == NULL)
153
14.3k
  t = lt;
154
28.6k
      m = ((k + lk) >> 1) + 1;
155
28.6k
      rek_raising_fac4 (r, p, P, k, m, t);
156
157
28.6k
      posmpz_inc_ui (p, 4*m+2);
158
28.6k
      mpz_addmul_ui (P, p, 4*m);
159
28.6k
      posmpz_dec_ui (P, m);
160
28.6k
      mpz_set (t, P);
161
28.6k
      rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
162
163
28.6k
      mpz_mul (r, r, t);
164
28.6k
      mpz_clear (lt);
165
28.6k
    }
166
57.3k
}
167
168
/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
169
   rek_raising_fac4, and exploiting an idea inspired by a piece of
170
   code that Fredrik Johansson wrote and by a comment by Niels Möller.
171
172
   Assume k = 4i then compute:
173
     p  = (n+1)(n+4i)/2 - i
174
    (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
175
    (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
176
     P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
177
     n' = n + 2
178
     i' = i - 1
179
    (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
180
    (n'-1)(n'+4i'+2)/2 - i' - 1 = p
181
    (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
182
    (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
183
    (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
184
     p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
185
    p' - 4i' - 2 = p
186
    (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
187
    (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
188
    (p' - 3i' - 1)*(p' - i')/2 = P
189
    (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
190
    (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
191
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
192
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
193
     P' = P + 4i'p' - i'
194
195
   And compute the product P * P' * P" ...
196
 */
197
198
static void
199
mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
200
14
{
201
14
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
202
14
  posmpz_init (n);
203
14
  posmpz_inc_ui (n, 1);
204
14
  SIZ (r) = 0;
205
14
  if (k & 1)
206
0
    {
207
0
      mpz_set (r, n);
208
0
      posmpz_inc_ui (n, 1);
209
0
    }
210
14
  k >>= 1;
211
14
  if (APARTAJ_KALKULOJ < 2 && k == 0)
212
0
    return;
213
214
14
  mpz_hmul_nbnpk (p, n, k, t);
215
14
  posmpz_init (p);
216
217
14
  if (k & 1)
218
4
    {
219
4
      if (SIZ (r))
220
0
  mpz_mul (r, r, p);
221
4
      else
222
4
  mpz_set (r, p);
223
4
      posmpz_inc_ui (p, k - 1);
224
4
    }
225
14
  k >>= 1;
226
14
  if (APARTAJ_KALKULOJ < 4 && k == 0)
227
0
    return;
228
229
14
  mpz_hmul_nbnpk (t, p, k, n);
230
14
  if (SIZ (r))
231
4
    mpz_mul (r, r, t);
232
10
  else
233
10
    mpz_set (r, t);
234
235
14
  if (APARTAJ_KALKULOJ > 8 || k > 1)
236
14
    {
237
14
      posmpz_dec_ui (p, k);
238
14
      rek_raising_fac4 (r, p, t, k - 1, 0, n);
239
14
    }
240
14
}
241
242
#else /* KVAROPE */
243
244
static void
245
rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
246
{
247
  /* Should the threshold depend on SIZ (n) ? */
248
  if (k - lk < 10)
249
    {
250
      do {
251
  posmpz_inc_ui (n, k);
252
  mpz_mul (r, r, n);
253
  --k;
254
      } while (k > lk);
255
    }
256
  else
257
    {
258
      mpz_t t3;
259
      unsigned long int m;
260
261
      m = ((k + lk) >> 1) + 1;
262
      rek_raising_fac (r, n, k, m, t1, t2);
263
264
      posmpz_inc_ui (n, m);
265
      if (t1 == NULL)
266
  {
267
    mpz_init_set (t3, n);
268
    t1 = t3;
269
  }
270
      else
271
  {
272
    ALLOC (t3) = 0;
273
    mpz_set (t1, n);
274
  }
275
      rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
276
277
      mpz_mul (r, r, t1);
278
      mpz_clear (t3);
279
    }
280
}
281
282
/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
283
   rek_raising_fac, and exploiting an idea inspired by a piece of
284
   code that Fredrik Johansson wrote.
285
286
   Force an even k = 2i then compute:
287
     p  = (n+1)(n+2i)/2
288
     i' = i - 1
289
     p == (n+1)(n+2i'+2)/2
290
     p' = p + i' == (n+2)(n+2i'+1)/2
291
     n' = n + 1
292
     p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
293
294
   And compute the product p * p' * p" ...
295
*/
296
297
static void
298
mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
299
{
300
  unsigned long int hk;
301
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
302
  mpz_add_ui (n, n, 1);
303
  hk = k >> 1;
304
  mpz_hmul_nbnpk (p, n, hk, t);
305
306
  if ((k & 1) != 0)
307
    {
308
      mpz_add_ui (t, t, hk + 1);
309
      mpz_mul (r, t, p);
310
    }
311
  else
312
    {
313
      mpz_set (r, p);
314
    }
315
316
  if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
317
    {
318
      posmpz_init (p);
319
      rek_raising_fac (r, p, hk - 1, 0, t, n);
320
    }
321
}
322
#endif /* KVAROPE */
323
324
/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
325
   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
326
   the code here only for big n.
327
328
   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
329
   1 section 1.2.6 part G. */
330
331
void
332
mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
333
14
{
334
14
  mpz_t      ni;
335
14
  mp_size_t  negate;
336
337
14
  if (SIZ (n) < 0)
338
0
    {
339
      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
340
0
      mpz_init (ni);
341
0
      mpz_add_ui (ni, n, 1L);
342
0
      mpz_neg (ni, ni);
343
0
      negate = (k & 1);   /* (-1)^k */
344
0
    }
345
14
  else
346
14
    {
347
      /* bin(n,k) == 0 if k>n
348
   (no test for this under the n<0 case, since -n+k-1 >= k there) */
349
14
      if (mpz_cmp_ui (n, k) < 0)
350
0
  {
351
0
    SIZ (r) = 0;
352
0
    return;
353
0
  }
354
355
      /* set ni = n-k */
356
14
      mpz_init (ni);
357
14
      mpz_sub_ui (ni, n, k);
358
14
      negate = 0;
359
14
    }
360
361
  /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
362
     for positive, 1 for negative). */
363
364
  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
365
     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
366
     = ni, and new ni of ni+k-ni = k.  */
367
14
  if (mpz_cmp_ui (ni, k) < 0)
368
14
    {
369
14
      unsigned long  tmp;
370
14
      tmp = k;
371
14
      k = mpz_get_ui (ni);
372
14
      mpz_set_ui (ni, tmp);
373
14
    }
374
375
14
  if (k < APARTAJ_KALKULOJ)
376
0
    {
377
0
      if (k == 0)
378
0
  {
379
0
    SIZ (r) = 1;
380
0
    MPZ_NEWALLOC (r, 1)[0] = 1;
381
0
  }
382
#if APARTAJ_KALKULOJ > 2
383
      else if (k > 1)
384
  {
385
    mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2));
386
    mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */
387
    if (APARTAJ_KALKULOJ == 2 || k == 2)
388
      {
389
        mpz_add (r, r, ni); /* n^2+n= n(n+1) */
390
        posmpz_rsh1 (r);
391
      }
392
#if APARTAJ_KALKULOJ > 3
393
#if APARTAJ_KALKULOJ != 5
394
#error Not implemented! 3 < APARTAJ_KALKULOJ != 5
395
#endif
396
    else /* k > 2 */
397
      { /* k = 3, 4 */
398
        mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
399
        if (k == 3)
400
    {
401
      mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
402
      /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
403
    }
404
        else /* k = 4 */
405
    {
406
      mpz_add (ni, ni, r); /* (n+1)^2+n */
407
      mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
408
      /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */
409
      /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */
410
      /* because the next division will shift away this bit anyway.  */
411
      /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
412
    }
413
        mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1));
414
        SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0;
415
      }
416
#endif
417
  }
418
#endif
419
0
      else
420
0
  { /* k = 1 */
421
0
    mpz_add_ui (r, ni, 1);
422
0
  }
423
0
    }
424
#if UZU_BIN_UIUI
425
  else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
426
    {
427
      mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
428
    }
429
#endif
430
14
  else
431
14
    {
432
14
      mp_limb_t count;
433
14
      mpz_t num, den;
434
435
14
      mpz_init (num);
436
14
      mpz_init (den);
437
438
14
#if KVAROPE
439
14
      mpz_raising_fac4 (num, ni, k, den, r);
440
14
      popc_limb (count, k);
441
14
      ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
442
14
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
443
#else
444
      mpz_raising_fac (num, ni, k, den, r);
445
      popc_limb (count, k);
446
      ASSERT (k - (k >> 1) - count >= 0);
447
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
448
#endif
449
450
14
      mpz_oddfac_1(den, k, 0);
451
452
14
      mpz_divexact(r, num, den);
453
14
      mpz_clear (num);
454
14
      mpz_clear (den);
455
14
    }
456
14
  mpz_clear (ni);
457
458
14
  SIZ(r) = (SIZ(r) ^ -negate) + negate;
459
14
}