Coverage Report

Created: 2024-06-28 06:19

/src/gmp-6.2.1/mpz/bin_ui.c
Line
Count
Source (jump to first uncovered line)
1
/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
2
3
Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc.
4
5
This file is part of the GNU MP Library.
6
7
The GNU MP Library is free software; you can redistribute it and/or modify
8
it under the terms of either:
9
10
  * the GNU Lesser General Public License as published by the Free
11
    Software Foundation; either version 3 of the License, or (at your
12
    option) any later version.
13
14
or
15
16
  * the GNU General Public License as published by the Free Software
17
    Foundation; either version 2 of the License, or (at your option) any
18
    later version.
19
20
or both in parallel, as here.
21
22
The GNU MP Library is distributed in the hope that it will be useful, but
23
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25
for more details.
26
27
You should have received copies of the GNU General Public License and the
28
GNU Lesser General Public License along with the GNU MP Library.  If not,
29
see https://www.gnu.org/licenses/.  */
30
31
#include "gmp-impl.h"
32
33
/* How many special cases? Minimum is 2: 0 and 1;
34
 * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
35
 */
36
0
#define APARTAJ_KALKULOJ 2
37
38
/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
39
 * the operands fit.
40
 */
41
#define UZU_BIN_UIUI 0
42
43
/* Whether to use a shortcut to precompute the product of four
44
 * elements (1), or precompute only the product of a couple (0).
45
 *
46
 * In both cases the precomputed product is then updated with some
47
 * linear operations to obtain the product of the next four (1)
48
 * [or two (0)] operands.
49
 */
50
#define KVAROPE 1
51
52
static void
53
posmpz_init (mpz_ptr r)
54
0
{
55
0
  mp_ptr rp;
56
0
  ASSERT (SIZ (r) > 0);
57
0
  rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
58
0
  *rp = 0;
59
0
  *++rp = 0;
60
0
}
61
62
/* Equivalent to mpz_add_ui (r, r, in), but faster when
63
   0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
64
static void
65
posmpz_inc_ui (mpz_ptr r, unsigned long in)
66
0
{
67
#if BITS_PER_ULONG > GMP_NUMB_BITS
68
  mpz_add_ui (r, r, in);
69
#else
70
0
  ASSERT (SIZ (r) > 0);
71
0
  MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
72
0
  SIZ (r) += (PTR (r)[SIZ (r)] != 0);
73
0
#endif
74
0
}
75
76
/* Equivalent to mpz_sub_ui (r, r, in), but faster when
77
   0 < SIZ (r) and we know in advance that the result is positive. */
78
static void
79
posmpz_dec_ui (mpz_ptr r, unsigned long in)
80
0
{
81
#if BITS_PER_ULONG > GMP_NUMB_BITS
82
  mpz_sub_ui (r, r, in);
83
#else
84
0
  ASSERT (mpz_cmp_ui (r, in) >= 0);
85
0
  MPN_DECR_U (PTR (r), SIZ (r), in);
86
0
  SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
87
0
#endif
88
0
}
89
90
/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
91
   0 < SIZ (r) and we know in advance that the result is positive. */
92
static void
93
posmpz_rsh1 (mpz_ptr r)
94
0
{
95
0
  mp_ptr rp;
96
0
  mp_size_t rn;
97
98
0
  rn = SIZ (r);
99
0
  rp = PTR (r);
100
0
  ASSERT (rn > 0);
101
0
  mpn_rshift (rp, rp, rn, 1);
102
0
  SIZ (r) -= rp[rn - 1] == 0;
103
0
}
104
105
/* Computes r = n(n+(2*k-1))/2
106
   It uses a sqare instead of a product, computing
107
   r = ((n+k-1)^2 + n - (k-1)^2)/2
108
   As a side effect, sets t = n+k-1
109
 */
110
static void
111
mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
112
0
{
113
0
  ASSERT (k > 0 && SIZ(n) > 0);
114
0
  --k;
115
0
  mpz_add_ui (t, n, k);
116
0
  mpz_mul (r, t, t);
117
0
  mpz_add (r, r, n);
118
0
  posmpz_rsh1 (r);
119
0
  if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
120
0
    posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
121
0
  else
122
0
    {
123
0
      mpz_t tmp;
124
0
      mpz_init_set_ui (tmp, (k + (k & 1)));
125
0
      mpz_mul_ui (tmp, tmp, k >> 1);
126
0
      mpz_sub (r, r, tmp);
127
0
      mpz_clear (tmp);
128
0
    }
129
0
}
130
131
#if KVAROPE
132
static void
133
rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
134
0
{
135
0
  if (k - lk < 5)
136
0
    {
137
0
      do {
138
0
  posmpz_inc_ui (p, 4*k+2);
139
0
  mpz_addmul_ui (P, p, 4*k);
140
0
  posmpz_dec_ui (P, k);
141
0
  mpz_mul (r, r, P);
142
0
      } while (--k > lk);
143
0
    }
144
0
  else
145
0
    {
146
0
      mpz_t lt;
147
0
      unsigned long int m;
148
149
0
      m = ((k + lk) >> 1) + 1;
150
0
      rek_raising_fac4 (r, p, P, k, m, t);
151
152
0
      posmpz_inc_ui (p, 4*m+2);
153
0
      mpz_addmul_ui (P, p, 4*m);
154
0
      posmpz_dec_ui (P, m);
155
0
      if (t == NULL)
156
0
  {
157
0
    mpz_init_set (lt, P);
158
0
    t = lt;
159
0
  }
160
0
      else
161
0
  {
162
0
    ALLOC (lt) = 0;
163
0
    mpz_set (t, P);
164
0
  }
165
0
      rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
166
167
0
      mpz_mul (r, r, t);
168
0
      mpz_clear (lt);
169
0
    }
170
0
}
171
172
/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
173
   rek_raising_fac4, and exploiting an idea inspired by a piece of
174
   code that Fredrik Johansson wrote and by a comment by Niels Möller.
175
176
   Assume k = 4i then compute:
177
     p  = (n+1)(n+4i)/2 - i
178
    (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
179
    (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
180
     P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
181
     n' = n + 2
182
     i' = i - 1
183
    (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
184
    (n'-1)(n'+4i'+2)/2 - i' - 1 = p
185
    (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
186
    (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
187
    (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
188
     p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
189
    p' - 4i' - 2 = p
190
    (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
191
    (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
192
    (p' - 3i' - 1)*(p' - i')/2 = P
193
    (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
194
    (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
195
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
196
    (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
197
     P' = P + 4i'p' - i'
198
199
   And compute the product P * P' * P" ...
200
 */
201
202
static void
203
mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
204
0
{
205
0
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
206
0
  posmpz_init (n);
207
0
  posmpz_inc_ui (n, 1);
208
0
  SIZ (r) = 0;
209
0
  if (k & 1)
210
0
    {
211
0
      mpz_set (r, n);
212
0
      posmpz_inc_ui (n, 1);
213
0
    }
214
0
  k >>= 1;
215
0
  if (APARTAJ_KALKULOJ < 2 && k == 0)
216
0
    return;
217
218
0
  mpz_hmul_nbnpk (p, n, k, t);
219
0
  posmpz_init (p);
220
221
0
  if (k & 1)
222
0
    {
223
0
      if (SIZ (r))
224
0
  mpz_mul (r, r, p);
225
0
      else
226
0
  mpz_set (r, p);
227
0
      posmpz_inc_ui (p, k - 1);
228
0
    }
229
0
  k >>= 1;
230
0
  if (APARTAJ_KALKULOJ < 4 && k == 0)
231
0
    return;
232
233
0
  mpz_hmul_nbnpk (t, p, k, n);
234
0
  if (SIZ (r))
235
0
    mpz_mul (r, r, t);
236
0
  else
237
0
    mpz_set (r, t);
238
239
0
  if (APARTAJ_KALKULOJ > 8 || k > 1)
240
0
    {
241
0
      posmpz_dec_ui (p, k);
242
0
      rek_raising_fac4 (r, p, t, k - 1, 0, n);
243
0
    }
244
0
}
245
246
#else /* KVAROPE */
247
248
static void
249
rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
250
{
251
  /* Should the threshold depend on SIZ (n) ? */
252
  if (k - lk < 10)
253
    {
254
      do {
255
  posmpz_inc_ui (n, k);
256
  mpz_mul (r, r, n);
257
  --k;
258
      } while (k > lk);
259
    }
260
  else
261
    {
262
      mpz_t t3;
263
      unsigned long int m;
264
265
      m = ((k + lk) >> 1) + 1;
266
      rek_raising_fac (r, n, k, m, t1, t2);
267
268
      posmpz_inc_ui (n, m);
269
      if (t1 == NULL)
270
  {
271
    mpz_init_set (t3, n);
272
    t1 = t3;
273
  }
274
      else
275
  {
276
    ALLOC (t3) = 0;
277
    mpz_set (t1, n);
278
  }
279
      rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
280
281
      mpz_mul (r, r, t1);
282
      mpz_clear (t3);
283
    }
284
}
285
286
/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
287
   rek_raising_fac, and exploiting an idea inspired by a piece of
288
   code that Fredrik Johansson wrote.
289
290
   Force an even k = 2i then compute:
291
     p  = (n+1)(n+2i)/2
292
     i' = i - 1
293
     p == (n+1)(n+2i'+2)/2
294
     p' = p + i' == (n+2)(n+2i'+1)/2
295
     n' = n + 1
296
     p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
297
298
   And compute the product p * p' * p" ...
299
*/
300
301
static void
302
mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
303
{
304
  unsigned long int hk;
305
  ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
306
  mpz_add_ui (n, n, 1);
307
  hk = k >> 1;
308
  mpz_hmul_nbnpk (p, n, hk, t);
309
310
  if ((k & 1) != 0)
311
    {
312
      mpz_add_ui (t, t, hk + 1);
313
      mpz_mul (r, t, p);
314
    }
315
  else
316
    {
317
      mpz_set (r, p);
318
    }
319
320
  if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
321
    {
322
      posmpz_init (p);
323
      rek_raising_fac (r, p, hk - 1, 0, t, n);
324
    }
325
}
326
#endif /* KVAROPE */
327
328
/* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
329
   In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
330
   the code here only for big n.
331
332
   The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
333
   1 section 1.2.6 part G. */
334
335
void
336
mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
337
0
{
338
0
  mpz_t      ni;
339
0
  mp_size_t  negate;
340
341
0
  if (SIZ (n) < 0)
342
0
    {
343
      /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
344
0
      mpz_init (ni);
345
0
      mpz_add_ui (ni, n, 1L);
346
0
      mpz_neg (ni, ni);
347
0
      negate = (k & 1);   /* (-1)^k */
348
0
    }
349
0
  else
350
0
    {
351
      /* bin(n,k) == 0 if k>n
352
   (no test for this under the n<0 case, since -n+k-1 >= k there) */
353
0
      if (mpz_cmp_ui (n, k) < 0)
354
0
  {
355
0
    SIZ (r) = 0;
356
0
    return;
357
0
  }
358
359
      /* set ni = n-k */
360
0
      mpz_init (ni);
361
0
      mpz_sub_ui (ni, n, k);
362
0
      negate = 0;
363
0
    }
364
365
  /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
366
     for positive, 1 for negative). */
367
368
  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
369
     whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
370
     = ni, and new ni of ni+k-ni = k.  */
371
0
  if (mpz_cmp_ui (ni, k) < 0)
372
0
    {
373
0
      unsigned long  tmp;
374
0
      tmp = k;
375
0
      k = mpz_get_ui (ni);
376
0
      mpz_set_ui (ni, tmp);
377
0
    }
378
379
0
  if (k < APARTAJ_KALKULOJ)
380
0
    {
381
0
      if (k == 0)
382
0
  {
383
0
    SIZ (r) = 1;
384
0
    MPZ_NEWALLOC (r, 1)[0] = 1;
385
0
  }
386
#if APARTAJ_KALKULOJ > 2
387
      else if (k == 2)
388
  {
389
    mpz_add_ui (ni, ni, 1);
390
    mpz_mul (r, ni, ni);
391
    mpz_add (r, r, ni);
392
    posmpz_rsh1 (r);
393
  }
394
#endif
395
#if APARTAJ_KALKULOJ > 3
396
      else if (k > 2)
397
  { /* k = 3, 4 */
398
    mpz_add_ui (ni, ni, 2); /* n+1 */
399
    mpz_mul (r, ni, ni); /* (n+1)^2 */
400
    mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
401
    if (k == 3)
402
      {
403
        mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
404
        /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
405
        mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1);
406
        MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
407
      }
408
    else /* k = 4 */
409
      {
410
        mpz_add (ni, ni, r); /* (n+1)^2+n */
411
        mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
412
        mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */
413
        /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
414
        mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3);
415
        MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
416
      }
417
  }
418
#endif
419
0
      else
420
0
  { /* k = 1 */
421
0
    mpz_add_ui (r, ni, 1);
422
0
  }
423
0
    }
424
#if UZU_BIN_UIUI
425
  else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
426
    {
427
      mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
428
    }
429
#endif
430
0
  else
431
0
    {
432
0
      mp_limb_t count;
433
0
      mpz_t num, den;
434
435
0
      mpz_init (num);
436
0
      mpz_init (den);
437
438
0
#if KVAROPE
439
0
      mpz_raising_fac4 (num, ni, k, den, r);
440
0
      popc_limb (count, k);
441
0
      ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
442
0
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
443
#else
444
      mpz_raising_fac (num, ni, k, den, r);
445
      popc_limb (count, k);
446
      ASSERT (k - (k >> 1) - count >= 0);
447
      mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
448
#endif
449
450
0
      mpz_oddfac_1(den, k, 0);
451
452
0
      mpz_divexact(r, num, den);
453
0
      mpz_clear (num);
454
0
      mpz_clear (den);
455
0
    }
456
0
  mpz_clear (ni);
457
458
0
  SIZ(r) = (SIZ(r) ^ -negate) + negate;
459
0
}