Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/tdiv_qr.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2
   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3
   qxn is non-zero, generate that many fraction limbs and append them after the
4
   other quotient limbs, and update the remainder accordingly.  The input
5
   operands are unaffected.
6
7
   Preconditions:
8
   1. The most significant limb of the divisor must be non-zero.
9
   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10
11
   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12
   complexity of multiplication.
13
14
Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15
16
This file is part of the GNU MP Library.
17
18
The GNU MP Library is free software; you can redistribute it and/or modify
19
it under the terms of either:
20
21
  * the GNU Lesser General Public License as published by the Free
22
    Software Foundation; either version 3 of the License, or (at your
23
    option) any later version.
24
25
or
26
27
  * the GNU General Public License as published by the Free Software
28
    Foundation; either version 2 of the License, or (at your option) any
29
    later version.
30
31
or both in parallel, as here.
32
33
The GNU MP Library is distributed in the hope that it will be useful, but
34
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36
for more details.
37
38
You should have received copies of the GNU General Public License and the
39
GNU Lesser General Public License along with the GNU MP Library.  If not,
40
see https://www.gnu.org/licenses/.  */
41
42
#include "gmp-impl.h"
43
#include "longlong.h"
44
45
46
void
47
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48
       mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49
1.00M
{
50
1.00M
  ASSERT_ALWAYS (qxn == 0);
51
52
1.00M
  ASSERT (nn >= 0);
53
1.00M
  ASSERT (dn >= 0);
54
1.00M
  ASSERT (dn == 0 || dp[dn - 1] != 0);
55
1.00M
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56
1.00M
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57
58
1.00M
  switch (dn)
59
1.00M
    {
60
0
    case 0:
61
0
      DIVIDE_BY_ZERO;
62
63
28.1k
    case 1:
64
28.1k
      {
65
28.1k
  rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66
28.1k
  return;
67
0
      }
68
69
15.4k
    case 2:
70
15.4k
      {
71
15.4k
  mp_ptr n2p;
72
15.4k
  mp_limb_t qhl, cy;
73
15.4k
  TMP_DECL;
74
15.4k
  TMP_MARK;
75
15.4k
  if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76
10.8k
    {
77
10.8k
      int cnt;
78
10.8k
      mp_limb_t d2p[2];
79
10.8k
      count_leading_zeros (cnt, dp[1]);
80
10.8k
      cnt -= GMP_NAIL_BITS;
81
10.8k
      d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82
10.8k
      d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83
10.8k
      n2p = TMP_ALLOC_LIMBS (nn + 1);
84
10.8k
      cy = mpn_lshift (n2p, np, nn, cnt);
85
10.8k
      n2p[nn] = cy;
86
10.8k
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87
10.8k
      if (cy == 0)
88
6.98k
        qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
89
10.8k
      rp[0] = (n2p[0] >> cnt)
90
10.8k
        | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91
10.8k
      rp[1] = (n2p[1] >> cnt);
92
10.8k
    }
93
4.64k
  else
94
4.64k
    {
95
4.64k
      n2p = TMP_ALLOC_LIMBS (nn);
96
4.64k
      MPN_COPY (n2p, np, nn);
97
4.64k
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98
4.64k
      qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
99
4.64k
      rp[0] = n2p[0];
100
4.64k
      rp[1] = n2p[1];
101
4.64k
    }
102
15.4k
  TMP_FREE;
103
15.4k
  return;
104
15.4k
      }
105
106
957k
    default:
107
957k
      {
108
957k
  int adjust;
109
957k
  gmp_pi1_t dinv;
110
957k
  TMP_DECL;
111
957k
  TMP_MARK;
112
957k
  adjust = np[nn - 1] >= dp[dn - 1];  /* conservative tests for quotient size */
113
957k
  if (nn + adjust >= 2 * dn)
114
886k
    {
115
886k
      mp_ptr n2p, d2p;
116
886k
      mp_limb_t cy;
117
886k
      int cnt;
118
119
886k
      qp[nn - dn] = 0;        /* zero high quotient limb */
120
886k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121
783k
        {
122
783k
    count_leading_zeros (cnt, dp[dn - 1]);
123
783k
    cnt -= GMP_NAIL_BITS;
124
783k
    d2p = TMP_ALLOC_LIMBS (dn);
125
783k
    mpn_lshift (d2p, dp, dn, cnt);
126
783k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
127
783k
    cy = mpn_lshift (n2p, np, nn, cnt);
128
783k
    n2p[nn] = cy;
129
783k
    nn += adjust;
130
783k
        }
131
102k
      else
132
102k
        {
133
102k
    cnt = 0;
134
102k
    d2p = (mp_ptr) dp;
135
102k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
136
102k
    MPN_COPY (n2p, np, nn);
137
102k
    n2p[nn] = 0;
138
102k
    nn += adjust;
139
102k
        }
140
141
886k
      invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142
886k
      if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143
758k
        mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144
127k
      else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
145
127k
         BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146
127k
         (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147
0
         + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
148
127k
        mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149
0
      else
150
0
        {
151
0
    mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152
0
    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153
0
    mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154
0
    n2p = rp;
155
0
        }
156
157
886k
      if (cnt != 0)
158
783k
        mpn_rshift (rp, n2p, dn, cnt);
159
102k
      else
160
102k
        MPN_COPY (rp, n2p, dn);
161
886k
      TMP_FREE;
162
886k
      return;
163
886k
    }
164
165
  /* When we come here, the numerator/partial remainder is less
166
     than twice the size of the denominator.  */
167
168
71.2k
    {
169
      /* Problem:
170
171
         Divide a numerator N with nn limbs by a denominator D with dn
172
         limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
173
         compared to dn, conventional division algorithms perform poorly.
174
         We want an algorithm that has an expected running time that is
175
         dependent only on qn.
176
177
         Algorithm (very informally stated):
178
179
         1) Divide the 2 x qn most significant limbs from the numerator
180
      by the qn most significant limbs from the denominator.  Call
181
      the result qest.  This is either the correct quotient, but
182
      might be 1 or 2 too large.  Compute the remainder from the
183
      division.  (This step is implemented by an mpn_divrem call.)
184
185
         2) Is the most significant limb from the remainder < p, where p
186
      is the product of the most significant limb from the quotient
187
      and the next(d)?  (Next(d) denotes the next ignored limb from
188
      the denominator.)  If it is, decrement qest, and adjust the
189
      remainder accordingly.
190
191
         3) Is the remainder >= qest?  If it is, qest is the desired
192
      quotient.  The algorithm terminates.
193
194
         4) Subtract qest x next(d) from the remainder.  If there is
195
      borrow out, decrement qest, and adjust the remainder
196
      accordingly.
197
198
         5) Skip one word from the denominator (i.e., let next(d) denote
199
      the next less significant limb.  */
200
201
71.2k
      mp_size_t qn;
202
71.2k
      mp_ptr n2p, d2p;
203
71.2k
      mp_ptr tp;
204
71.2k
      mp_limb_t cy;
205
71.2k
      mp_size_t in, rn;
206
71.2k
      mp_limb_t quotient_too_large;
207
71.2k
      unsigned int cnt;
208
209
71.2k
      qn = nn - dn;
210
71.2k
      qp[qn] = 0;       /* zero high quotient limb */
211
71.2k
      qn += adjust;     /* qn cannot become bigger */
212
213
71.2k
      if (qn == 0)
214
170
        {
215
170
    MPN_COPY (rp, np, dn);
216
170
    TMP_FREE;
217
170
    return;
218
170
        }
219
220
71.0k
      in = dn - qn;   /* (at least partially) ignored # of limbs in ops */
221
      /* Normalize denominator by shifting it to the left such that its
222
         most significant bit is set.  Then shift the numerator the same
223
         amount, to mathematically preserve quotient.  */
224
71.0k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225
68.5k
        {
226
68.5k
    count_leading_zeros (cnt, dp[dn - 1]);
227
68.5k
    cnt -= GMP_NAIL_BITS;
228
229
68.5k
    d2p = TMP_ALLOC_LIMBS (qn);
230
68.5k
    mpn_lshift (d2p, dp + in, qn, cnt);
231
68.5k
    d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232
233
68.5k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234
68.5k
    cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235
68.5k
    if (adjust)
236
43.7k
      {
237
43.7k
        n2p[2 * qn] = cy;
238
43.7k
        n2p++;
239
43.7k
      }
240
24.8k
    else
241
24.8k
      {
242
24.8k
        n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243
24.8k
      }
244
68.5k
        }
245
2.51k
      else
246
2.51k
        {
247
2.51k
    cnt = 0;
248
2.51k
    d2p = (mp_ptr) dp + in;
249
250
2.51k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251
2.51k
    MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252
2.51k
    if (adjust)
253
1
      {
254
1
        n2p[2 * qn] = 0;
255
1
        n2p++;
256
1
      }
257
2.51k
        }
258
259
      /* Get an approximate quotient using the extracted operands.  */
260
71.0k
      if (qn == 1)
261
13.9k
        {
262
13.9k
    mp_limb_t q0, r0;
263
13.9k
    udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264
13.9k
    n2p[0] = r0 >> GMP_NAIL_BITS;
265
13.9k
    qp[0] = q0;
266
13.9k
        }
267
57.1k
      else if (qn == 2)
268
5.35k
        mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269
51.7k
      else
270
51.7k
        {
271
51.7k
    invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272
51.7k
    if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273
41.7k
      mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274
10.0k
    else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275
10.0k
      mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276
0
    else
277
0
      {
278
0
        mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280
0
        mp_ptr r2p = rp;
281
0
        if (np == r2p) /* If N and R share space, put ... */
282
0
          r2p += nn - qn; /* intermediate remainder at N's upper end. */
283
0
        mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284
0
        MPN_COPY (n2p, r2p, qn);
285
0
      }
286
51.7k
        }
287
288
71.0k
      rn = qn;
289
      /* Multiply the first ignored divisor limb by the most significant
290
         quotient limb.  If that product is > the partial remainder's
291
         most significant limb, we know the quotient is too large.  This
292
         test quickly catches most cases where the quotient is too large;
293
         it catches all cases where the quotient is 2 too large.  */
294
71.0k
      {
295
71.0k
        mp_limb_t dl, x;
296
71.0k
        mp_limb_t h, dummy;
297
298
71.0k
        if (in - 2 < 0)
299
48.4k
    dl = 0;
300
22.6k
        else
301
22.6k
    dl = dp[in - 2];
302
303
71.0k
#if GMP_NAIL_BITS == 0
304
71.0k
        x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305
#else
306
        x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307
        if (cnt != 0)
308
    x |= dl >> (GMP_NUMB_BITS - cnt);
309
#endif
310
71.0k
        umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311
312
71.0k
        if (n2p[qn - 1] < h)
313
10.6k
    {
314
10.6k
      mp_limb_t cy;
315
316
10.6k
      mpn_decr_u (qp, (mp_limb_t) 1);
317
10.6k
      cy = mpn_add_n (n2p, n2p, d2p, qn);
318
10.6k
      if (cy)
319
1.74k
        {
320
          /* The partial remainder is safely large.  */
321
1.74k
          n2p[qn] = cy;
322
1.74k
          ++rn;
323
1.74k
        }
324
10.6k
    }
325
71.0k
      }
326
327
71.0k
      quotient_too_large = 0;
328
71.0k
      if (cnt != 0)
329
68.5k
        {
330
68.5k
    mp_limb_t cy1, cy2;
331
332
    /* Append partially used numerator limb to partial remainder.  */
333
68.5k
    cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334
68.5k
    n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335
336
    /* Update partial remainder with partially used divisor limb.  */
337
68.5k
    cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338
68.5k
    if (qn != rn)
339
1.68k
      {
340
1.68k
        ASSERT_ALWAYS (n2p[qn] >= cy2);
341
1.68k
        n2p[qn] -= cy2;
342
1.68k
      }
343
66.8k
    else
344
66.8k
      {
345
66.8k
        n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346
347
66.8k
        quotient_too_large = (cy1 < cy2);
348
66.8k
        ++rn;
349
66.8k
      }
350
68.5k
    --in;
351
68.5k
        }
352
      /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
353
354
71.0k
      tp = TMP_ALLOC_LIMBS (dn);
355
356
71.0k
      if (in < qn)
357
49.8k
        {
358
49.8k
    if (in == 0)
359
47.5k
      {
360
47.5k
        MPN_COPY (rp, n2p, rn);
361
47.5k
        ASSERT_ALWAYS (rn == dn);
362
47.5k
        goto foo;
363
47.5k
      }
364
2.24k
    mpn_mul (tp, qp, qn, dp, in);
365
2.24k
        }
366
21.2k
      else
367
21.2k
        mpn_mul (tp, dp, in, qp, qn);
368
369
23.4k
      cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370
23.4k
      MPN_COPY (rp + in, n2p, dn - in);
371
23.4k
      quotient_too_large |= cy;
372
23.4k
      cy = mpn_sub_n (rp, np, tp, in);
373
23.4k
      cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374
23.4k
      quotient_too_large |= cy;
375
71.0k
    foo:
376
71.0k
      if (quotient_too_large)
377
363
        {
378
363
    mpn_decr_u (qp, (mp_limb_t) 1);
379
363
    mpn_add_n (rp, rp, dp, dn);
380
363
        }
381
71.0k
    }
382
71.0k
  TMP_FREE;
383
71.0k
  return;
384
23.4k
      }
385
1.00M
    }
386
1.00M
}