Coverage Report

Created: 2024-11-25 06:31

/src/gmp/mpn/tdiv_qr.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2
   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3
   qxn is non-zero, generate that many fraction limbs and append them after the
4
   other quotient limbs, and update the remainder accordingly.  The input
5
   operands are unaffected.
6
7
   Preconditions:
8
   1. The most significant limb of the divisor must be non-zero.
9
   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10
11
   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12
   complexity of multiplication.
13
14
Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15
16
This file is part of the GNU MP Library.
17
18
The GNU MP Library is free software; you can redistribute it and/or modify
19
it under the terms of either:
20
21
  * the GNU Lesser General Public License as published by the Free
22
    Software Foundation; either version 3 of the License, or (at your
23
    option) any later version.
24
25
or
26
27
  * the GNU General Public License as published by the Free Software
28
    Foundation; either version 2 of the License, or (at your option) any
29
    later version.
30
31
or both in parallel, as here.
32
33
The GNU MP Library is distributed in the hope that it will be useful, but
34
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36
for more details.
37
38
You should have received copies of the GNU General Public License and the
39
GNU Lesser General Public License along with the GNU MP Library.  If not,
40
see https://www.gnu.org/licenses/.  */
41
42
#include "gmp-impl.h"
43
#include "longlong.h"
44
45
46
void
47
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48
       mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49
117k
{
50
117k
  ASSERT_ALWAYS (qxn == 0);
51
52
117k
  ASSERT (nn >= 0);
53
117k
  ASSERT (dn >= 0);
54
117k
  ASSERT (dn == 0 || dp[dn - 1] != 0);
55
117k
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56
117k
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57
58
117k
  switch (dn)
59
117k
    {
60
0
    case 0:
61
0
      DIVIDE_BY_ZERO;
62
63
8.37k
    case 1:
64
8.37k
      {
65
8.37k
  rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66
8.37k
  return;
67
0
      }
68
69
4.11k
    case 2:
70
4.11k
      {
71
4.11k
  mp_ptr n2p;
72
4.11k
  mp_limb_t qhl, cy;
73
4.11k
  TMP_DECL;
74
4.11k
  TMP_MARK;
75
4.11k
  if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76
3.42k
    {
77
3.42k
      int cnt;
78
3.42k
      mp_limb_t d2p[2];
79
3.42k
      count_leading_zeros (cnt, dp[1]);
80
3.42k
      cnt -= GMP_NAIL_BITS;
81
3.42k
      d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82
3.42k
      d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83
3.42k
      n2p = TMP_ALLOC_LIMBS (nn + 1);
84
3.42k
      cy = mpn_lshift (n2p, np, nn, cnt);
85
3.42k
      n2p[nn] = cy;
86
3.42k
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87
3.42k
      if (cy == 0)
88
1.33k
        qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
89
3.42k
      rp[0] = (n2p[0] >> cnt)
90
3.42k
        | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91
3.42k
      rp[1] = (n2p[1] >> cnt);
92
3.42k
    }
93
692
  else
94
692
    {
95
692
      n2p = TMP_ALLOC_LIMBS (nn);
96
692
      MPN_COPY (n2p, np, nn);
97
692
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98
692
      qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
99
692
      rp[0] = n2p[0];
100
692
      rp[1] = n2p[1];
101
692
    }
102
4.11k
  TMP_FREE;
103
4.11k
  return;
104
0
      }
105
106
105k
    default:
107
105k
      {
108
105k
  int adjust;
109
105k
  gmp_pi1_t dinv;
110
105k
  TMP_DECL;
111
105k
  TMP_MARK;
112
105k
  adjust = np[nn - 1] >= dp[dn - 1];  /* conservative tests for quotient size */
113
105k
  if (nn + adjust >= 2 * dn)
114
67.3k
    {
115
67.3k
      mp_ptr n2p, d2p;
116
67.3k
      mp_limb_t cy;
117
67.3k
      int cnt;
118
119
67.3k
      qp[nn - dn] = 0;        /* zero high quotient limb */
120
67.3k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121
5.29k
        {
122
5.29k
    count_leading_zeros (cnt, dp[dn - 1]);
123
5.29k
    cnt -= GMP_NAIL_BITS;
124
5.29k
    d2p = TMP_ALLOC_LIMBS (dn);
125
5.29k
    mpn_lshift (d2p, dp, dn, cnt);
126
5.29k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
127
5.29k
    cy = mpn_lshift (n2p, np, nn, cnt);
128
5.29k
    n2p[nn] = cy;
129
5.29k
    nn += adjust;
130
5.29k
        }
131
62.0k
      else
132
62.0k
        {
133
62.0k
    cnt = 0;
134
62.0k
    d2p = (mp_ptr) dp;
135
62.0k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
136
62.0k
    MPN_COPY (n2p, np, nn);
137
62.0k
    n2p[nn] = 0;
138
62.0k
    nn += adjust;
139
62.0k
        }
140
141
67.3k
      invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142
67.3k
      if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143
67.0k
        mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144
266
      else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
145
266
         BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146
266
         (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147
0
         + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
148
266
        mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149
0
      else
150
0
        {
151
0
    mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152
0
    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153
0
    mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154
0
    n2p = rp;
155
0
        }
156
157
67.3k
      if (cnt != 0)
158
5.29k
        mpn_rshift (rp, n2p, dn, cnt);
159
62.0k
      else
160
62.0k
        MPN_COPY (rp, n2p, dn);
161
67.3k
      TMP_FREE;
162
67.3k
      return;
163
67.3k
    }
164
165
  /* When we come here, the numerator/partial remainder is less
166
     than twice the size of the denominator.  */
167
168
37.9k
    {
169
      /* Problem:
170
171
         Divide a numerator N with nn limbs by a denominator D with dn
172
         limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
173
         compared to dn, conventional division algorithms perform poorly.
174
         We want an algorithm that has an expected running time that is
175
         dependent only on qn.
176
177
         Algorithm (very informally stated):
178
179
         1) Divide the 2 x qn most significant limbs from the numerator
180
      by the qn most significant limbs from the denominator.  Call
181
      the result qest.  This is either the correct quotient, but
182
      might be 1 or 2 too large.  Compute the remainder from the
183
      division.  (This step is implemented by an mpn_divrem call.)
184
185
         2) Is the most significant limb from the remainder < p, where p
186
      is the product of the most significant limb from the quotient
187
      and the next(d)?  (Next(d) denotes the next ignored limb from
188
      the denominator.)  If it is, decrement qest, and adjust the
189
      remainder accordingly.
190
191
         3) Is the remainder >= qest?  If it is, qest is the desired
192
      quotient.  The algorithm terminates.
193
194
         4) Subtract qest x next(d) from the remainder.  If there is
195
      borrow out, decrement qest, and adjust the remainder
196
      accordingly.
197
198
         5) Skip one word from the denominator (i.e., let next(d) denote
199
      the next less significant limb.  */
200
201
37.9k
      mp_size_t qn;
202
37.9k
      mp_ptr n2p, d2p;
203
37.9k
      mp_ptr tp;
204
37.9k
      mp_limb_t cy;
205
37.9k
      mp_size_t in, rn;
206
37.9k
      mp_limb_t quotient_too_large;
207
37.9k
      unsigned int cnt;
208
209
37.9k
      qn = nn - dn;
210
37.9k
      qp[qn] = 0;       /* zero high quotient limb */
211
37.9k
      qn += adjust;     /* qn cannot become bigger */
212
213
37.9k
      if (qn == 0)
214
130
        {
215
130
    MPN_COPY (rp, np, dn);
216
130
    TMP_FREE;
217
130
    return;
218
130
        }
219
220
37.8k
      in = dn - qn;   /* (at least partially) ignored # of limbs in ops */
221
      /* Normalize denominator by shifting it to the left such that its
222
         most significant bit is set.  Then shift the numerator the same
223
         amount, to mathematically preserve quotient.  */
224
37.8k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225
25.7k
        {
226
25.7k
    count_leading_zeros (cnt, dp[dn - 1]);
227
25.7k
    cnt -= GMP_NAIL_BITS;
228
229
25.7k
    d2p = TMP_ALLOC_LIMBS (qn);
230
25.7k
    mpn_lshift (d2p, dp + in, qn, cnt);
231
25.7k
    d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232
233
25.7k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234
25.7k
    cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235
25.7k
    if (adjust)
236
13.9k
      {
237
13.9k
        n2p[2 * qn] = cy;
238
13.9k
        n2p++;
239
13.9k
      }
240
11.8k
    else
241
11.8k
      {
242
11.8k
        n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243
11.8k
      }
244
25.7k
        }
245
12.0k
      else
246
12.0k
        {
247
12.0k
    cnt = 0;
248
12.0k
    d2p = (mp_ptr) dp + in;
249
250
12.0k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251
12.0k
    MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252
12.0k
    if (adjust)
253
987
      {
254
987
        n2p[2 * qn] = 0;
255
987
        n2p++;
256
987
      }
257
12.0k
        }
258
259
      /* Get an approximate quotient using the extracted operands.  */
260
37.8k
      if (qn == 1)
261
9.36k
        {
262
9.36k
    mp_limb_t q0, r0;
263
9.36k
    udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264
9.36k
    n2p[0] = r0 >> GMP_NAIL_BITS;
265
9.36k
    qp[0] = q0;
266
9.36k
        }
267
28.4k
      else if (qn == 2)
268
8.60k
        mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269
19.8k
      else
270
19.8k
        {
271
19.8k
    invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272
19.8k
    if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273
18.5k
      mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274
1.30k
    else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275
1.30k
      mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276
0
    else
277
0
      {
278
0
        mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280
0
        mp_ptr r2p = rp;
281
0
        if (np == r2p) /* If N and R share space, put ... */
282
0
          r2p += nn - qn; /* intermediate remainder at N's upper end. */
283
0
        mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284
0
        MPN_COPY (n2p, r2p, qn);
285
0
      }
286
19.8k
        }
287
288
37.8k
      rn = qn;
289
      /* Multiply the first ignored divisor limb by the most significant
290
         quotient limb.  If that product is > the partial remainder's
291
         most significant limb, we know the quotient is too large.  This
292
         test quickly catches most cases where the quotient is too large;
293
         it catches all cases where the quotient is 2 too large.  */
294
37.8k
      {
295
37.8k
        mp_limb_t dl, x;
296
37.8k
        mp_limb_t h, dummy;
297
298
37.8k
        if (in - 2 < 0)
299
1.94k
    dl = 0;
300
35.8k
        else
301
35.8k
    dl = dp[in - 2];
302
303
37.8k
#if GMP_NAIL_BITS == 0
304
37.8k
        x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305
#else
306
        x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307
        if (cnt != 0)
308
    x |= dl >> (GMP_NUMB_BITS - cnt);
309
#endif
310
37.8k
        umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311
312
37.8k
        if (n2p[qn - 1] < h)
313
2.63k
    {
314
2.63k
      mp_limb_t cy;
315
316
2.63k
      mpn_decr_u (qp, (mp_limb_t) 1);
317
2.63k
      cy = mpn_add_n (n2p, n2p, d2p, qn);
318
2.63k
      if (cy)
319
1.14k
        {
320
          /* The partial remainder is safely large.  */
321
1.14k
          n2p[qn] = cy;
322
1.14k
          ++rn;
323
1.14k
        }
324
2.63k
    }
325
37.8k
      }
326
327
37.8k
      quotient_too_large = 0;
328
37.8k
      if (cnt != 0)
329
25.7k
        {
330
25.7k
    mp_limb_t cy1, cy2;
331
332
    /* Append partially used numerator limb to partial remainder.  */
333
25.7k
    cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334
25.7k
    n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335
336
    /* Update partial remainder with partially used divisor limb.  */
337
25.7k
    cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338
25.7k
    if (qn != rn)
339
777
      {
340
777
        ASSERT_ALWAYS (n2p[qn] >= cy2);
341
777
        n2p[qn] -= cy2;
342
777
      }
343
24.9k
    else
344
24.9k
      {
345
24.9k
        n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346
347
24.9k
        quotient_too_large = (cy1 < cy2);
348
24.9k
        ++rn;
349
24.9k
      }
350
25.7k
    --in;
351
25.7k
        }
352
      /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
353
354
37.8k
      tp = TMP_ALLOC_LIMBS (dn);
355
356
37.8k
      if (in < qn)
357
12.0k
        {
358
12.0k
    if (in == 0)
359
1.64k
      {
360
1.64k
        MPN_COPY (rp, n2p, rn);
361
1.64k
        ASSERT_ALWAYS (rn == dn);
362
1.64k
        goto foo;
363
1.64k
      }
364
10.4k
    mpn_mul (tp, qp, qn, dp, in);
365
10.4k
        }
366
25.7k
      else
367
25.7k
        mpn_mul (tp, dp, in, qp, qn);
368
369
36.1k
      cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370
36.1k
      MPN_COPY (rp + in, n2p, dn - in);
371
36.1k
      quotient_too_large |= cy;
372
36.1k
      cy = mpn_sub_n (rp, np, tp, in);
373
36.1k
      cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374
36.1k
      quotient_too_large |= cy;
375
37.8k
    foo:
376
37.8k
      if (quotient_too_large)
377
1.64k
        {
378
1.64k
    mpn_decr_u (qp, (mp_limb_t) 1);
379
1.64k
    mpn_add_n (rp, rp, dp, dn);
380
1.64k
        }
381
37.8k
    }
382
37.8k
  TMP_FREE;
383
37.8k
  return;
384
36.1k
      }
385
117k
    }
386
117k
}