Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/tdiv_qr.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2
   write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3
   qxn is non-zero, generate that many fraction limbs and append them after the
4
   other quotient limbs, and update the remainder accordingly.  The input
5
   operands are unaffected.
6
7
   Preconditions:
8
   1. The most significant limb of the divisor must be non-zero.
9
   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10
11
   The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12
   complexity of multiplication.
13
14
Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15
16
This file is part of the GNU MP Library.
17
18
The GNU MP Library is free software; you can redistribute it and/or modify
19
it under the terms of either:
20
21
  * the GNU Lesser General Public License as published by the Free
22
    Software Foundation; either version 3 of the License, or (at your
23
    option) any later version.
24
25
or
26
27
  * the GNU General Public License as published by the Free Software
28
    Foundation; either version 2 of the License, or (at your option) any
29
    later version.
30
31
or both in parallel, as here.
32
33
The GNU MP Library is distributed in the hope that it will be useful, but
34
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36
for more details.
37
38
You should have received copies of the GNU General Public License and the
39
GNU Lesser General Public License along with the GNU MP Library.  If not,
40
see https://www.gnu.org/licenses/.  */
41
42
#include "gmp-impl.h"
43
#include "longlong.h"
44
45
46
void
47
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48
       mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49
1.14M
{
50
1.14M
  ASSERT_ALWAYS (qxn == 0);
51
52
1.14M
  ASSERT (nn >= 0);
53
1.14M
  ASSERT (dn >= 0);
54
1.14M
  ASSERT (dn == 0 || dp[dn - 1] != 0);
55
1.14M
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56
1.14M
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57
58
1.14M
  switch (dn)
59
1.14M
    {
60
0
    case 0:
61
0
      DIVIDE_BY_ZERO;
62
63
33.5k
    case 1:
64
33.5k
      {
65
33.5k
  rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66
33.5k
  return;
67
0
      }
68
69
51.2k
    case 2:
70
51.2k
      {
71
51.2k
  mp_ptr n2p;
72
51.2k
  mp_limb_t qhl, cy;
73
51.2k
  TMP_DECL;
74
51.2k
  TMP_MARK;
75
51.2k
  if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76
42.9k
    {
77
42.9k
      int cnt;
78
42.9k
      mp_limb_t d2p[2];
79
42.9k
      count_leading_zeros (cnt, dp[1]);
80
42.9k
      cnt -= GMP_NAIL_BITS;
81
42.9k
      d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82
42.9k
      d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83
42.9k
      n2p = TMP_ALLOC_LIMBS (nn + 1);
84
42.9k
      cy = mpn_lshift (n2p, np, nn, cnt);
85
42.9k
      n2p[nn] = cy;
86
42.9k
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87
42.9k
      if (cy == 0)
88
30.4k
        qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
89
42.9k
      rp[0] = (n2p[0] >> cnt)
90
42.9k
        | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91
42.9k
      rp[1] = (n2p[1] >> cnt);
92
42.9k
    }
93
8.22k
  else
94
8.22k
    {
95
8.22k
      n2p = TMP_ALLOC_LIMBS (nn);
96
8.22k
      MPN_COPY (n2p, np, nn);
97
8.22k
      qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98
8.22k
      qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
99
8.22k
      rp[0] = n2p[0];
100
8.22k
      rp[1] = n2p[1];
101
8.22k
    }
102
51.2k
  TMP_FREE;
103
51.2k
  return;
104
51.2k
      }
105
106
1.06M
    default:
107
1.06M
      {
108
1.06M
  int adjust;
109
1.06M
  gmp_pi1_t dinv;
110
1.06M
  TMP_DECL;
111
1.06M
  TMP_MARK;
112
1.06M
  adjust = np[nn - 1] >= dp[dn - 1];  /* conservative tests for quotient size */
113
1.06M
  if (nn + adjust >= 2 * dn)
114
983k
    {
115
983k
      mp_ptr n2p, d2p;
116
983k
      mp_limb_t cy;
117
983k
      int cnt;
118
119
983k
      qp[nn - dn] = 0;        /* zero high quotient limb */
120
983k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121
925k
        {
122
925k
    count_leading_zeros (cnt, dp[dn - 1]);
123
925k
    cnt -= GMP_NAIL_BITS;
124
925k
    d2p = TMP_ALLOC_LIMBS (dn);
125
925k
    mpn_lshift (d2p, dp, dn, cnt);
126
925k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
127
925k
    cy = mpn_lshift (n2p, np, nn, cnt);
128
925k
    n2p[nn] = cy;
129
925k
    nn += adjust;
130
925k
        }
131
58.1k
      else
132
58.1k
        {
133
58.1k
    cnt = 0;
134
58.1k
    d2p = (mp_ptr) dp;
135
58.1k
    n2p = TMP_ALLOC_LIMBS (nn + 1);
136
58.1k
    MPN_COPY (n2p, np, nn);
137
58.1k
    n2p[nn] = 0;
138
58.1k
    nn += adjust;
139
58.1k
        }
140
141
983k
      invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142
983k
      if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143
962k
        mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144
21.2k
      else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
145
21.2k
         BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146
21.2k
         (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147
0
         + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
148
21.2k
        mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149
0
      else
150
0
        {
151
0
    mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152
0
    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153
0
    mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154
0
    n2p = rp;
155
0
        }
156
157
983k
      if (cnt != 0)
158
925k
        mpn_rshift (rp, n2p, dn, cnt);
159
58.1k
      else
160
58.1k
        MPN_COPY (rp, n2p, dn);
161
983k
      TMP_FREE;
162
983k
      return;
163
983k
    }
164
165
  /* When we come here, the numerator/partial remainder is less
166
     than twice the size of the denominator.  */
167
168
79.0k
    {
169
      /* Problem:
170
171
         Divide a numerator N with nn limbs by a denominator D with dn
172
         limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
173
         compared to dn, conventional division algorithms perform poorly.
174
         We want an algorithm that has an expected running time that is
175
         dependent only on qn.
176
177
         Algorithm (very informally stated):
178
179
         1) Divide the 2 x qn most significant limbs from the numerator
180
      by the qn most significant limbs from the denominator.  Call
181
      the result qest.  This is either the correct quotient, but
182
      might be 1 or 2 too large.  Compute the remainder from the
183
      division.  (This step is implemented by an mpn_divrem call.)
184
185
         2) Is the most significant limb from the remainder < p, where p
186
      is the product of the most significant limb from the quotient
187
      and the next(d)?  (Next(d) denotes the next ignored limb from
188
      the denominator.)  If it is, decrement qest, and adjust the
189
      remainder accordingly.
190
191
         3) Is the remainder >= qest?  If it is, qest is the desired
192
      quotient.  The algorithm terminates.
193
194
         4) Subtract qest x next(d) from the remainder.  If there is
195
      borrow out, decrement qest, and adjust the remainder
196
      accordingly.
197
198
         5) Skip one word from the denominator (i.e., let next(d) denote
199
      the next less significant limb.  */
200
201
79.0k
      mp_size_t qn;
202
79.0k
      mp_ptr n2p, d2p;
203
79.0k
      mp_ptr tp;
204
79.0k
      mp_limb_t cy;
205
79.0k
      mp_size_t in, rn;
206
79.0k
      mp_limb_t quotient_too_large;
207
79.0k
      unsigned int cnt;
208
209
79.0k
      qn = nn - dn;
210
79.0k
      qp[qn] = 0;       /* zero high quotient limb */
211
79.0k
      qn += adjust;     /* qn cannot become bigger */
212
213
79.0k
      if (qn == 0)
214
206
        {
215
206
    MPN_COPY (rp, np, dn);
216
206
    TMP_FREE;
217
206
    return;
218
206
        }
219
220
78.8k
      in = dn - qn;   /* (at least partially) ignored # of limbs in ops */
221
      /* Normalize denominator by shifting it to the left such that its
222
         most significant bit is set.  Then shift the numerator the same
223
         amount, to mathematically preserve quotient.  */
224
78.8k
      if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225
77.0k
        {
226
77.0k
    count_leading_zeros (cnt, dp[dn - 1]);
227
77.0k
    cnt -= GMP_NAIL_BITS;
228
229
77.0k
    d2p = TMP_ALLOC_LIMBS (qn);
230
77.0k
    mpn_lshift (d2p, dp + in, qn, cnt);
231
77.0k
    d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232
233
77.0k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234
77.0k
    cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235
77.0k
    if (adjust)
236
50.9k
      {
237
50.9k
        n2p[2 * qn] = cy;
238
50.9k
        n2p++;
239
50.9k
      }
240
26.0k
    else
241
26.0k
      {
242
26.0k
        n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243
26.0k
      }
244
77.0k
        }
245
1.78k
      else
246
1.78k
        {
247
1.78k
    cnt = 0;
248
1.78k
    d2p = (mp_ptr) dp + in;
249
250
1.78k
    n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251
1.78k
    MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252
1.78k
    if (adjust)
253
13
      {
254
13
        n2p[2 * qn] = 0;
255
13
        n2p++;
256
13
      }
257
1.78k
        }
258
259
      /* Get an approximate quotient using the extracted operands.  */
260
78.8k
      if (qn == 1)
261
1.38k
        {
262
1.38k
    mp_limb_t q0, r0;
263
1.38k
    udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264
1.38k
    n2p[0] = r0 >> GMP_NAIL_BITS;
265
1.38k
    qp[0] = q0;
266
1.38k
        }
267
77.4k
      else if (qn == 2)
268
4.53k
        mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269
72.9k
      else
270
72.9k
        {
271
72.9k
    invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272
72.9k
    if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273
72.6k
      mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274
220
    else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275
220
      mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276
0
    else
277
0
      {
278
0
        mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280
0
        mp_ptr r2p = rp;
281
0
        if (np == r2p) /* If N and R share space, put ... */
282
0
          r2p += nn - qn; /* intermediate remainder at N's upper end. */
283
0
        mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284
0
        MPN_COPY (n2p, r2p, qn);
285
0
      }
286
72.9k
        }
287
288
78.8k
      rn = qn;
289
      /* Multiply the first ignored divisor limb by the most significant
290
         quotient limb.  If that product is > the partial remainder's
291
         most significant limb, we know the quotient is too large.  This
292
         test quickly catches most cases where the quotient is too large;
293
         it catches all cases where the quotient is 2 too large.  */
294
78.8k
      {
295
78.8k
        mp_limb_t dl, x;
296
78.8k
        mp_limb_t h, dummy;
297
298
78.8k
        if (in - 2 < 0)
299
65.6k
    dl = 0;
300
13.1k
        else
301
13.1k
    dl = dp[in - 2];
302
303
78.8k
#if GMP_NAIL_BITS == 0
304
78.8k
        x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305
#else
306
        x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307
        if (cnt != 0)
308
    x |= dl >> (GMP_NUMB_BITS - cnt);
309
#endif
310
78.8k
        umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311
312
78.8k
        if (n2p[qn - 1] < h)
313
16.2k
    {
314
16.2k
      mp_limb_t cy;
315
316
16.2k
      mpn_decr_u (qp, (mp_limb_t) 1);
317
16.2k
      cy = mpn_add_n (n2p, n2p, d2p, qn);
318
16.2k
      if (cy)
319
3.26k
        {
320
          /* The partial remainder is safely large.  */
321
3.26k
          n2p[qn] = cy;
322
3.26k
          ++rn;
323
3.26k
        }
324
16.2k
    }
325
78.8k
      }
326
327
78.8k
      quotient_too_large = 0;
328
78.8k
      if (cnt != 0)
329
77.0k
        {
330
77.0k
    mp_limb_t cy1, cy2;
331
332
    /* Append partially used numerator limb to partial remainder.  */
333
77.0k
    cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334
77.0k
    n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335
336
    /* Update partial remainder with partially used divisor limb.  */
337
77.0k
    cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338
77.0k
    if (qn != rn)
339
3.22k
      {
340
3.22k
        ASSERT_ALWAYS (n2p[qn] >= cy2);
341
3.22k
        n2p[qn] -= cy2;
342
3.22k
      }
343
73.8k
    else
344
73.8k
      {
345
73.8k
        n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346
347
73.8k
        quotient_too_large = (cy1 < cy2);
348
73.8k
        ++rn;
349
73.8k
      }
350
77.0k
    --in;
351
77.0k
        }
352
      /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
353
354
78.8k
      tp = TMP_ALLOC_LIMBS (dn);
355
356
78.8k
      if (in < qn)
357
74.2k
        {
358
74.2k
    if (in == 0)
359
65.1k
      {
360
65.1k
        MPN_COPY (rp, n2p, rn);
361
65.1k
        ASSERT_ALWAYS (rn == dn);
362
65.1k
        goto foo;
363
65.1k
      }
364
9.12k
    mpn_mul (tp, qp, qn, dp, in);
365
9.12k
        }
366
4.58k
      else
367
4.58k
        mpn_mul (tp, dp, in, qp, qn);
368
369
13.7k
      cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370
13.7k
      MPN_COPY (rp + in, n2p, dn - in);
371
13.7k
      quotient_too_large |= cy;
372
13.7k
      cy = mpn_sub_n (rp, np, tp, in);
373
13.7k
      cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374
13.7k
      quotient_too_large |= cy;
375
78.8k
    foo:
376
78.8k
      if (quotient_too_large)
377
255
        {
378
255
    mpn_decr_u (qp, (mp_limb_t) 1);
379
255
    mpn_add_n (rp, rp, dp, dn);
380
255
        }
381
78.8k
    }
382
78.8k
  TMP_FREE;
383
78.8k
  return;
384
13.7k
      }
385
1.14M
    }
386
1.14M
}