Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/div_q.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_div_q -- division for arbitrary size operands.
2
3
   Contributed to the GNU project by Torbjorn Granlund.
4
5
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9
Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
10
11
This file is part of the GNU MP Library.
12
13
The GNU MP Library is free software; you can redistribute it and/or modify
14
it under the terms of either:
15
16
  * the GNU Lesser General Public License as published by the Free
17
    Software Foundation; either version 3 of the License, or (at your
18
    option) any later version.
19
20
or
21
22
  * the GNU General Public License as published by the Free Software
23
    Foundation; either version 2 of the License, or (at your option) any
24
    later version.
25
26
or both in parallel, as here.
27
28
The GNU MP Library is distributed in the hope that it will be useful, but
29
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31
for more details.
32
33
You should have received copies of the GNU General Public License and the
34
GNU Lesser General Public License along with the GNU MP Library.  If not,
35
see https://www.gnu.org/licenses/.  */
36
37
#include "gmp-impl.h"
38
#include "longlong.h"
39
40
41
/* Compute Q = N/D with truncation.
42
     N = {np,nn}
43
     D = {dp,dn}
44
     Q = {qp,nn-dn+1}
45
     T = {scratch,nn+1} is scratch space
46
   N and D are both untouched by the computation.
47
   N and T may overlap; pass the same space if N is irrelevant after the call,
48
   but note that tp needs an extra limb.
49
50
   Operand requirements:
51
     N >= D > 0
52
     dp[dn-1] != 0
53
     No overlap between the N, D, and Q areas.
54
55
   This division function does not clobber its input operands, since it is
56
   intended to support average-O(qn) division, and for that to be effective, it
57
   cannot put requirements on callers to copy a O(nn) operand.
58
59
   If a caller does not care about the value of {np,nn+1} after calling this
60
   function, it should pass np also for the scratch argument.  This function
61
   will then save some time and space by avoiding allocation and copying.
62
   (FIXME: Is this a good design?  We only really save any copying for
63
   already-normalised divisors, which should be rare.  It also prevents us from
64
   reasonably asking for all scratch space we need.)
65
66
   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
67
   the most significant quotient limb?  Look at the 4 main code blocks below
68
   (consisting of an outer if-else where each arm contains an if-else). It is
69
   tricky for the first code block, since the mpn_*_div_q calls will typically
70
   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
71
   we generate the most significant quotient limb here, before calling
72
   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
73
   critical division case (the SB sub-case in particular) copying is not a good
74
   idea.
75
76
   It might make sense to split the if-else parts of the (qn + FUDGE
77
   >= dn) blocks into separate functions, since we could promise quite
78
   different things to callers in these two cases.  The 'then' case
79
   benefits from np=scratch, and it could perhaps even tolerate qp=np,
80
   saving some headache for many callers.
81
82
   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
83
   operands, we do not reuse the huge scratch for adjustments.  This can be a
84
   serious waste of memory for the largest operands.
85
*/
86
87
/* FUDGE determines when to try getting an approximate quotient from the upper
88
   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
89
   for the code to be correct.  */
90
2.84k
#define FUDGE 5      /* FIXME: tune this */
91
92
#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
93
0
#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
94
0
#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
95
#ifndef MUPI_DIVAPPR_Q_THRESHOLD
96
0
#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
97
#endif
98
99
void
100
mpn_div_q (mp_ptr qp,
101
     mp_srcptr np, mp_size_t nn,
102
     mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
103
4.55k
{
104
4.55k
  mp_ptr new_dp, new_np, tp, rp;
105
4.55k
  mp_limb_t cy, dh, qh;
106
4.55k
  mp_size_t new_nn, qn;
107
4.55k
  gmp_pi1_t dinv;
108
4.55k
  int cnt;
109
4.55k
  TMP_DECL;
110
4.55k
  TMP_MARK;
111
112
4.55k
  ASSERT (nn >= dn);
113
4.55k
  ASSERT (dn > 0);
114
4.55k
  ASSERT (dp[dn - 1] != 0);
115
4.55k
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
116
4.55k
  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
117
4.55k
  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
118
119
4.55k
  ASSERT_ALWAYS (FUDGE >= 2);
120
121
4.55k
  dh = dp[dn - 1];
122
4.55k
  if (dn == 1)
123
1.70k
    {
124
1.70k
      mpn_divrem_1 (qp, 0L, np, nn, dh);
125
1.70k
      return;
126
1.70k
    }
127
128
2.84k
  qn = nn - dn + 1;   /* Quotient size, high limb might be zero */
129
130
2.84k
  if (qn + FUDGE >= dn)
131
2.13k
    {
132
      /* |________________________|
133
                          |_______|  */
134
2.13k
      new_np = scratch;
135
136
2.13k
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
137
1.98k
  {
138
1.98k
    count_leading_zeros (cnt, dh);
139
140
1.98k
    cy = mpn_lshift (new_np, np, nn, cnt);
141
1.98k
    new_np[nn] = cy;
142
1.98k
    new_nn = nn + (cy != 0);
143
144
1.98k
    new_dp = TMP_ALLOC_LIMBS (dn);
145
1.98k
    mpn_lshift (new_dp, dp, dn, cnt);
146
147
1.98k
    if (dn == 2)
148
739
      {
149
739
        qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
150
739
      }
151
1.24k
    else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
152
1.24k
       BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
153
1.24k
      {
154
1.24k
        invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
155
1.24k
        qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
156
1.24k
      }
157
0
    else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
158
0
       BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
159
0
       (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
160
0
       + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
161
0
      {
162
0
        invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
163
0
        qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
164
0
      }
165
0
    else
166
0
      {
167
0
        mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
168
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
169
0
        qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
170
0
      }
171
1.98k
    if (cy == 0)
172
1.06k
      qp[qn - 1] = qh;
173
924
    else
174
1.98k
      ASSERT (qh == 0);
175
1.98k
  }
176
151
      else  /* divisor is already normalised */
177
151
  {
178
151
    if (new_np != np)
179
52
      MPN_COPY (new_np, np, nn);
180
181
151
    if (dn == 2)
182
74
      {
183
74
        qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
184
74
      }
185
77
    else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
186
77
       BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
187
77
      {
188
77
        invert_pi1 (dinv, dh, dp[dn - 2]);
189
77
        qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
190
77
      }
191
0
    else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
192
0
       BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
193
0
       (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
194
0
       + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
195
0
      {
196
0
        invert_pi1 (dinv, dh, dp[dn - 2]);
197
0
        qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
198
0
      }
199
0
    else
200
0
      {
201
0
        mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
202
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
203
0
        qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
204
0
      }
205
151
    qp[nn - dn] = qh;
206
151
  }
207
2.13k
    }
208
708
  else
209
708
    {
210
      /* |________________________|
211
                |_________________|  */
212
708
      tp = TMP_ALLOC_LIMBS (qn + 1);
213
214
708
      new_np = scratch;
215
708
      new_nn = 2 * qn + 1;
216
708
      if (new_np == np)
217
  /* We need {np,nn} to remain untouched until the final adjustment, so
218
     we need to allocate separate space for new_np.  */
219
509
  new_np = TMP_ALLOC_LIMBS (new_nn + 1);
220
221
222
708
      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
223
621
  {
224
621
    count_leading_zeros (cnt, dh);
225
226
621
    cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
227
621
    new_np[new_nn] = cy;
228
229
621
    new_nn += (cy != 0);
230
231
621
    new_dp = TMP_ALLOC_LIMBS (qn + 1);
232
621
    mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
233
621
    new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
234
235
621
    if (qn + 1 == 2)
236
20
      {
237
20
        qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
238
20
      }
239
601
    else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
240
601
      {
241
601
        invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
242
601
        qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
243
601
      }
244
0
    else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
245
0
      {
246
0
        invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
247
0
        qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
248
0
      }
249
0
    else
250
0
      {
251
0
        mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
252
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
253
0
        qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
254
0
      }
255
621
    if (cy == 0)
256
311
      tp[qn] = qh;
257
310
    else if (UNLIKELY (qh != 0))
258
0
      {
259
        /* This happens only when the quotient is close to B^n and
260
     mpn_*_divappr_q returned B^n.  */
261
0
        mp_size_t i, n;
262
0
        n = new_nn - (qn + 1);
263
0
        for (i = 0; i < n; i++)
264
0
    tp[i] = GMP_NUMB_MAX;
265
0
        qh = 0;   /* currently ignored */
266
0
      }
267
621
  }
268
87
      else  /* divisor is already normalised */
269
87
  {
270
87
    MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
271
272
87
    new_dp = (mp_ptr) dp + dn - (qn + 1);
273
274
87
    if (qn == 2 - 1)
275
0
      {
276
0
        qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
277
0
      }
278
87
    else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
279
87
      {
280
87
        invert_pi1 (dinv, dh, new_dp[qn - 1]);
281
87
        qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
282
87
      }
283
0
    else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
284
0
      {
285
0
        invert_pi1 (dinv, dh, new_dp[qn - 1]);
286
0
        qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
287
0
      }
288
0
    else
289
0
      {
290
0
        mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
291
0
        mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
292
0
        qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
293
0
      }
294
87
    tp[qn] = qh;
295
87
  }
296
297
708
      MPN_COPY (qp, tp + 1, qn);
298
708
      if (tp[0] <= 4)
299
0
        {
300
0
    mp_size_t rn;
301
302
0
          rp = TMP_ALLOC_LIMBS (dn + qn);
303
0
          mpn_mul (rp, dp, dn, tp + 1, qn);
304
0
    rn = dn + qn;
305
0
    rn -= rp[rn - 1] == 0;
306
307
0
          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
308
0
            MPN_DECR_U (qp, qn, 1);
309
0
        }
310
708
    }
311
312
2.84k
  TMP_FREE;
313
2.84k
}