Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/sbpi1_div_q.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
2
   division algorithm.
3
4
   Contributed to the GNU project by Torbjorn Granlund.
5
6
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10
Copyright 2007, 2009 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
39
#include "gmp-impl.h"
40
#include "longlong.h"
41
42
mp_limb_t
43
mpn_sbpi1_div_q (mp_ptr qp,
44
     mp_ptr np, mp_size_t nn,
45
     mp_srcptr dp, mp_size_t dn,
46
     mp_limb_t dinv)
47
282
{
48
282
  mp_limb_t qh;
49
282
  mp_size_t qn, i;
50
282
  mp_limb_t n1, n0;
51
282
  mp_limb_t d1, d0;
52
282
  mp_limb_t cy, cy1;
53
282
  mp_limb_t q;
54
282
  mp_limb_t flag;
55
56
282
  mp_size_t dn_orig = dn;
57
282
  mp_srcptr dp_orig = dp;
58
282
  mp_ptr np_orig = np;
59
60
282
  ASSERT (dn > 2);
61
282
  ASSERT (nn >= dn);
62
282
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
63
64
282
  np += nn;
65
66
282
  qn = nn - dn;
67
282
  if (qn + 1 < dn)
68
227
    {
69
227
      dp += dn - (qn + 1);
70
227
      dn = qn + 1;
71
227
    }
72
73
282
  qh = mpn_cmp (np - dn, dp, dn) >= 0;
74
282
  if (qh != 0)
75
4
    mpn_sub_n (np - dn, np - dn, dp, dn);
76
77
282
  qp += qn;
78
79
282
  dn -= 2;      /* offset dn by 2 for main division loops,
80
           saving two iterations in mpn_submul_1.  */
81
282
  d1 = dp[dn + 1];
82
282
  d0 = dp[dn + 0];
83
84
282
  np -= 2;
85
86
282
  n1 = np[1];
87
88
282
  for (i = qn - (dn + 2); i >= 0; i--)
89
0
    {
90
0
      np--;
91
0
      if (UNLIKELY (n1 == d1) && np[1] == d0)
92
0
  {
93
0
    q = GMP_NUMB_MASK;
94
0
    mpn_submul_1 (np - dn, dp, dn + 2, q);
95
0
    n1 = np[1];   /* update n1, last loop's value will now be invalid */
96
0
  }
97
0
      else
98
0
  {
99
0
    udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
100
101
0
    cy = mpn_submul_1 (np - dn, dp, dn, q);
102
103
0
    cy1 = n0 < cy;
104
0
    n0 = (n0 - cy) & GMP_NUMB_MASK;
105
0
    cy = n1 < cy1;
106
0
    n1 -= cy1;
107
0
    np[0] = n0;
108
109
0
    if (UNLIKELY (cy != 0))
110
0
      {
111
0
        n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
112
0
        q--;
113
0
      }
114
0
  }
115
116
0
      *--qp = q;
117
0
    }
118
119
282
  flag = ~CNST_LIMB(0);
120
121
282
  if (dn >= 0)
122
282
    {
123
850
      for (i = dn; i > 0; i--)
124
568
  {
125
568
    np--;
126
568
    if (UNLIKELY (n1 >= (d1 & flag)))
127
0
      {
128
0
        q = GMP_NUMB_MASK;
129
0
        cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
130
131
0
        if (UNLIKELY (n1 != cy))
132
0
    {
133
0
      if (n1 < (cy & flag))
134
0
        {
135
0
          q--;
136
0
          mpn_add_n (np - dn, np - dn, dp, dn + 2);
137
0
        }
138
0
      else
139
0
        flag = 0;
140
0
    }
141
0
        n1 = np[1];
142
0
      }
143
568
    else
144
568
      {
145
568
        udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
146
147
568
        cy = mpn_submul_1 (np - dn, dp, dn, q);
148
149
568
        cy1 = n0 < cy;
150
568
        n0 = (n0 - cy) & GMP_NUMB_MASK;
151
568
        cy = n1 < cy1;
152
568
        n1 -= cy1;
153
568
        np[0] = n0;
154
155
568
        if (UNLIKELY (cy != 0))
156
0
    {
157
0
      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
158
0
      q--;
159
0
    }
160
568
      }
161
162
568
    *--qp = q;
163
164
    /* Truncate operands.  */
165
568
    dn--;
166
568
    dp++;
167
568
  }
168
169
282
      np--;
170
282
      if (UNLIKELY (n1 >= (d1 & flag)))
171
0
  {
172
0
    q = GMP_NUMB_MASK;
173
0
    cy = mpn_submul_1 (np, dp, 2, q);
174
175
0
    if (UNLIKELY (n1 != cy))
176
0
      {
177
0
        if (n1 < (cy & flag))
178
0
    {
179
0
      q--;
180
0
      add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
181
0
    }
182
0
        else
183
0
    flag = 0;
184
0
      }
185
0
    n1 = np[1];
186
0
  }
187
282
      else
188
282
  {
189
282
    udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
190
191
282
    np[0] = n0;
192
282
    np[1] = n1;
193
282
  }
194
195
282
      *--qp = q;
196
282
    }
197
282
  ASSERT_ALWAYS (np[1] == n1);
198
282
  np += 2;
199
200
201
282
  dn = dn_orig;
202
282
  if (UNLIKELY (n1 < (dn & flag)))
203
0
    {
204
0
      mp_limb_t q, x;
205
206
      /* The quotient may be too large if the remainder is small.  Recompute
207
   for above ignored operand parts, until the remainder spills.
208
209
   FIXME: The quality of this code isn't the same as the code above.
210
   1. We don't compute things in an optimal order, high-to-low, in order
211
      to terminate as quickly as possible.
212
   2. We mess with pointers and sizes, adding and subtracting and
213
      adjusting to get things right.  It surely could be streamlined.
214
   3. The only termination criteria are that we determine that the
215
      quotient needs to be adjusted, or that we have recomputed
216
      everything.  We should stop when the remainder is so large
217
      that no additional subtracting could make it spill.
218
   4. If nothing else, we should not do two loops of submul_1 over the
219
      data, instead handle both the triangularization and chopping at
220
      once.  */
221
222
0
      x = n1;
223
224
0
      if (dn > 2)
225
0
  {
226
    /* Compensate for triangularization.  */
227
0
    mp_limb_t y;
228
229
0
    dp = dp_orig;
230
0
    if (qn + 1 < dn)
231
0
      {
232
0
        dp += dn - (qn + 1);
233
0
        dn = qn + 1;
234
0
      }
235
236
0
    y = np[-2];
237
238
0
    for (i = dn - 3; i >= 0; i--)
239
0
      {
240
0
        q = qp[i];
241
0
        cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
242
243
0
        if (y < cy)
244
0
    {
245
0
      if (x == 0)
246
0
        {
247
0
          cy = mpn_sub_1 (qp, qp, qn, 1);
248
0
          ASSERT_ALWAYS (cy == 0);
249
0
          return qh - cy;
250
0
        }
251
0
      x--;
252
0
    }
253
0
        y -= cy;
254
0
      }
255
0
    np[-2] = y;
256
0
  }
257
258
0
      dn = dn_orig;
259
0
      if (qn + 1 < dn)
260
0
  {
261
    /* Compensate for ignored dividend and divisor tails.  */
262
263
0
    dp = dp_orig;
264
0
    np = np_orig;
265
266
0
    if (qh != 0)
267
0
      {
268
0
        cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
269
0
        if (cy != 0)
270
0
    {
271
0
      if (x == 0)
272
0
        {
273
0
          if (qn != 0)
274
0
      cy = mpn_sub_1 (qp, qp, qn, 1);
275
0
          return qh - cy;
276
0
        }
277
0
      x--;
278
0
    }
279
0
      }
280
281
0
    if (qn == 0)
282
0
      return qh;
283
284
0
    for (i = dn - qn - 2; i >= 0; i--)
285
0
      {
286
0
        cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
287
0
        cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
288
0
        if (cy != 0)
289
0
    {
290
0
      if (x == 0)
291
0
        {
292
0
          cy = mpn_sub_1 (qp, qp, qn, 1);
293
0
          return qh;
294
0
        }
295
0
      x--;
296
0
    }
297
0
      }
298
0
  }
299
0
    }
300
301
282
  return qh;
302
282
}