Coverage Report

Created: 2024-11-21 07:03

/src/libgmp/mpn/dcpi1_div_qr.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2
   size operands.
3
4
   Contributed to the GNU project by Torbjorn Granlund.
5
6
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10
Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
#include "longlong.h"
40
41
42
mp_limb_t
43
mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44
        gmp_pi1_t *dinv, mp_ptr tp)
45
50.9k
{
46
50.9k
  mp_size_t lo, hi;
47
50.9k
  mp_limb_t cy, qh, ql;
48
49
50.9k
  lo = n >> 1;      /* floor(n/2) */
50
50.9k
  hi = n - lo;      /* ceil(n/2) */
51
52
50.9k
  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53
36.4k
    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54
14.4k
  else
55
14.4k
    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56
57
50.9k
  mpn_mul (tp, qp + lo, hi, dp, lo);
58
59
50.9k
  cy = mpn_sub_n (np + lo, np + lo, tp, n);
60
50.9k
  if (qh != 0)
61
129
    cy += mpn_sub_n (np + n, np + n, dp, lo);
62
63
53.2k
  while (cy != 0)
64
2.32k
    {
65
2.32k
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66
2.32k
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
67
2.32k
    }
68
69
50.9k
  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
70
36.4k
    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71
14.4k
  else
72
14.4k
    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
73
74
50.9k
  mpn_mul (tp, dp, hi, qp, lo);
75
76
50.9k
  cy = mpn_sub_n (np, np, tp, n);
77
50.9k
  if (ql != 0)
78
0
    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
79
80
60.6k
  while (cy != 0)
81
9.75k
    {
82
9.75k
      mpn_sub_1 (qp, qp, lo, 1);
83
9.75k
      cy -= mpn_add_n (np, np, dp, n);
84
9.75k
    }
85
86
50.9k
  return qh;
87
50.9k
}
88
89
mp_limb_t
90
mpn_dcpi1_div_qr (mp_ptr qp,
91
      mp_ptr np, mp_size_t nn,
92
      mp_srcptr dp, mp_size_t dn,
93
      gmp_pi1_t *dinv)
94
21.9k
{
95
21.9k
  mp_size_t qn;
96
21.9k
  mp_limb_t qh, cy;
97
21.9k
  mp_ptr tp;
98
21.9k
  TMP_DECL;
99
100
21.9k
  TMP_MARK;
101
102
21.9k
  ASSERT (dn >= 6);   /* to adhere to mpn_sbpi1_div_qr's limits */
103
21.9k
  ASSERT (nn - dn >= 3);  /* to adhere to mpn_sbpi1_div_qr's limits */
104
21.9k
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
105
106
21.9k
  tp = TMP_ALLOC_LIMBS (dn);
107
108
21.9k
  qn = nn - dn;
109
21.9k
  qp += qn;
110
21.9k
  np += nn;
111
21.9k
  dp += dn;
112
113
21.9k
  if (qn > dn)
114
14.2k
    {
115
      /* Reduce qn mod dn without division, optimizing small operations.  */
116
14.2k
      do
117
14.3k
  qn -= dn;
118
14.3k
      while (qn > dn);
119
120
14.2k
      qp -= qn;     /* point at low limb of next quotient block */
121
14.2k
      np -= qn;     /* point in the middle of partial remainder */
122
123
      /* Perform the typically smaller block first.  */
124
14.2k
      if (qn == 1)
125
13.3k
  {
126
13.3k
    mp_limb_t q, n2, n1, n0, d1, d0;
127
128
    /* Handle qh up front, for simplicity. */
129
13.3k
    qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
130
13.3k
    if (qh)
131
13.3k
      ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
132
133
    /* A single iteration of schoolbook: One 3/2 division,
134
       followed by the bignum update and adjustment. */
135
13.3k
    n2 = np[0];
136
13.3k
    n1 = np[-1];
137
13.3k
    n0 = np[-2];
138
13.3k
    d1 = dp[-1];
139
13.3k
    d0 = dp[-2];
140
141
13.3k
    ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
142
143
13.3k
    if (UNLIKELY (n2 == d1) && n1 == d0)
144
0
      {
145
0
        q = GMP_NUMB_MASK;
146
0
        cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
147
0
        ASSERT (cy == n2);
148
0
      }
149
13.3k
    else
150
13.3k
      {
151
13.3k
        udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
152
153
13.3k
        if (dn > 2)
154
13.3k
    {
155
13.3k
      mp_limb_t cy, cy1;
156
13.3k
      cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
157
158
13.3k
      cy1 = n0 < cy;
159
13.3k
      n0 = (n0 - cy) & GMP_NUMB_MASK;
160
13.3k
      cy = n1 < cy1;
161
13.3k
      n1 = (n1 - cy1) & GMP_NUMB_MASK;
162
13.3k
      np[-2] = n0;
163
164
13.3k
      if (UNLIKELY (cy != 0))
165
0
        {
166
0
          n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
167
0
          qh -= (q == 0);
168
0
          q = (q - 1) & GMP_NUMB_MASK;
169
0
        }
170
13.3k
    }
171
0
        else
172
0
    np[-2] = n0;
173
174
13.3k
        np[-1] = n1;
175
13.3k
      }
176
13.3k
    qp[0] = q;
177
13.3k
  }
178
937
      else
179
937
  {
180
    /* Do a 2qn / qn division */
181
937
    if (qn == 2)
182
113
      qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
183
824
    else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
184
706
      qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
185
118
    else
186
118
      qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
187
188
937
    if (qn != dn)
189
936
      {
190
936
        if (qn > dn - qn)
191
23
    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
192
913
        else
193
913
    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
194
195
936
        cy = mpn_sub_n (np - dn, np - dn, tp, dn);
196
936
        if (qh != 0)
197
0
    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
198
199
954
        while (cy != 0)
200
18
    {
201
18
      qh -= mpn_sub_1 (qp, qp, qn, 1);
202
18
      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
203
18
    }
204
936
      }
205
937
  }
206
207
14.2k
      qn = nn - dn - qn;
208
14.2k
      do
209
14.3k
  {
210
14.3k
    qp -= dn;
211
14.3k
    np -= dn;
212
14.3k
    mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
213
14.3k
    qn -= dn;
214
14.3k
  }
215
14.3k
      while (qn > 0);
216
14.2k
    }
217
7.62k
  else
218
7.62k
    {
219
7.62k
      qp -= qn;     /* point at low limb of next quotient block */
220
7.62k
      np -= qn;     /* point in the middle of partial remainder */
221
222
7.62k
      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
223
0
  qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
224
7.62k
      else
225
7.62k
  qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
226
227
7.62k
      if (qn != dn)
228
368
  {
229
368
    if (qn > dn - qn)
230
368
      mpn_mul (tp, qp, qn, dp - dn, dn - qn);
231
0
    else
232
0
      mpn_mul (tp, dp - dn, dn - qn, qp, qn);
233
234
368
    cy = mpn_sub_n (np - dn, np - dn, tp, dn);
235
368
    if (qh != 0)
236
65
      cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
237
238
437
    while (cy != 0)
239
69
      {
240
69
        qh -= mpn_sub_1 (qp, qp, qn, 1);
241
69
        cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
242
69
      }
243
368
  }
244
7.62k
    }
245
246
21.9k
  TMP_FREE;
247
21.9k
  return qh;
248
21.9k
}