Coverage Report

Created: 2025-03-09 06:52

/src/gmp-6.2.1/mpn/dcpi1_div_qr.c
Line
Count
Source (jump to first uncovered line)
1
/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2
   size operands.
3
4
   Contributed to the GNU project by Torbjorn Granlund.
5
6
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10
Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11
12
This file is part of the GNU MP Library.
13
14
The GNU MP Library is free software; you can redistribute it and/or modify
15
it under the terms of either:
16
17
  * the GNU Lesser General Public License as published by the Free
18
    Software Foundation; either version 3 of the License, or (at your
19
    option) any later version.
20
21
or
22
23
  * the GNU General Public License as published by the Free Software
24
    Foundation; either version 2 of the License, or (at your option) any
25
    later version.
26
27
or both in parallel, as here.
28
29
The GNU MP Library is distributed in the hope that it will be useful, but
30
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32
for more details.
33
34
You should have received copies of the GNU General Public License and the
35
GNU Lesser General Public License along with the GNU MP Library.  If not,
36
see https://www.gnu.org/licenses/.  */
37
38
#include "gmp-impl.h"
39
#include "longlong.h"
40
41
42
mp_limb_t
43
mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
44
        gmp_pi1_t *dinv, mp_ptr tp)
45
435k
{
46
435k
  mp_size_t lo, hi;
47
435k
  mp_limb_t cy, qh, ql;
48
49
435k
  lo = n >> 1;      /* floor(n/2) */
50
435k
  hi = n - lo;      /* ceil(n/2) */
51
52
435k
  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
53
289k
    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
54
146k
  else
55
146k
    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
56
57
435k
  mpn_mul (tp, qp + lo, hi, dp, lo);
58
59
435k
  cy = mpn_sub_n (np + lo, np + lo, tp, n);
60
435k
  if (qh != 0)
61
122
    cy += mpn_sub_n (np + n, np + n, dp, lo);
62
63
525k
  while (cy != 0)
64
89.4k
    {
65
89.4k
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
66
89.4k
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
67
89.4k
    }
68
69
435k
  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
70
289k
    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
71
146k
  else
72
146k
    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
73
74
435k
  mpn_mul (tp, dp, hi, qp, lo);
75
76
435k
  cy = mpn_sub_n (np, np, tp, n);
77
435k
  if (ql != 0)
78
0
    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
79
80
646k
  while (cy != 0)
81
210k
    {
82
210k
      mpn_sub_1 (qp, qp, lo, 1);
83
210k
      cy -= mpn_add_n (np, np, dp, n);
84
210k
    }
85
86
435k
  return qh;
87
435k
}
88
89
mp_limb_t
90
mpn_dcpi1_div_qr (mp_ptr qp,
91
      mp_ptr np, mp_size_t nn,
92
      mp_srcptr dp, mp_size_t dn,
93
      gmp_pi1_t *dinv)
94
138k
{
95
138k
  mp_size_t qn;
96
138k
  mp_limb_t qh, cy;
97
138k
  mp_ptr tp;
98
138k
  TMP_DECL;
99
100
138k
  TMP_MARK;
101
102
138k
  ASSERT (dn >= 6);   /* to adhere to mpn_sbpi1_div_qr's limits */
103
138k
  ASSERT (nn - dn >= 3);  /* to adhere to mpn_sbpi1_div_qr's limits */
104
138k
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
105
106
138k
  tp = TMP_ALLOC_LIMBS (dn);
107
108
138k
  qn = nn - dn;
109
138k
  qp += qn;
110
138k
  np += nn;
111
138k
  dp += dn;
112
113
138k
  if (qn > dn)
114
19.3k
    {
115
      /* Reduce qn mod dn without division, optimizing small operations.  */
116
19.3k
      do
117
19.3k
  qn -= dn;
118
19.3k
      while (qn > dn);
119
120
19.3k
      qp -= qn;     /* point at low limb of next quotient block */
121
19.3k
      np -= qn;     /* point in the middle of partial remainder */
122
123
      /* Perform the typically smaller block first.  */
124
19.3k
      if (qn == 1)
125
8.59k
  {
126
8.59k
    mp_limb_t q, n2, n1, n0, d1, d0;
127
128
    /* Handle qh up front, for simplicity. */
129
8.59k
    qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
130
8.59k
    if (qh)
131
8.59k
      ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
132
133
    /* A single iteration of schoolbook: One 3/2 division,
134
       followed by the bignum update and adjustment. */
135
8.59k
    n2 = np[0];
136
8.59k
    n1 = np[-1];
137
8.59k
    n0 = np[-2];
138
8.59k
    d1 = dp[-1];
139
8.59k
    d0 = dp[-2];
140
141
8.59k
    ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
142
143
8.59k
    if (UNLIKELY (n2 == d1) && n1 == d0)
144
0
      {
145
0
        q = GMP_NUMB_MASK;
146
0
        cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
147
0
        ASSERT (cy == n2);
148
0
      }
149
8.59k
    else
150
8.59k
      {
151
8.59k
        udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
152
153
8.59k
        if (dn > 2)
154
8.59k
    {
155
8.59k
      mp_limb_t cy, cy1;
156
8.59k
      cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
157
158
8.59k
      cy1 = n0 < cy;
159
8.59k
      n0 = (n0 - cy) & GMP_NUMB_MASK;
160
8.59k
      cy = n1 < cy1;
161
8.59k
      n1 = (n1 - cy1) & GMP_NUMB_MASK;
162
8.59k
      np[-2] = n0;
163
164
8.59k
      if (UNLIKELY (cy != 0))
165
0
        {
166
0
          n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
167
0
          qh -= (q == 0);
168
0
          q = (q - 1) & GMP_NUMB_MASK;
169
0
        }
170
8.59k
    }
171
0
        else
172
0
    np[-2] = n0;
173
174
8.59k
        np[-1] = n1;
175
8.59k
      }
176
8.59k
    qp[0] = q;
177
8.59k
  }
178
10.7k
      else
179
10.7k
  {
180
    /* Do a 2qn / qn division */
181
10.7k
    if (qn == 2)
182
38
      qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
183
10.7k
    else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
184
6.59k
      qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
185
4.14k
    else
186
4.14k
      qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
187
188
10.7k
    if (qn != dn)
189
10.7k
      {
190
10.7k
        if (qn > dn - qn)
191
11
    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
192
10.7k
        else
193
10.7k
    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
194
195
10.7k
        cy = mpn_sub_n (np - dn, np - dn, tp, dn);
196
10.7k
        if (qh != 0)
197
0
    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
198
199
11.1k
        while (cy != 0)
200
338
    {
201
338
      qh -= mpn_sub_1 (qp, qp, qn, 1);
202
338
      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
203
338
    }
204
10.7k
      }
205
10.7k
  }
206
207
19.3k
      qn = nn - dn - qn;
208
19.3k
      do
209
19.3k
  {
210
19.3k
    qp -= dn;
211
19.3k
    np -= dn;
212
19.3k
    mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
213
19.3k
    qn -= dn;
214
19.3k
  }
215
19.3k
      while (qn > 0);
216
19.3k
    }
217
119k
  else
218
119k
    {
219
119k
      qp -= qn;     /* point at low limb of next quotient block */
220
119k
      np -= qn;     /* point in the middle of partial remainder */
221
222
119k
      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
223
0
  qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
224
119k
      else
225
119k
  qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
226
227
119k
      if (qn != dn)
228
278
  {
229
278
    if (qn > dn - qn)
230
278
      mpn_mul (tp, qp, qn, dp - dn, dn - qn);
231
0
    else
232
0
      mpn_mul (tp, dp - dn, dn - qn, qp, qn);
233
234
278
    cy = mpn_sub_n (np - dn, np - dn, tp, dn);
235
278
    if (qh != 0)
236
61
      cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
237
238
447
    while (cy != 0)
239
169
      {
240
169
        qh -= mpn_sub_1 (qp, qp, qn, 1);
241
169
        cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
242
169
      }
243
278
  }
244
119k
    }
245
246
138k
  TMP_FREE;
247
138k
  return qh;
248
138k
}