Coverage Report

Created: 2025-03-18 06:55

/src/gmp/mpn/generic/hgcd2-div.h
Line
Count
Source (jump to first uncovered line)
1
/* hgcd2-div.h
2
3
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7
Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019, 2020 Free Software
8
Foundation, Inc.
9
10
This file is part of the GNU MP Library.
11
12
The GNU MP Library is free software; you can redistribute it and/or modify
13
it under the terms of either:
14
15
  * the GNU Lesser General Public License as published by the Free
16
    Software Foundation; either version 3 of the License, or (at your
17
    option) any later version.
18
19
or
20
21
  * the GNU General Public License as published by the Free Software
22
    Foundation; either version 2 of the License, or (at your option) any
23
    later version.
24
25
or both in parallel, as here.
26
27
The GNU MP Library is distributed in the hope that it will be useful, but
28
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30
for more details.
31
32
You should have received copies of the GNU General Public License and the
33
GNU Lesser General Public License along with the GNU MP Library.  If not,
34
see https://www.gnu.org/licenses/.  */
35
36
#include "gmp-impl.h"
37
#include "longlong.h"
38
39
#ifndef HGCD2_DIV1_METHOD
40
#define HGCD2_DIV1_METHOD 3
41
#endif
42
43
#ifndef HGCD2_DIV2_METHOD
44
#define HGCD2_DIV2_METHOD 2
45
#endif
46
47
#if HAVE_NATIVE_mpn_div_11
48
49
#define div1 mpn_div_11
50
/* Single-limb division optimized for small quotients.
51
   Returned value holds d0 = r, d1 = q. */
52
mp_double_limb_t div1 (mp_limb_t, mp_limb_t);
53
54
#elif HGCD2_DIV1_METHOD == 1
55
56
static inline mp_double_limb_t
57
div1 (mp_limb_t n0, mp_limb_t d0)
58
0
{
59
0
  mp_double_limb_t res;
60
0
  res.d1 = n0 / d0;
61
0
  res.d0 = n0 - res.d1 * d0;
62
63
0
  return res;
64
0
}
65
66
#elif HGCD2_DIV1_METHOD == 2
67
68
static mp_double_limb_t
69
div1 (mp_limb_t n0, mp_limb_t d0)
70
{
71
  mp_double_limb_t res;
72
  int ncnt, dcnt, cnt;
73
  mp_limb_t q;
74
  mp_limb_t mask;
75
76
  ASSERT (n0 >= d0);
77
78
  count_leading_zeros (ncnt, n0);
79
  count_leading_zeros (dcnt, d0);
80
  cnt = dcnt - ncnt;
81
82
  d0 <<= cnt;
83
84
  q = -(mp_limb_t) (n0 >= d0);
85
  n0 -= d0 & q;
86
  d0 >>= 1;
87
  q = -q;
88
89
  while (--cnt >= 0)
90
    {
91
      mask = -(mp_limb_t) (n0 >= d0);
92
      n0 -= d0 & mask;
93
      d0 >>= 1;
94
      q = (q << 1) - mask;
95
    }
96
97
  res.d0 = n0;
98
  res.d1 = q;
99
  return res;
100
}
101
102
#elif HGCD2_DIV1_METHOD == 3
103
104
static inline mp_double_limb_t
105
div1 (mp_limb_t n0, mp_limb_t d0)
106
{
107
  mp_double_limb_t res;
108
  if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
109
      || UNLIKELY (n0 >= (d0 << 3)))
110
    {
111
      res.d1 = n0 / d0;
112
      res.d0 = n0 - res.d1 * d0;
113
    }
114
  else
115
    {
116
      mp_limb_t q, mask;
117
118
      d0 <<= 2;
119
120
      mask = -(mp_limb_t) (n0 >= d0);
121
      n0 -= d0 & mask;
122
      q = 4 & mask;
123
124
      d0 >>= 1;
125
      mask = -(mp_limb_t) (n0 >= d0);
126
      n0 -= d0 & mask;
127
      q += 2 & mask;
128
129
      d0 >>= 1;
130
      mask = -(mp_limb_t) (n0 >= d0);
131
      n0 -= d0 & mask;
132
      q -= mask;
133
134
      res.d0 = n0;
135
      res.d1 = q;
136
    }
137
  return res;
138
}
139
140
#elif HGCD2_DIV1_METHOD == 4
141
142
/* Table quotients.  We extract the NBITS most significant bits of the
143
   numerator limb, and the corresponding bits from the divisor limb, and use
144
   these to form an index into the table.  This method is probably only useful
145
   for short pipelines with slow multiplication.
146
147
   Possible improvements:
148
149
   * Perhaps extract the highest NBITS of the divisor instead of the same bits
150
     as from the numerator.  That would require another count_leading_zeros,
151
     and a post-multiply shift of the quotient.
152
153
   * Compress tables?  Their values are tiny, and there are lots of zero
154
     entries (which are never used).
155
156
   * Round the table entries more cleverly?
157
*/
158
159
#ifndef NBITS
160
#define NBITS 5
161
#endif
162
163
#if NBITS == 5
164
/* This needs full division about 13.2% of the time. */
165
static const unsigned char tab[512] = {
166
17, 9, 5,4,3,2,2,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
167
18, 9, 6,4,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
168
19,10, 6,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
169
20,10, 6,5,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
170
21,11, 7,5,4,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
171
22,11, 7,5,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
172
23,12, 7,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
173
24,12, 8,6,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
174
25,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
175
26,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
176
27,14, 9,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
177
28,14, 9,7,5,4,3,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
178
29,15,10,7,5,4,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
179
30,15,10,7,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
180
31,16,10,7,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
181
32,16,11,8,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
182
};
183
#elif NBITS == 6
184
/* This needs full division about 9.8% of the time. */
185
static const unsigned char tab[2048] = {
186
33,17,11, 8, 6, 5,4,4,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
187
 1, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188
34,17,11, 8, 6, 5,4,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
189
 1, 1, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190
35,18,12, 9, 7, 5,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
191
 1, 1, 1, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192
36,18,12, 9, 7, 6,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193
 1, 1, 1, 1, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
194
37,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
195
 1, 1, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196
38,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
197
 1, 1, 1, 1, 1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
198
39,20,13,10, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
199
 1, 1, 1, 1, 1, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200
40,20,14,10, 8, 6,5,5,4,3,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
201
 1, 1, 1, 1, 1, 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
202
41,21,14,10, 8, 6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
203
 1, 1, 1, 1, 1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
204
42,21,14,10, 8, 7,6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
205
 1, 1, 1, 1, 1, 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
206
43,22,15,11, 8, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
207
 1, 1, 1, 1, 1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
208
44,22,15,11, 9, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
209
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210
45,23,15,11, 9, 7,6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
211
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212
46,23,16,11, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
213
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
214
47,24,16,12, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
215
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
216
48,24,16,12, 9, 8,6,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
217
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
218
49,25,17,12,10, 8,7,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
219
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
220
50,25,17,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
221
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
222
51,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
223
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
224
52,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
225
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
226
53,27,18,13,10, 9,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
227
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
228
54,27,19,14,11, 9,7,6,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
229
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
230
55,28,19,14,11, 9,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
231
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
232
56,28,19,14,11, 9,8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
233
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
234
57,29,20,14,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
235
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
236
58,29,20,15,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,
237
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
238
59,30,20,15,12,10,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
239
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
240
60,30,21,15,12,10,8,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
241
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
242
61,31,21,15,12,10,8,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
243
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
244
62,31,22,16,12,10,9,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
245
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
246
63,32,22,16,13,10,9,7,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,
247
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
248
64,32,22,16,13,10,9,8,7,6,5,5,4,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,
249
 1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
250
};
251
#else
252
#error No table for provided NBITS
253
#endif
254
255
/* Doing tabp with a #define makes compiler warnings about pointing outside an
256
   object go away.  We used to define this as a variable.  It is not clear if
257
   e.g.  (vector[100] - 10) + 10 is well- defined as per the C standard;
258
   (vector[100] + 10) - 10 surely is and there is no sequence point so the
259
   expressions should be equivalent.  To make this safe, we might want to
260
   define tabp as a macro with the index as an argument.  Depending on the
261
   platform, relocs might allow for assembly-time or linker-time resolution to
262
   take place. */
263
#define tabp (tab - (1 << (NBITS - 1) << NBITS))
264
265
static inline mp_double_limb_t
266
div1 (mp_limb_t n0, mp_limb_t d0)
267
{
268
  int ncnt;
269
  size_t nbi, dbi;
270
  mp_limb_t q0;
271
  mp_limb_t r0;
272
  mp_limb_t mask;
273
  mp_double_limb_t res;
274
275
  ASSERT (n0 >= d0);    /* Actually only msb position is critical. */
276
277
  count_leading_zeros (ncnt, n0);
278
  nbi = n0 << ncnt >> (GMP_LIMB_BITS - NBITS);
279
  dbi = d0 << ncnt >> (GMP_LIMB_BITS - NBITS);
280
281
  q0 = tabp[(nbi << NBITS) + dbi];
282
  r0 = n0 - q0 * d0;
283
  mask = -(mp_limb_t) (r0 >= d0);
284
  q0 -= mask;
285
  r0 -= d0 & mask;
286
287
  if (UNLIKELY (r0 >= d0))
288
    {
289
      q0 = n0 / d0;
290
      r0 = n0 - q0 * d0;
291
    }
292
293
  res.d1 = q0;
294
  res.d0 = r0;
295
  return res;
296
}
297
298
#elif HGCD2_DIV1_METHOD == 5
299
300
/* Table inverses of divisors.  We don't bother with suppressing the msb from
301
   the tables.  We index with the NBITS most significant divisor bits,
302
   including the always-set highest bit, but use addressing trickery via tabp
303
   to suppress it.
304
305
   Possible improvements:
306
307
   * Do first multiply using 32-bit operations on 64-bit computers.  At least
308
     on most Arm64 cores, that uses 3 times less resources.  It also saves on
309
     many x86-64 processors.
310
*/
311
312
#ifndef NBITS
313
#define NBITS 7
314
#endif
315
316
#if NBITS == 5
317
/* This needs full division about 1.63% of the time. */
318
static const unsigned char tab[16] = {
319
 63, 59, 55, 52, 50, 47, 45, 43, 41, 39, 38, 36, 35, 34, 33, 32
320
};
321
#elif NBITS == 6
322
/* This needs full division about 0.93% of the time. */
323
static const unsigned char tab[32] = {
324
127,123,119,116,112,109,106,104,101, 98, 96, 94, 92, 90, 88, 86,
325
 84, 82, 80, 79, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64
326
};
327
#elif NBITS == 7
328
/* This needs full division about 0.49% of the time. */
329
static const unsigned char tab[64] = {
330
255,251,247,243,239,236,233,229,226,223,220,217,214,211,209,206,
331
203,201,198,196,194,191,189,187,185,183,181,179,177,175,173,171,
332
169,167,166,164,162,161,159,158,156,155,153,152,150,149,147,146,
333
145,143,142,141,140,139,137,136,135,134,133,132,131,130,129,128
334
};
335
#elif NBITS == 8
336
/* This needs full division about 0.26% of the time. */
337
static const unsigned short tab[128] = {
338
511,507,503,499,495,491,488,484,480,477,473,470,467,463,460,457,
339
454,450,447,444,441,438,435,433,430,427,424,421,419,416,413,411,
340
408,406,403,401,398,396,393,391,389,386,384,382,380,377,375,373,
341
371,369,367,365,363,361,359,357,355,353,351,349,347,345,343,342,
342
340,338,336,335,333,331,329,328,326,325,323,321,320,318,317,315,
343
314,312,311,309,308,306,305,303,302,301,299,298,296,295,294,292,
344
291,290,288,287,286,285,283,282,281,280,279,277,276,275,274,273,
345
272,270,269,268,267,266,265,264,263,262,261,260,259,258,257,256
346
};
347
#else
348
#error No table for provided NBITS
349
#endif
350
351
/* Doing tabp with a #define makes compiler warnings about pointing outside an
352
   object go away.  We used to define this as a variable.  It is not clear if
353
   e.g.  (vector[100] - 10) + 10 is well- defined as per the C standard;
354
   (vector[100] + 10) - 10 surely is and there is no sequence point so the
355
   expressions should be equivalent.  To make this safe, we might want to
356
   define tabp as a macro with the index as an argument.  Depending on the
357
   platform, relocs might allow for assembly-time or linker-time resolution to
358
   take place. */
359
#define tabp (tab - (1 << (NBITS - 1)))
360
361
static inline mp_double_limb_t
362
div1 (mp_limb_t n0, mp_limb_t d0)
363
{
364
  int ncnt, dcnt;
365
  size_t dbi;
366
  mp_limb_t inv;
367
  mp_limb_t q0;
368
  mp_limb_t r0;
369
  mp_limb_t mask;
370
  mp_double_limb_t res;
371
372
  count_leading_zeros (ncnt, n0);
373
  count_leading_zeros (dcnt, d0);
374
375
  dbi = d0 << dcnt >> (GMP_LIMB_BITS - NBITS);
376
  inv = tabp[dbi];
377
  q0 = ((n0 << ncnt) >> (NBITS + 1)) * inv >> (GMP_LIMB_BITS - 1 + ncnt - dcnt);
378
  r0 = n0 - q0 * d0;
379
  mask = -(mp_limb_t) (r0 >= d0);
380
  q0 -= mask;
381
  r0 -= d0 & mask;
382
383
  if (UNLIKELY (r0 >= d0))
384
    {
385
      q0 = n0 / d0;
386
      r0 = n0 - q0 * d0;
387
    }
388
389
  res.d1 = q0;
390
  res.d0 = r0;
391
  return res;
392
}
393
394
#else
395
#error Unknown HGCD2_DIV1_METHOD
396
#endif
397
398
#if HAVE_NATIVE_mpn_div_22
399
400
#define div2 mpn_div_22
401
/* Two-limb division optimized for small quotients.  */
402
mp_limb_t div2 (mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
403
404
#elif HGCD2_DIV2_METHOD == 1
405
406
static mp_limb_t
407
div2 (mp_ptr rp,
408
      mp_limb_t n1, mp_limb_t n0,
409
      mp_limb_t d1, mp_limb_t d0)
410
{
411
  mp_double_limb_t rq = div1 (n1, d1);
412
  if (UNLIKELY (rq.d1 > d1))
413
    {
414
      mp_limb_t n2, q, t1, t0;
415
      int c;
416
417
      /* Normalize */
418
      count_leading_zeros (c, d1);
419
      ASSERT (c > 0);
420
421
      n2 = n1 >> (GMP_LIMB_BITS - c);
422
      n1 = (n1 << c) | (n0 >> (GMP_LIMB_BITS - c));
423
      n0 <<= c;
424
      d1 = (d1 << c) | (d0 >> (GMP_LIMB_BITS - c));
425
      d0 <<= c;
426
427
      udiv_qrnnd (q, n1, n2, n1, d1);
428
      umul_ppmm (t1, t0, q, d0);
429
      if (t1 > n1 || (t1 == n1 && t0 > n0))
430
  {
431
    ASSERT (q > 0);
432
    q--;
433
    sub_ddmmss (t1, t0, t1, t0, d1, d0);
434
  }
435
      sub_ddmmss (n1, n0, n1, n0, t1, t0);
436
437
      /* Undo normalization */
438
      rp[0] = (n0 >> c) | (n1 << (GMP_LIMB_BITS - c));
439
      rp[1] = n1 >> c;
440
441
      return q;
442
    }
443
  else
444
    {
445
      mp_limb_t q, t1, t0;
446
      n1 = rq.d0;
447
      q = rq.d1;
448
      umul_ppmm (t1, t0, q, d0);
449
      if (UNLIKELY (t1 >= n1) && (t1 > n1 || t0 > n0))
450
  {
451
    ASSERT (q > 0);
452
    q--;
453
    sub_ddmmss (t1, t0, t1, t0, d1, d0);
454
  }
455
      sub_ddmmss (rp[1], rp[0], n1, n0, t1, t0);
456
      return q;
457
    }
458
}
459
460
#elif HGCD2_DIV2_METHOD == 2
461
462
/* Bit-wise div2. Relies on fast count_leading_zeros. */
463
static mp_limb_t
464
div2 (mp_ptr rp,
465
      mp_limb_t n1, mp_limb_t n0,
466
      mp_limb_t d1, mp_limb_t d0)
467
0
{
468
0
  mp_limb_t q = 0;
469
0
  int ncnt;
470
0
  int dcnt;
471
472
0
  count_leading_zeros (ncnt, n1);
473
0
  count_leading_zeros (dcnt, d1);
474
0
  dcnt -= ncnt;
475
476
0
  d1 = (d1 << dcnt) + (d0 >> 1 >> (GMP_LIMB_BITS - 1 - dcnt));
477
0
  d0 <<= dcnt;
478
479
0
  do
480
0
    {
481
0
      mp_limb_t mask;
482
0
      q <<= 1;
483
0
      if (UNLIKELY (n1 == d1))
484
0
  mask = -(n0 >= d0);
485
0
      else
486
0
  mask = -(n1 > d1);
487
488
0
      q -= mask;
489
490
0
      sub_ddmmss (n1, n0, n1, n0, mask & d1, mask & d0);
491
492
0
      d0 = (d1 << (GMP_LIMB_BITS - 1)) | (d0 >> 1);
493
0
      d1 = d1 >> 1;
494
0
    }
495
0
  while (dcnt--);
496
497
0
  rp[0] = n0;
498
0
  rp[1] = n1;
499
500
0
  return q;
501
0
}
502
#else
503
#error Unknown HGCD2_DIV2_METHOD
504
#endif