/src/gmp/mpn/generic/hgcd2-div.h

Line	Count	Source (jump to first uncovered line)
1		/* hgcd2-div.h
2
3		THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4		SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5		GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7		Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019, 2020 Free Software
8		Foundation, Inc.
9
10		This file is part of the GNU MP Library.
11
12		The GNU MP Library is free software; you can redistribute it and/or modify
13		it under the terms of either:
14
15		* the GNU Lesser General Public License as published by the Free
16		Software Foundation; either version 3 of the License, or (at your
17		option) any later version.
18
19		or
20
21		* the GNU General Public License as published by the Free Software
22		Foundation; either version 2 of the License, or (at your option) any
23		later version.
24
25		or both in parallel, as here.
26
27		The GNU MP Library is distributed in the hope that it will be useful, but
28		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30		for more details.
31
32		You should have received copies of the GNU General Public License and the
33		GNU Lesser General Public License along with the GNU MP Library. If not,
34		see https://www.gnu.org/licenses/. */
35
36		#include "gmp-impl.h"
37		#include "longlong.h"
38
39		#ifndef HGCD2_DIV1_METHOD
40		#define HGCD2_DIV1_METHOD 3
41		#endif
42
43		#ifndef HGCD2_DIV2_METHOD
44		#define HGCD2_DIV2_METHOD 2
45		#endif
46
47		#if HAVE_NATIVE_mpn_div_11
48
49		#define div1 mpn_div_11
50		/* Single-limb division optimized for small quotients.
51		Returned value holds d0 = r, d1 = q. */
52		mp_double_limb_t div1 (mp_limb_t, mp_limb_t);
53
54		#elif HGCD2_DIV1_METHOD == 1
55
56		static inline mp_double_limb_t
57		div1 (mp_limb_t n0, mp_limb_t d0)
58		{
59		mp_double_limb_t res;
60		res.d1 = n0 / d0;
61		res.d0 = n0 - res.d1 * d0;
62
63		return res;
64		}
65
66		#elif HGCD2_DIV1_METHOD == 2
67
68		static mp_double_limb_t
69		div1 (mp_limb_t n0, mp_limb_t d0)
70		{
71		mp_double_limb_t res;
72		int ncnt, dcnt, cnt;
73		mp_limb_t q;
74		mp_limb_t mask;
75
76		ASSERT (n0 >= d0);
77
78		count_leading_zeros (ncnt, n0);
79		count_leading_zeros (dcnt, d0);
80		cnt = dcnt - ncnt;
81
82		d0 <<= cnt;
83
84		q = -(mp_limb_t) (n0 >= d0);
85		n0 -= d0 & q;
86		d0 >>= 1;
87		q = -q;
88
89		while (--cnt >= 0)
90		{
91		mask = -(mp_limb_t) (n0 >= d0);
92		n0 -= d0 & mask;
93		d0 >>= 1;
94		q = (q << 1) - mask;
95		}
96
97		res.d0 = n0;
98		res.d1 = q;
99		return res;
100		}
101
102		#elif HGCD2_DIV1_METHOD == 3
103
104		static inline mp_double_limb_t
105		div1 (mp_limb_t n0, mp_limb_t d0)
106		{
107		mp_double_limb_t res;
108		if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
109		\|\| UNLIKELY (n0 >= (d0 << 3)))
110		{
111		res.d1 = n0 / d0;
112		res.d0 = n0 - res.d1 * d0;
113		}
114		else
115		{
116		mp_limb_t q, mask;
117
118		d0 <<= 2;
119
120		mask = -(mp_limb_t) (n0 >= d0);
121		n0 -= d0 & mask;
122		q = 4 & mask;
123
124		d0 >>= 1;
125		mask = -(mp_limb_t) (n0 >= d0);
126		n0 -= d0 & mask;
127		q += 2 & mask;
128
129		d0 >>= 1;
130		mask = -(mp_limb_t) (n0 >= d0);
131		n0 -= d0 & mask;
132		q -= mask;
133
134		res.d0 = n0;
135		res.d1 = q;
136		}
137		return res;
138		}
139
140		#elif HGCD2_DIV1_METHOD == 4
141
142		/* Table quotients. We extract the NBITS most significant bits of the
143		numerator limb, and the corresponding bits from the divisor limb, and use
144		these to form an index into the table. This method is probably only useful
145		for short pipelines with slow multiplication.
146
147		Possible improvements:
148
149		* Perhaps extract the highest NBITS of the divisor instead of the same bits
150		as from the numerator. That would require another count_leading_zeros,
151		and a post-multiply shift of the quotient.
152
153		* Compress tables? Their values are tiny, and there are lots of zero
154		entries (which are never used).
155
156		* Round the table entries more cleverly?
157		*/
158
159		#ifndef NBITS
160		#define NBITS 5
161		#endif
162
163		#if NBITS == 5
164		/* This needs full division about 13.2% of the time. */
165		static const unsigned char tab[512] = {
166		17, 9, 5,4,3,2,2,2,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
167		18, 9, 6,4,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
168		19,10, 6,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
169		20,10, 6,5,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
170		21,11, 7,5,4,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
171		22,11, 7,5,4,3,3,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
172		23,12, 7,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
173		24,12, 8,6,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
174		25,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
175		26,13, 8,6,5,4,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
176		27,14, 9,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
177		28,14, 9,7,5,4,3,3,3,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
178		29,15,10,7,5,4,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
179		30,15,10,7,6,5,4,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
180		31,16,10,7,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
181		32,16,11,8,6,5,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
182		};
183		#elif NBITS == 6
184		/* This needs full division about 9.8% of the time. */
185		static const unsigned char tab[2048] = {
186		33,17,11, 8, 6, 5,4,4,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
187		1, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188		34,17,11, 8, 6, 5,4,4,3,3,3,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
189		1, 1, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190		35,18,12, 9, 7, 5,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
191		1, 1, 1, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192		36,18,12, 9, 7, 6,5,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193		1, 1, 1, 1, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
194		37,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
195		1, 1, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196		38,19,13, 9, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
197		1, 1, 1, 1, 1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
198		39,20,13,10, 7, 6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
199		1, 1, 1, 1, 1, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200		40,20,14,10, 8, 6,5,5,4,3,3,3,3,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,
201		1, 1, 1, 1, 1, 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
202		41,21,14,10, 8, 6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
203		1, 1, 1, 1, 1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
204		42,21,14,10, 8, 7,6,5,4,4,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
205		1, 1, 1, 1, 1, 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
206		43,22,15,11, 8, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
207		1, 1, 1, 1, 1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
208		44,22,15,11, 9, 7,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,
209		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210		45,23,15,11, 9, 7,6,5,5,4,4,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
211		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212		46,23,16,11, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,
213		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
214		47,24,16,12, 9, 7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
215		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
216		48,24,16,12, 9, 8,6,6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,
217		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
218		49,25,17,12,10, 8,7,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
219		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
220		50,25,17,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
221		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
222		51,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
223		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
224		52,26,18,13,10, 8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,
225		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
226		53,27,18,13,10, 9,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
227		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,
228		54,27,19,14,11, 9,7,6,6,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
229		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
230		55,28,19,14,11, 9,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
231		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
232		56,28,19,14,11, 9,8,7,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,
233		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
234		57,29,20,14,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
235		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
236		58,29,20,15,11, 9,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,
237		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
238		59,30,20,15,12,10,8,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
239		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
240		60,30,21,15,12,10,8,7,6,6,5,5,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,1,
241		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
242		61,31,21,15,12,10,8,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
243		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
244		62,31,22,16,12,10,9,7,6,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,1,
245		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
246		63,32,22,16,13,10,9,7,7,6,5,5,4,4,4,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,
247		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
248		64,32,22,16,13,10,9,8,7,6,5,5,4,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,1,
249		1, 1, 1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
250		};
251		#else
252		#error No table for provided NBITS
253		#endif
254
255		/* Doing tabp with a #define makes compiler warnings about pointing outside an
256		object go away. We used to define this as a variable. It is not clear if
257		e.g. (vector[100] - 10) + 10 is well- defined as per the C standard;
258		(vector[100] + 10) - 10 surely is and there is no sequence point so the
259		expressions should be equivalent. To make this safe, we might want to
260		define tabp as a macro with the index as an argument. Depending on the
261		platform, relocs might allow for assembly-time or linker-time resolution to
262		take place. */
263		#define tabp (tab - (1 << (NBITS - 1) << NBITS))
264
265		static inline mp_double_limb_t
266		div1 (mp_limb_t n0, mp_limb_t d0)
267		{
268		int ncnt;
269		size_t nbi, dbi;
270		mp_limb_t q0;
271		mp_limb_t r0;
272		mp_limb_t mask;
273		mp_double_limb_t res;
274
275		ASSERT (n0 >= d0); /* Actually only msb position is critical. */
276
277		count_leading_zeros (ncnt, n0);
278		nbi = n0 << ncnt >> (GMP_LIMB_BITS - NBITS);
279		dbi = d0 << ncnt >> (GMP_LIMB_BITS - NBITS);
280
281		q0 = tabp[(nbi << NBITS) + dbi];
282		r0 = n0 - q0 * d0;
283		mask = -(mp_limb_t) (r0 >= d0);
284		q0 -= mask;
285		r0 -= d0 & mask;
286
287		if (UNLIKELY (r0 >= d0))
288		{
289		q0 = n0 / d0;
290		r0 = n0 - q0 * d0;
291		}
292
293		res.d1 = q0;
294		res.d0 = r0;
295		return res;
296		}
297
298		#elif HGCD2_DIV1_METHOD == 5
299
300		/* Table inverses of divisors. We don't bother with suppressing the msb from
301		the tables. We index with the NBITS most significant divisor bits,
302		including the always-set highest bit, but use addressing trickery via tabp
303		to suppress it.
304
305		Possible improvements:
306
307		* Do first multiply using 32-bit operations on 64-bit computers. At least
308		on most Arm64 cores, that uses 3 times less resources. It also saves on
309		many x86-64 processors.
310		*/
311
312		#ifndef NBITS
313	0	#define NBITS 7
314		#endif
315
316		#if NBITS == 5
317		/* This needs full division about 1.63% of the time. */
318		static const unsigned char tab[16] = {
319		63, 59, 55, 52, 50, 47, 45, 43, 41, 39, 38, 36, 35, 34, 33, 32
320		};
321		#elif NBITS == 6
322		/* This needs full division about 0.93% of the time. */
323		static const unsigned char tab[32] = {
324		127,123,119,116,112,109,106,104,101, 98, 96, 94, 92, 90, 88, 86,
325		84, 82, 80, 79, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64
326		};
327		#elif NBITS == 7
328		/* This needs full division about 0.49% of the time. */
329		static const unsigned char tab[64] = {
330		255,251,247,243,239,236,233,229,226,223,220,217,214,211,209,206,
331		203,201,198,196,194,191,189,187,185,183,181,179,177,175,173,171,
332		169,167,166,164,162,161,159,158,156,155,153,152,150,149,147,146,
333		145,143,142,141,140,139,137,136,135,134,133,132,131,130,129,128
334		};
335		#elif NBITS == 8
336		/* This needs full division about 0.26% of the time. */
337		static const unsigned short tab[128] = {
338		511,507,503,499,495,491,488,484,480,477,473,470,467,463,460,457,
339		454,450,447,444,441,438,435,433,430,427,424,421,419,416,413,411,
340		408,406,403,401,398,396,393,391,389,386,384,382,380,377,375,373,
341		371,369,367,365,363,361,359,357,355,353,351,349,347,345,343,342,
342		340,338,336,335,333,331,329,328,326,325,323,321,320,318,317,315,
343		314,312,311,309,308,306,305,303,302,301,299,298,296,295,294,292,
344		291,290,288,287,286,285,283,282,281,280,279,277,276,275,274,273,
345		272,270,269,268,267,266,265,264,263,262,261,260,259,258,257,256
346		};
347		#else
348		#error No table for provided NBITS
349		#endif
350
351		/* Doing tabp with a #define makes compiler warnings about pointing outside an
352		object go away. We used to define this as a variable. It is not clear if
353		e.g. (vector[100] - 10) + 10 is well- defined as per the C standard;
354		(vector[100] + 10) - 10 surely is and there is no sequence point so the
355		expressions should be equivalent. To make this safe, we might want to
356		define tabp as a macro with the index as an argument. Depending on the
357		platform, relocs might allow for assembly-time or linker-time resolution to
358		take place. */
359	0	#define tabp (tab - (1 << (NBITS - 1)))
360
361		static inline mp_double_limb_t
362		div1 (mp_limb_t n0, mp_limb_t d0)
363	0	{
364	0	int ncnt, dcnt;
365	0	size_t dbi;
366	0	mp_limb_t inv;
367	0	mp_limb_t q0;
368	0	mp_limb_t r0;
369	0	mp_limb_t mask;
370	0	mp_double_limb_t res;
371
372	0	count_leading_zeros (ncnt, n0);
373	0	count_leading_zeros (dcnt, d0);
374
375	0	dbi = d0 << dcnt >> (GMP_LIMB_BITS - NBITS);
376	0	inv = tabp[dbi];
377	0	q0 = ((n0 << ncnt) >> (NBITS + 1)) * inv >> (GMP_LIMB_BITS - 1 + ncnt - dcnt);
378	0	r0 = n0 - q0 * d0;
379	0	mask = -(mp_limb_t) (r0 >= d0);
380	0	q0 -= mask;
381	0	r0 -= d0 & mask;
382
383	0	if (UNLIKELY (r0 >= d0))
384	0	{
385	0	q0 = n0 / d0;
386	0	r0 = n0 - q0 * d0;
387	0	}
388
389	0	res.d1 = q0;
390	0	res.d0 = r0;
391	0	return res;
392	0	}
393
394		#else
395		#error Unknown HGCD2_DIV1_METHOD
396		#endif
397
398		#if HAVE_NATIVE_mpn_div_22
399
400		#define div2 mpn_div_22
401		/* Two-limb division optimized for small quotients. */
402		mp_limb_t div2 (mp_ptr, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
403
404		#elif HGCD2_DIV2_METHOD == 1
405
406		static mp_limb_t
407		div2 (mp_ptr rp,
408		mp_limb_t n1, mp_limb_t n0,
409		mp_limb_t d1, mp_limb_t d0)
410		{
411		mp_double_limb_t rq = div1 (n1, d1);
412		if (UNLIKELY (rq.d1 > d1))
413		{
414		mp_limb_t n2, q, t1, t0;
415		int c;
416
417		/* Normalize */
418		count_leading_zeros (c, d1);
419		ASSERT (c > 0);
420
421		n2 = n1 >> (GMP_LIMB_BITS - c);
422		n1 = (n1 << c) \| (n0 >> (GMP_LIMB_BITS - c));
423		n0 <<= c;
424		d1 = (d1 << c) \| (d0 >> (GMP_LIMB_BITS - c));
425		d0 <<= c;
426
427		udiv_qrnnd (q, n1, n2, n1, d1);
428		umul_ppmm (t1, t0, q, d0);
429		if (t1 > n1 \|\| (t1 == n1 && t0 > n0))
430		{
431		ASSERT (q > 0);
432		q--;
433		sub_ddmmss (t1, t0, t1, t0, d1, d0);
434		}
435		sub_ddmmss (n1, n0, n1, n0, t1, t0);
436
437		/* Undo normalization */
438		rp[0] = (n0 >> c) \| (n1 << (GMP_LIMB_BITS - c));
439		rp[1] = n1 >> c;
440
441		return q;
442		}
443		else
444		{
445		mp_limb_t q, t1, t0;
446		n1 = rq.d0;
447		q = rq.d1;
448		umul_ppmm (t1, t0, q, d0);
449		if (UNLIKELY (t1 >= n1) && (t1 > n1 \|\| t0 > n0))
450		{
451		ASSERT (q > 0);
452		q--;
453		sub_ddmmss (t1, t0, t1, t0, d1, d0);
454		}
455		sub_ddmmss (rp[1], rp[0], n1, n0, t1, t0);
456		return q;
457		}
458		}
459
460		#elif HGCD2_DIV2_METHOD == 2
461
462		/* Bit-wise div2. Relies on fast count_leading_zeros. */
463		static mp_limb_t
464		div2 (mp_ptr rp,
465		mp_limb_t n1, mp_limb_t n0,
466		mp_limb_t d1, mp_limb_t d0)
467	0	{
468	0	mp_limb_t q = 0;
469	0	int ncnt;
470	0	int dcnt;
471
472	0	count_leading_zeros (ncnt, n1);
473	0	count_leading_zeros (dcnt, d1);
474	0	dcnt -= ncnt;
475
476	0	d1 = (d1 << dcnt) + (d0 >> 1 >> (GMP_LIMB_BITS - 1 - dcnt));
477	0	d0 <<= dcnt;
478
479	0	do
480	0	{
481	0	mp_limb_t mask;
482	0	q <<= 1;
483	0	if (UNLIKELY (n1 == d1))
484	0	mask = -(n0 >= d0);
485	0	else
486	0	mask = -(n1 > d1);
487
488	0	q -= mask;
489
490	0	sub_ddmmss (n1, n0, n1, n0, mask & d1, mask & d0);
491
492	0	d0 = (d1 << (GMP_LIMB_BITS - 1)) \| (d0 >> 1);
493	0	d1 = d1 >> 1;
494	0	}
495	0	while (dcnt--);
496
497	0	rp[0] = n0;
498	0	rp[1] = n1;
499
500	0	return q;
501	0	}
502		#else
503		#error Unknown HGCD2_DIV2_METHOD
504		#endif

Coverage Report

Created: 2025-03-06 06:58