/src/gmp/mpn/divrem_1.c

Source
/* mpn_divrem_1 -- mpn by limb division.

Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"
#include "longlong.h"


/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
   meaning the quotient size where that should happen, the quotient size
   being how many udiv divisions will be done.

   The default is to use preinv always, CPUs where this doesn't suit have
   tuned thresholds.  Note in particular that preinv should certainly be
   used if that's the only division available (USE_PREINV_ALWAYS).  */

#ifndef DIVREM_1_NORM_THRESHOLD
#define DIVREM_1_NORM_THRESHOLD  0
#endif
#ifndef DIVREM_1_UNNORM_THRESHOLD
#define DIVREM_1_UNNORM_THRESHOLD  0
#endif



/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
   and UNNORM thresholds are 0 and only the inversion code is included.

   If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
   will be MP_SIZE_T_MAX and only the plain division code is included.

   Otherwise mul-by-inverse is better than plain division above some
   threshold, and best results are obtained by having code for both present.

   The main reason for separating the norm and unnorm cases is that not all
   CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
   code used on an already normalized divisor.

   If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
   non-shifting code for both the norm and unnorm cases, though with
   different criteria for skipping a division, and with different thresholds
   of course.  And in fact if inversion is never viable, then that simple
   non-shifting division would be all that's left.

   The NORM and UNNORM thresholds might not differ much, but if there's
   going to be separate code for norm and unnorm then it makes sense to have
   separate thresholds.  One thing that's possible is that the
   mul-by-inverse might be better only for normalized divisors, due to that
   case not needing variable bit shifts.

   Notice that the thresholds are tested after the decision to possibly skip
   one divide step, so they're based on the actual number of divisions done.

   For the unnorm case, it would be possible to call mpn_lshift to adjust
   the dividend all in one go (into the quotient space say), rather than
   limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
   than what the compiler can generate for EXTRACT.  But this is left to CPU
   specific implementations to consider, especially since EXTRACT isn't on
   the dependent chain.  */

mp_limb_t
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
        mp_srcptr up, mp_size_t un, mp_limb_t d)
{
  mp_size_t  n;
  mp_size_t  i;
  mp_limb_t  n1, n0;
  mp_limb_t  r = 0;

  ASSERT (qxn >= 0);
  ASSERT (un >= 0);
  ASSERT (d != 0);
  /* FIXME: What's the correct overlap rule when qxn!=0? */
  ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));

  n = un + qxn;
  if (n == 0)
    return 0;

  d <<= GMP_NAIL_BITS;

  qp += (n - 1);   /* Make qp point at most significant quotient limb */

  if ((d & GMP_LIMB_HIGHBIT) != 0)
    {
      if (un != 0)
  {
    /* High quotient limb is 0 or 1, skip a divide step. */
    mp_limb_t q;
    r = up[un - 1] << GMP_NAIL_BITS;
    q = (r >= d);
    *qp-- = q;
    r -= (d & -q);
    r >>= GMP_NAIL_BITS;
    n--;
    un--;
  }

      if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
  {
  plain:
    for (i = un - 1; i >= 0; i--)
      {
        n0 = up[i] << GMP_NAIL_BITS;
        udiv_qrnnd (*qp, r, r, n0, d);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    for (i = qxn - 1; i >= 0; i--)
      {
        udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    return r;
  }
      else
  {
    /* Multiply-by-inverse, divisor already normalized. */
    mp_limb_t dinv;
    invert_limb (dinv, d);

    for (i = un - 1; i >= 0; i--)
      {
        n0 = up[i] << GMP_NAIL_BITS;
        udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    for (i = qxn - 1; i >= 0; i--)
      {
        udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    return r;
  }
    }
  else
    {
      /* Most significant bit of divisor == 0.  */
      int cnt;

      /* Skip a division if high < divisor (high quotient 0).  Testing here
   before normalizing will still skip as often as possible.  */
      if (un != 0)
  {
    n1 = up[un - 1] << GMP_NAIL_BITS;
    if (n1 < d)
      {
        r = n1 >> GMP_NAIL_BITS;
        *qp-- = 0;
        n--;
        if (n == 0)
    return r;
        un--;
      }
  }

      if (! UDIV_NEEDS_NORMALIZATION
    && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
  goto plain;

      count_leading_zeros (cnt, d);
      d <<= cnt;
      r <<= cnt;

      if (UDIV_NEEDS_NORMALIZATION
    && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
  {
    mp_limb_t nshift;
    if (un != 0)
      {
        n1 = up[un - 1] << GMP_NAIL_BITS;
        r |= (n1 >> (GMP_LIMB_BITS - cnt));
        for (i = un - 2; i >= 0; i--)
    {
      n0 = up[i] << GMP_NAIL_BITS;
      nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
      udiv_qrnnd (*qp, r, r, nshift, d);
      r >>= GMP_NAIL_BITS;
      qp--;
      n1 = n0;
    }
        udiv_qrnnd (*qp, r, r, n1 << cnt, d);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    for (i = qxn - 1; i >= 0; i--)
      {
        udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    return r >> cnt;
  }
      else
  {
    mp_limb_t  dinv, nshift;
    invert_limb (dinv, d);
    if (un != 0)
      {
        n1 = up[un - 1] << GMP_NAIL_BITS;
        r |= (n1 >> (GMP_LIMB_BITS - cnt));
        for (i = un - 2; i >= 0; i--)
    {
      n0 = up[i] << GMP_NAIL_BITS;
      nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
      udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
      r >>= GMP_NAIL_BITS;
      qp--;
      n1 = n0;
    }
        udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    for (i = qxn - 1; i >= 0; i--)
      {
        udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
        r >>= GMP_NAIL_BITS;
        qp--;
      }
    return r >> cnt;
  }
    }
}

Coverage Report

Created: 2026-02-14 06:49

Line	Count	Source
1		/* mpn_divrem_1 -- mpn by limb division.
2
3		Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
4		Foundation, Inc.
5
6		This file is part of the GNU MP Library.
7
8		The GNU MP Library is free software; you can redistribute it and/or modify
9		it under the terms of either:
10
11		* the GNU Lesser General Public License as published by the Free
12		Software Foundation; either version 3 of the License, or (at your
13		option) any later version.
14
15		or
16
17		* the GNU General Public License as published by the Free Software
18		Foundation; either version 2 of the License, or (at your option) any
19		later version.
20
21		or both in parallel, as here.
22
23		The GNU MP Library is distributed in the hope that it will be useful, but
24		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26		for more details.
27
28		You should have received copies of the GNU General Public License and the
29		GNU Lesser General Public License along with the GNU MP Library. If not,
30		see https://www.gnu.org/licenses/. */
31
32		#include "gmp-impl.h"
33		#include "longlong.h"
34
35
36		/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
37		meaning the quotient size where that should happen, the quotient size
38		being how many udiv divisions will be done.
39
40		The default is to use preinv always, CPUs where this doesn't suit have
41		tuned thresholds. Note in particular that preinv should certainly be
42		used if that's the only division available (USE_PREINV_ALWAYS). */
43
44		#ifndef DIVREM_1_NORM_THRESHOLD
45		#define DIVREM_1_NORM_THRESHOLD 0
46		#endif
47		#ifndef DIVREM_1_UNNORM_THRESHOLD
48		#define DIVREM_1_UNNORM_THRESHOLD 0
49		#endif
50
51
52
53		/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
54		and UNNORM thresholds are 0 and only the inversion code is included.
55
56		If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
57		will be MP_SIZE_T_MAX and only the plain division code is included.
58
59		Otherwise mul-by-inverse is better than plain division above some
60		threshold, and best results are obtained by having code for both present.
61
62		The main reason for separating the norm and unnorm cases is that not all
63		CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
64		code used on an already normalized divisor.
65
66		If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
67		non-shifting code for both the norm and unnorm cases, though with
68		different criteria for skipping a division, and with different thresholds
69		of course. And in fact if inversion is never viable, then that simple
70		non-shifting division would be all that's left.
71
72		The NORM and UNNORM thresholds might not differ much, but if there's
73		going to be separate code for norm and unnorm then it makes sense to have
74		separate thresholds. One thing that's possible is that the
75		mul-by-inverse might be better only for normalized divisors, due to that
76		case not needing variable bit shifts.
77
78		Notice that the thresholds are tested after the decision to possibly skip
79		one divide step, so they're based on the actual number of divisions done.
80
81		For the unnorm case, it would be possible to call mpn_lshift to adjust
82		the dividend all in one go (into the quotient space say), rather than
83		limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
84		than what the compiler can generate for EXTRACT. But this is left to CPU
85		specific implementations to consider, especially since EXTRACT isn't on
86		the dependent chain. */
87
88		mp_limb_t
89		mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
90		mp_srcptr up, mp_size_t un, mp_limb_t d)
91	8.32k	{
92	8.32k	mp_size_t n;
93	8.32k	mp_size_t i;
94	8.32k	mp_limb_t n1, n0;
95	8.32k	mp_limb_t r = 0;
96
97	8.32k	ASSERT (qxn >= 0);
98	8.32k	ASSERT (un >= 0);
99	8.32k	ASSERT (d != 0);
100		/* FIXME: What's the correct overlap rule when qxn!=0? */
101	8.32k	ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
102
103	8.32k	n = un + qxn;
104	8.32k	if (n == 0)
105	0	return 0;
106
107	8.32k	d <<= GMP_NAIL_BITS;
108
109	8.32k	qp += (n - 1); /* Make qp point at most significant quotient limb */
110
111	8.32k	if ((d & GMP_LIMB_HIGHBIT) != 0)
112	2.33k	{
113	2.33k	if (un != 0)
114	2.33k	{
115		/* High quotient limb is 0 or 1, skip a divide step. */
116	2.33k	mp_limb_t q;
117	2.33k	r = up[un - 1] << GMP_NAIL_BITS;
118	2.33k	q = (r >= d);
119	2.33k	*qp-- = q;
120	2.33k	r -= (d & -q);
121	2.33k	r >>= GMP_NAIL_BITS;
122	2.33k	n--;
123	2.33k	un--;
124	2.33k	}
125
126	2.33k	if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
127	0	{
128	0	plain:
129	0	for (i = un - 1; i >= 0; i--)
130	0	{
131	0	n0 = up[i] << GMP_NAIL_BITS;
132	0	udiv_qrnnd (*qp, r, r, n0, d);
133	0	r >>= GMP_NAIL_BITS;
134	0	qp--;
135	0	}
136	0	for (i = qxn - 1; i >= 0; i--)
137	0	{
138	0	udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
139	0	r >>= GMP_NAIL_BITS;
140	0	qp--;
141	0	}
142	0	return r;
143	0	}
144	2.33k	else
145	2.33k	{
146		/* Multiply-by-inverse, divisor already normalized. */
147	2.33k	mp_limb_t dinv;
148	2.33k	invert_limb (dinv, d);
149
150	20.0k	for (i = un - 1; i >= 0; i--)
151	17.7k	{
152	17.7k	n0 = up[i] << GMP_NAIL_BITS;
153	17.7k	udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
154	17.7k	r >>= GMP_NAIL_BITS;
155	17.7k	qp--;
156	17.7k	}
157	2.33k	for (i = qxn - 1; i >= 0; i--)
158	0	{
159	0	udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
160	0	r >>= GMP_NAIL_BITS;
161	0	qp--;
162	0	}
163	2.33k	return r;
164	2.33k	}
165	2.33k	}
166	5.98k	else
167	5.98k	{
168		/* Most significant bit of divisor == 0. */
169	5.98k	int cnt;
170
171		/* Skip a division if high < divisor (high quotient 0). Testing here
172		before normalizing will still skip as often as possible. */
173	5.98k	if (un != 0)
174	5.98k	{
175	5.98k	n1 = up[un - 1] << GMP_NAIL_BITS;
176	5.98k	if (n1 < d)
177	2.59k	{
178	2.59k	r = n1 >> GMP_NAIL_BITS;
179	2.59k	*qp-- = 0;
180	2.59k	n--;
181	2.59k	if (n == 0)
182	489	return r;
183	2.10k	un--;
184	2.10k	}
185	5.98k	}
186
187	5.49k	if (! UDIV_NEEDS_NORMALIZATION
188	0	&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
189	0	goto plain;
190
191	5.49k	count_leading_zeros (cnt, d);
192	5.49k	d <<= cnt;
193	5.49k	r <<= cnt;
194
195	5.49k	if (UDIV_NEEDS_NORMALIZATION
196	0	&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
197	0	{
198	0	mp_limb_t nshift;
199	0	if (un != 0)
200	0	{
201	0	n1 = up[un - 1] << GMP_NAIL_BITS;
202	0	r \|= (n1 >> (GMP_LIMB_BITS - cnt));
203	0	for (i = un - 2; i >= 0; i--)
204	0	{
205	0	n0 = up[i] << GMP_NAIL_BITS;
206	0	nshift = (n1 << cnt) \| (n0 >> (GMP_NUMB_BITS - cnt));
207	0	udiv_qrnnd (*qp, r, r, nshift, d);
208	0	r >>= GMP_NAIL_BITS;
209	0	qp--;
210	0	n1 = n0;
211	0	}
212	0	udiv_qrnnd (*qp, r, r, n1 << cnt, d);
213	0	r >>= GMP_NAIL_BITS;
214	0	qp--;
215	0	}
216	0	for (i = qxn - 1; i >= 0; i--)
217	0	{
218	0	udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
219	0	r >>= GMP_NAIL_BITS;
220	0	qp--;
221	0	}
222	0	return r >> cnt;
223	0	}
224	5.49k	else
225	5.49k	{
226	5.49k	mp_limb_t dinv, nshift;
227	5.49k	invert_limb (dinv, d);
228	5.49k	if (un != 0)
229	5.49k	{
230	5.49k	n1 = up[un - 1] << GMP_NAIL_BITS;
231	5.49k	r \|= (n1 >> (GMP_LIMB_BITS - cnt));
232	51.2k	for (i = un - 2; i >= 0; i--)
233	45.7k	{
234	45.7k	n0 = up[i] << GMP_NAIL_BITS;
235	45.7k	nshift = (n1 << cnt) \| (n0 >> (GMP_NUMB_BITS - cnt));
236	45.7k	udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
237	45.7k	r >>= GMP_NAIL_BITS;
238	45.7k	qp--;
239	45.7k	n1 = n0;
240	45.7k	}
241	5.49k	udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
242	5.49k	r >>= GMP_NAIL_BITS;
243	5.49k	qp--;
244	5.49k	}
245	5.49k	for (i = qxn - 1; i >= 0; i--)
246	0	{
247	0	udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
248	0	r >>= GMP_NAIL_BITS;
249	0	qp--;
250	0	}
251	5.49k	return r >> cnt;
252	5.49k	}
253	5.49k	}
254	8.32k	}