/src/gmp-6.2.1/mpn/sbpi1_div_q.c

Source (jump to first uncovered line)
/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
   division algorithm.

   Contributed to the GNU project by Torbjorn Granlund.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

Copyright 2007, 2009 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


#include "gmp-impl.h"
#include "longlong.h"

mp_limb_t
mpn_sbpi1_div_q (mp_ptr qp,
     mp_ptr np, mp_size_t nn,
     mp_srcptr dp, mp_size_t dn,
     mp_limb_t dinv)
{
  mp_limb_t qh;
  mp_size_t qn, i;
  mp_limb_t n1, n0;
  mp_limb_t d1, d0;
  mp_limb_t cy, cy1;
  mp_limb_t q;
  mp_limb_t flag;

  mp_size_t dn_orig = dn;
  mp_srcptr dp_orig = dp;
  mp_ptr np_orig = np;

  ASSERT (dn > 2);
  ASSERT (nn >= dn);
  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);

  np += nn;

  qn = nn - dn;
  if (qn + 1 < dn)
    {
      dp += dn - (qn + 1);
      dn = qn + 1;
    }

  qh = mpn_cmp (np - dn, dp, dn) >= 0;
  if (qh != 0)
    mpn_sub_n (np - dn, np - dn, dp, dn);

  qp += qn;

  dn -= 2;      /* offset dn by 2 for main division loops,
           saving two iterations in mpn_submul_1.  */
  d1 = dp[dn + 1];
  d0 = dp[dn + 0];

  np -= 2;

  n1 = np[1];

  for (i = qn - (dn + 2); i >= 0; i--)
    {
      np--;
      if (UNLIKELY (n1 == d1) && np[1] == d0)
  {
    q = GMP_NUMB_MASK;
    mpn_submul_1 (np - dn, dp, dn + 2, q);
    n1 = np[1];   /* update n1, last loop's value will now be invalid */
  }
      else
  {
    udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

    cy = mpn_submul_1 (np - dn, dp, dn, q);

    cy1 = n0 < cy;
    n0 = (n0 - cy) & GMP_NUMB_MASK;
    cy = n1 < cy1;
    n1 -= cy1;
    np[0] = n0;

    if (UNLIKELY (cy != 0))
      {
        n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
        q--;
      }
  }

      *--qp = q;
    }

  flag = ~CNST_LIMB(0);

  if (dn >= 0)
    {
      for (i = dn; i > 0; i--)
  {
    np--;
    if (UNLIKELY (n1 >= (d1 & flag)))
      {
        q = GMP_NUMB_MASK;
        cy = mpn_submul_1 (np - dn, dp, dn + 2, q);

        if (UNLIKELY (n1 != cy))
    {
      if (n1 < (cy & flag))
        {
          q--;
          mpn_add_n (np - dn, np - dn, dp, dn + 2);
        }
      else
        flag = 0;
    }
        n1 = np[1];
      }
    else
      {
        udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

        cy = mpn_submul_1 (np - dn, dp, dn, q);

        cy1 = n0 < cy;
        n0 = (n0 - cy) & GMP_NUMB_MASK;
        cy = n1 < cy1;
        n1 -= cy1;
        np[0] = n0;

        if (UNLIKELY (cy != 0))
    {
      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
      q--;
    }
      }

    *--qp = q;

    /* Truncate operands.  */
    dn--;
    dp++;
  }

      np--;
      if (UNLIKELY (n1 >= (d1 & flag)))
  {
    q = GMP_NUMB_MASK;
    cy = mpn_submul_1 (np, dp, 2, q);

    if (UNLIKELY (n1 != cy))
      {
        if (n1 < (cy & flag))
    {
      q--;
      add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
    }
        else
    flag = 0;
      }
    n1 = np[1];
  }
      else
  {
    udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);

    np[0] = n0;
    np[1] = n1;
  }

      *--qp = q;
    }
  ASSERT_ALWAYS (np[1] == n1);
  np += 2;


  dn = dn_orig;
  if (UNLIKELY (n1 < (dn & flag)))
    {
      mp_limb_t q, x;

      /* The quotient may be too large if the remainder is small.  Recompute
   for above ignored operand parts, until the remainder spills.

   FIXME: The quality of this code isn't the same as the code above.
   1. We don't compute things in an optimal order, high-to-low, in order
      to terminate as quickly as possible.
   2. We mess with pointers and sizes, adding and subtracting and
      adjusting to get things right.  It surely could be streamlined.
   3. The only termination criteria are that we determine that the
      quotient needs to be adjusted, or that we have recomputed
      everything.  We should stop when the remainder is so large
      that no additional subtracting could make it spill.
   4. If nothing else, we should not do two loops of submul_1 over the
      data, instead handle both the triangularization and chopping at
      once.  */

      x = n1;

      if (dn > 2)
  {
    /* Compensate for triangularization.  */
    mp_limb_t y;

    dp = dp_orig;
    if (qn + 1 < dn)
      {
        dp += dn - (qn + 1);
        dn = qn + 1;
      }

    y = np[-2];

    for (i = dn - 3; i >= 0; i--)
      {
        q = qp[i];
        cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);

        if (y < cy)
    {
      if (x == 0)
        {
          cy = mpn_sub_1 (qp, qp, qn, 1);
          ASSERT_ALWAYS (cy == 0);
          return qh - cy;
        }
      x--;
    }
        y -= cy;
      }
    np[-2] = y;
  }

      dn = dn_orig;
      if (qn + 1 < dn)
  {
    /* Compensate for ignored dividend and divisor tails.  */

    dp = dp_orig;
    np = np_orig;

    if (qh != 0)
      {
        cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
        if (cy != 0)
    {
      if (x == 0)
        {
          if (qn != 0)
      cy = mpn_sub_1 (qp, qp, qn, 1);
          return qh - cy;
        }
      x--;
    }
      }

    if (qn == 0)
      return qh;

    for (i = dn - qn - 2; i >= 0; i--)
      {
        cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
        cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
        if (cy != 0)
    {
      if (x == 0)
        {
          cy = mpn_sub_1 (qp, qp, qn, 1);
          return qh;
        }
      x--;
    }
      }
  }
    }

  return qh;
}

Coverage Report

Created: 2025-03-09 06:52

Line	Count	Source (jump to first uncovered line)
1		/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
2		division algorithm.
3
4		Contributed to the GNU project by Torbjorn Granlund.
5
6		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9
10		Copyright 2007, 2009 Free Software Foundation, Inc.
11
12		This file is part of the GNU MP Library.
13
14		The GNU MP Library is free software; you can redistribute it and/or modify
15		it under the terms of either:
16
17		* the GNU Lesser General Public License as published by the Free
18		Software Foundation; either version 3 of the License, or (at your
19		option) any later version.
20
21		or
22
23		* the GNU General Public License as published by the Free Software
24		Foundation; either version 2 of the License, or (at your option) any
25		later version.
26
27		or both in parallel, as here.
28
29		The GNU MP Library is distributed in the hope that it will be useful, but
30		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32		for more details.
33
34		You should have received copies of the GNU General Public License and the
35		GNU Lesser General Public License along with the GNU MP Library. If not,
36		see https://www.gnu.org/licenses/. */
37
38
39		#include "gmp-impl.h"
40		#include "longlong.h"
41
42		mp_limb_t
43		mpn_sbpi1_div_q (mp_ptr qp,
44		mp_ptr np, mp_size_t nn,
45		mp_srcptr dp, mp_size_t dn,
46		mp_limb_t dinv)
47	1.32k	{
48	1.32k	mp_limb_t qh;
49	1.32k	mp_size_t qn, i;
50	1.32k	mp_limb_t n1, n0;
51	1.32k	mp_limb_t d1, d0;
52	1.32k	mp_limb_t cy, cy1;
53	1.32k	mp_limb_t q;
54	1.32k	mp_limb_t flag;
55
56	1.32k	mp_size_t dn_orig = dn;
57	1.32k	mp_srcptr dp_orig = dp;
58	1.32k	mp_ptr np_orig = np;
59
60	1.32k	ASSERT (dn > 2);
61	1.32k	ASSERT (nn >= dn);
62	1.32k	ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
63
64	1.32k	np += nn;
65
66	1.32k	qn = nn - dn;
67	1.32k	if (qn + 1 < dn)
68	1.16k	{
69	1.16k	dp += dn - (qn + 1);
70	1.16k	dn = qn + 1;
71	1.16k	}
72
73	1.32k	qh = mpn_cmp (np - dn, dp, dn) >= 0;
74	1.32k	if (qh != 0)
75	23	mpn_sub_n (np - dn, np - dn, dp, dn);
76
77	1.32k	qp += qn;
78
79	1.32k	dn -= 2; /* offset dn by 2 for main division loops,
80		saving two iterations in mpn_submul_1. */
81	1.32k	d1 = dp[dn + 1];
82	1.32k	d0 = dp[dn + 0];
83
84	1.32k	np -= 2;
85
86	1.32k	n1 = np[1];
87
88	1.32k	for (i = qn - (dn + 2); i >= 0; i--)
89	0	{
90	0	np--;
91	0	if (UNLIKELY (n1 == d1) && np[1] == d0)
92	0	{
93	0	q = GMP_NUMB_MASK;
94	0	mpn_submul_1 (np - dn, dp, dn + 2, q);
95	0	n1 = np[1]; /* update n1, last loop's value will now be invalid */
96	0	}
97	0	else
98	0	{
99	0	udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
100
101	0	cy = mpn_submul_1 (np - dn, dp, dn, q);
102
103	0	cy1 = n0 < cy;
104	0	n0 = (n0 - cy) & GMP_NUMB_MASK;
105	0	cy = n1 < cy1;
106	0	n1 -= cy1;
107	0	np[0] = n0;
108
109	0	if (UNLIKELY (cy != 0))
110	0	{
111	0	n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
112	0	q--;
113	0	}
114	0	}
115
116	0	*--qp = q;
117	0	}
118
119	1.32k	flag = ~CNST_LIMB(0);
120
121	1.32k	if (dn >= 0)
122	1.32k	{
123	3.60k	for (i = dn; i > 0; i--)
124	2.28k	{
125	2.28k	np--;
126	2.28k	if (UNLIKELY (n1 >= (d1 & flag)))
127	9	{
128	9	q = GMP_NUMB_MASK;
129	9	cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
130
131	9	if (UNLIKELY (n1 != cy))
132	0	{
133	0	if (n1 < (cy & flag))
134	0	{
135	0	q--;
136	0	mpn_add_n (np - dn, np - dn, dp, dn + 2);
137	0	}
138	0	else
139	0	flag = 0;
140	0	}
141	9	n1 = np[1];
142	9	}
143	2.27k	else
144	2.27k	{
145	2.27k	udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
146
147	2.27k	cy = mpn_submul_1 (np - dn, dp, dn, q);
148
149	2.27k	cy1 = n0 < cy;
150	2.27k	n0 = (n0 - cy) & GMP_NUMB_MASK;
151	2.27k	cy = n1 < cy1;
152	2.27k	n1 -= cy1;
153	2.27k	np[0] = n0;
154
155	2.27k	if (UNLIKELY (cy != 0))
156	6	{
157	6	n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
158	6	q--;
159	6	}
160	2.27k	}
161
162	2.28k	*--qp = q;
163
164		/* Truncate operands. */
165	2.28k	dn--;
166	2.28k	dp++;
167	2.28k	}
168
169	1.32k	np--;
170	1.32k	if (UNLIKELY (n1 >= (d1 & flag)))
171	2	{
172	2	q = GMP_NUMB_MASK;
173	2	cy = mpn_submul_1 (np, dp, 2, q);
174
175	2	if (UNLIKELY (n1 != cy))
176	0	{
177	0	if (n1 < (cy & flag))
178	0	{
179	0	q--;
180	0	add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
181	0	}
182	0	else
183	0	flag = 0;
184	0	}
185	2	n1 = np[1];
186	2	}
187	1.32k	else
188	1.32k	{
189	1.32k	udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
190
191	1.32k	np[0] = n0;
192	1.32k	np[1] = n1;
193	1.32k	}
194
195	1.32k	*--qp = q;
196	1.32k	}
197	1.32k	ASSERT_ALWAYS (np[1] == n1);
198	1.32k	np += 2;
199
200
201	1.32k	dn = dn_orig;
202	1.32k	if (UNLIKELY (n1 < (dn & flag)))
203	0	{
204	0	mp_limb_t q, x;
205
206		/* The quotient may be too large if the remainder is small. Recompute
207		for above ignored operand parts, until the remainder spills.
208
209		FIXME: The quality of this code isn't the same as the code above.
210		1. We don't compute things in an optimal order, high-to-low, in order
211		to terminate as quickly as possible.
212		2. We mess with pointers and sizes, adding and subtracting and
213		adjusting to get things right. It surely could be streamlined.
214		3. The only termination criteria are that we determine that the
215		quotient needs to be adjusted, or that we have recomputed
216		everything. We should stop when the remainder is so large
217		that no additional subtracting could make it spill.
218		4. If nothing else, we should not do two loops of submul_1 over the
219		data, instead handle both the triangularization and chopping at
220		once. */
221
222	0	x = n1;
223
224	0	if (dn > 2)
225	0	{
226		/* Compensate for triangularization. */
227	0	mp_limb_t y;
228
229	0	dp = dp_orig;
230	0	if (qn + 1 < dn)
231	0	{
232	0	dp += dn - (qn + 1);
233	0	dn = qn + 1;
234	0	}
235
236	0	y = np[-2];
237
238	0	for (i = dn - 3; i >= 0; i--)
239	0	{
240	0	q = qp[i];
241	0	cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
242
243	0	if (y < cy)
244	0	{
245	0	if (x == 0)
246	0	{
247	0	cy = mpn_sub_1 (qp, qp, qn, 1);
248	0	ASSERT_ALWAYS (cy == 0);
249	0	return qh - cy;
250	0	}
251	0	x--;
252	0	}
253	0	y -= cy;
254	0	}
255	0	np[-2] = y;
256	0	}
257
258	0	dn = dn_orig;
259	0	if (qn + 1 < dn)
260	0	{
261		/* Compensate for ignored dividend and divisor tails. */
262
263	0	dp = dp_orig;
264	0	np = np_orig;
265
266	0	if (qh != 0)
267	0	{
268	0	cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
269	0	if (cy != 0)
270	0	{
271	0	if (x == 0)
272	0	{
273	0	if (qn != 0)
274	0	cy = mpn_sub_1 (qp, qp, qn, 1);
275	0	return qh - cy;
276	0	}
277	0	x--;
278	0	}
279	0	}
280
281	0	if (qn == 0)
282	0	return qh;
283
284	0	for (i = dn - qn - 2; i >= 0; i--)
285	0	{
286	0	cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
287	0	cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
288	0	if (cy != 0)
289	0	{
290	0	if (x == 0)
291	0	{
292	0	cy = mpn_sub_1 (qp, qp, qn, 1);
293	0	return qh;
294	0	}
295	0	x--;
296	0	}
297	0	}
298	0	}
299	0	}
300
301	1.32k	return qh;
302	1.32k	}