/src/gmp-6.2.1/mpn/mu_div_q.c

Source (jump to first uncovered line)
/* mpn_mu_div_q.

   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.

   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.

Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


/*
   The idea of the algorithm used herein is to compute a smaller inverted value
   than used in the standard Barrett algorithm, and thus save time in the
   Newton iterations, and pay just a small price when using the inverted value
   for developing quotient bits.  This algorithm was presented at ICMS 2006.
*/

/*
  Things to work on:

  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
     probably close to optimal, except when mpn_mu_divappr_q fails.

  2. We used to fall back to mpn_mu_div_qr when we detect a possible
     mpn_mu_divappr_q rounding problem, now we multiply and compare.
     Unfortunately, since mpn_mu_divappr_q does not return the partial
     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
     solve that.

  3. The allocations done here should be made from the scratch area, which
     then would need to be amended.
*/

#include <stdlib.h>   /* for NULL */
#include "gmp-impl.h"


mp_limb_t
mpn_mu_div_q (mp_ptr qp,
        mp_srcptr np, mp_size_t nn,
        mp_srcptr dp, mp_size_t dn,
        mp_ptr scratch)
{
  mp_ptr tp, rp;
  mp_size_t qn;
  mp_limb_t cy, qh;
  TMP_DECL;

  TMP_MARK;

  qn = nn - dn;

  tp = TMP_BALLOC_LIMBS (qn + 1);

  if (qn >= dn)     /* nn >= 2*dn + 1 */
    {
       /* |_______________________|   dividend
       |________|   divisor  */

      rp = TMP_BALLOC_LIMBS (nn + 1);
      MPN_COPY (rp + 1, np, nn);
      rp[0] = 0;

      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
      if (qh != 0)
  mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);

      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);

      if (UNLIKELY (cy != 0))
  {
    /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
       canonically reduced, replace the returned value of B^(qn-dn)+eps
       by the largest possible value.  */
    mp_size_t i;
    for (i = 0; i < qn + 1; i++)
      tp[i] = GMP_NUMB_MAX;
  }

      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
   smaller than the max error, we cannot trust the quotient.  */
      if (tp[0] > 4)
  {
    MPN_COPY (qp, tp + 1, qn);
  }
      else
  {
    mp_limb_t cy;
    mp_ptr pp;

    pp = rp;
    mpn_mul (pp, tp + 1, qn, dp, dn);

    cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;

    if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
      qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
    else /* Same as above */
      MPN_COPY (qp, tp + 1, qn);
  }
    }
  else
    {
       /* |_______________________|   dividend
     |________________|   divisor  */

      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
   here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
   the most significant dn-1 limbs will actually be read, but it is not
   pretty.  */

      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
           dp + dn - (qn + 1), qn + 1, scratch);

      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
         error from the divisor truncation.  */
      if (tp[0] > 6)
  {
    MPN_COPY (qp, tp + 1, qn);
  }
      else
  {
    mp_limb_t cy;

    /* FIXME: a shorter product should be enough; we may use already
       allocated space... */
    rp = TMP_BALLOC_LIMBS (nn);
    mpn_mul (rp, dp, dn, tp + 1, qn);

    cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;

    if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
      qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
    else /* Same as above */
      MPN_COPY (qp, tp + 1, qn);
  }
    }

  TMP_FREE;
  return qh;
}

mp_size_t
mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
{
  mp_size_t qn;

  qn = nn - dn;
  if (qn >= dn)
    {
      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
    }
  else
    {
      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
    }
}

Line	Count	Source (jump to first uncovered line)
1		/* mpn_mu_div_q.
2
3		Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
4
5		THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6		SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7		GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9		Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
10
11		This file is part of the GNU MP Library.
12
13		The GNU MP Library is free software; you can redistribute it and/or modify
14		it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free Software
23		Foundation; either version 2 of the License, or (at your option) any
24		later version.
25
26		or both in parallel, as here.
27
28		The GNU MP Library is distributed in the hope that it will be useful, but
29		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31		for more details.
32
33		You should have received copies of the GNU General Public License and the
34		GNU Lesser General Public License along with the GNU MP Library. If not,
35		see https://www.gnu.org/licenses/. */
36
37
38		/*
39		The idea of the algorithm used herein is to compute a smaller inverted value
40		than used in the standard Barrett algorithm, and thus save time in the
41		Newton iterations, and pay just a small price when using the inverted value
42		for developing quotient bits. This algorithm was presented at ICMS 2006.
43		*/
44
45		/*
46		Things to work on:
47
48		1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is
49		probably close to optimal, except when mpn_mu_divappr_q fails.
50
51		2. We used to fall back to mpn_mu_div_qr when we detect a possible
52		mpn_mu_divappr_q rounding problem, now we multiply and compare.
53		Unfortunately, since mpn_mu_divappr_q does not return the partial
54		remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could
55		solve that.
56
57		3. The allocations done here should be made from the scratch area, which
58		then would need to be amended.
59		*/
60
61		#include <stdlib.h> /* for NULL */
62		#include "gmp-impl.h"
63
64
65		mp_limb_t
66		mpn_mu_div_q (mp_ptr qp,
67		mp_srcptr np, mp_size_t nn,
68		mp_srcptr dp, mp_size_t dn,
69		mp_ptr scratch)
70	0	{
71	0	mp_ptr tp, rp;
72	0	mp_size_t qn;
73	0	mp_limb_t cy, qh;
74	0	TMP_DECL;
75
76	0	TMP_MARK;
77
78	0	qn = nn - dn;
79
80	0	tp = TMP_BALLOC_LIMBS (qn + 1);
81
82	0	if (qn >= dn) /* nn >= 2dn + 1 /
83	0	{
84		/* \|_______________________\| dividend
85		\|________\| divisor */
86
87	0	rp = TMP_BALLOC_LIMBS (nn + 1);
88	0	MPN_COPY (rp + 1, np, nn);
89	0	rp[0] = 0;
90
91	0	qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
92	0	if (qh != 0)
93	0	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
94
95	0	cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
96
97	0	if (UNLIKELY (cy != 0))
98	0	{
99		/* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
100		canonically reduced, replace the returned value of B^(qn-dn)+eps
101		by the largest possible value. */
102	0	mp_size_t i;
103	0	for (i = 0; i < qn + 1; i++)
104	0	tp[i] = GMP_NUMB_MAX;
105	0	}
106
107		/* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is
108		smaller than the max error, we cannot trust the quotient. */
109	0	if (tp[0] > 4)
110	0	{
111	0	MPN_COPY (qp, tp + 1, qn);
112	0	}
113	0	else
114	0	{
115	0	mp_limb_t cy;
116	0	mp_ptr pp;
117
118	0	pp = rp;
119	0	mpn_mul (pp, tp + 1, qn, dp, dn);
120
121	0	cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
122
123	0	if (cy \|\| mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
124	0	qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
125	0	else /* Same as above */
126	0	MPN_COPY (qp, tp + 1, qn);
127	0	}
128	0	}
129	0	else
130	0	{
131		/* \|_______________________\| dividend
132		\|________________\| divisor */
133
134		/* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
135		here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only
136		the most significant dn-1 limbs will actually be read, but it is not
137		pretty. */
138
139	0	qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
140	0	dp + dn - (qn + 1), qn + 1, scratch);
141
142		/* The max error of mpn_mu_divappr_q is +4, but we get an additional
143		error from the divisor truncation. */
144	0	if (tp[0] > 6)
145	0	{
146	0	MPN_COPY (qp, tp + 1, qn);
147	0	}
148	0	else
149	0	{
150	0	mp_limb_t cy;
151
152		/* FIXME: a shorter product should be enough; we may use already
153		allocated space... */
154	0	rp = TMP_BALLOC_LIMBS (nn);
155	0	mpn_mul (rp, dp, dn, tp + 1, qn);
156
157	0	cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
158
159	0	if (cy \|\| mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
160	0	qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
161	0	else /* Same as above */
162	0	MPN_COPY (qp, tp + 1, qn);
163	0	}
164	0	}
165
166	0	TMP_FREE;
167	0	return qh;
168	0	}
169
170		mp_size_t
171		mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
172	0	{
173	0	mp_size_t qn;
174
175	0	qn = nn - dn;
176	0	if (qn >= dn)
177	0	{
178	0	return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
179	0	}
180	0	else
181	0	{
182	0	return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
183	0	}
184	0	}

Coverage Report

Created: 2023-02-22 06:39