/src/gmp/mpn/hgcd2.c

Source (jump to first uncovered line)
/* hgcd2.c

   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"
#include "longlong.h"

#include "mpn/generic/hgcd2-div.h"

#if GMP_NAIL_BITS != 0
#error Nails not implemented
#endif

/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
   matrix M. Returns 1 if we make progress, i.e. can perform at least
   one subtraction. Otherwise returns zero. */

/* FIXME: Possible optimizations:

   The div2 function starts with checking the most significant bit of
   the numerator. We can maintained normalized operands here, call
   hgcd with normalized operands only, which should make the code
   simpler and possibly faster.

   Experiment with table lookups on the most significant bits.

   This function is also a candidate for assembler implementation.
*/
int
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
     struct hgcd_matrix1 *M)
{
  mp_limb_t u00, u01, u10, u11;

  if (ah < 2 || bh < 2)
    return 0;

  if (ah > bh || (ah == bh && al > bl))
    {
      sub_ddmmss (ah, al, ah, al, bh, bl);
      if (ah < 2)
  return 0;

      u00 = u01 = u11 = 1;
      u10 = 0;
    }
  else
    {
      sub_ddmmss (bh, bl, bh, bl, ah, al);
      if (bh < 2)
  return 0;

      u00 = u10 = u11 = 1;
      u01 = 0;
    }

  if (ah < bh)
    goto subtract_a;

  for (;;)
    {
      ASSERT (ah >= bh);
      if (ah == bh)
  goto done;

      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
  {
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));

    break;
  }

      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
   1), affecting the second column of M. */
      ASSERT (ah > bh);
      sub_ddmmss (ah, al, ah, al, bh, bl);

      if (ah < 2)
  goto done;

      if (ah <= bh)
  {
    /* Use q = 1 */
    u01 += u00;
    u11 += u10;
  }
      else
  {
    mp_limb_t r[2];
    mp_limb_t q = div2 (r, ah, al, bh, bl);
    al = r[0]; ah = r[1];
    if (ah < 2)
      {
        /* A is too small, but q is correct. */
        u01 += q * u00;
        u11 += q * u10;
        goto done;
      }
    q++;
    u01 += q * u00;
    u11 += q * u10;
  }
    subtract_a:
      ASSERT (bh >= ah);
      if (ah == bh)
  goto done;

      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
  {
    ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
    bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));

    goto subtract_a1;
  }

      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
   1), affecting the first column of M. */
      sub_ddmmss (bh, bl, bh, bl, ah, al);

      if (bh < 2)
  goto done;

      if (bh <= ah)
  {
    /* Use q = 1 */
    u00 += u01;
    u10 += u11;
  }
      else
  {
    mp_limb_t r[2];
    mp_limb_t q = div2 (r, bh, bl, ah, al);
    bl = r[0]; bh = r[1];
    if (bh < 2)
      {
        /* B is too small, but q is correct. */
        u00 += q * u01;
        u10 += q * u11;
        goto done;
      }
    q++;
    u00 += q * u01;
    u10 += q * u11;
  }
    }

  /* NOTE: Since we discard the least significant half limb, we don't get a
     truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
  /* Single precision loop */
  for (;;)
    {
      ASSERT (ah >= bh);

      ah -= bh;
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
  break;

      if (ah <= bh)
  {
    /* Use q = 1 */
    u01 += u00;
    u11 += u10;
  }
      else
  {
    mp_double_limb_t rq = div1 (ah, bh);
    mp_limb_t q = rq.d1;
    ah = rq.d0;

    if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
      {
        /* A is too small, but q is correct. */
        u01 += q * u00;
        u11 += q * u10;
        break;
      }
    q++;
    u01 += q * u00;
    u11 += q * u10;
  }
    subtract_a1:
      ASSERT (bh >= ah);

      bh -= ah;
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
  break;

      if (bh <= ah)
  {
    /* Use q = 1 */
    u00 += u01;
    u10 += u11;
  }
      else
  {
    mp_double_limb_t rq = div1 (bh, ah);
    mp_limb_t q = rq.d1;
    bh = rq.d0;

    if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
      {
        /* B is too small, but q is correct. */
        u00 += q * u01;
        u10 += q * u11;
        break;
      }
    q++;
    u00 += q * u01;
    u10 += q * u11;
  }
    }

 done:
  M->u[0][0] = u00; M->u[0][1] = u01;
  M->u[1][0] = u10; M->u[1][1] = u11;

  return 1;
}

/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
mp_size_t
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
           mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
{
  mp_limb_t ah, bh;

  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as

     r  = u00 * a
     r += u10 * b
     b *= u11
     b += u01 * a
  */

#if HAVE_NATIVE_mpn_addaddmul_1msb0
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
#else
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);

  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
#endif
  rp[n] = ah;
  bp[n] = bh;

  n += (ah | bh) > 0;
  return n;
}

Coverage Report

Created: 2024-11-25 06:29

Line	Count	Source (jump to first uncovered line)
1		/* hgcd2.c
2
3		THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4		SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5		GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7		Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
8		Inc.
9
10		This file is part of the GNU MP Library.
11
12		The GNU MP Library is free software; you can redistribute it and/or modify
13		it under the terms of either:
14
15		* the GNU Lesser General Public License as published by the Free
16		Software Foundation; either version 3 of the License, or (at your
17		option) any later version.
18
19		or
20
21		* the GNU General Public License as published by the Free Software
22		Foundation; either version 2 of the License, or (at your option) any
23		later version.
24
25		or both in parallel, as here.
26
27		The GNU MP Library is distributed in the hope that it will be useful, but
28		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30		for more details.
31
32		You should have received copies of the GNU General Public License and the
33		GNU Lesser General Public License along with the GNU MP Library. If not,
34		see https://www.gnu.org/licenses/. */
35
36		#include "gmp-impl.h"
37		#include "longlong.h"
38
39		#include "mpn/generic/hgcd2-div.h"
40
41		#if GMP_NAIL_BITS != 0
42		#error Nails not implemented
43		#endif
44
45		/* Reduces a,b until \|a-b\| (almost) fits in one limb + 1 bit. Constructs
46		matrix M. Returns 1 if we make progress, i.e. can perform at least
47		one subtraction. Otherwise returns zero. */
48
49		/* FIXME: Possible optimizations:
50
51		The div2 function starts with checking the most significant bit of
52		the numerator. We can maintained normalized operands here, call
53		hgcd with normalized operands only, which should make the code
54		simpler and possibly faster.
55
56		Experiment with table lookups on the most significant bits.
57
58		This function is also a candidate for assembler implementation.
59		*/
60		int
61		mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
62		struct hgcd_matrix1 *M)
63	101k	{
64	101k	mp_limb_t u00, u01, u10, u11;
65
66	101k	if (ah < 2 \|\| bh < 2)
67	0	return 0;
68
69	101k	if (ah > bh \|\| (ah == bh && al > bl))
70	56.9k	{
71	56.9k	sub_ddmmss (ah, al, ah, al, bh, bl);
72	56.9k	if (ah < 2)
73	0	return 0;
74
75	56.9k	u00 = u01 = u11 = 1;
76	56.9k	u10 = 0;
77	56.9k	}
78	44.7k	else
79	44.7k	{
80	44.7k	sub_ddmmss (bh, bl, bh, bl, ah, al);
81	44.7k	if (bh < 2)
82	0	return 0;
83
84	44.7k	u00 = u10 = u11 = 1;
85	44.7k	u01 = 0;
86	44.7k	}
87
88	101k	if (ah < bh)
89	56.9k	goto subtract_a;
90
91	44.7k	for (;;)
92	979k	{
93	979k	ASSERT (ah >= bh);
94	979k	if (ah == bh)
95	0	goto done;
96
97	979k	if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
98	60.9k	{
99	60.9k	ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
100	60.9k	bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
101
102	60.9k	break;
103	60.9k	}
104
105		/* Subtract a -= q b, and multiply M from the right by (1 q ; 0
106		1), affecting the second column of M. */
107	918k	ASSERT (ah > bh);
108	918k	sub_ddmmss (ah, al, ah, al, bh, bl);
109
110	918k	if (ah < 2)
111	0	goto done;
112
113	918k	if (ah <= bh)
114	349k	{
115		/* Use q = 1 */
116	349k	u01 += u00;
117	349k	u11 += u10;
118	349k	}
119	569k	else
120	569k	{
121	569k	mp_limb_t r[2];
122	569k	mp_limb_t q = div2 (r, ah, al, bh, bl);
123	569k	al = r[0]; ah = r[1];
124	569k	if (ah < 2)
125	0	{
126		/* A is too small, but q is correct. */
127	0	u01 += q * u00;
128	0	u11 += q * u10;
129	0	goto done;
130	0	}
131	569k	q++;
132	569k	u01 += q * u00;
133	569k	u11 += q * u10;
134	569k	}
135	975k	subtract_a:
136	975k	ASSERT (bh >= ah);
137	975k	if (ah == bh)
138	0	goto done;
139
140	975k	if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
141	40.6k	{
142	40.6k	ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
143	40.6k	bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
144
145	40.6k	goto subtract_a1;
146	40.6k	}
147
148		/* Subtract b -= q a, and multiply M from the right by (1 0 ; q
149		1), affecting the first column of M. */
150	935k	sub_ddmmss (bh, bl, bh, bl, ah, al);
151
152	935k	if (bh < 2)
153	0	goto done;
154
155	935k	if (bh <= ah)
156	382k	{
157		/* Use q = 1 */
158	382k	u00 += u01;
159	382k	u10 += u11;
160	382k	}
161	552k	else
162	552k	{
163	552k	mp_limb_t r[2];
164	552k	mp_limb_t q = div2 (r, bh, bl, ah, al);
165	552k	bl = r[0]; bh = r[1];
166	552k	if (bh < 2)
167	0	{
168		/* B is too small, but q is correct. */
169	0	u00 += q * u01;
170	0	u10 += q * u11;
171	0	goto done;
172	0	}
173	552k	q++;
174	552k	u00 += q * u01;
175	552k	u10 += q * u11;
176	552k	}
177	935k	}
178
179		/* NOTE: Since we discard the least significant half limb, we don't get a
180		truly maximal M (corresponding to \|a - b\| < 2^{GMP_LIMB_BITS +1}). */
181		/* Single precision loop */
182	60.9k	for (;;)
183	878k	{
184	878k	ASSERT (ah >= bh);
185
186	878k	ah -= bh;
187	878k	if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
188	28.4k	break;
189
190	849k	if (ah <= bh)
191	378k	{
192		/* Use q = 1 */
193	378k	u01 += u00;
194	378k	u11 += u10;
195	378k	}
196	471k	else
197	471k	{
198	471k	mp_double_limb_t rq = div1 (ah, bh);
199	471k	mp_limb_t q = rq.d1;
200	471k	ah = rq.d0;
201
202	471k	if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
203	24.3k	{
204		/* A is too small, but q is correct. */
205	24.3k	u01 += q * u00;
206	24.3k	u11 += q * u10;
207	24.3k	break;
208	24.3k	}
209	447k	q++;
210	447k	u01 += q * u00;
211	447k	u11 += q * u10;
212	447k	}
213	866k	subtract_a1:
214	866k	ASSERT (bh >= ah);
215
216	866k	bh -= ah;
217	866k	if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
218	24.3k	break;
219
220	841k	if (bh <= ah)
221	333k	{
222		/* Use q = 1 */
223	333k	u00 += u01;
224	333k	u10 += u11;
225	333k	}
226	508k	else
227	508k	{
228	508k	mp_double_limb_t rq = div1 (bh, ah);
229	508k	mp_limb_t q = rq.d1;
230	508k	bh = rq.d0;
231
232	508k	if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
233	24.3k	{
234		/* B is too small, but q is correct. */
235	24.3k	u00 += q * u01;
236	24.3k	u10 += q * u11;
237	24.3k	break;
238	24.3k	}
239	483k	q++;
240	483k	u00 += q * u01;
241	483k	u10 += q * u11;
242	483k	}
243	841k	}
244
245	101k	done:
246	101k	M->u[0][0] = u00; M->u[0][1] = u01;
247	101k	M->u[1][0] = u10; M->u[1][1] = u11;
248
249	101k	return 1;
250	60.9k	}
251
252		/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
253		* have space for n + 1 limbs. Uses three buffers to avoid a copy*/
254		mp_size_t
255		mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
256		mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
257	101k	{
258	101k	mp_limb_t ah, bh;
259
260		/* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
261
262		r = u00 * a
263		r += u10 * b
264		b *= u11
265		b += u01 * a
266		*/
267
268		#if HAVE_NATIVE_mpn_addaddmul_1msb0
269		ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
270		bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
271		#else
272	101k	ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
273	101k	ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
274
275	101k	bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
276	101k	bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
277	101k	#endif
278	101k	rp[n] = ah;
279	101k	bp[n] = bh;
280
281	101k	n += (ah \| bh) > 0;
282	101k	return n;
283	101k	}