/src/gmp-6.2.1/mpn/toom_interpolate_5pts.c

Source (jump to first uncovered line)
/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.

   Contributed to the GNU project by Robert Harley.
   Improvements by Paul Zimmermann and Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"

void
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
         mp_size_t k, mp_size_t twor, int sa,
         mp_limb_t vinf0)
{
  mp_limb_t cy, saved;
  mp_size_t twok;
  mp_size_t kk1;
  mp_ptr c1, v1, c3, vinf;

  twok = k + k;
  kk1 = twok + 1;

  c1 = c  + k;
  v1 = c1 + k;
  c3 = v1 + k;
  vinf = c3 + k;

#define v0 (c)
  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
  */
  if (sa)
    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
  else
    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */

  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
                  /* (5 3 1 1 0)*/

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */

  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
     tm1 >= 0                                         (0  1 0  1 0)
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
     and the division by two is exact.
     If (sa!=0) the sign of vm1 is negative */
  if (sa)
    {
#ifdef HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
#else
      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
#endif
    }
  else
    {
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
#else
      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
#endif
    }

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
     t1 >= 0
  */
  vinf[0] -= mpn_sub_n (v1, v1, c, twok);

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */

  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
  */
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (v2, v2, v1, kk1);
#else
  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
#endif

  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */

  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
     result is v1 >= 0
  */
  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));

  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
  cy = mpn_add_n (c1, c1, vm1, kk1);
  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
  /* Memory allocated for vm1 is now free, it can be recycled ...*/

  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
     result is v2 >= 0 */
  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
#else
  /* Overwrite unused vm1 */
  cy = mpn_lshift (vm1, vinf, twor, 1);
  cy += mpn_sub_n (v2, v2, vm1, twor);
#endif
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);

  /* Current matrix is
     [1 0 0 0 0; vinf
      0 1 0 0 0; v2
      1 0 1 0 0; v1
      0 1 0 1 0; vm1
      0 0 0 0 1] v0
     Some values already are in-place (we added vm1 in the correct position)
     | vinf|  v1 |  v0 |
        | vm1 |
     One still is in a separated area
  | +v2 |
     We have to compute v1-=vinf; vm1 -= v2,
     |-vinf|
        | -v2 |
     Carefully reordering operations we can avoid to compute twice the sum
     of the high half of v2 plus the low half of vinf.
  */

  /* Add the high half of t2 in {vinf} */
  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
  } else { /* triggered only by very unbalanced cases like
        (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
  }
  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
     result is >= 0 */
  /* Side effect: we also subtracted (high half) vm1 -= v2 */
  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
  vinf[0] = saved;
  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */

  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
     Operate only on the low half.
  */
  cy = mpn_sub_n (c1, c1, v2, k);
  MPN_DECR_U (v1, kk1, cy);

  /********************* Beginning the final phase **********************/

  /* Most of the recomposition was done */

  /* add t2 in {c+3k, ...}, but only the low half */
  cy = mpn_add_n (c3, c3, v2, k);
  vinf[0] += cy;
  ASSERT(vinf[0] >= cy); /* No carry */
  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */

#undef v0
}

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
2
3		Contributed to the GNU project by Robert Harley.
4		Improvements by Paul Zimmermann and Marco Bodrato.
5
6		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10		Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
11
12		This file is part of the GNU MP Library.
13
14		The GNU MP Library is free software; you can redistribute it and/or modify
15		it under the terms of either:
16
17		* the GNU Lesser General Public License as published by the Free
18		Software Foundation; either version 3 of the License, or (at your
19		option) any later version.
20
21		or
22
23		* the GNU General Public License as published by the Free Software
24		Foundation; either version 2 of the License, or (at your option) any
25		later version.
26
27		or both in parallel, as here.
28
29		The GNU MP Library is distributed in the hope that it will be useful, but
30		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32		for more details.
33
34		You should have received copies of the GNU General Public License and the
35		GNU Lesser General Public License along with the GNU MP Library. If not,
36		see https://www.gnu.org/licenses/. */
37
38		#include "gmp-impl.h"
39
40		void
41		mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
42		mp_size_t k, mp_size_t twor, int sa,
43		mp_limb_t vinf0)
44	5.07M	{
45	5.07M	mp_limb_t cy, saved;
46	5.07M	mp_size_t twok;
47	5.07M	mp_size_t kk1;
48	5.07M	mp_ptr c1, v1, c3, vinf;
49
50	5.07M	twok = k + k;
51	5.07M	kk1 = twok + 1;
52
53	5.07M	c1 = c + k;
54	5.07M	v1 = c1 + k;
55	5.07M	c3 = v1 + k;
56	5.07M	vinf = c3 + k;
57
58	5.07M	#define v0 (c)
59		/* (1) v2 <- v2-vm1 < v2+\|vm1\|, (16 8 4 2 1) - (1 -1 1 -1 1) =
60		thus 0 <= v2 < 50B^(2k) < 2^6B^(2k) (15 9 3 3 0)
61		*/
62	5.07M	if (sa)
63	5.07M	ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
64	3.78M	else
65	5.07M	ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
66
67		/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
68		v0 v1 hi(vinf) \|vm1\| v2-vm1 EMPTY */
69
70	5.07M	ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */
71		/* (5 3 1 1 0)*/
72
73		/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
74		v0 v1 hi(vinf) \|vm1\| (v2-vm1)/3 EMPTY */
75
76		/* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
77		tm1 >= 0 (0 1 0 1 0)
78		No carry comes out from {v1, kk1} +/- {vm1, kk1},
79		and the division by two is exact.
80		If (sa!=0) the sign of vm1 is negative */
81	5.07M	if (sa)
82	1.28M	{
83	1.28M	#ifdef HAVE_NATIVE_mpn_rsh1add_n
84	1.28M	mpn_rsh1add_n (vm1, v1, vm1, kk1);
85		#else
86		ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
87		ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
88		#endif
89	1.28M	}
90	3.78M	else
91	3.78M	{
92	3.78M	#ifdef HAVE_NATIVE_mpn_rsh1sub_n
93	3.78M	mpn_rsh1sub_n (vm1, v1, vm1, kk1);
94		#else
95		ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
96		ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
97		#endif
98	3.78M	}
99
100		/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
101		v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
102
103		/* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
104		t1 >= 0
105		*/
106	5.07M	vinf[0] -= mpn_sub_n (v1, v1, c, twok);
107
108		/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
109		v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
110
111		/* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
112		t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
113		*/
114	5.07M	#ifdef HAVE_NATIVE_mpn_rsh1sub_n
115	5.07M	mpn_rsh1sub_n (v2, v2, v1, kk1);
116		#else
117		ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
118		ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
119		#endif
120
121		/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
122		v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */
123
124		/* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
125		result is v1 >= 0
126		*/
127	5.07M	ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
128
129		/* We do not need to read the value in vm1, so we add it in {c+k, ...} */
130	5.07M	cy = mpn_add_n (c1, c1, vm1, kk1);
131	5.07M	MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
132		/* Memory allocated for vm1 is now free, it can be recycled ...*/
133
134		/* (6) v2 <- v2 - 2vinf, (2 1 0 0 0) - 2(1 0 0 0 0) = (0 1 0 0 0)
135		result is v2 >= 0 */
136	5.07M	saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
137	5.07M	vinf[0] = vinf0; /* Set the right value for vinf0 */
138	5.07M	#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
139	5.07M	cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
140		#else
141		/* Overwrite unused vm1 */
142		cy = mpn_lshift (vm1, vinf, twor, 1);
143		cy += mpn_sub_n (v2, v2, vm1, twor);
144		#endif
145	5.07M	MPN_DECR_U (v2 + twor, kk1 - twor, cy);
146
147		/* Current matrix is
148		[1 0 0 0 0; vinf
149		0 1 0 0 0; v2
150		1 0 1 0 0; v1
151		0 1 0 1 0; vm1
152		0 0 0 0 1] v0
153		Some values already are in-place (we added vm1 in the correct position)
154		\| vinf\| v1 \| v0 \|
155		\| vm1 \|
156		One still is in a separated area
157		\| +v2 \|
158		We have to compute v1-=vinf; vm1 -= v2,
159		\|-vinf\|
160		\| -v2 \|
161		Carefully reordering operations we can avoid to compute twice the sum
162		of the high half of v2 plus the low half of vinf.
163		*/
164
165		/* Add the high half of t2 in {vinf} */
166	5.07M	if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */
167	5.07M	cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
168	5.07M	MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
169	5.07M	} else { /* triggered only by very unbalanced cases like
170		(k+k+(k-2))x(k+k+1) , should be handled by toom32 */
171	0	ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
172	0	}
173		/* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
174		result is >= 0 */
175		/* Side effect: we also subtracted (high half) vm1 -= v2 */
176	5.07M	cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */
177	5.07M	vinf0 = vinf[0]; /* Save again the right value for vinf0 */
178	5.07M	vinf[0] = saved;
179	5.07M	MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */
180
181		/* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
182		Operate only on the low half.
183		*/
184	5.07M	cy = mpn_sub_n (c1, c1, v2, k);
185	5.07M	MPN_DECR_U (v1, kk1, cy);
186
187		/******************* Beginning the final phase ********************/
188
189		/* Most of the recomposition was done */
190
191		/* add t2 in {c+3k, ...}, but only the low half */
192	5.07M	cy = mpn_add_n (c3, c3, v2, k);
193	5.07M	vinf[0] += cy;
194	5.07M	ASSERT(vinf[0] >= cy); /* No carry */
195	5.07M	MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
196
197	5.07M	#undef v0
198	5.07M	}

Coverage Report

Created: 2024-06-28 06:19