/src/gmp-6.2.1/mpn/toom_interpolate_6pts.c

Source (jump to first uncovered line)
/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52

   Contributed to the GNU project by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2009, 2010, 2012 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"

#define BINVERT_3 MODLIMB_INVERSE_3

/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
#ifndef mpn_divexact_by3
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
#else
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
#endif
#endif

/* Interpolation for Toom-3.5, using the evaluation points: infinity,
   1, -1, 2, -2. More precisely, we want to compute
   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
   six values

     w5 = f(0),
     w4 = f(-1),
     w3 = f(1)
     w2 = f(-2),
     w1 = f(2),
     w0 = limit at infinity of f(x) / x^5,

   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
   significant limbs small). f(-1) and f(-2) may be negative, signs
   determined by the flag bits. All intermediate results are positive.
   Inputs are destroyed.

   Interpolation sequence was taken from the paper: "Integer and
   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
   Some slight variations were introduced: adaptation to "gmp
   instruction set", and a final saving of an operation by interlacing
   interpolation and recomposition phases.
*/

void
mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
         mp_ptr w4, mp_ptr w2, mp_ptr w1,
         mp_size_t w0n)
{
  mp_limb_t cy;
  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
  mp_limb_t cy4, cy6, embankment;

  ASSERT( n > 0 );
  ASSERT( 2*n >= w0n && w0n > 0 );

#define w5  pp          /* 2n   */
#define w3  (pp + 2 * n)      /* 2n+1 */
#define w0  (pp + 5 * n)      /* w0n  */

  /* Interpolate with sequence:
     W2 =(W1 - W2)>>2
     W1 =(W1 - W5)>>1
     W1 =(W1 - W2)>>1
     W4 =(W3 - W4)>>1
     W2 =(W2 - W4)/3
     W3 = W3 - W4 - W5
     W1 =(W1 - W3)/3
     // Last steps are mixed with recomposition...
     W2 = W2 - W0<<2
     W4 = W4 - W2
     W3 = W3 - W1
     W2 = W2 - W0
  */

  /* W2 =(W1 - W2)>>2 */
  if (flags & toom6_vm2_neg)
    mpn_add_n (w2, w1, w2, 2 * n + 1);
  else
    mpn_sub_n (w2, w1, w2, 2 * n + 1);
  mpn_rshift (w2, w2, 2 * n + 1, 2);

  /* W1 =(W1 - W5)>>1 */
  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
  mpn_rshift (w1, w1, 2 * n + 1, 1);

  /* W1 =(W1 - W2)>>1 */
#if HAVE_NATIVE_mpn_rsh1sub_n
  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
#else
  mpn_sub_n (w1, w1, w2, 2 * n + 1);
  mpn_rshift (w1, w1, 2 * n + 1, 1);
#endif

  /* W4 =(W3 - W4)>>1 */
  if (flags & toom6_vm1_neg)
    {
#if HAVE_NATIVE_mpn_rsh1add_n
      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
#else
      mpn_add_n (w4, w3, w4, 2 * n + 1);
      mpn_rshift (w4, w4, 2 * n + 1, 1);
#endif
    }
  else
    {
#if HAVE_NATIVE_mpn_rsh1sub_n
      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
#else
      mpn_sub_n (w4, w3, w4, 2 * n + 1);
      mpn_rshift (w4, w4, 2 * n + 1, 1);
#endif
    }

  /* W2 =(W2 - W4)/3 */
  mpn_sub_n (w2, w2, w4, 2 * n + 1);
  mpn_divexact_by3 (w2, w2, 2 * n + 1);

  /* W3 = W3 - W4 - W5 */
  mpn_sub_n (w3, w3, w4, 2 * n + 1);
  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);

  /* W1 =(W1 - W3)/3 */
  mpn_sub_n (w1, w1, w3, 2 * n + 1);
  mpn_divexact_by3 (w1, w1, 2 * n + 1);

  /*
    [1 0 0 0 0 0;
     0 1 0 0 0 0;
     1 0 1 0 0 0;
     0 1 0 1 0 0;
     1 0 1 0 1 0;
     0 0 0 0 0 1]

    pp[] prior to operations:
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|

    summation scheme for remaining operations:
     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
            || H w4  | L w4  |
        || H w2  | L w2  |
      || H w1  | L w1  |
          ||-H w1  |-L w1  |
         |-H w0  |-L w0 ||-H w2  |-L w2  |
  */
  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
  MPN_INCR_U (pp + 3 * n + 1, n, cy);

  /* W2 -= W0<<2 */
#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
#else
  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
#endif
#else
  /* {W4,2*n+1} is now free and can be overwritten. */
  cy = mpn_lshift(w4, w0, w0n, 2);
  cy+= mpn_sub_n(w2, w2, w4, w0n);
#endif
  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);

  /* W4L = W4L - W2L */
  cy = mpn_sub_n (pp + n, pp + n, w2, n);
  MPN_DECR_U (w3, 2 * n + 1, cy);

  /* W3H = W3H + W2L */
  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
  /* W1L + W2H */
  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
  MPN_INCR_U (w1 + n, n + 1, cy);

  /* W0 = W0 + W1H */
  if (LIKELY (w0n > n))
    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
  else
    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);

  /*
    summation scheme for the next operation:
     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
         ...-w0___|-w1_w2 |
  */
  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);

  /* embankment is a "dirty trick" to avoid carry/borrow propagation
     beyond allocated memory */
  embankment = w0[w0n - 1] - 1;
  w0[w0n - 1] = 1;
  if (LIKELY (w0n > n)) {
    if (cy4 > cy6)
      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
    else
      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
    MPN_INCR_U (w0 + n, w0n - n, cy6);
  } else {
    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
  }
  w0[w0n - 1] += embankment;

#undef w5
#undef w3
#undef w0

}

Coverage Report

Created: 2023-02-22 06:39

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
2
3		Contributed to the GNU project by Marco Bodrato.
4
5		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9		Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11		This file is part of the GNU MP Library.
12
13		The GNU MP Library is free software; you can redistribute it and/or modify
14		it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free Software
23		Foundation; either version 2 of the License, or (at your option) any
24		later version.
25
26		or both in parallel, as here.
27
28		The GNU MP Library is distributed in the hope that it will be useful, but
29		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31		for more details.
32
33		You should have received copies of the GNU General Public License and the
34		GNU Lesser General Public License along with the GNU MP Library. If not,
35		see https://www.gnu.org/licenses/. */
36
37		#include "gmp-impl.h"
38
39		#define BINVERT_3 MODLIMB_INVERSE_3
40
41		/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
42		#ifndef mpn_divexact_by3
43		#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
44		#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
45		#else
46		#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
47		#endif
48		#endif
49
50		/* Interpolation for Toom-3.5, using the evaluation points: infinity,
51		1, -1, 2, -2. More precisely, we want to compute
52		f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
53		six values
54
55		w5 = f(0),
56		w4 = f(-1),
57		w3 = f(1)
58		w2 = f(-2),
59		w1 = f(2),
60		w0 = limit at infinity of f(x) / x^5,
61
62		The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
63		{pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
64		{pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
65		significant limbs small). f(-1) and f(-2) may be negative, signs
66		determined by the flag bits. All intermediate results are positive.
67		Inputs are destroyed.
68
69		Interpolation sequence was taken from the paper: "Integer and
70		Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
71		Some slight variations were introduced: adaptation to "gmp
72		instruction set", and a final saving of an operation by interlacing
73		interpolation and recomposition phases.
74		*/
75
76		void
77		mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
78		mp_ptr w4, mp_ptr w2, mp_ptr w1,
79		mp_size_t w0n)
80	295	{
81	295	mp_limb_t cy;
82		/* cy6 can be stored in w1[2n], cy4 in w4[0], embankment in w2[0] /
83	295	mp_limb_t cy4, cy6, embankment;
84
85	295	ASSERT( n > 0 );
86	295	ASSERT( 2*n >= w0n && w0n > 0 );
87
88	590	#define w5 pp /* 2n */
89	2.36k	#define w3 (pp + 2 * n) /* 2n+1 */
90	1.77k	#define w0 (pp + 5 * n) /* w0n */
91
92		/* Interpolate with sequence:
93		W2 =(W1 - W2)>>2
94		W1 =(W1 - W5)>>1
95		W1 =(W1 - W2)>>1
96		W4 =(W3 - W4)>>1
97		W2 =(W2 - W4)/3
98		W3 = W3 - W4 - W5
99		W1 =(W1 - W3)/3
100		// Last steps are mixed with recomposition...
101		W2 = W2 - W0<<2
102		W4 = W4 - W2
103		W3 = W3 - W1
104		W2 = W2 - W0
105		*/
106
107		/* W2 =(W1 - W2)>>2 */
108	295	if (flags & toom6_vm2_neg)
109	198	mpn_add_n (w2, w1, w2, 2 * n + 1);
110	97	else
111	97	mpn_sub_n (w2, w1, w2, 2 * n + 1);
112	295	mpn_rshift (w2, w2, 2 * n + 1, 2);
113
114		/* W1 =(W1 - W5)>>1 */
115	295	w1[2n] -= mpn_sub_n (w1, w1, w5, 2n);
116	295	mpn_rshift (w1, w1, 2 * n + 1, 1);
117
118		/* W1 =(W1 - W2)>>1 */
119	295	#if HAVE_NATIVE_mpn_rsh1sub_n
120	295	mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
121		#else
122		mpn_sub_n (w1, w1, w2, 2 * n + 1);
123		mpn_rshift (w1, w1, 2 * n + 1, 1);
124		#endif
125
126		/* W4 =(W3 - W4)>>1 */
127	295	if (flags & toom6_vm1_neg)
128	144	{
129	144	#if HAVE_NATIVE_mpn_rsh1add_n
130	144	mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
131		#else
132		mpn_add_n (w4, w3, w4, 2 * n + 1);
133		mpn_rshift (w4, w4, 2 * n + 1, 1);
134		#endif
135	144	}
136	151	else
137	151	{
138	151	#if HAVE_NATIVE_mpn_rsh1sub_n
139	151	mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
140		#else
141		mpn_sub_n (w4, w3, w4, 2 * n + 1);
142		mpn_rshift (w4, w4, 2 * n + 1, 1);
143		#endif
144	151	}
145
146		/* W2 =(W2 - W4)/3 */
147	295	mpn_sub_n (w2, w2, w4, 2 * n + 1);
148	295	mpn_divexact_by3 (w2, w2, 2 * n + 1);
149
150		/* W3 = W3 - W4 - W5 */
151	295	mpn_sub_n (w3, w3, w4, 2 * n + 1);
152	295	w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
153
154		/* W1 =(W1 - W3)/3 */
155	295	mpn_sub_n (w1, w1, w3, 2 * n + 1);
156	295	mpn_divexact_by3 (w1, w1, 2 * n + 1);
157
158		/*
159		[1 0 0 0 0 0;
160		0 1 0 0 0 0;
161		1 0 1 0 0 0;
162		0 1 0 1 0 0;
163		1 0 1 0 1 0;
164		0 0 0 0 0 1]
165
166		pp[] prior to operations:
167		\|_H w0__\|_L w0__\|______\|\|_H w3__\|_L w3__\|_H w5__\|_L w5__\|
168
169		summation scheme for remaining operations:
170		\|______________5\|n_____4\|n_____3\|n_____2\|n______\|n______\|pp
171		\|_H w0__\|_L w0__\|______\|\|_H w3__\|_L w3__\|_H w5__\|_L w5__\|
172		\|\| H w4 \| L w4 \|
173		\|\| H w2 \| L w2 \|
174		\|\| H w1 \| L w1 \|
175		\|\|-H w1 \|-L w1 \|
176		\|-H w0 \|-L w0 \|\|-H w2 \|-L w2 \|
177		*/
178	295	cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
179	295	MPN_INCR_U (pp + 3 * n + 1, n, cy);
180
181		/* W2 -= W0<<2 */
182		#if HAVE_NATIVE_mpn_sublsh_n \|\| HAVE_NATIVE_mpn_sublsh2_n_ip1
183		#if HAVE_NATIVE_mpn_sublsh2_n_ip1
184		cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
185		#else
186		cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
187		#endif
188		#else
189		/* {W4,2n+1} is now free and can be overwritten. /
190	295	cy = mpn_lshift(w4, w0, w0n, 2);
191	295	cy+= mpn_sub_n(w2, w2, w4, w0n);
192	295	#endif
193	295	MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
194
195		/* W4L = W4L - W2L */
196	295	cy = mpn_sub_n (pp + n, pp + n, w2, n);
197	295	MPN_DECR_U (w3, 2 * n + 1, cy);
198
199		/* W3H = W3H + W2L */
200	295	cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
201		/* W1L + W2H */
202	295	cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
203	295	MPN_INCR_U (w1 + n, n + 1, cy);
204
205		/* W0 = W0 + W1H */
206	295	if (LIKELY (w0n > n))
207	295	cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
208	0	else
209	0	cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
210
211		/*
212		summation scheme for the next operation:
213		\|...____5\|n_____4\|n_____3\|n_____2\|n______\|n______\|pp
214		\|...w0___\|_w1_w2_\|_H w3__\|_L w3__\|_H w5__\|_L w5__\|
215		...-w0___\|-w1_w2 \|
216		*/
217		/* if(LIKELY(w0n>n)) the two operands below DO overlap! */
218	295	cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
219
220		/* embankment is a "dirty trick" to avoid carry/borrow propagation
221		beyond allocated memory */
222	295	embankment = w0[w0n - 1] - 1;
223	295	w0[w0n - 1] = 1;
224	295	if (LIKELY (w0n > n)) {
225	295	if (cy4 > cy6)
226	159	MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
227	136	else
228	136	MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
229	295	MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
230	295	MPN_INCR_U (w0 + n, w0n - n, cy6);
231	295	} else {
232	0	MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
233	0	MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
234	0	}
235	295	w0[w0n - 1] += embankment;
236
237	295	#undef w5
238	295	#undef w3
239	295	#undef w0
240
241	295	}