/src/gmp/mpn/toom_interpolate_8pts.c

Source (jump to first uncovered line)
/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.

   Contributed to the GNU project by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2009, 2011, 2012 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"

#define BINVERT_3 MODLIMB_INVERSE_3

#define BINVERT_15 \
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)

#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)

#ifndef mpn_divexact_by3
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
#else
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
#endif
#endif

#ifndef mpn_divexact_by45
#if GMP_NUMB_BITS % 12 == 0
#define mpn_divexact_by45(dst,src,size) \
  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
#else
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
#else
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
#endif
#endif
#endif

#if HAVE_NATIVE_mpn_sublsh2_n_ip1
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
#else
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
#endif

#if HAVE_NATIVE_mpn_sublsh_n
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
#else
static mp_limb_t
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift (ws,src,n,s);
  return __cy + mpn_sub_n (dst,dst,ws,n);
#endif
}
#endif


#if HAVE_NATIVE_mpn_subrsh
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
#else
/* This is not a correct definition, it assumes no carry */
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)       \
do {                 \
  mp_limb_t __cy;             \
  MPN_DECR_U (dst, nd, src[0] >> s);          \
  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);  \
  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);       \
} while (0)
#endif

/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
   degree 7 (or 6), given the 8 (rsp. 7) values:

     r1 = limit at infinity of f(x) / x^7,
     r2 = f(4),
     r3 = f(-4),
     r4 = f(2),
     r5 = f(-2),
     r6 = f(1),
     r7 = f(-1),
     r8 = f(0).

   All couples of the form f(n),f(-n) must be already mixed with
   toom_couple_handling(f(n),...,f(-n),...)

   The result is stored in {pp, spt + 7*n (or 6*n)}.
   At entry, r8 is stored at {pp, 2n},
   r5 is stored at {pp + 3n, 3n + 1}.

   The other values are 2n+... limbs each (with most significant limbs small).

   All intermediate results are positive.
   Inputs are destroyed.
*/

void
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
         mp_ptr r3, mp_ptr r7,
         mp_size_t spt, mp_ptr ws)
{
  mp_limb_signed_t cy;
  mp_ptr r5, r1;
  r5 = (pp + 3 * n);      /* 3n+1 */
  r1 = (pp + 7 * n);      /* spt */

  /******************************* interpolation *****************************/

  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);

  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);

  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
  cy = mpn_sub_n (r7, r7, r1, spt);
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));

  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));

  mpn_divexact_by45 (r3, r3, 3 * n + 1);

  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));

  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));

  /* last interpolation steps... */
  /* ... are mixed with recomposition */

  /***************************** recomposition *******************************/
  /*
    pp[] prior to operations:
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp

    summation scheme for remaining operations:
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
    ||_H r3|_M r3|_L*r3|
          ||_H_r7|_M_r7|_L_r7|
          ||-H r3|-M r3|-L*r3|
          ||-H*r5|-M_r5|-L_r5|
  */

  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
  if (cy > 0) {
    MPN_INCR_U (r7 + n, 2*n + 1, 1);
    cy = 0;
  }

  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
  MPN_DECR_U (r7 + 2*n, n + 1, cy);

  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
  if (UNLIKELY(0 > cy))
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
  else
    MPN_INCR_U (r5 + n + 1, 2*n, cy);

  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */

  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
  if (LIKELY(spt != n))
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
  else
    ASSERT (r3[3*n] + cy == 0);
}

Coverage Report

Created: 2025-03-18 06:55

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3		Contributed to the GNU project by Marco Bodrato.
4
5		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9		Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11		This file is part of the GNU MP Library.
12
13		The GNU MP Library is free software; you can redistribute it and/or modify
14		it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free Software
23		Foundation; either version 2 of the License, or (at your option) any
24		later version.
25
26		or both in parallel, as here.
27
28		The GNU MP Library is distributed in the hope that it will be useful, but
29		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31		for more details.
32
33		You should have received copies of the GNU General Public License and the
34		GNU Lesser General Public License along with the GNU MP Library. If not,
35		see https://www.gnu.org/licenses/. */
36
37		#include "gmp-impl.h"
38
39	0	#define BINVERT_3 MODLIMB_INVERSE_3
40
41		#define BINVERT_15 \
42	0	((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43
44	0	#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45
46		#ifndef mpn_divexact_by3
47		#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48		#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49		#else
50		#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51		#endif
52		#endif
53
54		#ifndef mpn_divexact_by45
55		#if GMP_NUMB_BITS % 12 == 0
56		#define mpn_divexact_by45(dst,src,size) \
57		(63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58		#else
59		#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60	0	#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61		#else
62		#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63		#endif
64		#endif
65		#endif
66
67		#if HAVE_NATIVE_mpn_sublsh2_n_ip1
68		#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69		#else
70		#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71		#endif
72
73		#if HAVE_NATIVE_mpn_sublsh_n
74		#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75		#else
76		static mp_limb_t
77		DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78	0	{
79		#if USE_MUL_1 && 0
80		return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81		#else
82	0	mp_limb_t __cy;
83	0	__cy = mpn_lshift (ws,src,n,s);
84	0	return __cy + mpn_sub_n (dst,dst,ws,n);
85	0	#endif
86	0	}
87		#endif
88
89
90		#if HAVE_NATIVE_mpn_subrsh
91		#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92		#else
93		/* This is not a correct definition, it assumes no carry */
94	0	#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
95	0	do { \
96	0	mp_limb_t __cy; \
97	0	MPN_DECR_U (dst, nd, src[0] >> s); \
98	0	__cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
99	0	MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
100	0	} while (0)
101		#endif
102
103		/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104		points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105		we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106		degree 7 (or 6), given the 8 (rsp. 7) values:
107
108		r1 = limit at infinity of f(x) / x^7,
109		r2 = f(4),
110		r3 = f(-4),
111		r4 = f(2),
112		r5 = f(-2),
113		r6 = f(1),
114		r7 = f(-1),
115		r8 = f(0).
116
117		All couples of the form f(n),f(-n) must be already mixed with
118		toom_couple_handling(f(n),...,f(-n),...)
119
120		The result is stored in {pp, spt + 7n (or 6n)}.
121		At entry, r8 is stored at {pp, 2n},
122		r5 is stored at {pp + 3n, 3n + 1}.
123
124		The other values are 2n+... limbs each (with most significant limbs small).
125
126		All intermediate results are positive.
127		Inputs are destroyed.
128		*/
129
130		void
131		mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132		mp_ptr r3, mp_ptr r7,
133		mp_size_t spt, mp_ptr ws)
134	0	{
135	0	mp_limb_signed_t cy;
136	0	mp_ptr r5, r1;
137	0	r5 = (pp + 3 * n); /* 3n+1 */
138	0	r1 = (pp + 7 * n); /* spt */
139
140		/***************************** interpolation ***************************/
141
142	0	DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143	0	cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144	0	MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145
146	0	DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147	0	cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148	0	MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149
150	0	r7[3n] -= mpn_sub_n (r7+n, r7+n, pp, 2 n);
151	0	cy = mpn_sub_n (r7, r7, r1, spt);
152	0	MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153
154	0	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155	0	ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156
157	0	ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158
159	0	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160
161	0	mpn_divexact_by45 (r3, r3, 3 * n + 1);
162
163	0	ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164
165	0	ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166
167		/* last interpolation steps... */
168		/* ... are mixed with recomposition */
169
170		/*************************** recomposition *****************************/
171		/*
172		pp[] prior to operations:
173		\|_H r1\|_L r1\|____\|\|_H r5\|_M_r5\|_L r5\|_____\|_H r8\|_L r8\|pp
174
175		summation scheme for remaining operations:
176		\|____8\|n___7\|n___6\|n___5\|n___4\|n___3\|n___2\|n____\|n____\|pp
177		\|_H r1\|_L r1\|____\|\|_H*r5\|_M r5\|_L r5\|_____\|_H_r8\|_L r8\|pp
178		\|\|_H r3\|_M r3\|_L*r3\|
179		\|\|_H_r7\|_M_r7\|_L_r7\|
180		\|\|-H r3\|-M r3\|-L*r3\|
181		\|\|-H*r5\|-M_r5\|-L_r5\|
182		*/
183
184	0	cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185	0	cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186	0	if (cy > 0) {
187	0	MPN_INCR_U (r7 + n, 2*n + 1, 1);
188	0	cy = 0;
189	0	}
190
191	0	cy = mpn_sub_nc (pp + 2n, r7 + n, r5 + n, n, -cy); / Mr7-Mr5 */
192	0	MPN_DECR_U (r7 + 2*n, n + 1, cy);
193
194	0	cy = mpn_add_n (pp + 3n, r5, r7+ 2n, n+1); /* Hr7+Lr5 */
195	0	r5[3n]+= mpn_add_n (r5 + 2n, r5 + 2n, r3, n); / Hr5+Lr3 */
196	0	cy-= mpn_sub_n (pp + 3n, pp + 3n, r5 + 2n, n+1); / Hr7-Hr5+Lr5-Lr3 */
197	0	if (UNLIKELY(0 > cy))
198	0	MPN_DECR_U (r5 + n + 1, 2*n, 1);
199	0	else
200	0	MPN_INCR_U (r5 + n + 1, 2*n, cy);
201
202	0	ASSERT_NOCARRY(mpn_sub_n(pp + 4n, r5 + n, r3 + n, 2n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203
204	0	cy = mpn_add_1 (pp + 6n, r3 + n, n, pp[6n]);
205	0	MPN_INCR_U (r3 + 2*n, n + 1, cy);
206	0	cy = mpn_add_n (pp + 7n, pp + 7n, r3 + 2*n, n);
207	0	if (LIKELY(spt != n))
208	0	MPN_INCR_U (pp + 8n, spt - n, cy + r3[3n]);
209	0	else
210	0	ASSERT (r3[3*n] + cy == 0);
211	0	}