/src/libgmp/mpn/toom_interpolate_8pts.c

Source (jump to first uncovered line)
/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.

   Contributed to the GNU project by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2009, 2011, 2012 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */

#include "gmp-impl.h"

#define BINVERT_3 MODLIMB_INVERSE_3

#define BINVERT_15 \
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)

#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)

#ifndef mpn_divexact_by3
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
#else
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
#endif
#endif

#ifndef mpn_divexact_by45
#if GMP_NUMB_BITS % 12 == 0
#define mpn_divexact_by45(dst,src,size) \
  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
#else
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
#else
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
#endif
#endif
#endif

#if HAVE_NATIVE_mpn_sublsh2_n_ip1
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
#else
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
#endif

#if HAVE_NATIVE_mpn_sublsh_n
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
#else
static mp_limb_t
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift (ws,src,n,s);
  return __cy + mpn_sub_n (dst,dst,ws,n);
#endif
}
#endif


#if HAVE_NATIVE_mpn_subrsh
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
#else
/* This is not a correct definition, it assumes no carry */
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)       \
do {                 \
  mp_limb_t __cy;             \
  MPN_DECR_U (dst, nd, src[0] >> s);          \
  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);  \
  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);       \
} while (0)
#endif

/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
   degree 7 (or 6), given the 8 (rsp. 7) values:

     r1 = limit at infinity of f(x) / x^7,
     r2 = f(4),
     r3 = f(-4),
     r4 = f(2),
     r5 = f(-2),
     r6 = f(1),
     r7 = f(-1),
     r8 = f(0).

   All couples of the form f(n),f(-n) must be already mixed with
   toom_couple_handling(f(n),...,f(-n),...)

   The result is stored in {pp, spt + 7*n (or 6*n)}.
   At entry, r8 is stored at {pp, 2n},
   r5 is stored at {pp + 3n, 3n + 1}.

   The other values are 2n+... limbs each (with most significant limbs small).

   All intermediate results are positive.
   Inputs are destroyed.
*/

void
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
         mp_ptr r3, mp_ptr r7,
         mp_size_t spt, mp_ptr ws)
{
  mp_limb_signed_t cy;
  mp_ptr r5, r1;
  r5 = (pp + 3 * n);      /* 3n+1 */
  r1 = (pp + 7 * n);      /* spt */

  /******************************* interpolation *****************************/

  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);

  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);

  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
  cy = mpn_sub_n (r7, r7, r1, spt);
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));

  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));

  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));

  mpn_divexact_by45 (r3, r3, 3 * n + 1);

  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));

  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));

  /* last interpolation steps... */
  /* ... are mixed with recomposition */

  /***************************** recomposition *******************************/
  /*
    pp[] prior to operations:
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp

    summation scheme for remaining operations:
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
    ||_H r3|_M r3|_L*r3|
          ||_H_r7|_M_r7|_L_r7|
          ||-H r3|-M r3|-L*r3|
          ||-H*r5|-M_r5|-L_r5|
  */

  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
  if (cy > 0) {
    MPN_INCR_U (r7 + n, 2*n + 1, 1);
    cy = 0;
  }

  cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
  MPN_DECR_U (r7 + 2*n, n + 1, cy);

  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
  if (UNLIKELY(0 > cy))
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
  else
    MPN_INCR_U (r5 + n + 1, 2*n, cy);

  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */

  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
  if (LIKELY(spt != n))
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
  else
    ASSERT (r3[3*n] + cy == 0);
}

Coverage Report

Created: 2024-11-21 07:03

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3		Contributed to the GNU project by Marco Bodrato.
4
5		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9		Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11		This file is part of the GNU MP Library.
12
13		The GNU MP Library is free software; you can redistribute it and/or modify
14		it under the terms of either:
15
16		* the GNU Lesser General Public License as published by the Free
17		Software Foundation; either version 3 of the License, or (at your
18		option) any later version.
19
20		or
21
22		* the GNU General Public License as published by the Free Software
23		Foundation; either version 2 of the License, or (at your option) any
24		later version.
25
26		or both in parallel, as here.
27
28		The GNU MP Library is distributed in the hope that it will be useful, but
29		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31		for more details.
32
33		You should have received copies of the GNU General Public License and the
34		GNU Lesser General Public License along with the GNU MP Library. If not,
35		see https://www.gnu.org/licenses/. */
36
37		#include "gmp-impl.h"
38
39	154	#define BINVERT_3 MODLIMB_INVERSE_3
40
41		#define BINVERT_15 \
42	154	((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
43
44	154	#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
45
46		#ifndef mpn_divexact_by3
47		#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
48		#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
49		#else
50		#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
51		#endif
52		#endif
53
54		#ifndef mpn_divexact_by45
55		#if GMP_NUMB_BITS % 12 == 0
56		#define mpn_divexact_by45(dst,src,size) \
57		(63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
58		#else
59		#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
60	154	#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
61		#else
62		#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
63		#endif
64		#endif
65		#endif
66
67		#if HAVE_NATIVE_mpn_sublsh2_n_ip1
68		#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
69		#else
70		#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
71		#endif
72
73		#if HAVE_NATIVE_mpn_sublsh_n
74		#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
75		#else
76		static mp_limb_t
77		DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
78	770	{
79		#if USE_MUL_1 && 0
80		return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
81		#else
82	770	mp_limb_t __cy;
83	770	__cy = mpn_lshift (ws,src,n,s);
84	770	return __cy + mpn_sub_n (dst,dst,ws,n);
85	770	#endif
86	770	}
87		#endif
88
89
90		#if HAVE_NATIVE_mpn_subrsh
91		#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
92		#else
93		/* This is not a correct definition, it assumes no carry */
94	308	#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
95	308	do { \
96	308	mp_limb_t __cy; \
97	308	MPN_DECR_U (dst, nd, src[0] >> s); \
98	308	__cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
99	308	MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
100	308	} while (0)
101		#endif
102
103		/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
104		points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
105		we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
106		degree 7 (or 6), given the 8 (rsp. 7) values:
107
108		r1 = limit at infinity of f(x) / x^7,
109		r2 = f(4),
110		r3 = f(-4),
111		r4 = f(2),
112		r5 = f(-2),
113		r6 = f(1),
114		r7 = f(-1),
115		r8 = f(0).
116
117		All couples of the form f(n),f(-n) must be already mixed with
118		toom_couple_handling(f(n),...,f(-n),...)
119
120		The result is stored in {pp, spt + 7n (or 6n)}.
121		At entry, r8 is stored at {pp, 2n},
122		r5 is stored at {pp + 3n, 3n + 1}.
123
124		The other values are 2n+... limbs each (with most significant limbs small).
125
126		All intermediate results are positive.
127		Inputs are destroyed.
128		*/
129
130		void
131		mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
132		mp_ptr r3, mp_ptr r7,
133		mp_size_t spt, mp_ptr ws)
134	154	{
135	154	mp_limb_signed_t cy;
136	154	mp_ptr r5, r1;
137	154	r5 = (pp + 3 * n); /* 3n+1 */
138	154	r1 = (pp + 7 * n); /* spt */
139
140		/***************************** interpolation ***************************/
141
142	154	DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
143	154	cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
144	154	MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
145
146	154	DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
147	154	cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
148	154	MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
149
150	154	r7[3n] -= mpn_sub_n (r7+n, r7+n, pp, 2 n);
151	154	cy = mpn_sub_n (r7, r7, r1, spt);
152	154	MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
153
154	154	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
155	154	ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
156
157	154	ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
158
159	154	ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
160
161	154	mpn_divexact_by45 (r3, r3, 3 * n + 1);
162
163	154	ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
164
165	154	ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
166
167		/* last interpolation steps... */
168		/* ... are mixed with recomposition */
169
170		/*************************** recomposition *****************************/
171		/*
172		pp[] prior to operations:
173		\|_H r1\|_L r1\|____\|\|_H r5\|_M_r5\|_L r5\|_____\|_H r8\|_L r8\|pp
174
175		summation scheme for remaining operations:
176		\|____8\|n___7\|n___6\|n___5\|n___4\|n___3\|n___2\|n____\|n____\|pp
177		\|_H r1\|_L r1\|____\|\|_H*r5\|_M r5\|_L r5\|_____\|_H_r8\|_L r8\|pp
178		\|\|_H r3\|_M r3\|_L*r3\|
179		\|\|_H_r7\|_M_r7\|_L_r7\|
180		\|\|-H r3\|-M r3\|-L*r3\|
181		\|\|-H*r5\|-M_r5\|-L_r5\|
182		*/
183
184	154	cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
185	154	cy-= mpn_sub_n (pp + n, pp + n, r5, n);
186	154	if (cy > 0) {
187	11	MPN_INCR_U (r7 + n, 2*n + 1, 1);
188	11	cy = 0;
189	11	}
190
191	154	cy = mpn_sub_nc (pp + 2n, r7 + n, r5 + n, n, -cy); / Mr7-Mr5 */
192	154	MPN_DECR_U (r7 + 2*n, n + 1, cy);
193
194	154	cy = mpn_add_n (pp + 3n, r5, r7+ 2n, n+1); /* Hr7+Lr5 */
195	154	r5[3n]+= mpn_add_n (r5 + 2n, r5 + 2n, r3, n); / Hr5+Lr3 */
196	154	cy-= mpn_sub_n (pp + 3n, pp + 3n, r5 + 2n, n+1); / Hr7-Hr5+Lr5-Lr3 */
197	154	if (UNLIKELY(0 > cy))
198	0	MPN_DECR_U (r5 + n + 1, 2*n, 1);
199	154	else
200	154	MPN_INCR_U (r5 + n + 1, 2*n, cy);
201
202	154	ASSERT_NOCARRY(mpn_sub_n(pp + 4n, r5 + n, r3 + n, 2n +1)); /* Mr5-Mr3,Hr5-Hr3 */
203
204	154	cy = mpn_add_1 (pp + 6n, r3 + n, n, pp[6n]);
205	154	MPN_INCR_U (r3 + 2*n, n + 1, cy);
206	154	cy = mpn_add_n (pp + 7n, pp + 7n, r3 + 2*n, n);
207	154	if (LIKELY(spt != n))
208	154	MPN_INCR_U (pp + 8n, spt - n, cy + r3[3n]);
209	0	else
210	154	ASSERT (r3[3*n] + cy == 0);
211	154	}