/src/gmp-6.2.1/mpn/toom33_mul.c

Source (jump to first uncovered line)
/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
   size.  Or more accurately, bn <= an < (3/2)bn.

   Contributed to the GNU project by Torbjorn Granlund.
   Additional improvements by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2006-2008, 2010, 2012, 2015 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


#include "gmp-impl.h"

/* Evaluate in: -1, 0, +1, +2, +inf

  <-s--><--n--><--n-->
   ____ ______ ______
  |_a2_|___a1_|___a0_|
   |b2_|___b1_|___b0_|
   <-t-><--n--><--n-->

  v0  =  a0         * b0          #   A(0)*B(0)
  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
  vinf=          a2 *         b2  # A(inf)*B(inf)
*/

#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_basecase 1
#define MAYBE_mul_toom33   1
#else
#define MAYBE_mul_basecase            \
  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
#define MAYBE_mul_toom33            \
  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
#endif

/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
   multiplication at the infinity point. We may have
   MAYBE_mul_basecase == 0, and still get s just below
   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
   s == 1 and mpn_toom22_mul will crash.
*/

#define TOOM33_MUL_N_REC(p, a, b, n, ws)        \
  do {                 \
    if (MAYBE_mul_basecase            \
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))     \
      mpn_mul_basecase (p, a, n, b, n);         \
    else if (! MAYBE_mul_toom33            \
       || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))   \
      mpn_toom22_mul (p, a, n, b, n, ws);       \
    else                \
      mpn_toom33_mul (p, a, n, b, n, ws);       \
  } while (0)

void
mpn_toom33_mul (mp_ptr pp,
    mp_srcptr ap, mp_size_t an,
    mp_srcptr bp, mp_size_t bn,
    mp_ptr scratch)
{
  const int __gmpn_cpuvec_initialized = 1;
  mp_size_t n, s, t;
  int vm1_neg;
  mp_limb_t cy, vinf0;
  mp_ptr gp;
  mp_ptr as1, asm1, as2;
  mp_ptr bs1, bsm1, bs2;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = (an + 2) / (size_t) 3;

  s = an - 2 * n;
  t = bn - 2 * n;

  ASSERT (an >= bn);

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  as1  = scratch + 4 * n + 4;
  asm1 = scratch + 2 * n + 2;
  as2 = pp + n + 1;

  bs1 = pp;
  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
  bs2 = pp + 2 * n + 2;

  gp = scratch;

  vm1_neg = 0;

  /* Compute as1 and asm1.  */
  cy = mpn_add (gp, a0, n, a2, s);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
      as1[n] = cy >> 1;
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
      as1[n] = cy + (cy2 >> 1);
      asm1[n] = cy - (cy2 & 1);
    }
#else
  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      mpn_sub_n (asm1, a1, gp, n);
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      cy -= mpn_sub_n (asm1, gp, a1, n);
      asm1[n] = cy;
    }
#endif

  /* Compute as2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a1, a2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  cy -= mpn_sub_n (as2, as2, a0, n);
#endif
#endif
  as2[n] = cy;

  /* Compute bs1 and bsm1.  */
  cy = mpn_add (gp, b0, n, b2, t);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
      bs1[n] = cy >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
      bs1[n] = cy + (cy2 >> 1);
      bsm1[n] = cy - (cy2 & 1);
    }
#else
  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      mpn_sub_n (bsm1, b1, gp, n);
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      cy -= mpn_sub_n (bsm1, gp, b1, n);
      bsm1[n] = cy;
    }
#endif

  /* Compute bs2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (bs2, b2, bs1, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
#else
  cy  = mpn_add_n (bs2, bs1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
  cy -= mpn_sub_n (bs2, bs2, b0, n);
#endif
#endif
  bs2[n] = cy;

  ASSERT (as1[n] <= 2);
  ASSERT (bs1[n] <= 2);
  ASSERT (asm1[n] <= 1);
  ASSERT (bsm1[n] <= 1);
  ASSERT (as2[n] <= 6);
  ASSERT (bs2[n] <= 6);

#define v0    pp        /* 2n */
#define v1    (pp + 2 * n)      /* 2n+1 */
#define vinf  (pp + 4 * n)      /* s+t */
#define vm1   scratch        /* 2n+1 */
#define v2    (scratch + 2 * n + 1)    /* 2n+2 */
#define scratch_out  (scratch + 5 * n + 5)

  /* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
  cy = 0;
  if (asm1[n] != 0)
    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
  if (bsm1[n] != 0)
    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
  vm1[2 * n] = cy;
#else
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
#endif

  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);  /* v2, 2n+1 limbs */

  /* vinf, s+t limbs */
  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);

  vinf0 = vinf[0];        /* v1 overlaps with this */

#ifdef SMALLER_RECURSION
  /* v1, 2n+1 limbs */
  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
  if (as1[n] == 1)
    {
      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
    }
  else if (as1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
#else
      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
#endif
    }
  else
    cy = 0;
  if (bs1[n] == 1)
    {
      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
    }
  else if (bs1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
#else
      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
#endif
    }
  v1[2 * n] = cy;
#else
  cy = vinf[1];
  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
  vinf[1] = cy;
#endif

  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);  /* v0, 2n limbs */

  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
}

Coverage Report

Created: 2023-02-22 06:39

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2		size. Or more accurately, bn <= an < (3/2)bn.
3
4		Contributed to the GNU project by Torbjorn Granlund.
5		Additional improvements by Marco Bodrato.
6
7		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11		Copyright 2006-2008, 2010, 2012, 2015 Free Software Foundation, Inc.
12
13		This file is part of the GNU MP Library.
14
15		The GNU MP Library is free software; you can redistribute it and/or modify
16		it under the terms of either:
17
18		* the GNU Lesser General Public License as published by the Free
19		Software Foundation; either version 3 of the License, or (at your
20		option) any later version.
21
22		or
23
24		* the GNU General Public License as published by the Free Software
25		Foundation; either version 2 of the License, or (at your option) any
26		later version.
27
28		or both in parallel, as here.
29
30		The GNU MP Library is distributed in the hope that it will be useful, but
31		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33		for more details.
34
35		You should have received copies of the GNU General Public License and the
36		GNU Lesser General Public License along with the GNU MP Library. If not,
37		see https://www.gnu.org/licenses/. */
38
39
40		#include "gmp-impl.h"
41
42		/* Evaluate in: -1, 0, +1, +2, +inf
43
44		<-s--><--n--><--n-->
45		____ ______ ______
46		\|_a2_\|___a1_\|___a0_\|
47		\|b2_\|___b1_\|___b0_\|
48		<-t-><--n--><--n-->
49
50		v0 = a0 * b0 # A(0)*B(0)
51		v1 = (a0+ a1+ a2)(b0+ b1+ b2) # A(1)B(1) ah <= 2 bh <= 2
52		vm1 = (a0- a1+ a2)(b0- b1+ b2) # A(-1)B(-1) \|ah\| <= 1 bh <= 1
53		v2 = (a0+2a1+4a2)(b0+2b1+4b2) # A(2)B(2) ah <= 6 bh <= 6
54		vinf= a2 * b2 # A(inf)*B(inf)
55		*/
56
57		#if TUNE_PROGRAM_BUILD \|\| WANT_FAT_BINARY
58		#define MAYBE_mul_basecase 1
59		#define MAYBE_mul_toom33 1
60		#else
61		#define MAYBE_mul_basecase \
62	5.11k	(MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
63		#define MAYBE_mul_toom33 \
64	5.03k	(MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
65		#endif
66
67		/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
68		multiplication at the infinity point. We may have
69		MAYBE_mul_basecase == 0, and still get s just below
70		MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
71		s == 1 and mpn_toom22_mul will crash.
72		*/
73
74		#define TOOM33_MUL_N_REC(p, a, b, n, ws) \
75	2.55k	do { \
76	2.55k	if (MAYBE_mul_basecase \
77	2.55k	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
78	2.55k	mpn_mul_basecase (p, a, n, b, n); \
79	2.55k	else if (! MAYBE_mul_toom33 \
80	2.51k	\|\| BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
81	2.51k	mpn_toom22_mul (p, a, n, b, n, ws); \
82	2.51k	else \
83	2.51k	mpn_toom33_mul (p, a, n, b, n, ws); \
84	2.55k	} while (0)
85
86		void
87		mpn_toom33_mul (mp_ptr pp,
88		mp_srcptr ap, mp_size_t an,
89		mp_srcptr bp, mp_size_t bn,
90		mp_ptr scratch)
91	594	{
92	594	const int __gmpn_cpuvec_initialized = 1;
93	594	mp_size_t n, s, t;
94	594	int vm1_neg;
95	594	mp_limb_t cy, vinf0;
96	594	mp_ptr gp;
97	594	mp_ptr as1, asm1, as2;
98	594	mp_ptr bs1, bsm1, bs2;
99
100	1.18k	#define a0 ap
101	1.74k	#define a1 (ap + n)
102	1.60k	#define a2 (ap + 2*n)
103	1.18k	#define b0 bp
104	1.73k	#define b1 (bp + n)
105	1.60k	#define b2 (bp + 2*n)
106
107	594	n = (an + 2) / (size_t) 3;
108
109	594	s = an - 2 * n;
110	594	t = bn - 2 * n;
111
112	594	ASSERT (an >= bn);
113
114	594	ASSERT (0 < s && s <= n);
115	594	ASSERT (0 < t && t <= n);
116
117	594	as1 = scratch + 4 * n + 4;
118	594	asm1 = scratch + 2 * n + 2;
119	594	as2 = pp + n + 1;
120
121	594	bs1 = pp;
122	594	bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
123	594	bs2 = pp + 2 * n + 2;
124
125	594	gp = scratch;
126
127	594	vm1_neg = 0;
128
129		/* Compute as1 and asm1. */
130	594	cy = mpn_add (gp, a0, n, a2, s);
131		#if HAVE_NATIVE_mpn_add_n_sub_n
132		if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
133		{
134		cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
135		as1[n] = cy >> 1;
136		asm1[n] = 0;
137		vm1_neg = 1;
138		}
139		else
140		{
141		mp_limb_t cy2;
142		cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
143		as1[n] = cy + (cy2 >> 1);
144		asm1[n] = cy - (cy2 & 1);
145		}
146		#else
147	594	as1[n] = cy + mpn_add_n (as1, gp, a1, n);
148	594	if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
149	304	{
150	304	mpn_sub_n (asm1, a1, gp, n);
151	304	asm1[n] = 0;
152	304	vm1_neg = 1;
153	304	}
154	290	else
155	290	{
156	290	cy -= mpn_sub_n (asm1, gp, a1, n);
157	290	asm1[n] = cy;
158	290	}
159	594	#endif
160
161		/* Compute as2. */
162	594	#if HAVE_NATIVE_mpn_rsblsh1_n
163	594	cy = mpn_add_n (as2, a2, as1, s);
164	594	if (s != n)
165	526	cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
166	594	cy += as1[n];
167	594	cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
168		#else
169		#if HAVE_NATIVE_mpn_addlsh1_n
170		cy = mpn_addlsh1_n (as2, a1, a2, s);
171		if (s != n)
172		cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
173		cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
174		#else
175		cy = mpn_add_n (as2, a2, as1, s);
176		if (s != n)
177		cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
178		cy += as1[n];
179		cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
180		cy -= mpn_sub_n (as2, as2, a0, n);
181		#endif
182		#endif
183	594	as2[n] = cy;
184
185		/* Compute bs1 and bsm1. */
186	594	cy = mpn_add (gp, b0, n, b2, t);
187		#if HAVE_NATIVE_mpn_add_n_sub_n
188		if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
189		{
190		cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
191		bs1[n] = cy >> 1;
192		bsm1[n] = 0;
193		vm1_neg ^= 1;
194		}
195		else
196		{
197		mp_limb_t cy2;
198		cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
199		bs1[n] = cy + (cy2 >> 1);
200		bsm1[n] = cy - (cy2 & 1);
201		}
202		#else
203	594	bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
204	594	if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
205	288	{
206	288	mpn_sub_n (bsm1, b1, gp, n);
207	288	bsm1[n] = 0;
208	288	vm1_neg ^= 1;
209	288	}
210	306	else
211	306	{
212	306	cy -= mpn_sub_n (bsm1, gp, b1, n);
213	306	bsm1[n] = cy;
214	306	}
215	594	#endif
216
217		/* Compute bs2. */
218	594	#if HAVE_NATIVE_mpn_rsblsh1_n
219	594	cy = mpn_add_n (bs2, b2, bs1, t);
220	594	if (t != n)
221	569	cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
222	594	cy += bs1[n];
223	594	cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
224		#else
225		#if HAVE_NATIVE_mpn_addlsh1_n
226		cy = mpn_addlsh1_n (bs2, b1, b2, t);
227		if (t != n)
228		cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
229		cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
230		#else
231		cy = mpn_add_n (bs2, bs1, b2, t);
232		if (t != n)
233		cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
234		cy += bs1[n];
235		cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
236		cy -= mpn_sub_n (bs2, bs2, b0, n);
237		#endif
238		#endif
239	594	bs2[n] = cy;
240
241	594	ASSERT (as1[n] <= 2);
242	594	ASSERT (bs1[n] <= 2);
243	594	ASSERT (asm1[n] <= 1);
244	594	ASSERT (bsm1[n] <= 1);
245	594	ASSERT (as2[n] <= 6);
246	594	ASSERT (bs2[n] <= 6);
247
248	594	#define v0 pp /* 2n */
249	594	#define v1 (pp + 2 * n) /* 2n+1 */
250	2.19k	#define vinf (pp + 4 * n) /* s+t */
251	594	#define vm1 scratch /* 2n+1 */
252	594	#define v2 (scratch + 2 * n + 1) /* 2n+2 */
253	594	#define scratch_out (scratch + 5 * n + 5)
254
255		/* vm1, 2n+1 limbs */
256		#ifdef SMALLER_RECURSION
257		TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
258		cy = 0;
259		if (asm1[n] != 0)
260		cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
261		if (bsm1[n] != 0)
262		cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
263		vm1[2 * n] = cy;
264		#else
265	594	TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
266	594	#endif
267
268	594	TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
269
270		/* vinf, s+t limbs */
271	594	if (s > t) mpn_mul (vinf, a2, s, b2, t);
272	180	else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
273
274	594	vinf0 = vinf[0]; /* v1 overlaps with this */
275
276		#ifdef SMALLER_RECURSION
277		/* v1, 2n+1 limbs */
278		TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
279		if (as1[n] == 1)
280		{
281		cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
282		}
283		else if (as1[n] != 0)
284		{
285		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
286		cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
287		#else
288		cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
289		#endif
290		}
291		else
292		cy = 0;
293		if (bs1[n] == 1)
294		{
295		cy += mpn_add_n (v1 + n, v1 + n, as1, n);
296		}
297		else if (bs1[n] != 0)
298		{
299		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
300		cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
301		#else
302		cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
303		#endif
304		}
305		v1[2 * n] = cy;
306		#else
307	594	cy = vinf[1];
308	594	TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
309	594	vinf[1] = cy;
310	594	#endif
311
312	594	TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
313
314	594	mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
315	594	}