/src/gmp/mpn/toom33_mul.c

Source
/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
   size.  Or more accurately, bn <= an < (3/2)bn.

   Contributed to the GNU project by Torbjorn Granlund.
   Additional improvements by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


#include "gmp-impl.h"

/* Evaluate in: -1, 0, +1, +2, +inf

  <-s--><--n--><--n-->
   ____ ______ ______
  |_a2_|___a1_|___a0_|
   |b2_|___b1_|___b0_|
   <-t-><--n--><--n-->

  v0  =  a0         * b0          #   A(0)*B(0)
  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
  vinf=          a2 *         b2  # A(inf)*B(inf)
*/

#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_basecase 1
#define MAYBE_mul_toom33   1
#else
#define MAYBE_mul_basecase            \
  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
#define MAYBE_mul_toom33            \
  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
#endif

/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
   multiplication at the infinity point. We may have
   MAYBE_mul_basecase == 0, and still get s just below
   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
   s == 1 and mpn_toom22_mul will crash.
*/

#define TOOM33_MUL_N_REC(p, a, b, n, ws)        \
  do {                 \
    if (MAYBE_mul_basecase            \
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))     \
      mpn_mul_basecase (p, a, n, b, n);         \
    else if (! MAYBE_mul_toom33            \
       || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))   \
      mpn_toom22_mul (p, a, n, b, n, ws);       \
    else                \
      mpn_toom33_mul (p, a, n, b, n, ws);       \
  } while (0)

void
mpn_toom33_mul (mp_ptr pp,
    mp_srcptr ap, mp_size_t an,
    mp_srcptr bp, mp_size_t bn,
    mp_ptr scratch)
{
  const int __gmpn_cpuvec_initialized = 1;
  mp_size_t n, s, t;
  int vm1_neg;
  mp_limb_t cy, vinf0;
  mp_ptr gp;
  mp_ptr as1, asm1, as2;
  mp_ptr bs1, bsm1, bs2;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = (an + 2) / (size_t) 3;

  s = an - 2 * n;
  t = bn - 2 * n;

  ASSERT (an >= bn);

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  as1  = scratch + 4 * n + 4;
  asm1 = scratch + 2 * n + 2;
  as2 = pp + n + 1;

  bs1 = pp;
  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
  bs2 = pp + 2 * n + 2;

  gp = scratch;

  vm1_neg = 0;

  /* Compute as1 and asm1.  */
  cy = mpn_add (gp, a0, n, a2, s);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
      as1[n] = cy >> 1;
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
      as1[n] = cy + (cy2 >> 1);
      asm1[n] = cy - (cy2 & 1);
    }
#else
  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      mpn_sub_n (asm1, a1, gp, n);
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      cy -= mpn_sub_n (asm1, gp, a1, n);
      asm1[n] = cy;
    }
#endif

  /* Compute as2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a1, a2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  cy -= mpn_sub_n (as2, as2, a0, n);
#endif
#endif
  as2[n] = cy;

  /* Compute bs1 and bsm1.  */
  cy = mpn_add (gp, b0, n, b2, t);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
      bs1[n] = cy >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
      bs1[n] = cy + (cy2 >> 1);
      bsm1[n] = cy - (cy2 & 1);
    }
#else
  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      mpn_sub_n (bsm1, b1, gp, n);
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      cy -= mpn_sub_n (bsm1, gp, b1, n);
      bsm1[n] = cy;
    }
#endif

  /* Compute bs2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (bs2, b2, bs1, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
#else
  cy  = mpn_add_n (bs2, bs1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
  cy -= mpn_sub_n (bs2, bs2, b0, n);
#endif
#endif
  bs2[n] = cy;

  ASSERT (as1[n] <= 2);
  ASSERT (bs1[n] <= 2);
  ASSERT (asm1[n] <= 1);
  ASSERT (bsm1[n] <= 1);
  ASSERT (as2[n] <= 6);
  ASSERT (bs2[n] <= 6);

#define v0    pp        /* 2n */
#define v1    (pp + 2 * n)      /* 2n+1 */
#define vinf  (pp + 4 * n)      /* s+t */
#define vm1   scratch        /* 2n+1 */
#define v2    (scratch + 2 * n + 1)    /* 2n+2 */
#define scratch_out  (scratch + 5 * n + 5)

  /* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
  cy = 0;
  if (asm1[n] != 0)
    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
  if (bsm1[n] != 0)
    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
  vm1[2 * n] = cy;
#else
  vm1[2 * n] = 0;
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
#endif

  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);  /* v2, 2n+1 limbs */

  /* vinf, s+t limbs */
  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);

  vinf0 = vinf[0];        /* v1 overlaps with this */

#ifdef SMALLER_RECURSION
  /* v1, 2n+1 limbs */
  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
  if (as1[n] == 1)
    {
      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
    }
  else if (as1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
#else
      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
#endif
    }
  else
    cy = 0;
  if (bs1[n] == 1)
    {
      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
    }
  else if (bs1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
#else
      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
#endif
    }
  v1[2 * n] = cy;
#else
  cy = vinf[1];
  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
  vinf[1] = cy;
#endif

  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);  /* v0, 2n limbs */

  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
}

Coverage Report

Created: 2025-12-31 06:37

Line	Count	Source
1		/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2		size. Or more accurately, bn <= an < (3/2)bn.
3
4		Contributed to the GNU project by Torbjorn Granlund.
5		Additional improvements by Marco Bodrato.
6
7		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11		Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
12
13		This file is part of the GNU MP Library.
14
15		The GNU MP Library is free software; you can redistribute it and/or modify
16		it under the terms of either:
17
18		* the GNU Lesser General Public License as published by the Free
19		Software Foundation; either version 3 of the License, or (at your
20		option) any later version.
21
22		or
23
24		* the GNU General Public License as published by the Free Software
25		Foundation; either version 2 of the License, or (at your option) any
26		later version.
27
28		or both in parallel, as here.
29
30		The GNU MP Library is distributed in the hope that it will be useful, but
31		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33		for more details.
34
35		You should have received copies of the GNU General Public License and the
36		GNU Lesser General Public License along with the GNU MP Library. If not,
37		see https://www.gnu.org/licenses/. */
38
39
40		#include "gmp-impl.h"
41
42		/* Evaluate in: -1, 0, +1, +2, +inf
43
44		<-s--><--n--><--n-->
45		____ ______ ______
46		\|_a2_\|___a1_\|___a0_\|
47		\|b2_\|___b1_\|___b0_\|
48		<-t-><--n--><--n-->
49
50		v0 = a0 * b0 # A(0)*B(0)
51		v1 = (a0+ a1+ a2)(b0+ b1+ b2) # A(1)B(1) ah <= 2 bh <= 2
52		vm1 = (a0- a1+ a2)(b0- b1+ b2) # A(-1)B(-1) \|ah\| <= 1 bh <= 1
53		v2 = (a0+2a1+4a2)(b0+2b1+4b2) # A(2)B(2) ah <= 6 bh <= 6
54		vinf= a2 * b2 # A(inf)*B(inf)
55		*/
56
57		#if TUNE_PROGRAM_BUILD \|\| WANT_FAT_BINARY
58		#define MAYBE_mul_basecase 1
59		#define MAYBE_mul_toom33 1
60		#else
61		#define MAYBE_mul_basecase \
62	20.4M	(MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
63		#define MAYBE_mul_toom33 \
64	20.4M	(MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
65		#endif
66
67		/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
68		multiplication at the infinity point. We may have
69		MAYBE_mul_basecase == 0, and still get s just below
70		MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
71		s == 1 and mpn_toom22_mul will crash.
72		*/
73
74		#define TOOM33_MUL_N_REC(p, a, b, n, ws) \
75	10.2M	do { \
76	10.2M	if (MAYBE_mul_basecase \
77	10.2M	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
78	10.2M	mpn_mul_basecase (p, a, n, b, n); \
79	10.2M	else if (! MAYBE_mul_toom33 \
80	10.2M	\|\| BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
81	10.2M	mpn_toom22_mul (p, a, n, b, n, ws); \
82	10.2M	else \
83	10.2M	mpn_toom33_mul (p, a, n, b, n, ws); \
84	10.2M	} while (0)
85
86		void
87		mpn_toom33_mul (mp_ptr pp,
88		mp_srcptr ap, mp_size_t an,
89		mp_srcptr bp, mp_size_t bn,
90		mp_ptr scratch)
91	2.04M	{
92	2.04M	const int __gmpn_cpuvec_initialized = 1;
93	2.04M	mp_size_t n, s, t;
94	2.04M	int vm1_neg;
95	2.04M	mp_limb_t cy, vinf0;
96	2.04M	mp_ptr gp;
97	2.04M	mp_ptr as1, asm1, as2;
98	2.04M	mp_ptr bs1, bsm1, bs2;
99
100	4.09M	#define a0 ap
101	5.91M	#define a1 (ap + n)
102	4.09M	#define a2 (ap + 2*n)
103	4.09M	#define b0 bp
104	5.65M	#define b1 (bp + n)
105	4.09M	#define b2 (bp + 2*n)
106
107	2.04M	n = (an + 2) / (size_t) 3;
108
109	2.04M	s = an - 2 * n;
110	2.04M	t = bn - 2 * n;
111
112	2.04M	ASSERT (an >= bn);
113
114	2.04M	ASSERT (0 < s && s <= n);
115	2.04M	ASSERT (0 < t && t <= n);
116
117	2.04M	as1 = scratch + 4 * n + 4;
118	2.04M	asm1 = scratch + 2 * n + 2;
119	2.04M	as2 = pp + n + 1;
120
121	2.04M	bs1 = pp;
122	2.04M	bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
123	2.04M	bs2 = pp + 2 * n + 2;
124
125	2.04M	gp = scratch;
126
127	2.04M	vm1_neg = 0;
128
129		/* Compute as1 and asm1. */
130	2.04M	cy = mpn_add (gp, a0, n, a2, s);
131		#if HAVE_NATIVE_mpn_add_n_sub_n
132		if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
133		{
134		cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
135		as1[n] = cy >> 1;
136		asm1[n] = 0;
137		vm1_neg = 1;
138		}
139		else
140		{
141		mp_limb_t cy2;
142		cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
143		as1[n] = cy + (cy2 >> 1);
144		asm1[n] = cy - (cy2 & 1);
145		}
146		#else
147	2.04M	as1[n] = cy + mpn_add_n (as1, gp, a1, n);
148	2.04M	if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
149	850k	{
150	850k	mpn_sub_n (asm1, a1, gp, n);
151	850k	asm1[n] = 0;
152	850k	vm1_neg = 1;
153	850k	}
154	1.19M	else
155	1.19M	{
156	1.19M	cy -= mpn_sub_n (asm1, gp, a1, n);
157	1.19M	asm1[n] = cy;
158	1.19M	}
159	2.04M	#endif
160
161		/* Compute as2. */
162		#if HAVE_NATIVE_mpn_rsblsh1_n
163		cy = mpn_add_n (as2, a2, as1, s);
164		if (s != n)
165		cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
166		cy += as1[n];
167		cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
168		#else
169		#if HAVE_NATIVE_mpn_addlsh1_n
170		cy = mpn_addlsh1_n (as2, a1, a2, s);
171		if (s != n)
172		cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
173		cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
174		#else
175	2.04M	cy = mpn_add_n (as2, a2, as1, s);
176	2.04M	if (s != n)
177	1.52M	cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
178	2.04M	cy += as1[n];
179	2.04M	cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
180	2.04M	cy -= mpn_sub_n (as2, as2, a0, n);
181	2.04M	#endif
182	2.04M	#endif
183	2.04M	as2[n] = cy;
184
185		/* Compute bs1 and bsm1. */
186	2.04M	cy = mpn_add (gp, b0, n, b2, t);
187		#if HAVE_NATIVE_mpn_add_n_sub_n
188		if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
189		{
190		cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
191		bs1[n] = cy >> 1;
192		bsm1[n] = 0;
193		vm1_neg ^= 1;
194		}
195		else
196		{
197		mp_limb_t cy2;
198		cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
199		bs1[n] = cy + (cy2 >> 1);
200		bsm1[n] = cy - (cy2 & 1);
201		}
202		#else
203	2.04M	bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
204	2.04M	if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
205	687k	{
206	687k	mpn_sub_n (bsm1, b1, gp, n);
207	687k	bsm1[n] = 0;
208	687k	vm1_neg ^= 1;
209	687k	}
210	1.36M	else
211	1.36M	{
212	1.36M	cy -= mpn_sub_n (bsm1, gp, b1, n);
213	1.36M	bsm1[n] = cy;
214	1.36M	}
215	2.04M	#endif
216
217		/* Compute bs2. */
218		#if HAVE_NATIVE_mpn_rsblsh1_n
219		cy = mpn_add_n (bs2, b2, bs1, t);
220		if (t != n)
221		cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
222		cy += bs1[n];
223		cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
224		#else
225		#if HAVE_NATIVE_mpn_addlsh1_n
226		cy = mpn_addlsh1_n (bs2, b1, b2, t);
227		if (t != n)
228		cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
229		cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
230		#else
231	2.04M	cy = mpn_add_n (bs2, bs1, b2, t);
232	2.04M	if (t != n)
233	1.52M	cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
234	2.04M	cy += bs1[n];
235	2.04M	cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
236	2.04M	cy -= mpn_sub_n (bs2, bs2, b0, n);
237	2.04M	#endif
238	2.04M	#endif
239	2.04M	bs2[n] = cy;
240
241	2.04M	ASSERT (as1[n] <= 2);
242	2.04M	ASSERT (bs1[n] <= 2);
243	2.04M	ASSERT (asm1[n] <= 1);
244	2.04M	ASSERT (bsm1[n] <= 1);
245	2.04M	ASSERT (as2[n] <= 6);
246	2.04M	ASSERT (bs2[n] <= 6);
247
248	2.04M	#define v0 pp /* 2n */
249	2.04M	#define v1 (pp + 2 * n) /* 2n+1 */
250	6.14M	#define vinf (pp + 4 * n) /* s+t */
251	4.09M	#define vm1 scratch /* 2n+1 */
252	2.04M	#define v2 (scratch + 2 * n + 1) /* 2n+2 */
253	2.04M	#define scratch_out (scratch + 5 * n + 5)
254
255		/* vm1, 2n+1 limbs */
256		#ifdef SMALLER_RECURSION
257		TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
258		cy = 0;
259		if (asm1[n] != 0)
260		cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
261		if (bsm1[n] != 0)
262		cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
263		vm1[2 * n] = cy;
264		#else
265	2.04M	vm1[2 * n] = 0;
266	2.04M	TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] \| asm1[n]), scratch_out);
267	2.04M	#endif
268
269	2.04M	TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
270
271		/* vinf, s+t limbs */
272	2.04M	if (s > t) mpn_mul (vinf, a2, s, b2, t);
273	2.04M	else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
274
275	2.04M	vinf0 = vinf[0]; /* v1 overlaps with this */
276
277		#ifdef SMALLER_RECURSION
278		/* v1, 2n+1 limbs */
279		TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
280		if (as1[n] == 1)
281		{
282		cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
283		}
284		else if (as1[n] != 0)
285		{
286		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
287		cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
288		#else
289		cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
290		#endif
291		}
292		else
293		cy = 0;
294		if (bs1[n] == 1)
295		{
296		cy += mpn_add_n (v1 + n, v1 + n, as1, n);
297		}
298		else if (bs1[n] != 0)
299		{
300		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
301		cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
302		#else
303		cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
304		#endif
305		}
306		v1[2 * n] = cy;
307		#else
308	2.04M	cy = vinf[1];
309	2.04M	TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
310	2.04M	vinf[1] = cy;
311	2.04M	#endif
312
313	2.04M	TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
314
315	2.04M	mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
316	2.04M	}