/src/gmp/mpn/toom33_mul.c

Source (jump to first uncovered line)
/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
   size.  Or more accurately, bn <= an < (3/2)bn.

   Contributed to the GNU project by Torbjorn Granlund.
   Additional improvements by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


#include "gmp-impl.h"

/* Evaluate in: -1, 0, +1, +2, +inf

  <-s--><--n--><--n-->
   ____ ______ ______
  |_a2_|___a1_|___a0_|
   |b2_|___b1_|___b0_|
   <-t-><--n--><--n-->

  v0  =  a0         * b0          #   A(0)*B(0)
  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
  vinf=          a2 *         b2  # A(inf)*B(inf)
*/

#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_basecase 1
#define MAYBE_mul_toom33   1
#else
#define MAYBE_mul_basecase            \
  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
#define MAYBE_mul_toom33            \
  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
#endif

/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
   multiplication at the infinity point. We may have
   MAYBE_mul_basecase == 0, and still get s just below
   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
   s == 1 and mpn_toom22_mul will crash.
*/

#define TOOM33_MUL_N_REC(p, a, b, n, ws)        \
  do {                 \
    if (MAYBE_mul_basecase            \
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))     \
      mpn_mul_basecase (p, a, n, b, n);         \
    else if (! MAYBE_mul_toom33            \
       || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))   \
      mpn_toom22_mul (p, a, n, b, n, ws);       \
    else                \
      mpn_toom33_mul (p, a, n, b, n, ws);       \
  } while (0)

void
mpn_toom33_mul (mp_ptr pp,
    mp_srcptr ap, mp_size_t an,
    mp_srcptr bp, mp_size_t bn,
    mp_ptr scratch)
{
  const int __gmpn_cpuvec_initialized = 1;
  mp_size_t n, s, t;
  int vm1_neg;
  mp_limb_t cy, vinf0;
  mp_ptr gp;
  mp_ptr as1, asm1, as2;
  mp_ptr bs1, bsm1, bs2;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = (an + 2) / (size_t) 3;

  s = an - 2 * n;
  t = bn - 2 * n;

  ASSERT (an >= bn);

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  as1  = scratch + 4 * n + 4;
  asm1 = scratch + 2 * n + 2;
  as2 = pp + n + 1;

  bs1 = pp;
  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
  bs2 = pp + 2 * n + 2;

  gp = scratch;

  vm1_neg = 0;

  /* Compute as1 and asm1.  */
  cy = mpn_add (gp, a0, n, a2, s);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
      as1[n] = cy >> 1;
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
      as1[n] = cy + (cy2 >> 1);
      asm1[n] = cy - (cy2 & 1);
    }
#else
  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      mpn_sub_n (asm1, a1, gp, n);
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      cy -= mpn_sub_n (asm1, gp, a1, n);
      asm1[n] = cy;
    }
#endif

  /* Compute as2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a1, a2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  cy -= mpn_sub_n (as2, as2, a0, n);
#endif
#endif
  as2[n] = cy;

  /* Compute bs1 and bsm1.  */
  cy = mpn_add (gp, b0, n, b2, t);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
      bs1[n] = cy >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
      bs1[n] = cy + (cy2 >> 1);
      bsm1[n] = cy - (cy2 & 1);
    }
#else
  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      mpn_sub_n (bsm1, b1, gp, n);
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      cy -= mpn_sub_n (bsm1, gp, b1, n);
      bsm1[n] = cy;
    }
#endif

  /* Compute bs2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (bs2, b2, bs1, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
#else
  cy  = mpn_add_n (bs2, bs1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
  cy -= mpn_sub_n (bs2, bs2, b0, n);
#endif
#endif
  bs2[n] = cy;

  ASSERT (as1[n] <= 2);
  ASSERT (bs1[n] <= 2);
  ASSERT (asm1[n] <= 1);
  ASSERT (bsm1[n] <= 1);
  ASSERT (as2[n] <= 6);
  ASSERT (bs2[n] <= 6);

#define v0    pp        /* 2n */
#define v1    (pp + 2 * n)      /* 2n+1 */
#define vinf  (pp + 4 * n)      /* s+t */
#define vm1   scratch        /* 2n+1 */
#define v2    (scratch + 2 * n + 1)    /* 2n+2 */
#define scratch_out  (scratch + 5 * n + 5)

  /* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
  cy = 0;
  if (asm1[n] != 0)
    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
  if (bsm1[n] != 0)
    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
  vm1[2 * n] = cy;
#else
  vm1[2 * n] = 0;
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
#endif

  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);  /* v2, 2n+1 limbs */

  /* vinf, s+t limbs */
  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);

  vinf0 = vinf[0];        /* v1 overlaps with this */

#ifdef SMALLER_RECURSION
  /* v1, 2n+1 limbs */
  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
  if (as1[n] == 1)
    {
      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
    }
  else if (as1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
#else
      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
#endif
    }
  else
    cy = 0;
  if (bs1[n] == 1)
    {
      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
    }
  else if (bs1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
#else
      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
#endif
    }
  v1[2 * n] = cy;
#else
  cy = vinf[1];
  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
  vinf[1] = cy;
#endif

  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);  /* v0, 2n limbs */

  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
}

Coverage Report

Created: 2024-11-25 06:29

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2		size. Or more accurately, bn <= an < (3/2)bn.
3
4		Contributed to the GNU project by Torbjorn Granlund.
5		Additional improvements by Marco Bodrato.
6
7		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11		Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
12
13		This file is part of the GNU MP Library.
14
15		The GNU MP Library is free software; you can redistribute it and/or modify
16		it under the terms of either:
17
18		* the GNU Lesser General Public License as published by the Free
19		Software Foundation; either version 3 of the License, or (at your
20		option) any later version.
21
22		or
23
24		* the GNU General Public License as published by the Free Software
25		Foundation; either version 2 of the License, or (at your option) any
26		later version.
27
28		or both in parallel, as here.
29
30		The GNU MP Library is distributed in the hope that it will be useful, but
31		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33		for more details.
34
35		You should have received copies of the GNU General Public License and the
36		GNU Lesser General Public License along with the GNU MP Library. If not,
37		see https://www.gnu.org/licenses/. */
38
39
40		#include "gmp-impl.h"
41
42		/* Evaluate in: -1, 0, +1, +2, +inf
43
44		<-s--><--n--><--n-->
45		____ ______ ______
46		\|_a2_\|___a1_\|___a0_\|
47		\|b2_\|___b1_\|___b0_\|
48		<-t-><--n--><--n-->
49
50		v0 = a0 * b0 # A(0)*B(0)
51		v1 = (a0+ a1+ a2)(b0+ b1+ b2) # A(1)B(1) ah <= 2 bh <= 2
52		vm1 = (a0- a1+ a2)(b0- b1+ b2) # A(-1)B(-1) \|ah\| <= 1 bh <= 1
53		v2 = (a0+2a1+4a2)(b0+2b1+4b2) # A(2)B(2) ah <= 6 bh <= 6
54		vinf= a2 * b2 # A(inf)*B(inf)
55		*/
56
57		#if TUNE_PROGRAM_BUILD \|\| WANT_FAT_BINARY
58		#define MAYBE_mul_basecase 1
59		#define MAYBE_mul_toom33 1
60		#else
61		#define MAYBE_mul_basecase \
62	519k	(MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
63		#define MAYBE_mul_toom33 \
64	519k	(MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
65		#endif
66
67		/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
68		multiplication at the infinity point. We may have
69		MAYBE_mul_basecase == 0, and still get s just below
70		MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
71		s == 1 and mpn_toom22_mul will crash.
72		*/
73
74		#define TOOM33_MUL_N_REC(p, a, b, n, ws) \
75	259k	do { \
76	259k	if (MAYBE_mul_basecase \
77	259k	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
78	259k	mpn_mul_basecase (p, a, n, b, n); \
79	259k	else if (! MAYBE_mul_toom33 \
80	259k	\|\| BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
81	259k	mpn_toom22_mul (p, a, n, b, n, ws); \
82	259k	else \
83	259k	mpn_toom33_mul (p, a, n, b, n, ws); \
84	259k	} while (0)
85
86		void
87		mpn_toom33_mul (mp_ptr pp,
88		mp_srcptr ap, mp_size_t an,
89		mp_srcptr bp, mp_size_t bn,
90		mp_ptr scratch)
91	51.9k	{
92	51.9k	const int __gmpn_cpuvec_initialized = 1;
93	51.9k	mp_size_t n, s, t;
94	51.9k	int vm1_neg;
95	51.9k	mp_limb_t cy, vinf0;
96	51.9k	mp_ptr gp;
97	51.9k	mp_ptr as1, asm1, as2;
98	51.9k	mp_ptr bs1, bsm1, bs2;
99
100	103k	#define a0 ap
101	155k	#define a1 (ap + n)
102	103k	#define a2 (ap + 2*n)
103	103k	#define b0 bp
104	155k	#define b1 (bp + n)
105	103k	#define b2 (bp + 2*n)
106
107	51.9k	n = (an + 2) / (size_t) 3;
108
109	51.9k	s = an - 2 * n;
110	51.9k	t = bn - 2 * n;
111
112	51.9k	ASSERT (an >= bn);
113
114	51.9k	ASSERT (0 < s && s <= n);
115	51.9k	ASSERT (0 < t && t <= n);
116
117	51.9k	as1 = scratch + 4 * n + 4;
118	51.9k	asm1 = scratch + 2 * n + 2;
119	51.9k	as2 = pp + n + 1;
120
121	51.9k	bs1 = pp;
122	51.9k	bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
123	51.9k	bs2 = pp + 2 * n + 2;
124
125	51.9k	gp = scratch;
126
127	51.9k	vm1_neg = 0;
128
129		/* Compute as1 and asm1. */
130	51.9k	cy = mpn_add (gp, a0, n, a2, s);
131		#if HAVE_NATIVE_mpn_add_n_sub_n
132		if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
133		{
134		cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
135		as1[n] = cy >> 1;
136		asm1[n] = 0;
137		vm1_neg = 1;
138		}
139		else
140		{
141		mp_limb_t cy2;
142		cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
143		as1[n] = cy + (cy2 >> 1);
144		asm1[n] = cy - (cy2 & 1);
145		}
146		#else
147	51.9k	as1[n] = cy + mpn_add_n (as1, gp, a1, n);
148	51.9k	if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
149	25.9k	{
150	25.9k	mpn_sub_n (asm1, a1, gp, n);
151	25.9k	asm1[n] = 0;
152	25.9k	vm1_neg = 1;
153	25.9k	}
154	25.9k	else
155	25.9k	{
156	25.9k	cy -= mpn_sub_n (asm1, gp, a1, n);
157	25.9k	asm1[n] = cy;
158	25.9k	}
159	51.9k	#endif
160
161		/* Compute as2. */
162		#if HAVE_NATIVE_mpn_rsblsh1_n
163		cy = mpn_add_n (as2, a2, as1, s);
164		if (s != n)
165		cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
166		cy += as1[n];
167		cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
168		#else
169		#if HAVE_NATIVE_mpn_addlsh1_n
170		cy = mpn_addlsh1_n (as2, a1, a2, s);
171		if (s != n)
172		cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
173		cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
174		#else
175	51.9k	cy = mpn_add_n (as2, a2, as1, s);
176	51.9k	if (s != n)
177	51.9k	cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
178	51.9k	cy += as1[n];
179	51.9k	cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
180	51.9k	cy -= mpn_sub_n (as2, as2, a0, n);
181	51.9k	#endif
182	51.9k	#endif
183	51.9k	as2[n] = cy;
184
185		/* Compute bs1 and bsm1. */
186	51.9k	cy = mpn_add (gp, b0, n, b2, t);
187		#if HAVE_NATIVE_mpn_add_n_sub_n
188		if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
189		{
190		cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
191		bs1[n] = cy >> 1;
192		bsm1[n] = 0;
193		vm1_neg ^= 1;
194		}
195		else
196		{
197		mp_limb_t cy2;
198		cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
199		bs1[n] = cy + (cy2 >> 1);
200		bsm1[n] = cy - (cy2 & 1);
201		}
202		#else
203	51.9k	bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
204	51.9k	if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
205	25.0k	{
206	25.0k	mpn_sub_n (bsm1, b1, gp, n);
207	25.0k	bsm1[n] = 0;
208	25.0k	vm1_neg ^= 1;
209	25.0k	}
210	26.8k	else
211	26.8k	{
212	26.8k	cy -= mpn_sub_n (bsm1, gp, b1, n);
213	26.8k	bsm1[n] = cy;
214	26.8k	}
215	51.9k	#endif
216
217		/* Compute bs2. */
218		#if HAVE_NATIVE_mpn_rsblsh1_n
219		cy = mpn_add_n (bs2, b2, bs1, t);
220		if (t != n)
221		cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
222		cy += bs1[n];
223		cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
224		#else
225		#if HAVE_NATIVE_mpn_addlsh1_n
226		cy = mpn_addlsh1_n (bs2, b1, b2, t);
227		if (t != n)
228		cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
229		cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
230		#else
231	51.9k	cy = mpn_add_n (bs2, bs1, b2, t);
232	51.9k	if (t != n)
233	51.9k	cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
234	51.9k	cy += bs1[n];
235	51.9k	cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
236	51.9k	cy -= mpn_sub_n (bs2, bs2, b0, n);
237	51.9k	#endif
238	51.9k	#endif
239	51.9k	bs2[n] = cy;
240
241	51.9k	ASSERT (as1[n] <= 2);
242	51.9k	ASSERT (bs1[n] <= 2);
243	51.9k	ASSERT (asm1[n] <= 1);
244	51.9k	ASSERT (bsm1[n] <= 1);
245	51.9k	ASSERT (as2[n] <= 6);
246	51.9k	ASSERT (bs2[n] <= 6);
247
248	51.9k	#define v0 pp /* 2n */
249	51.9k	#define v1 (pp + 2 * n) /* 2n+1 */
250	155k	#define vinf (pp + 4 * n) /* s+t */
251	103k	#define vm1 scratch /* 2n+1 */
252	51.9k	#define v2 (scratch + 2 * n + 1) /* 2n+2 */
253	51.9k	#define scratch_out (scratch + 5 * n + 5)
254
255		/* vm1, 2n+1 limbs */
256		#ifdef SMALLER_RECURSION
257		TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
258		cy = 0;
259		if (asm1[n] != 0)
260		cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
261		if (bsm1[n] != 0)
262		cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
263		vm1[2 * n] = cy;
264		#else
265	51.9k	vm1[2 * n] = 0;
266	51.9k	TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] \| asm1[n]), scratch_out);
267	51.9k	#endif
268
269	51.9k	TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
270
271		/* vinf, s+t limbs */
272	51.9k	if (s > t) mpn_mul (vinf, a2, s, b2, t);
273	51.9k	else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
274
275	51.9k	vinf0 = vinf[0]; /* v1 overlaps with this */
276
277		#ifdef SMALLER_RECURSION
278		/* v1, 2n+1 limbs */
279		TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
280		if (as1[n] == 1)
281		{
282		cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
283		}
284		else if (as1[n] != 0)
285		{
286		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
287		cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
288		#else
289		cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
290		#endif
291		}
292		else
293		cy = 0;
294		if (bs1[n] == 1)
295		{
296		cy += mpn_add_n (v1 + n, v1 + n, as1, n);
297		}
298		else if (bs1[n] != 0)
299		{
300		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
301		cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
302		#else
303		cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
304		#endif
305		}
306		v1[2 * n] = cy;
307		#else
308	51.9k	cy = vinf[1];
309	51.9k	TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
310	51.9k	vinf[1] = cy;
311	51.9k	#endif
312
313	51.9k	TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
314
315	51.9k	mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
316	51.9k	}