/src/libgmp/mpn/toom33_mul.c

Source (jump to first uncovered line)
/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
   size.  Or more accurately, bn <= an < (3/2)bn.

   Contributed to the GNU project by Torbjorn Granlund.
   Additional improvements by Marco Bodrato.

   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:

  * the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your
    option) any later version.

or

  * the GNU General Public License as published by the Free Software
    Foundation; either version 2 of the License, or (at your option) any
    later version.

or both in parallel, as here.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library.  If not,
see https://www.gnu.org/licenses/.  */


#include "gmp-impl.h"

/* Evaluate in: -1, 0, +1, +2, +inf

  <-s--><--n--><--n-->
   ____ ______ ______
  |_a2_|___a1_|___a0_|
   |b2_|___b1_|___b0_|
   <-t-><--n--><--n-->

  v0  =  a0         * b0          #   A(0)*B(0)
  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
  vinf=          a2 *         b2  # A(inf)*B(inf)
*/

#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_basecase 1
#define MAYBE_mul_toom33   1
#else
#define MAYBE_mul_basecase            \
  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
#define MAYBE_mul_toom33            \
  (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
#endif

/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
   multiplication at the infinity point. We may have
   MAYBE_mul_basecase == 0, and still get s just below
   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
   s == 1 and mpn_toom22_mul will crash.
*/

#define TOOM33_MUL_N_REC(p, a, b, n, ws)        \
  do {                 \
    if (MAYBE_mul_basecase            \
  && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))     \
      mpn_mul_basecase (p, a, n, b, n);         \
    else if (! MAYBE_mul_toom33            \
       || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))   \
      mpn_toom22_mul (p, a, n, b, n, ws);       \
    else                \
      mpn_toom33_mul (p, a, n, b, n, ws);       \
  } while (0)

void
mpn_toom33_mul (mp_ptr pp,
    mp_srcptr ap, mp_size_t an,
    mp_srcptr bp, mp_size_t bn,
    mp_ptr scratch)
{
  const int __gmpn_cpuvec_initialized = 1;
  mp_size_t n, s, t;
  int vm1_neg;
  mp_limb_t cy, vinf0;
  mp_ptr gp;
  mp_ptr as1, asm1, as2;
  mp_ptr bs1, bsm1, bs2;

#define a0  ap
#define a1  (ap + n)
#define a2  (ap + 2*n)
#define b0  bp
#define b1  (bp + n)
#define b2  (bp + 2*n)

  n = (an + 2) / (size_t) 3;

  s = an - 2 * n;
  t = bn - 2 * n;

  ASSERT (an >= bn);

  ASSERT (0 < s && s <= n);
  ASSERT (0 < t && t <= n);

  as1  = scratch + 4 * n + 4;
  asm1 = scratch + 2 * n + 2;
  as2 = pp + n + 1;

  bs1 = pp;
  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
  bs2 = pp + 2 * n + 2;

  gp = scratch;

  vm1_neg = 0;

  /* Compute as1 and asm1.  */
  cy = mpn_add (gp, a0, n, a2, s);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
      as1[n] = cy >> 1;
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
      as1[n] = cy + (cy2 >> 1);
      asm1[n] = cy - (cy2 & 1);
    }
#else
  as1[n] = cy + mpn_add_n (as1, gp, a1, n);
  if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
    {
      mpn_sub_n (asm1, a1, gp, n);
      asm1[n] = 0;
      vm1_neg = 1;
    }
  else
    {
      cy -= mpn_sub_n (asm1, gp, a1, n);
      asm1[n] = cy;
    }
#endif

  /* Compute as2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (as2, a1, a2, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
#else
  cy = mpn_add_n (as2, a2, as1, s);
  if (s != n)
    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
  cy += as1[n];
  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
  cy -= mpn_sub_n (as2, as2, a0, n);
#endif
#endif
  as2[n] = cy;

  /* Compute bs1 and bsm1.  */
  cy = mpn_add (gp, b0, n, b2, t);
#if HAVE_NATIVE_mpn_add_n_sub_n
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
      bs1[n] = cy >> 1;
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      mp_limb_t cy2;
      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
      bs1[n] = cy + (cy2 >> 1);
      bsm1[n] = cy - (cy2 & 1);
    }
#else
  bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
  if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
    {
      mpn_sub_n (bsm1, b1, gp, n);
      bsm1[n] = 0;
      vm1_neg ^= 1;
    }
  else
    {
      cy -= mpn_sub_n (bsm1, gp, b1, n);
      bsm1[n] = cy;
    }
#endif

  /* Compute bs2.  */
#if HAVE_NATIVE_mpn_rsblsh1_n
  cy = mpn_add_n (bs2, b2, bs1, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
#else
#if HAVE_NATIVE_mpn_addlsh1_n
  cy  = mpn_addlsh1_n (bs2, b1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
#else
  cy  = mpn_add_n (bs2, bs1, b2, t);
  if (t != n)
    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
  cy += bs1[n];
  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
  cy -= mpn_sub_n (bs2, bs2, b0, n);
#endif
#endif
  bs2[n] = cy;

  ASSERT (as1[n] <= 2);
  ASSERT (bs1[n] <= 2);
  ASSERT (asm1[n] <= 1);
  ASSERT (bsm1[n] <= 1);
  ASSERT (as2[n] <= 6);
  ASSERT (bs2[n] <= 6);

#define v0    pp        /* 2n */
#define v1    (pp + 2 * n)      /* 2n+1 */
#define vinf  (pp + 4 * n)      /* s+t */
#define vm1   scratch        /* 2n+1 */
#define v2    (scratch + 2 * n + 1)    /* 2n+2 */
#define scratch_out  (scratch + 5 * n + 5)

  /* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
  cy = 0;
  if (asm1[n] != 0)
    cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
  if (bsm1[n] != 0)
    cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
  vm1[2 * n] = cy;
#else
  vm1[2 * n] = 0;
  TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
#endif

  TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);  /* v2, 2n+1 limbs */

  /* vinf, s+t limbs */
  if (s > t)  mpn_mul (vinf, a2, s, b2, t);
  else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);

  vinf0 = vinf[0];        /* v1 overlaps with this */

#ifdef SMALLER_RECURSION
  /* v1, 2n+1 limbs */
  TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
  if (as1[n] == 1)
    {
      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
    }
  else if (as1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
#else
      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
#endif
    }
  else
    cy = 0;
  if (bs1[n] == 1)
    {
      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
    }
  else if (bs1[n] != 0)
    {
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
      cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
#else
      cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
#endif
    }
  v1[2 * n] = cy;
#else
  cy = vinf[1];
  TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
  vinf[1] = cy;
#endif

  TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);  /* v0, 2n limbs */

  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
}

Coverage Report

Created: 2024-11-21 07:03

Line	Count	Source (jump to first uncovered line)
1		/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2		size. Or more accurately, bn <= an < (3/2)bn.
3
4		Contributed to the GNU project by Torbjorn Granlund.
5		Additional improvements by Marco Bodrato.
6
7		THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8		SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9		GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11		Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
12
13		This file is part of the GNU MP Library.
14
15		The GNU MP Library is free software; you can redistribute it and/or modify
16		it under the terms of either:
17
18		* the GNU Lesser General Public License as published by the Free
19		Software Foundation; either version 3 of the License, or (at your
20		option) any later version.
21
22		or
23
24		* the GNU General Public License as published by the Free Software
25		Foundation; either version 2 of the License, or (at your option) any
26		later version.
27
28		or both in parallel, as here.
29
30		The GNU MP Library is distributed in the hope that it will be useful, but
31		WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32		or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33		for more details.
34
35		You should have received copies of the GNU General Public License and the
36		GNU Lesser General Public License along with the GNU MP Library. If not,
37		see https://www.gnu.org/licenses/. */
38
39
40		#include "gmp-impl.h"
41
42		/* Evaluate in: -1, 0, +1, +2, +inf
43
44		<-s--><--n--><--n-->
45		____ ______ ______
46		\|_a2_\|___a1_\|___a0_\|
47		\|b2_\|___b1_\|___b0_\|
48		<-t-><--n--><--n-->
49
50		v0 = a0 * b0 # A(0)*B(0)
51		v1 = (a0+ a1+ a2)(b0+ b1+ b2) # A(1)B(1) ah <= 2 bh <= 2
52		vm1 = (a0- a1+ a2)(b0- b1+ b2) # A(-1)B(-1) \|ah\| <= 1 bh <= 1
53		v2 = (a0+2a1+4a2)(b0+2b1+4b2) # A(2)B(2) ah <= 6 bh <= 6
54		vinf= a2 * b2 # A(inf)*B(inf)
55		*/
56
57		#if TUNE_PROGRAM_BUILD \|\| WANT_FAT_BINARY
58		#define MAYBE_mul_basecase 1
59		#define MAYBE_mul_toom33 1
60		#else
61		#define MAYBE_mul_basecase \
62	19.5M	(MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
63		#define MAYBE_mul_toom33 \
64	19.5M	(MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
65		#endif
66
67		/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
68		multiplication at the infinity point. We may have
69		MAYBE_mul_basecase == 0, and still get s just below
70		MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
71		s == 1 and mpn_toom22_mul will crash.
72		*/
73
74		#define TOOM33_MUL_N_REC(p, a, b, n, ws) \
75	9.78M	do { \
76	9.78M	if (MAYBE_mul_basecase \
77	9.78M	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
78	9.78M	mpn_mul_basecase (p, a, n, b, n); \
79	9.78M	else if (! MAYBE_mul_toom33 \
80	9.78M	\|\| BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
81	9.78M	mpn_toom22_mul (p, a, n, b, n, ws); \
82	9.78M	else \
83	9.78M	mpn_toom33_mul (p, a, n, b, n, ws); \
84	9.78M	} while (0)
85
86		void
87		mpn_toom33_mul (mp_ptr pp,
88		mp_srcptr ap, mp_size_t an,
89		mp_srcptr bp, mp_size_t bn,
90		mp_ptr scratch)
91	1.95M	{
92	1.95M	const int __gmpn_cpuvec_initialized = 1;
93	1.95M	mp_size_t n, s, t;
94	1.95M	int vm1_neg;
95	1.95M	mp_limb_t cy, vinf0;
96	1.95M	mp_ptr gp;
97	1.95M	mp_ptr as1, asm1, as2;
98	1.95M	mp_ptr bs1, bsm1, bs2;
99
100	3.91M	#define a0 ap
101	5.60M	#define a1 (ap + n)
102	3.91M	#define a2 (ap + 2*n)
103	3.91M	#define b0 bp
104	5.65M	#define b1 (bp + n)
105	3.91M	#define b2 (bp + 2*n)
106
107	1.95M	n = (an + 2) / (size_t) 3;
108
109	1.95M	s = an - 2 * n;
110	1.95M	t = bn - 2 * n;
111
112	1.95M	ASSERT (an >= bn);
113
114	1.95M	ASSERT (0 < s && s <= n);
115	1.95M	ASSERT (0 < t && t <= n);
116
117	1.95M	as1 = scratch + 4 * n + 4;
118	1.95M	asm1 = scratch + 2 * n + 2;
119	1.95M	as2 = pp + n + 1;
120
121	1.95M	bs1 = pp;
122	1.95M	bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
123	1.95M	bs2 = pp + 2 * n + 2;
124
125	1.95M	gp = scratch;
126
127	1.95M	vm1_neg = 0;
128
129		/* Compute as1 and asm1. */
130	1.95M	cy = mpn_add (gp, a0, n, a2, s);
131		#if HAVE_NATIVE_mpn_add_n_sub_n
132		if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
133		{
134		cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
135		as1[n] = cy >> 1;
136		asm1[n] = 0;
137		vm1_neg = 1;
138		}
139		else
140		{
141		mp_limb_t cy2;
142		cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
143		as1[n] = cy + (cy2 >> 1);
144		asm1[n] = cy - (cy2 & 1);
145		}
146		#else
147	1.95M	as1[n] = cy + mpn_add_n (as1, gp, a1, n);
148	1.95M	if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
149	802k	{
150	802k	mpn_sub_n (asm1, a1, gp, n);
151	802k	asm1[n] = 0;
152	802k	vm1_neg = 1;
153	802k	}
154	1.15M	else
155	1.15M	{
156	1.15M	cy -= mpn_sub_n (asm1, gp, a1, n);
157	1.15M	asm1[n] = cy;
158	1.15M	}
159	1.95M	#endif
160
161		/* Compute as2. */
162	1.95M	#if HAVE_NATIVE_mpn_rsblsh1_n
163	1.95M	cy = mpn_add_n (as2, a2, as1, s);
164	1.95M	if (s != n)
165	1.39M	cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
166	1.95M	cy += as1[n];
167	1.95M	cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
168		#else
169		#if HAVE_NATIVE_mpn_addlsh1_n
170		cy = mpn_addlsh1_n (as2, a1, a2, s);
171		if (s != n)
172		cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
173		cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
174		#else
175		cy = mpn_add_n (as2, a2, as1, s);
176		if (s != n)
177		cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
178		cy += as1[n];
179		cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
180		cy -= mpn_sub_n (as2, as2, a0, n);
181		#endif
182		#endif
183	1.95M	as2[n] = cy;
184
185		/* Compute bs1 and bsm1. */
186	1.95M	cy = mpn_add (gp, b0, n, b2, t);
187		#if HAVE_NATIVE_mpn_add_n_sub_n
188		if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
189		{
190		cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
191		bs1[n] = cy >> 1;
192		bsm1[n] = 0;
193		vm1_neg ^= 1;
194		}
195		else
196		{
197		mp_limb_t cy2;
198		cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
199		bs1[n] = cy + (cy2 >> 1);
200		bsm1[n] = cy - (cy2 & 1);
201		}
202		#else
203	1.95M	bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
204	1.95M	if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
205	842k	{
206	842k	mpn_sub_n (bsm1, b1, gp, n);
207	842k	bsm1[n] = 0;
208	842k	vm1_neg ^= 1;
209	842k	}
210	1.11M	else
211	1.11M	{
212	1.11M	cy -= mpn_sub_n (bsm1, gp, b1, n);
213	1.11M	bsm1[n] = cy;
214	1.11M	}
215	1.95M	#endif
216
217		/* Compute bs2. */
218	1.95M	#if HAVE_NATIVE_mpn_rsblsh1_n
219	1.95M	cy = mpn_add_n (bs2, b2, bs1, t);
220	1.95M	if (t != n)
221	1.39M	cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
222	1.95M	cy += bs1[n];
223	1.95M	cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
224		#else
225		#if HAVE_NATIVE_mpn_addlsh1_n
226		cy = mpn_addlsh1_n (bs2, b1, b2, t);
227		if (t != n)
228		cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
229		cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
230		#else
231		cy = mpn_add_n (bs2, bs1, b2, t);
232		if (t != n)
233		cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
234		cy += bs1[n];
235		cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
236		cy -= mpn_sub_n (bs2, bs2, b0, n);
237		#endif
238		#endif
239	1.95M	bs2[n] = cy;
240
241	1.95M	ASSERT (as1[n] <= 2);
242	1.95M	ASSERT (bs1[n] <= 2);
243	1.95M	ASSERT (asm1[n] <= 1);
244	1.95M	ASSERT (bsm1[n] <= 1);
245	1.95M	ASSERT (as2[n] <= 6);
246	1.95M	ASSERT (bs2[n] <= 6);
247
248	1.95M	#define v0 pp /* 2n */
249	1.95M	#define v1 (pp + 2 * n) /* 2n+1 */
250	5.87M	#define vinf (pp + 4 * n) /* s+t */
251	3.91M	#define vm1 scratch /* 2n+1 */
252	1.95M	#define v2 (scratch + 2 * n + 1) /* 2n+2 */
253	1.95M	#define scratch_out (scratch + 5 * n + 5)
254
255		/* vm1, 2n+1 limbs */
256		#ifdef SMALLER_RECURSION
257		TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
258		cy = 0;
259		if (asm1[n] != 0)
260		cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
261		if (bsm1[n] != 0)
262		cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
263		vm1[2 * n] = cy;
264		#else
265	1.95M	vm1[2 * n] = 0;
266	1.95M	TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] \| asm1[n]), scratch_out);
267	1.95M	#endif
268
269	1.95M	TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
270
271		/* vinf, s+t limbs */
272	1.95M	if (s > t) mpn_mul (vinf, a2, s, b2, t);
273	1.95M	else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
274
275	1.95M	vinf0 = vinf[0]; /* v1 overlaps with this */
276
277		#ifdef SMALLER_RECURSION
278		/* v1, 2n+1 limbs */
279		TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
280		if (as1[n] == 1)
281		{
282		cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
283		}
284		else if (as1[n] != 0)
285		{
286		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
287		cy = 2 * bs1[n] + mpn_addlsh1_n_ip1 (v1 + n, bs1, n);
288		#else
289		cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
290		#endif
291		}
292		else
293		cy = 0;
294		if (bs1[n] == 1)
295		{
296		cy += mpn_add_n (v1 + n, v1 + n, as1, n);
297		}
298		else if (bs1[n] != 0)
299		{
300		#if HAVE_NATIVE_mpn_addlsh1_n_ip1
301		cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
302		#else
303		cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
304		#endif
305		}
306		v1[2 * n] = cy;
307		#else
308	1.95M	cy = vinf[1];
309	1.95M	TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
310	1.95M	vinf[1] = cy;
311	1.95M	#endif
312
313	1.95M	TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
314
315	1.95M	mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
316	1.95M	}