/src/fftw3/dft/rader.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

#include "dft/dft.h"

/*
 * Compute transforms of prime sizes using Rader's trick: turn them
 * into convolutions of size n - 1, which you then perform via a pair
 * of FFTs.
 */

typedef struct {
     solver super;
} S;

typedef struct {
     plan_dft super;

     plan *cld1, *cld2;
     R *omega;
     INT n, g, ginv;
     INT is, os;
     plan *cld_omega;
} P;

static rader_tl *omegas = 0;

static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
{
     plan_dft *p = (plan_dft *) p_;
     R *omega;
     INT i, gpower;
     trigreal scale;
     triggen *t;

     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
    return omega;

     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);

     scale = n - 1.0; /* normalization for convolution */

     t = X(mktriggen)(wakefulness, n);
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
    trigreal w[2];
    t->cexpl(t, gpower, w);
    omega[2*i] = w[0] / scale;
    omega[2*i+1] = FFT_SIGN * w[1] / scale;
     }
     X(triggen_destroy)(t);
     A(gpower == 1);

     p->apply(p_, omega, omega + 1, omega, omega + 1);

     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
     return omega;
}

static void free_omega(R *omega)
{
     X(rader_tl_delete)(omega, &omegas);
}


/***************************************************************************/

/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
   order to share data between forward and backward transforms and to
   obviate the necessity of having separate forward and backward
   plans.  (Although we often compute separate plans these days anyway
   due to the differing strides, etcetera.)

   Of course, since the new FFTW gives us separate pointers to
   the real and imaginary parts, we could have instead used the
   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
   reuse the code from our old version. */

static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT is, os;
     INT k, gpower, g, r;
     R *buf;
     R r0 = ri[0], i0 = ii[0];

     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);

     /* First, permute the input, storing in buf: */
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
    R rA, iA;
    rA = ri[gpower * is];
    iA = ii[gpower * is];
    buf[2*k] = rA; buf[2*k + 1] = iA;
     }
     /* gpower == g^(r-1) mod r == 1 */;


     /* compute DFT of buf, storing in output (except DC): */
     {
      plan_dft *cld = (plan_dft *) ego->cld1;
      cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
     }

     /* set output DC component: */
     {
    ro[0] = r0 + ro[os];
    io[0] = i0 + io[os];
     }

     /* now, multiply by omega: */
     {
    const R *omega = ego->omega;
    for (k = 0; k < r - 1; ++k) {
         E rB, iB, rW, iW;
         rW = omega[2*k];
         iW = omega[2*k+1];
         rB = ro[(k+1)*os];
         iB = io[(k+1)*os];
         ro[(k+1)*os] = rW * rB - iW * iB;
         io[(k+1)*os] = -(rW * iB + iW * rB);
    }
     }
     
     /* this will add input[0] to all of the outputs after the ifft */
     ro[os] += r0;
     io[os] -= i0;

     /* inverse FFT: */
     {
      plan_dft *cld = (plan_dft *) ego->cld2;
      cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
     }
     
     /* finally, do inverse permutation to unshuffle the output: */
     {
    INT ginv = ego->ginv;
    gpower = 1;
    for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
         ro[gpower * os] = buf[2*k];
         io[gpower * os] = -buf[2*k+1];
    }
    A(gpower == 1);
     }


     X(ifree)(buf);
}

/***************************************************************************/

static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld1, wakefulness);
     X(plan_awake)(ego->cld2, wakefulness);
     X(plan_awake)(ego->cld_omega, wakefulness);

     switch (wakefulness) {
   case SLEEPY:
        free_omega(ego->omega);
        ego->omega = 0;
        break;
   default:
        ego->g = X(find_generator)(ego->n);
        ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
        A(MULMOD(ego->g, ego->ginv, ego->n) == 1);

        ego->omega = mkomega(wakefulness,
           ego->cld_omega, ego->n, ego->ginv);
        break;
     }
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cld_omega);
     X(plan_destroy_internal)(ego->cld2);
     X(plan_destroy_internal)(ego->cld1);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *)ego_;
     p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
              ego->n, ego->is, ego->os, ego->cld1);
     if (ego->cld2 != ego->cld1)
          p->print(p, "%(%p%)", ego->cld2);
     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
          p->print(p, "%(%p%)", ego->cld_omega);
     p->putchr(p, ')');
}

static int applicable(const solver *ego_, const problem *p_,
          const planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     UNUSED(ego_);
     return (1
       && p->sz->rnk == 1
       && p->vecsz->rnk == 0
       && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
       && X(is_prime)(p->sz->dims[0].n)

       /* proclaim the solver SLOW if p-1 is not easily factorizable.
    Bluestein should take care of this case. */
       && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
    );
}

static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
         planner *plnr)
{
     plan *cld1 = (plan *) 0;
     plan *cld2 = (plan *) 0;
     plan *cld_omega = (plan *) 0;
     R *buf = (R *) 0;

     /* initial allocation for the purpose of planning */
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);

     cld1 = X(mkplan_f_d)(plnr, 
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
               X(mktensor_1d)(1, 0, 0),
               buf, buf + 1, ro + os, io + os),
        NO_SLOW, 0, 0);
     if (!cld1) goto nada;

     cld2 = X(mkplan_f_d)(plnr, 
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
               X(mktensor_1d)(1, 0, 0),
               ro + os, io + os, buf, buf + 1),
        NO_SLOW, 0, 0);

     if (!cld2) goto nada;

     /* plan for omega array */
     cld_omega = X(mkplan_f_d)(plnr, 
             X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
              X(mktensor_1d)(1, 0, 0),
              buf, buf + 1, buf, buf + 1),
             NO_SLOW, ESTIMATE, 0);
     if (!cld_omega) goto nada;

     /* deallocate buffers; let awake() or apply() allocate them for real */
     X(ifree)(buf);
     buf = 0;

     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld_omega = cld_omega;
     pln->omega = 0;
     pln->n = n;
     pln->is = is;
     pln->os = os;

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
     pln->super.super.ops.add += (n - 1) * 2 + 4;
     pln->super.super.ops.mul += (n - 1) * 4;

     return 1;

 nada:
     X(ifree0)(buf);
     X(plan_destroy_internal)(cld_omega);
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return 0;
}

static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     INT n;
     INT is, os;

     static const plan_adt padt = {
    X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
    return (plan *) 0;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     pln = MKPLAN_DFT(P, &padt, apply);
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
    X(ifree)(pln);
    return (plan *) 0;
     }
     return &(pln->super.super);
}

static solver *mksolver(void)
{
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     return &(slv->super);
}

void X(dft_rader_register)(planner *p)
{
     REGISTER_SOLVER(p, mksolver());
}

Coverage Report

Created: 2025-06-22 06:45

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21		#include "dft/dft.h"
22
23		/*
24		* Compute transforms of prime sizes using Rader's trick: turn them
25		* into convolutions of size n - 1, which you then perform via a pair
26		* of FFTs.
27		*/
28
29		typedef struct {
30		solver super;
31		} S;
32
33		typedef struct {
34		plan_dft super;
35
36		plan cld1, cld2;
37		R *omega;
38		INT n, g, ginv;
39		INT is, os;
40		plan *cld_omega;
41		} P;
42
43		static rader_tl *omegas = 0;
44
45		static R mkomega(enum wakefulness wakefulness, plan p_, INT n, INT ginv)
46	44	{
47	44	plan_dft p = (plan_dft ) p_;
48	44	R *omega;
49	44	INT i, gpower;
50	44	trigreal scale;
51	44	triggen *t;
52
53	44	if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
54	0	return omega;
55
56	44	omega = (R )MALLOC(sizeof(R) (n - 1) * 2, TWIDDLES);
57
58	44	scale = n - 1.0; /* normalization for convolution */
59
60	44	t = X(mktriggen)(wakefulness, n);
61	3.75k	for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
62	3.71k	trigreal w[2];
63	3.71k	t->cexpl(t, gpower, w);
64	3.71k	omega[2*i] = w[0] / scale;
65	3.71k	omega[2i+1] = FFT_SIGN w[1] / scale;
66	3.71k	}
67	44	X(triggen_destroy)(t);
68	44	A(gpower == 1);
69
70	44	p->apply(p_, omega, omega + 1, omega, omega + 1);
71
72	44	X(rader_tl_insert)(n, n, ginv, omega, &omegas);
73	44	return omega;
74	44	}
75
76		static void free_omega(R *omega)
77	44	{
78	44	X(rader_tl_delete)(omega, &omegas);
79	44	}
80
81
82		/***************************************************************************/
83
84		/* Below, we extensively use the identity that fft(x) = ifft(x) in
85		order to share data between forward and backward transforms and to
86		obviate the necessity of having separate forward and backward
87		plans. (Although we often compute separate plans these days anyway
88		due to the differing strides, etcetera.)
89
90		Of course, since the new FFTW gives us separate pointers to
91		the real and imaginary parts, we could have instead used the
92		fft(r,i) = ifft(i,r) form of this identity, but it was easier to
93		reuse the code from our old version. */
94
95		static void apply(const plan ego_, R ri, R ii, R ro, R *io)
96	93	{
97	93	const P ego = (const P ) ego_;
98	93	INT is, os;
99	93	INT k, gpower, g, r;
100	93	R *buf;
101	93	R r0 = ri[0], i0 = ii[0];
102
103	93	r = ego->n; is = ego->is; os = ego->os; g = ego->g;
104	93	buf = (R ) MALLOC(sizeof(R) (r - 1) * 2, BUFFERS);
105
106		/* First, permute the input, storing in buf: */
107	6.18k	for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
108	6.09k	R rA, iA;
109	6.09k	rA = ri[gpower * is];
110	6.09k	iA = ii[gpower * is];
111	6.09k	buf[2k] = rA; buf[2k + 1] = iA;
112	6.09k	}
113	93	/* gpower == g^(r-1) mod r == 1 */;
114
115
116		/* compute DFT of buf, storing in output (except DC): */
117	93	{
118	93	plan_dft cld = (plan_dft ) ego->cld1;
119	93	cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
120	93	}
121
122		/* set output DC component: */
123	93	{
124	93	ro[0] = r0 + ro[os];
125	93	io[0] = i0 + io[os];
126	93	}
127
128		/* now, multiply by omega: */
129	93	{
130	93	const R *omega = ego->omega;
131	6.18k	for (k = 0; k < r - 1; ++k) {
132	6.09k	E rB, iB, rW, iW;
133	6.09k	rW = omega[2*k];
134	6.09k	iW = omega[2*k+1];
135	6.09k	rB = ro[(k+1)*os];
136	6.09k	iB = io[(k+1)*os];
137	6.09k	ro[(k+1)os] = rW rB - iW * iB;
138	6.09k	io[(k+1)os] = -(rW iB + iW * rB);
139	6.09k	}
140	93	}
141
142		/* this will add input[0] to all of the outputs after the ifft */
143	93	ro[os] += r0;
144	93	io[os] -= i0;
145
146		/* inverse FFT: */
147	93	{
148	93	plan_dft cld = (plan_dft ) ego->cld2;
149	93	cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
150	93	}
151
152		/* finally, do inverse permutation to unshuffle the output: */
153	93	{
154	93	INT ginv = ego->ginv;
155	93	gpower = 1;
156	6.18k	for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
157	6.09k	ro[gpower * os] = buf[2*k];
158	6.09k	io[gpower * os] = -buf[2*k+1];
159	6.09k	}
160	93	A(gpower == 1);
161	93	}
162
163
164	93	X(ifree)(buf);
165	93	}
166
167		/***************************************************************************/
168
169		static void awake(plan *ego_, enum wakefulness wakefulness)
170	88	{
171	88	P ego = (P ) ego_;
172
173	88	X(plan_awake)(ego->cld1, wakefulness);
174	88	X(plan_awake)(ego->cld2, wakefulness);
175	88	X(plan_awake)(ego->cld_omega, wakefulness);
176
177	88	switch (wakefulness) {
178	44	case SLEEPY:
179	44	free_omega(ego->omega);
180	44	ego->omega = 0;
181	44	break;
182	44	default:
183	44	ego->g = X(find_generator)(ego->n);
184	44	ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
185	44	A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
186
187	44	ego->omega = mkomega(wakefulness,
188	44	ego->cld_omega, ego->n, ego->ginv);
189	44	break;
190	88	}
191	88	}
192
193		static void destroy(plan *ego_)
194	94	{
195	94	P ego = (P ) ego_;
196	94	X(plan_destroy_internal)(ego->cld_omega);
197	94	X(plan_destroy_internal)(ego->cld2);
198	94	X(plan_destroy_internal)(ego->cld1);
199	94	}
200
201		static void print(const plan ego_, printer p)
202	0	{
203	0	const P ego = (const P )ego_;
204	0	p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
205	0	ego->n, ego->is, ego->os, ego->cld1);
206	0	if (ego->cld2 != ego->cld1)
207	0	p->print(p, "%(%p%)", ego->cld2);
208	0	if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
209	0	p->print(p, "%(%p%)", ego->cld_omega);
210	0	p->putchr(p, ')');
211	0	}
212
213		static int applicable(const solver ego_, const problem p_,
214		const planner *plnr)
215	1.15k	{
216	1.15k	const problem_dft p = (const problem_dft ) p_;
217	1.15k	UNUSED(ego_);
218	1.15k	return (1
219	1.15k	&& p->sz->rnk == 1
220	1.15k	&& p->vecsz->rnk == 0
221	1.15k	&& CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
222	1.15k	&& X(is_prime)(p->sz->dims[0].n)
223
224		/* proclaim the solver SLOW if p-1 is not easily factorizable.
225		Bluestein should take care of this case. */
226	1.15k	&& CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
227	1.15k	);
228	1.15k	}
229
230		static int mkP(P pln, INT n, INT is, INT os, R ro, R *io,
231		planner *plnr)
232	94	{
233	94	plan cld1 = (plan ) 0;
234	94	plan cld2 = (plan ) 0;
235	94	plan cld_omega = (plan ) 0;
236	94	R buf = (R ) 0;
237
238		/* initial allocation for the purpose of planning */
239	94	buf = (R ) MALLOC(sizeof(R) (n - 1) * 2, BUFFERS);
240
241	94	cld1 = X(mkplan_f_d)(plnr,
242	94	X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
243	94	X(mktensor_1d)(1, 0, 0),
244	94	buf, buf + 1, ro + os, io + os),
245	94	NO_SLOW, 0, 0);
246	94	if (!cld1) goto nada;
247
248	94	cld2 = X(mkplan_f_d)(plnr,
249	94	X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
250	94	X(mktensor_1d)(1, 0, 0),
251	94	ro + os, io + os, buf, buf + 1),
252	94	NO_SLOW, 0, 0);
253
254	94	if (!cld2) goto nada;
255
256		/* plan for omega array */
257	94	cld_omega = X(mkplan_f_d)(plnr,
258	94	X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
259	94	X(mktensor_1d)(1, 0, 0),
260	94	buf, buf + 1, buf, buf + 1),
261	94	NO_SLOW, ESTIMATE, 0);
262	94	if (!cld_omega) goto nada;
263
264		/* deallocate buffers; let awake() or apply() allocate them for real */
265	94	X(ifree)(buf);
266	94	buf = 0;
267
268	94	pln->cld1 = cld1;
269	94	pln->cld2 = cld2;
270	94	pln->cld_omega = cld_omega;
271	94	pln->omega = 0;
272	94	pln->n = n;
273	94	pln->is = is;
274	94	pln->os = os;
275
276	94	X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
277	94	pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
278	94	pln->super.super.ops.add += (n - 1) * 2 + 4;
279	94	pln->super.super.ops.mul += (n - 1) * 4;
280
281	94	return 1;
282
283	0	nada:
284	0	X(ifree0)(buf);
285	0	X(plan_destroy_internal)(cld_omega);
286	0	X(plan_destroy_internal)(cld2);
287	0	X(plan_destroy_internal)(cld1);
288	0	return 0;
289	94	}
290
291		static plan mkplan(const solver ego, const problem p_, planner plnr)
292	1.15k	{
293	1.15k	const problem_dft p = (const problem_dft ) p_;
294	1.15k	P *pln;
295	1.15k	INT n;
296	1.15k	INT is, os;
297
298	1.15k	static const plan_adt padt = {
299	1.15k	X(dft_solve), awake, print, destroy
300	1.15k	};
301
302	1.15k	if (!applicable(ego, p_, plnr))
303	1.06k	return (plan *) 0;
304
305	94	n = p->sz->dims[0].n;
306	94	is = p->sz->dims[0].is;
307	94	os = p->sz->dims[0].os;
308
309	94	pln = MKPLAN_DFT(P, &padt, apply);
310	94	if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
311	0	X(ifree)(pln);
312	0	return (plan *) 0;
313	0	}
314	94	return &(pln->super.super);
315	94	}
316
317		static solver *mksolver(void)
318	1	{
319	1	static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
320	1	S *slv = MKSOLVER(S, &sadt);
321	1	return &(slv->super);
322	1	}
323
324		void X(dft_rader_register)(planner *p)
325	1	{
326	1	REGISTER_SOLVER(p, mksolver());
327	1	}