Coverage Report

Created: 2025-06-22 06:45

/src/fftw3/dft/rader.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
#include "dft/dft.h"
22
23
/*
24
 * Compute transforms of prime sizes using Rader's trick: turn them
25
 * into convolutions of size n - 1, which you then perform via a pair
26
 * of FFTs.
27
 */
28
29
typedef struct {
30
     solver super;
31
} S;
32
33
typedef struct {
34
     plan_dft super;
35
36
     plan *cld1, *cld2;
37
     R *omega;
38
     INT n, g, ginv;
39
     INT is, os;
40
     plan *cld_omega;
41
} P;
42
43
static rader_tl *omegas = 0;
44
45
static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
46
44
{
47
44
     plan_dft *p = (plan_dft *) p_;
48
44
     R *omega;
49
44
     INT i, gpower;
50
44
     trigreal scale;
51
44
     triggen *t;
52
53
44
     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
54
0
    return omega;
55
56
44
     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
57
58
44
     scale = n - 1.0; /* normalization for convolution */
59
60
44
     t = X(mktriggen)(wakefulness, n);
61
3.75k
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
62
3.71k
    trigreal w[2];
63
3.71k
    t->cexpl(t, gpower, w);
64
3.71k
    omega[2*i] = w[0] / scale;
65
3.71k
    omega[2*i+1] = FFT_SIGN * w[1] / scale;
66
3.71k
     }
67
44
     X(triggen_destroy)(t);
68
44
     A(gpower == 1);
69
70
44
     p->apply(p_, omega, omega + 1, omega, omega + 1);
71
72
44
     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
73
44
     return omega;
74
44
}
75
76
static void free_omega(R *omega)
77
44
{
78
44
     X(rader_tl_delete)(omega, &omegas);
79
44
}
80
81
82
/***************************************************************************/
83
84
/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
85
   order to share data between forward and backward transforms and to
86
   obviate the necessity of having separate forward and backward
87
   plans.  (Although we often compute separate plans these days anyway
88
   due to the differing strides, etcetera.)
89
90
   Of course, since the new FFTW gives us separate pointers to
91
   the real and imaginary parts, we could have instead used the
92
   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
93
   reuse the code from our old version. */
94
95
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
96
93
{
97
93
     const P *ego = (const P *) ego_;
98
93
     INT is, os;
99
93
     INT k, gpower, g, r;
100
93
     R *buf;
101
93
     R r0 = ri[0], i0 = ii[0];
102
103
93
     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
104
93
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
105
106
     /* First, permute the input, storing in buf: */
107
6.18k
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
108
6.09k
    R rA, iA;
109
6.09k
    rA = ri[gpower * is];
110
6.09k
    iA = ii[gpower * is];
111
6.09k
    buf[2*k] = rA; buf[2*k + 1] = iA;
112
6.09k
     }
113
93
     /* gpower == g^(r-1) mod r == 1 */;
114
115
116
     /* compute DFT of buf, storing in output (except DC): */
117
93
     {
118
93
      plan_dft *cld = (plan_dft *) ego->cld1;
119
93
      cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
120
93
     }
121
122
     /* set output DC component: */
123
93
     {
124
93
    ro[0] = r0 + ro[os];
125
93
    io[0] = i0 + io[os];
126
93
     }
127
128
     /* now, multiply by omega: */
129
93
     {
130
93
    const R *omega = ego->omega;
131
6.18k
    for (k = 0; k < r - 1; ++k) {
132
6.09k
         E rB, iB, rW, iW;
133
6.09k
         rW = omega[2*k];
134
6.09k
         iW = omega[2*k+1];
135
6.09k
         rB = ro[(k+1)*os];
136
6.09k
         iB = io[(k+1)*os];
137
6.09k
         ro[(k+1)*os] = rW * rB - iW * iB;
138
6.09k
         io[(k+1)*os] = -(rW * iB + iW * rB);
139
6.09k
    }
140
93
     }
141
     
142
     /* this will add input[0] to all of the outputs after the ifft */
143
93
     ro[os] += r0;
144
93
     io[os] -= i0;
145
146
     /* inverse FFT: */
147
93
     {
148
93
      plan_dft *cld = (plan_dft *) ego->cld2;
149
93
      cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
150
93
     }
151
     
152
     /* finally, do inverse permutation to unshuffle the output: */
153
93
     {
154
93
    INT ginv = ego->ginv;
155
93
    gpower = 1;
156
6.18k
    for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
157
6.09k
         ro[gpower * os] = buf[2*k];
158
6.09k
         io[gpower * os] = -buf[2*k+1];
159
6.09k
    }
160
93
    A(gpower == 1);
161
93
     }
162
163
164
93
     X(ifree)(buf);
165
93
}
166
167
/***************************************************************************/
168
169
static void awake(plan *ego_, enum wakefulness wakefulness)
170
88
{
171
88
     P *ego = (P *) ego_;
172
173
88
     X(plan_awake)(ego->cld1, wakefulness);
174
88
     X(plan_awake)(ego->cld2, wakefulness);
175
88
     X(plan_awake)(ego->cld_omega, wakefulness);
176
177
88
     switch (wakefulness) {
178
44
   case SLEEPY:
179
44
        free_omega(ego->omega);
180
44
        ego->omega = 0;
181
44
        break;
182
44
   default:
183
44
        ego->g = X(find_generator)(ego->n);
184
44
        ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
185
44
        A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
186
187
44
        ego->omega = mkomega(wakefulness,
188
44
           ego->cld_omega, ego->n, ego->ginv);
189
44
        break;
190
88
     }
191
88
}
192
193
static void destroy(plan *ego_)
194
94
{
195
94
     P *ego = (P *) ego_;
196
94
     X(plan_destroy_internal)(ego->cld_omega);
197
94
     X(plan_destroy_internal)(ego->cld2);
198
94
     X(plan_destroy_internal)(ego->cld1);
199
94
}
200
201
static void print(const plan *ego_, printer *p)
202
0
{
203
0
     const P *ego = (const P *)ego_;
204
0
     p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
205
0
              ego->n, ego->is, ego->os, ego->cld1);
206
0
     if (ego->cld2 != ego->cld1)
207
0
          p->print(p, "%(%p%)", ego->cld2);
208
0
     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
209
0
          p->print(p, "%(%p%)", ego->cld_omega);
210
0
     p->putchr(p, ')');
211
0
}
212
213
static int applicable(const solver *ego_, const problem *p_,
214
          const planner *plnr)
215
1.15k
{
216
1.15k
     const problem_dft *p = (const problem_dft *) p_;
217
1.15k
     UNUSED(ego_);
218
1.15k
     return (1
219
1.15k
       && p->sz->rnk == 1
220
1.15k
       && p->vecsz->rnk == 0
221
1.15k
       && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
222
1.15k
       && X(is_prime)(p->sz->dims[0].n)
223
224
       /* proclaim the solver SLOW if p-1 is not easily factorizable.
225
    Bluestein should take care of this case. */
226
1.15k
       && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
227
1.15k
    );
228
1.15k
}
229
230
static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
231
         planner *plnr)
232
94
{
233
94
     plan *cld1 = (plan *) 0;
234
94
     plan *cld2 = (plan *) 0;
235
94
     plan *cld_omega = (plan *) 0;
236
94
     R *buf = (R *) 0;
237
238
     /* initial allocation for the purpose of planning */
239
94
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
240
241
94
     cld1 = X(mkplan_f_d)(plnr, 
242
94
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
243
94
               X(mktensor_1d)(1, 0, 0),
244
94
               buf, buf + 1, ro + os, io + os),
245
94
        NO_SLOW, 0, 0);
246
94
     if (!cld1) goto nada;
247
248
94
     cld2 = X(mkplan_f_d)(plnr, 
249
94
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
250
94
               X(mktensor_1d)(1, 0, 0),
251
94
               ro + os, io + os, buf, buf + 1),
252
94
        NO_SLOW, 0, 0);
253
254
94
     if (!cld2) goto nada;
255
256
     /* plan for omega array */
257
94
     cld_omega = X(mkplan_f_d)(plnr, 
258
94
             X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
259
94
              X(mktensor_1d)(1, 0, 0),
260
94
              buf, buf + 1, buf, buf + 1),
261
94
             NO_SLOW, ESTIMATE, 0);
262
94
     if (!cld_omega) goto nada;
263
264
     /* deallocate buffers; let awake() or apply() allocate them for real */
265
94
     X(ifree)(buf);
266
94
     buf = 0;
267
268
94
     pln->cld1 = cld1;
269
94
     pln->cld2 = cld2;
270
94
     pln->cld_omega = cld_omega;
271
94
     pln->omega = 0;
272
94
     pln->n = n;
273
94
     pln->is = is;
274
94
     pln->os = os;
275
276
94
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
277
94
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
278
94
     pln->super.super.ops.add += (n - 1) * 2 + 4;
279
94
     pln->super.super.ops.mul += (n - 1) * 4;
280
281
94
     return 1;
282
283
0
 nada:
284
0
     X(ifree0)(buf);
285
0
     X(plan_destroy_internal)(cld_omega);
286
0
     X(plan_destroy_internal)(cld2);
287
0
     X(plan_destroy_internal)(cld1);
288
0
     return 0;
289
94
}
290
291
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
292
1.15k
{
293
1.15k
     const problem_dft *p = (const problem_dft *) p_;
294
1.15k
     P *pln;
295
1.15k
     INT n;
296
1.15k
     INT is, os;
297
298
1.15k
     static const plan_adt padt = {
299
1.15k
    X(dft_solve), awake, print, destroy
300
1.15k
     };
301
302
1.15k
     if (!applicable(ego, p_, plnr))
303
1.06k
    return (plan *) 0;
304
305
94
     n = p->sz->dims[0].n;
306
94
     is = p->sz->dims[0].is;
307
94
     os = p->sz->dims[0].os;
308
309
94
     pln = MKPLAN_DFT(P, &padt, apply);
310
94
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
311
0
    X(ifree)(pln);
312
0
    return (plan *) 0;
313
0
     }
314
94
     return &(pln->super.super);
315
94
}
316
317
static solver *mksolver(void)
318
1
{
319
1
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
320
1
     S *slv = MKSOLVER(S, &sadt);
321
1
     return &(slv->super);
322
1
}
323
324
void X(dft_rader_register)(planner *p)
325
1
{
326
1
     REGISTER_SOLVER(p, mksolver());
327
1
}