Coverage Report

Created: 2026-04-12 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/rader.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
#include "dft/dft.h"
22
23
/*
24
 * Compute transforms of prime sizes using Rader's trick: turn them
25
 * into convolutions of size n - 1, which you then perform via a pair
26
 * of FFTs.
27
 */
28
29
typedef struct {
30
     solver super;
31
} S;
32
33
typedef struct {
34
     plan_dft super;
35
36
     plan *cld1, *cld2;
37
     R *omega;
38
     INT n, g, ginv;
39
     INT is, os;
40
     plan *cld_omega;
41
} P;
42
43
static rader_tl *omegas = 0;
44
45
static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
46
42
{
47
42
     plan_dft *p = (plan_dft *) p_;
48
42
     R *omega;
49
42
     INT i, gpower;
50
42
     trigreal scale;
51
42
     triggen *t;
52
53
42
     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
54
0
    return omega;
55
56
42
     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
57
58
42
     scale = n - 1.0; /* normalization for convolution */
59
60
42
     t = X(mktriggen)(wakefulness, n);
61
3.42k
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
62
3.38k
    trigreal w[2];
63
3.38k
    t->cexpl(t, gpower, w);
64
3.38k
    omega[2*i] = w[0] / scale;
65
3.38k
    omega[2*i+1] = FFT_SIGN * w[1] / scale;
66
3.38k
     }
67
42
     X(triggen_destroy)(t);
68
42
     A(gpower == 1);
69
70
42
     p->apply(p_, omega, omega + 1, omega, omega + 1);
71
72
42
     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
73
42
     return omega;
74
42
}
75
76
static void free_omega(R *omega)
77
42
{
78
42
     X(rader_tl_delete)(omega, &omegas);
79
42
}
80
81
82
/***************************************************************************/
83
84
/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
85
   order to share data between forward and backward transforms and to
86
   obviate the necessity of having separate forward and backward
87
   plans.  (Although we often compute separate plans these days anyway
88
   due to the differing strides, etcetera.)
89
90
   Of course, since the new FFTW gives us separate pointers to
91
   the real and imaginary parts, we could have instead used the
92
   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
93
   reuse the code from our old version. */
94
95
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
96
83
{
97
83
     const P *ego = (const P *) ego_;
98
83
     INT is, os;
99
83
     INT k, gpower, g, r;
100
83
     R *buf;
101
83
     R r0 = ri[0], i0 = ii[0];
102
103
83
     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
104
83
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
105
106
     /* First, permute the input, storing in buf: */
107
5.46k
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
108
5.38k
    R rA, iA;
109
5.38k
    rA = ri[gpower * is];
110
5.38k
    iA = ii[gpower * is];
111
5.38k
    buf[2*k] = rA; buf[2*k + 1] = iA;
112
5.38k
     }
113
83
     /* gpower == g^(r-1) mod r == 1 */;
114
115
116
     /* compute DFT of buf, storing in output (except DC): */
117
83
     {
118
83
      plan_dft *cld = (plan_dft *) ego->cld1;
119
83
      cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
120
83
     }
121
122
     /* set output DC component: */
123
83
     {
124
83
    ro[0] = r0 + ro[os];
125
83
    io[0] = i0 + io[os];
126
83
     }
127
128
     /* now, multiply by omega: */
129
83
     {
130
83
    const R *omega = ego->omega;
131
5.46k
    for (k = 0; k < r - 1; ++k) {
132
5.38k
         E rB, iB, rW, iW;
133
5.38k
         rW = omega[2*k];
134
5.38k
         iW = omega[2*k+1];
135
5.38k
         rB = ro[(k+1)*os];
136
5.38k
         iB = io[(k+1)*os];
137
5.38k
         ro[(k+1)*os] = rW * rB - iW * iB;
138
5.38k
         io[(k+1)*os] = -(rW * iB + iW * rB);
139
5.38k
    }
140
83
     }
141
     
142
     /* this will add input[0] to all of the outputs after the ifft */
143
83
     ro[os] += r0;
144
83
     io[os] -= i0;
145
146
     /* inverse FFT: */
147
83
     {
148
83
      plan_dft *cld = (plan_dft *) ego->cld2;
149
83
      cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
150
83
     }
151
     
152
     /* finally, do inverse permutation to unshuffle the output: */
153
83
     {
154
83
    INT ginv = ego->ginv;
155
83
    gpower = 1;
156
5.46k
    for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
157
5.38k
         ro[gpower * os] = buf[2*k];
158
5.38k
         io[gpower * os] = -buf[2*k+1];
159
5.38k
    }
160
83
    A(gpower == 1);
161
83
     }
162
163
164
83
     X(ifree)(buf);
165
83
}
166
167
/***************************************************************************/
168
169
static void awake(plan *ego_, enum wakefulness wakefulness)
170
84
{
171
84
     P *ego = (P *) ego_;
172
173
84
     X(plan_awake)(ego->cld1, wakefulness);
174
84
     X(plan_awake)(ego->cld2, wakefulness);
175
84
     X(plan_awake)(ego->cld_omega, wakefulness);
176
177
84
     switch (wakefulness) {
178
42
   case SLEEPY:
179
42
        free_omega(ego->omega);
180
42
        ego->omega = 0;
181
42
        break;
182
42
   default:
183
42
        ego->g = X(find_generator)(ego->n);
184
42
        ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
185
42
        A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
186
187
42
        ego->omega = mkomega(wakefulness,
188
42
           ego->cld_omega, ego->n, ego->ginv);
189
42
        break;
190
84
     }
191
84
}
192
193
static void destroy(plan *ego_)
194
90
{
195
90
     P *ego = (P *) ego_;
196
90
     X(plan_destroy_internal)(ego->cld_omega);
197
90
     X(plan_destroy_internal)(ego->cld2);
198
90
     X(plan_destroy_internal)(ego->cld1);
199
90
}
200
201
static void print(const plan *ego_, printer *p)
202
0
{
203
0
     const P *ego = (const P *)ego_;
204
0
     p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
205
0
              ego->n, ego->is, ego->os, ego->cld1);
206
0
     if (ego->cld2 != ego->cld1)
207
0
          p->print(p, "%(%p%)", ego->cld2);
208
0
     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
209
0
          p->print(p, "%(%p%)", ego->cld_omega);
210
0
     p->putchr(p, ')');
211
0
}
212
213
static int applicable(const solver *ego_, const problem *p_,
214
          const planner *plnr)
215
1.17k
{
216
1.17k
     const problem_dft *p = (const problem_dft *) p_;
217
1.17k
     UNUSED(ego_);
218
1.17k
     return (1
219
1.17k
       && p->sz->rnk == 1
220
839
       && p->vecsz->rnk == 0
221
318
       && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
222
226
       && X(is_prime)(p->sz->dims[0].n)
223
224
       /* proclaim the solver SLOW if p-1 is not easily factorizable.
225
    Bluestein should take care of this case. */
226
130
       && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
227
1.17k
    );
228
1.17k
}
229
230
static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
231
         planner *plnr)
232
90
{
233
90
     plan *cld1 = (plan *) 0;
234
90
     plan *cld2 = (plan *) 0;
235
90
     plan *cld_omega = (plan *) 0;
236
90
     R *buf = (R *) 0;
237
238
     /* initial allocation for the purpose of planning */
239
90
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
240
241
90
     cld1 = X(mkplan_f_d)(plnr, 
242
90
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
243
90
               X(mktensor_1d)(1, 0, 0),
244
90
               buf, buf + 1, ro + os, io + os),
245
90
        NO_SLOW, 0, 0);
246
90
     if (!cld1) goto nada;
247
248
90
     cld2 = X(mkplan_f_d)(plnr, 
249
90
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
250
90
               X(mktensor_1d)(1, 0, 0),
251
90
               ro + os, io + os, buf, buf + 1),
252
90
        NO_SLOW, 0, 0);
253
254
90
     if (!cld2) goto nada;
255
256
     /* plan for omega array */
257
90
     cld_omega = X(mkplan_f_d)(plnr, 
258
90
             X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
259
90
              X(mktensor_1d)(1, 0, 0),
260
90
              buf, buf + 1, buf, buf + 1),
261
90
             NO_SLOW, ESTIMATE, 0);
262
90
     if (!cld_omega) goto nada;
263
264
     /* deallocate buffers; let awake() or apply() allocate them for real */
265
90
     X(ifree)(buf);
266
90
     buf = 0;
267
268
90
     pln->cld1 = cld1;
269
90
     pln->cld2 = cld2;
270
90
     pln->cld_omega = cld_omega;
271
90
     pln->omega = 0;
272
90
     pln->n = n;
273
90
     pln->is = is;
274
90
     pln->os = os;
275
276
90
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
277
90
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
278
90
     pln->super.super.ops.add += (n - 1) * 2 + 4;
279
90
     pln->super.super.ops.mul += (n - 1) * 4;
280
281
90
     return 1;
282
283
0
 nada:
284
0
     X(ifree0)(buf);
285
0
     X(plan_destroy_internal)(cld_omega);
286
0
     X(plan_destroy_internal)(cld2);
287
0
     X(plan_destroy_internal)(cld1);
288
0
     return 0;
289
90
}
290
291
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
292
1.17k
{
293
1.17k
     const problem_dft *p = (const problem_dft *) p_;
294
1.17k
     P *pln;
295
1.17k
     INT n;
296
1.17k
     INT is, os;
297
298
1.17k
     static const plan_adt padt = {
299
1.17k
    X(dft_solve), awake, print, destroy
300
1.17k
     };
301
302
1.17k
     if (!applicable(ego, p_, plnr))
303
1.08k
    return (plan *) 0;
304
305
90
     n = p->sz->dims[0].n;
306
90
     is = p->sz->dims[0].is;
307
90
     os = p->sz->dims[0].os;
308
309
90
     pln = MKPLAN_DFT(P, &padt, apply);
310
90
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
311
0
    X(ifree)(pln);
312
0
    return (plan *) 0;
313
0
     }
314
90
     return &(pln->super.super);
315
90
}
316
317
static solver *mksolver(void)
318
1
{
319
1
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
320
1
     S *slv = MKSOLVER(S, &sadt);
321
1
     return &(slv->super);
322
1
}
323
324
void X(dft_rader_register)(planner *p)
325
1
{
326
1
     REGISTER_SOLVER(p, mksolver());
327
1
}