Coverage Report

Created: 2025-10-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/rader.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
#include "dft/dft.h"
22
23
/*
24
 * Compute transforms of prime sizes using Rader's trick: turn them
25
 * into convolutions of size n - 1, which you then perform via a pair
26
 * of FFTs.
27
 */
28
29
typedef struct {
30
     solver super;
31
} S;
32
33
typedef struct {
34
     plan_dft super;
35
36
     plan *cld1, *cld2;
37
     R *omega;
38
     INT n, g, ginv;
39
     INT is, os;
40
     plan *cld_omega;
41
} P;
42
43
static rader_tl *omegas = 0;
44
45
static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
46
41
{
47
41
     plan_dft *p = (plan_dft *) p_;
48
41
     R *omega;
49
41
     INT i, gpower;
50
41
     trigreal scale;
51
41
     triggen *t;
52
53
41
     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
54
0
    return omega;
55
56
41
     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
57
58
41
     scale = n - 1.0; /* normalization for convolution */
59
60
41
     t = X(mktriggen)(wakefulness, n);
61
3.44k
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
62
3.40k
    trigreal w[2];
63
3.40k
    t->cexpl(t, gpower, w);
64
3.40k
    omega[2*i] = w[0] / scale;
65
3.40k
    omega[2*i+1] = FFT_SIGN * w[1] / scale;
66
3.40k
     }
67
41
     X(triggen_destroy)(t);
68
41
     A(gpower == 1);
69
70
41
     p->apply(p_, omega, omega + 1, omega, omega + 1);
71
72
41
     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
73
41
     return omega;
74
41
}
75
76
static void free_omega(R *omega)
77
41
{
78
41
     X(rader_tl_delete)(omega, &omegas);
79
41
}
80
81
82
/***************************************************************************/
83
84
/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
85
   order to share data between forward and backward transforms and to
86
   obviate the necessity of having separate forward and backward
87
   plans.  (Although we often compute separate plans these days anyway
88
   due to the differing strides, etcetera.)
89
90
   Of course, since the new FFTW gives us separate pointers to
91
   the real and imaginary parts, we could have instead used the
92
   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
93
   reuse the code from our old version. */
94
95
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
96
74
{
97
74
     const P *ego = (const P *) ego_;
98
74
     INT is, os;
99
74
     INT k, gpower, g, r;
100
74
     R *buf;
101
74
     R r0 = ri[0], i0 = ii[0];
102
103
74
     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
104
74
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
105
106
     /* First, permute the input, storing in buf: */
107
5.21k
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
108
5.13k
    R rA, iA;
109
5.13k
    rA = ri[gpower * is];
110
5.13k
    iA = ii[gpower * is];
111
5.13k
    buf[2*k] = rA; buf[2*k + 1] = iA;
112
5.13k
     }
113
74
     /* gpower == g^(r-1) mod r == 1 */;
114
115
116
     /* compute DFT of buf, storing in output (except DC): */
117
74
     {
118
74
      plan_dft *cld = (plan_dft *) ego->cld1;
119
74
      cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
120
74
     }
121
122
     /* set output DC component: */
123
74
     {
124
74
    ro[0] = r0 + ro[os];
125
74
    io[0] = i0 + io[os];
126
74
     }
127
128
     /* now, multiply by omega: */
129
74
     {
130
74
    const R *omega = ego->omega;
131
5.21k
    for (k = 0; k < r - 1; ++k) {
132
5.13k
         E rB, iB, rW, iW;
133
5.13k
         rW = omega[2*k];
134
5.13k
         iW = omega[2*k+1];
135
5.13k
         rB = ro[(k+1)*os];
136
5.13k
         iB = io[(k+1)*os];
137
5.13k
         ro[(k+1)*os] = rW * rB - iW * iB;
138
5.13k
         io[(k+1)*os] = -(rW * iB + iW * rB);
139
5.13k
    }
140
74
     }
141
     
142
     /* this will add input[0] to all of the outputs after the ifft */
143
74
     ro[os] += r0;
144
74
     io[os] -= i0;
145
146
     /* inverse FFT: */
147
74
     {
148
74
      plan_dft *cld = (plan_dft *) ego->cld2;
149
74
      cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
150
74
     }
151
     
152
     /* finally, do inverse permutation to unshuffle the output: */
153
74
     {
154
74
    INT ginv = ego->ginv;
155
74
    gpower = 1;
156
5.21k
    for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
157
5.13k
         ro[gpower * os] = buf[2*k];
158
5.13k
         io[gpower * os] = -buf[2*k+1];
159
5.13k
    }
160
74
    A(gpower == 1);
161
74
     }
162
163
164
74
     X(ifree)(buf);
165
74
}
166
167
/***************************************************************************/
168
169
static void awake(plan *ego_, enum wakefulness wakefulness)
170
82
{
171
82
     P *ego = (P *) ego_;
172
173
82
     X(plan_awake)(ego->cld1, wakefulness);
174
82
     X(plan_awake)(ego->cld2, wakefulness);
175
82
     X(plan_awake)(ego->cld_omega, wakefulness);
176
177
82
     switch (wakefulness) {
178
41
   case SLEEPY:
179
41
        free_omega(ego->omega);
180
41
        ego->omega = 0;
181
41
        break;
182
41
   default:
183
41
        ego->g = X(find_generator)(ego->n);
184
41
        ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
185
41
        A(MULMOD(ego->g, ego->ginv, ego->n) == 1);
186
187
41
        ego->omega = mkomega(wakefulness,
188
41
           ego->cld_omega, ego->n, ego->ginv);
189
41
        break;
190
82
     }
191
82
}
192
193
static void destroy(plan *ego_)
194
88
{
195
88
     P *ego = (P *) ego_;
196
88
     X(plan_destroy_internal)(ego->cld_omega);
197
88
     X(plan_destroy_internal)(ego->cld2);
198
88
     X(plan_destroy_internal)(ego->cld1);
199
88
}
200
201
static void print(const plan *ego_, printer *p)
202
0
{
203
0
     const P *ego = (const P *)ego_;
204
0
     p->print(p, "(dft-rader-%D%ois=%oos=%(%p%)",
205
0
              ego->n, ego->is, ego->os, ego->cld1);
206
0
     if (ego->cld2 != ego->cld1)
207
0
          p->print(p, "%(%p%)", ego->cld2);
208
0
     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
209
0
          p->print(p, "%(%p%)", ego->cld_omega);
210
0
     p->putchr(p, ')');
211
0
}
212
213
static int applicable(const solver *ego_, const problem *p_,
214
          const planner *plnr)
215
1.13k
{
216
1.13k
     const problem_dft *p = (const problem_dft *) p_;
217
1.13k
     UNUSED(ego_);
218
1.13k
     return (1
219
1.13k
       && p->sz->rnk == 1
220
812
       && p->vecsz->rnk == 0
221
314
       && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > RADER_MAX_SLOW)
222
222
       && X(is_prime)(p->sz->dims[0].n)
223
224
       /* proclaim the solver SLOW if p-1 is not easily factorizable.
225
    Bluestein should take care of this case. */
226
127
       && CIMPLIES(NO_SLOWP(plnr), X(factors_into_small_primes)(p->sz->dims[0].n - 1))
227
1.13k
    );
228
1.13k
}
229
230
static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
231
         planner *plnr)
232
88
{
233
88
     plan *cld1 = (plan *) 0;
234
88
     plan *cld2 = (plan *) 0;
235
88
     plan *cld_omega = (plan *) 0;
236
88
     R *buf = (R *) 0;
237
238
     /* initial allocation for the purpose of planning */
239
88
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
240
241
88
     cld1 = X(mkplan_f_d)(plnr, 
242
88
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
243
88
               X(mktensor_1d)(1, 0, 0),
244
88
               buf, buf + 1, ro + os, io + os),
245
88
        NO_SLOW, 0, 0);
246
88
     if (!cld1) goto nada;
247
248
88
     cld2 = X(mkplan_f_d)(plnr, 
249
88
        X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
250
88
               X(mktensor_1d)(1, 0, 0),
251
88
               ro + os, io + os, buf, buf + 1),
252
88
        NO_SLOW, 0, 0);
253
254
88
     if (!cld2) goto nada;
255
256
     /* plan for omega array */
257
88
     cld_omega = X(mkplan_f_d)(plnr, 
258
88
             X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
259
88
              X(mktensor_1d)(1, 0, 0),
260
88
              buf, buf + 1, buf, buf + 1),
261
88
             NO_SLOW, ESTIMATE, 0);
262
88
     if (!cld_omega) goto nada;
263
264
     /* deallocate buffers; let awake() or apply() allocate them for real */
265
88
     X(ifree)(buf);
266
88
     buf = 0;
267
268
88
     pln->cld1 = cld1;
269
88
     pln->cld2 = cld2;
270
88
     pln->cld_omega = cld_omega;
271
88
     pln->omega = 0;
272
88
     pln->n = n;
273
88
     pln->is = is;
274
88
     pln->os = os;
275
276
88
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
277
88
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
278
88
     pln->super.super.ops.add += (n - 1) * 2 + 4;
279
88
     pln->super.super.ops.mul += (n - 1) * 4;
280
281
88
     return 1;
282
283
0
 nada:
284
0
     X(ifree0)(buf);
285
0
     X(plan_destroy_internal)(cld_omega);
286
0
     X(plan_destroy_internal)(cld2);
287
0
     X(plan_destroy_internal)(cld1);
288
0
     return 0;
289
88
}
290
291
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
292
1.13k
{
293
1.13k
     const problem_dft *p = (const problem_dft *) p_;
294
1.13k
     P *pln;
295
1.13k
     INT n;
296
1.13k
     INT is, os;
297
298
1.13k
     static const plan_adt padt = {
299
1.13k
    X(dft_solve), awake, print, destroy
300
1.13k
     };
301
302
1.13k
     if (!applicable(ego, p_, plnr))
303
1.04k
    return (plan *) 0;
304
305
88
     n = p->sz->dims[0].n;
306
88
     is = p->sz->dims[0].is;
307
88
     os = p->sz->dims[0].os;
308
309
88
     pln = MKPLAN_DFT(P, &padt, apply);
310
88
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
311
0
    X(ifree)(pln);
312
0
    return (plan *) 0;
313
0
     }
314
88
     return &(pln->super.super);
315
88
}
316
317
static solver *mksolver(void)
318
1
{
319
1
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
320
1
     S *slv = MKSOLVER(S, &sadt);
321
1
     return &(slv->super);
322
1
}
323
324
void X(dft_rader_register)(planner *p)
325
1
{
326
1
     REGISTER_SOLVER(p, mksolver());
327
1
}