Coverage Report

Created: 2025-10-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/reodft/reodft11e-radix2.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
/* Do an R{E,O}DFT11 problem of *even* size by a pair of R2HC problems
23
   of half the size, plus some pre/post-processing.  Use a trick from:
24
25
   Zhongde Wang, "On computing the discrete Fourier and cosine transforms,"
26
   IEEE Trans. Acoust. Speech Sig. Proc. ASSP-33 (4), 1341--1344 (1985).
27
28
   to re-express as a pair of half-size REDFT01 (DCT-III) problems.  Our
29
   implementation looks quite a bit different from the algorithm described
30
   in the paper because we combined the paper's pre/post-processing with
31
   the pre/post-processing used to turn REDFT01 into R2HC.  (Also, the
32
   paper uses a DCT/DST pair, but we turn the DST into a DCT via the
33
   usual reordering/sign-flip trick.  We additionally combined a couple
34
   of the matrices/transformations of the paper into a single pass.)
35
36
   NOTE: We originally used a simpler method by S. C. Chan and K. L. Ho
37
   that turned out to have numerical problems; see reodft11e-r2hc.c.
38
39
   (For odd sizes, see reodft11e-r2hc-odd.c.)
40
*/
41
42
#include "reodft/reodft.h"
43
44
typedef struct {
45
     solver super;
46
} S;
47
48
typedef struct {
49
     plan_rdft super;
50
     plan *cld;
51
     twid *td, *td2;
52
     INT is, os;
53
     INT n;
54
     INT vl;
55
     INT ivs, ovs;
56
     rdft_kind kind;
57
} P;
58
59
static void apply_re11(const plan *ego_, R *I, R *O)
60
0
{
61
0
     const P *ego = (const P *) ego_;
62
0
     INT is = ego->is, os = ego->os;
63
0
     INT i, n = ego->n, n2 = n/2;
64
0
     INT iv, vl = ego->vl;
65
0
     INT ivs = ego->ivs, ovs = ego->ovs;
66
0
     R *W = ego->td->W;
67
0
     R *W2;
68
0
     R *buf;
69
70
0
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
71
72
0
     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
73
0
    buf[0] = K(2.0) * I[0];
74
0
    buf[n2] = K(2.0) * I[is * (n - 1)];
75
0
    for (i = 1; i + i < n2; ++i) {
76
0
         INT k = i + i;
77
0
         E a, b, a2, b2;
78
0
         {
79
0
        E u, v;
80
0
        u = I[is * (k - 1)];
81
0
        v = I[is * k];
82
0
        a = u + v;
83
0
        b2 = u - v;
84
0
         }
85
0
         {
86
0
        E u, v;
87
0
        u = I[is * (n - k - 1)];
88
0
        v = I[is * (n - k)];
89
0
        b = u + v;
90
0
        a2 = u - v;
91
0
         }
92
0
         {
93
0
        E wa, wb;
94
0
        wa = W[2*i];
95
0
        wb = W[2*i + 1];
96
0
        {
97
0
       E apb, amb;
98
0
       apb = a + b;
99
0
       amb = a - b;
100
0
       buf[i] = wa * amb + wb * apb; 
101
0
       buf[n2 - i] = wa * apb - wb * amb; 
102
0
        }
103
0
        {
104
0
       E apb, amb;
105
0
       apb = a2 + b2;
106
0
       amb = a2 - b2;
107
0
       buf[n2 + i] = wa * amb + wb * apb; 
108
0
       buf[n - i] = wa * apb - wb * amb; 
109
0
        }
110
0
         }
111
0
    }
112
0
    if (i + i == n2) {
113
0
         E u, v;
114
0
         u = I[is * (n2 - 1)];
115
0
         v = I[is * n2];
116
0
         buf[i] = (u + v) * (W[2*i] * K(2.0));
117
0
         buf[n - i] = (u - v) * (W[2*i] * K(2.0));
118
0
    }
119
120
121
    /* child plan: two r2hc's of size n/2 */
122
0
    {
123
0
         plan_rdft *cld = (plan_rdft *) ego->cld;
124
0
         cld->apply((plan *) cld, buf, buf);
125
0
    }
126
    
127
0
    W2 = ego->td2->W;
128
0
    { /* i == 0 case */
129
0
         E wa, wb;
130
0
         E a, b;
131
0
         wa = W2[0]; /* cos */
132
0
         wb = W2[1]; /* sin */
133
0
         a = buf[0];
134
0
         b = buf[n2];
135
0
         O[0] = wa * a + wb * b;
136
0
         O[os * (n - 1)] = wb * a - wa * b;
137
0
    }
138
0
    W2 += 2;
139
0
    for (i = 1; i + i < n2; ++i, W2 += 2) {
140
0
         INT k;
141
0
         E u, v, u2, v2;
142
0
         u = buf[i];
143
0
         v = buf[n2 - i];
144
0
         u2 = buf[n2 + i];
145
0
         v2 = buf[n - i];
146
0
         k = (i + i) - 1;
147
0
         {
148
0
                    E wa, wb;
149
0
                    E a, b;
150
0
                    wa = W2[0]; /* cos */
151
0
                    wb = W2[1]; /* sin */
152
0
                    a = u - v;
153
0
                    b = v2 - u2;
154
0
                    O[os * k] = wa * a + wb * b;
155
0
                    O[os * (n - 1 - k)] = wb * a - wa * b;
156
0
               }
157
0
         ++k;
158
0
         W2 += 2;
159
0
         {
160
0
        E wa, wb;
161
0
        E a, b;
162
0
        wa = W2[0]; /* cos */
163
0
        wb = W2[1]; /* sin */
164
0
        a = u + v;
165
0
        b = u2 + v2;
166
0
        O[os * k] = wa * a + wb * b;
167
0
        O[os * (n - 1 - k)] = wb * a - wa * b;
168
0
         }
169
0
    }
170
0
    if (i + i == n2) {
171
0
         INT k = (i + i) - 1;
172
0
         E wa, wb;
173
0
         E a, b;
174
0
         wa = W2[0]; /* cos */
175
0
         wb = W2[1]; /* sin */
176
0
         a = buf[i];
177
0
         b = buf[n2 + i];
178
0
         O[os * k] = wa * a - wb * b;
179
0
         O[os * (n - 1 - k)] = wb * a + wa * b;
180
0
    }
181
0
     }
182
183
0
     X(ifree)(buf);
184
0
}
185
186
#if 0
187
188
/* This version of apply_re11 uses REDFT01 child plans, more similar
189
   to the original paper by Z. Wang.  We keep it around for reference
190
   (it is simpler) and because it may become more efficient if we
191
   ever implement REDFT01 codelets. */
192
193
static void apply_re11(const plan *ego_, R *I, R *O)
194
{
195
     const P *ego = (const P *) ego_;
196
     INT is = ego->is, os = ego->os;
197
     INT i, n = ego->n;
198
     INT iv, vl = ego->vl;
199
     INT ivs = ego->ivs, ovs = ego->ovs;
200
     R *W;
201
     R *buf;
202
203
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
204
205
     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
206
    buf[0] = K(2.0) * I[0];
207
    buf[n/2] = K(2.0) * I[is * (n - 1)];
208
    for (i = 1; i + i < n; ++i) {
209
         INT k = i + i;
210
         E a, b;
211
         a = I[is * (k - 1)];
212
         b = I[is * k];
213
         buf[i] = a + b;
214
         buf[n - i] = a - b;
215
    }
216
217
    /* child plan: two redft01's (DCT-III) */
218
    {
219
         plan_rdft *cld = (plan_rdft *) ego->cld;
220
         cld->apply((plan *) cld, buf, buf);
221
    }
222
    
223
    W = ego->td2->W;
224
    for (i = 0; i + 1 < n/2; ++i, W += 2) {
225
         {
226
        E wa, wb;
227
        E a, b;
228
        wa = W[0]; /* cos */
229
        wb = W[1]; /* sin */
230
        a = buf[i];
231
        b = buf[n/2 + i];
232
        O[os * i] = wa * a + wb * b;
233
        O[os * (n - 1 - i)] = wb * a - wa * b;
234
         }
235
         ++i;
236
         W += 2;
237
         {
238
                    E wa, wb;
239
                    E a, b;
240
                    wa = W[0]; /* cos */
241
                    wb = W[1]; /* sin */
242
                    a = buf[i];
243
                    b = buf[n/2 + i];
244
                    O[os * i] = wa * a - wb * b;
245
                    O[os * (n - 1 - i)] = wb * a + wa * b;
246
               }
247
    }
248
    if (i < n/2) {
249
         E wa, wb;
250
         E a, b;
251
         wa = W[0]; /* cos */
252
         wb = W[1]; /* sin */
253
         a = buf[i];
254
         b = buf[n/2 + i];
255
         O[os * i] = wa * a + wb * b;
256
         O[os * (n - 1 - i)] = wb * a - wa * b;
257
    }
258
     }
259
260
     X(ifree)(buf);
261
}
262
263
#endif /* 0 */
264
265
/* like for rodft01, rodft11 is obtained from redft11 by
266
   reversing the input and flipping the sign of every other output. */
267
static void apply_ro11(const plan *ego_, R *I, R *O)
268
0
{
269
0
     const P *ego = (const P *) ego_;
270
0
     INT is = ego->is, os = ego->os;
271
0
     INT i, n = ego->n, n2 = n/2;
272
0
     INT iv, vl = ego->vl;
273
0
     INT ivs = ego->ivs, ovs = ego->ovs;
274
0
     R *W = ego->td->W;
275
0
     R *W2;
276
0
     R *buf;
277
278
0
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
279
280
0
     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
281
0
    buf[0] = K(2.0) * I[is * (n - 1)];
282
0
    buf[n2] = K(2.0) * I[0];
283
0
    for (i = 1; i + i < n2; ++i) {
284
0
         INT k = i + i;
285
0
         E a, b, a2, b2;
286
0
         {
287
0
        E u, v;
288
0
        u = I[is * (n - k)];
289
0
        v = I[is * (n - 1 - k)];
290
0
        a = u + v;
291
0
        b2 = u - v;
292
0
         }
293
0
         {
294
0
        E u, v;
295
0
        u = I[is * (k)];
296
0
        v = I[is * (k - 1)];
297
0
        b = u + v;
298
0
        a2 = u - v;
299
0
         }
300
0
         {
301
0
        E wa, wb;
302
0
        wa = W[2*i];
303
0
        wb = W[2*i + 1];
304
0
        {
305
0
       E apb, amb;
306
0
       apb = a + b;
307
0
       amb = a - b;
308
0
       buf[i] = wa * amb + wb * apb; 
309
0
       buf[n2 - i] = wa * apb - wb * amb; 
310
0
        }
311
0
        {
312
0
       E apb, amb;
313
0
       apb = a2 + b2;
314
0
       amb = a2 - b2;
315
0
       buf[n2 + i] = wa * amb + wb * apb; 
316
0
       buf[n - i] = wa * apb - wb * amb; 
317
0
        }
318
0
         }
319
0
    }
320
0
    if (i + i == n2) {
321
0
         E u, v;
322
0
         u = I[is * n2];
323
0
         v = I[is * (n2 - 1)];
324
0
         buf[i] = (u + v) * (W[2*i] * K(2.0));
325
0
         buf[n - i] = (u - v) * (W[2*i] * K(2.0));
326
0
    }
327
328
329
    /* child plan: two r2hc's of size n/2 */
330
0
    {
331
0
         plan_rdft *cld = (plan_rdft *) ego->cld;
332
0
         cld->apply((plan *) cld, buf, buf);
333
0
    }
334
    
335
0
    W2 = ego->td2->W;
336
0
    { /* i == 0 case */
337
0
         E wa, wb;
338
0
         E a, b;
339
0
         wa = W2[0]; /* cos */
340
0
         wb = W2[1]; /* sin */
341
0
         a = buf[0];
342
0
         b = buf[n2];
343
0
         O[0] = wa * a + wb * b;
344
0
         O[os * (n - 1)] = wa * b - wb * a;
345
0
    }
346
0
    W2 += 2;
347
0
    for (i = 1; i + i < n2; ++i, W2 += 2) {
348
0
         INT k;
349
0
         E u, v, u2, v2;
350
0
         u = buf[i];
351
0
         v = buf[n2 - i];
352
0
         u2 = buf[n2 + i];
353
0
         v2 = buf[n - i];
354
0
         k = (i + i) - 1;
355
0
         {
356
0
                    E wa, wb;
357
0
                    E a, b;
358
0
                    wa = W2[0]; /* cos */
359
0
                    wb = W2[1]; /* sin */
360
0
                    a = v - u;
361
0
                    b = u2 - v2;
362
0
                    O[os * k] = wa * a + wb * b;
363
0
                    O[os * (n - 1 - k)] = wa * b - wb * a;
364
0
               }
365
0
         ++k;
366
0
         W2 += 2;
367
0
         {
368
0
        E wa, wb;
369
0
        E a, b;
370
0
        wa = W2[0]; /* cos */
371
0
        wb = W2[1]; /* sin */
372
0
        a = u + v;
373
0
        b = u2 + v2;
374
0
        O[os * k] = wa * a + wb * b;
375
0
        O[os * (n - 1 - k)] = wa * b - wb * a;
376
0
         }
377
0
    }
378
0
    if (i + i == n2) {
379
0
         INT k = (i + i) - 1;
380
0
         E wa, wb;
381
0
         E a, b;
382
0
         wa = W2[0]; /* cos */
383
0
         wb = W2[1]; /* sin */
384
0
         a = buf[i];
385
0
         b = buf[n2 + i];
386
0
         O[os * k] = wb * b - wa * a;
387
0
         O[os * (n - 1 - k)] = wa * b + wb * a;
388
0
    }
389
0
     }
390
391
0
     X(ifree)(buf);
392
0
}
393
394
static void awake(plan *ego_, enum wakefulness wakefulness)
395
0
{
396
0
     P *ego = (P *) ego_;
397
0
     static const tw_instr reodft010e_tw[] = {
398
0
          { TW_COS, 0, 1 },
399
0
          { TW_SIN, 0, 1 },
400
0
          { TW_NEXT, 1, 0 }
401
0
     };
402
0
     static const tw_instr reodft11e_tw[] = {
403
0
          { TW_COS, 1, 1 },
404
0
          { TW_SIN, 1, 1 },
405
0
          { TW_NEXT, 2, 0 }
406
0
     };
407
408
0
     X(plan_awake)(ego->cld, wakefulness);
409
410
0
     X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw, 
411
0
          2*ego->n, 1, ego->n/4+1);
412
0
     X(twiddle_awake)(wakefulness, &ego->td2, reodft11e_tw, 
413
0
          8*ego->n, 1, ego->n);
414
0
}
415
416
static void destroy(plan *ego_)
417
0
{
418
0
     P *ego = (P *) ego_;
419
0
     X(plan_destroy_internal)(ego->cld);
420
0
}
421
422
static void print(const plan *ego_, printer *p)
423
0
{
424
0
     const P *ego = (const P *) ego_;
425
0
     p->print(p, "(%se-radix2-r2hc-%D%v%(%p%))",
426
0
        X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
427
0
}
428
429
static int applicable0(const solver *ego_, const problem *p_)
430
0
{
431
0
     const problem_rdft *p = (const problem_rdft *) p_;
432
0
     UNUSED(ego_);
433
434
0
     return (1
435
0
       && p->sz->rnk == 1
436
0
       && p->vecsz->rnk <= 1
437
0
       && p->sz->dims[0].n % 2 == 0
438
0
       && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
439
0
    );
440
0
}
441
442
static int applicable(const solver *ego, const problem *p, const planner *plnr)
443
320
{
444
320
     return (!NO_SLOWP(plnr) && applicable0(ego, p));
445
320
}
446
447
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
448
320
{
449
320
     P *pln;
450
320
     const problem_rdft *p;
451
320
     plan *cld;
452
320
     R *buf;
453
320
     INT n;
454
320
     opcnt ops;
455
456
320
     static const plan_adt padt = {
457
320
    X(rdft_solve), awake, print, destroy
458
320
     };
459
460
320
     if (!applicable(ego_, p_, plnr))
461
320
          return (plan *)0;
462
463
0
     p = (const problem_rdft *) p_;
464
465
0
     n = p->sz->dims[0].n;
466
0
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
467
468
0
     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n/2, 1, 1),
469
0
                                                   X(mktensor_1d)(2, n/2, n/2),
470
0
                                                   buf, buf, R2HC));
471
0
     X(ifree)(buf);
472
0
     if (!cld)
473
0
          return (plan *)0;
474
475
0
     pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
476
0
     pln->n = n;
477
0
     pln->is = p->sz->dims[0].is;
478
0
     pln->os = p->sz->dims[0].os;
479
0
     pln->cld = cld;
480
0
     pln->td = pln->td2 = 0;
481
0
     pln->kind = p->kind[0];
482
     
483
0
     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
484
     
485
0
     X(ops_zero)(&ops);
486
0
     ops.add = 2 + (n/2 - 1)/2 * 20;
487
0
     ops.mul = 6 + (n/2 - 1)/2 * 16;
488
0
     ops.other = 4*n + 2 + (n/2 - 1)/2 * 6;
489
0
     if ((n/2) % 2 == 0) {
490
0
    ops.add += 4;
491
0
    ops.mul += 8;
492
0
    ops.other += 4;
493
0
     }
494
495
0
     X(ops_zero)(&pln->super.super.ops);
496
0
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
497
0
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
498
499
0
     return &(pln->super.super);
500
0
}
501
502
/* constructor */
503
static solver *mksolver(void)
504
1
{
505
1
     static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
506
1
     S *slv = MKSOLVER(S, &sadt);
507
1
     return &(slv->super);
508
1
}
509
510
void X(reodft11e_radix2_r2hc_register)(planner *p)
511
1
{
512
1
     REGISTER_SOLVER(p, mksolver());
513
1
}