Coverage Report

Created: 2025-08-26 06:35

/src/fftw3/rdft/hc2hc-generic.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* express a hc2hc problem in terms of rdft + multiplication by
22
   twiddle factors */
23
24
#include "rdft/hc2hc.h"
25
26
typedef hc2hc_solver S;
27
28
typedef struct {
29
     plan_hc2hc super;
30
31
     INT r, m, s, vl, vs, mstart1, mcount1;
32
     plan *cld0;
33
     plan *cld;
34
     twid *td;
35
} P;
36
37
38
/**************************************************************/
39
static void mktwiddle(P *ego, enum wakefulness wakefulness)
40
0
{
41
0
     static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } };
42
43
     /* note that R and M are swapped, to allow for sequential
44
  access both to data and twiddles */
45
0
     X(twiddle_awake)(wakefulness, &ego->td, tw, 
46
0
          ego->r * ego->m, ego->m, ego->r);
47
0
}
48
49
static void bytwiddle(const P *ego, R *IO, R sign)
50
0
{
51
0
     INT i, j, k;
52
0
     INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
53
0
     INT ms = m * s;
54
0
     INT mstart1 = ego->mstart1, mcount1 = ego->mcount1;
55
0
     INT wrem = 2 * ((m-1)/2 - mcount1);
56
57
0
     for (i = 0; i < vl; ++i, IO += vs) {
58
0
    const R *W = ego->td->W;
59
60
0
    A(m % 2 == 1);
61
0
    for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) {
62
         /* pr := IO + (j + mstart1) * s + k * ms */
63
0
         R *pr = IO + mstart1 * s + k * ms;
64
65
         /* pi := IO + (m - j - mstart1) * s + k * ms */
66
0
         R *pi = IO - mstart1 * s + (k + 1) * ms;
67
68
0
         for (j = 0; j < mcount1; ++j, pr += s, pi -= s) {
69
0
        E xr = *pr;
70
0
        E xi = *pi;
71
0
        E wr = W[0];
72
0
        E wi = sign * W[1];
73
0
        *pr = xr * wr - xi * wi;
74
0
        *pi = xi * wr + xr * wi;
75
0
        W += 2;
76
0
         }
77
0
         W += wrem;
78
0
    }
79
0
     }
80
0
}
81
82
static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend)
83
0
{
84
0
     INT k;
85
0
     INT ms = m * s;
86
0
     INT js = jstart * s;
87
0
     for (k = 0; k + k < r; ++k) {
88
    /* pr := IO + (m - j) * s + k * ms */
89
0
    R *pr = IO + (k + 1) * ms - js;
90
    /* pi := IO + (m - j) * s + (r - 1 - k) * ms */
91
0
    R *pi = IO + (r - k) * ms - js;
92
0
    INT j;
93
0
    for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) {
94
0
         R t = *pr;
95
0
         *pr = *pi;
96
0
         *pi = t;
97
0
    }
98
0
     }
99
0
}
100
101
static void reorder_dit(const P *ego, R *IO)
102
0
{
103
0
     INT i, k;
104
0
     INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
105
0
     INT ms = m * s;
106
0
     INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
107
108
0
     for (i = 0; i < vl; ++i, IO += vs) {
109
0
    for (k = 1; k + k < r; ++k) {
110
0
         R *p0 = IO + k * ms;
111
0
         R *p1 = IO + (r - k) * ms;
112
0
         INT j;
113
114
0
         for (j = mstart1; j < mend1; ++j) {
115
0
        E rp, ip, im, rm;
116
0
        rp = p0[j * s];
117
0
        im = p1[ms - j * s];
118
0
        rm = p1[j * s];
119
0
        ip = p0[ms - j * s];
120
0
        p0[j * s] = rp - im;
121
0
        p1[ms - j * s] = rp + im;
122
0
        p1[j * s] = rm - ip;
123
0
        p0[ms - j * s] = ip + rm;
124
0
         }
125
0
    }
126
127
0
    swapri(IO, r, m, s, mstart1, mend1);
128
0
     }
129
0
}
130
131
static void reorder_dif(const P *ego, R *IO)
132
0
{
133
0
     INT i, k;
134
0
     INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs;
135
0
     INT ms = m * s;
136
0
     INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1;
137
138
0
     for (i = 0; i < vl; ++i, IO += vs) {
139
0
    swapri(IO, r, m, s, mstart1, mend1);
140
141
0
    for (k = 1; k + k < r; ++k) {
142
0
         R *p0 = IO + k * ms;
143
0
         R *p1 = IO + (r - k) * ms;
144
0
         const R half = K(0.5);
145
0
         INT j;
146
147
0
         for (j = mstart1; j < mend1; ++j) {
148
0
        E rp, ip, im, rm;
149
0
        rp = half * p0[j * s];
150
0
        im = half * p1[ms - j * s];
151
0
        rm = half * p1[j * s];
152
0
        ip = half * p0[ms - j * s];
153
0
        p0[j * s] = rp + im;
154
0
        p1[ms - j * s] = im - rp;
155
0
        p1[j * s] = rm + ip;
156
0
        p0[ms - j * s] = ip - rm;
157
0
         }
158
0
    }
159
0
     }
160
0
}
161
162
static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr)
163
0
{
164
0
     return (1 
165
0
       && (kind == R2HC || kind == HC2R)
166
0
       && (m % 2)
167
0
       && (r % 2)
168
0
       && !NO_SLOWP(plnr)
169
0
    );
170
0
}
171
172
/**************************************************************/
173
174
static void apply_dit(const plan *ego_, R *IO)
175
0
{
176
0
     const P *ego = (const P *) ego_;
177
0
     INT start;
178
0
     plan_rdft *cld, *cld0;
179
180
0
     bytwiddle(ego, IO, K(-1.0));
181
182
0
     cld0 = (plan_rdft *) ego->cld0;
183
0
     cld0->apply(ego->cld0, IO, IO);
184
185
0
     start = ego->mstart1 * ego->s;
186
0
     cld = (plan_rdft *) ego->cld;
187
0
     cld->apply(ego->cld, IO + start, IO + start);
188
189
0
     reorder_dit(ego, IO);
190
0
}
191
192
static void apply_dif(const plan *ego_, R *IO)
193
0
{
194
0
     const P *ego = (const P *) ego_;
195
0
     INT start;
196
0
     plan_rdft *cld, *cld0;
197
198
0
     reorder_dif(ego, IO);
199
200
0
     cld0 = (plan_rdft *) ego->cld0;
201
0
     cld0->apply(ego->cld0, IO, IO);
202
203
0
     start = ego->mstart1 * ego->s;
204
0
     cld = (plan_rdft *) ego->cld;
205
0
     cld->apply(ego->cld, IO + start, IO + start);
206
207
0
     bytwiddle(ego, IO, K(1.0));
208
0
}
209
210
211
static void awake(plan *ego_, enum wakefulness wakefulness)
212
0
{
213
0
     P *ego = (P *) ego_;
214
0
     X(plan_awake)(ego->cld0, wakefulness);
215
0
     X(plan_awake)(ego->cld, wakefulness);
216
0
     mktwiddle(ego, wakefulness);
217
0
}
218
219
static void destroy(plan *ego_)
220
0
{
221
0
     P *ego = (P *) ego_;
222
0
     X(plan_destroy_internal)(ego->cld);
223
0
     X(plan_destroy_internal)(ego->cld0);
224
0
}
225
226
static void print(const plan *ego_, printer *p)
227
0
{
228
0
     const P *ego = (const P *) ego_;
229
0
     p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))", 
230
0
        ego->super.apply == apply_dit ? "dit" : "dif",
231
0
        ego->r, ego->m, ego->vl, ego->cld0, ego->cld);
232
0
}
233
234
static plan *mkcldw(const hc2hc_solver *ego_, 
235
        rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs, 
236
        INT mstart, INT mcount,
237
        R *IO, planner *plnr)
238
0
{
239
0
     P *pln;
240
0
     plan *cld0 = 0, *cld = 0;
241
0
     INT mstart1, mcount1, mstride;
242
243
0
     static const plan_adt padt = {
244
0
    0, awake, print, destroy
245
0
     };
246
247
0
     UNUSED(ego_);
248
249
0
     A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2);
250
251
0
     if (!applicable(kind, r, m, plnr))
252
0
          return (plan *)0;
253
254
0
     A(m % 2);
255
0
     mstart1 = mstart + (mstart == 0);
256
0
     mcount1 = mcount - (mstart == 0);
257
0
     mstride = m - (mstart + mcount - 1) - mstart1;
258
259
     /* 0th (DC) transform (vl of these), if mstart == 0 */
260
0
     cld0 = X(mkplan_d)(plnr, 
261
0
      X(mkproblem_rdft_1_d)(
262
0
           mstart == 0 ? X(mktensor_1d)(r, m * s, m * s)
263
0
           : X(mktensor_0d)(),
264
0
           X(mktensor_1d)(vl, vs, vs),
265
0
           IO, IO, kind)
266
0
      );
267
0
     if (!cld0) goto nada;
268
269
     /* twiddle transforms: there are 2 x mcount1 x vl of these
270
  (where 2 corresponds to the real and imaginary parts) ...
271
        the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */
272
0
     cld = X(mkplan_d)(plnr, 
273
0
      X(mkproblem_rdft_1_d)(
274
0
           X(mktensor_1d)(r, m * s, m * s),
275
0
           X(mktensor_3d)(2, mstride * s, mstride * s,
276
0
              mcount1, s, s, 
277
0
              vl, vs, vs),
278
0
           IO + s * mstart1, IO + s * mstart1, kind)
279
0
                  );
280
0
     if (!cld) goto nada;
281
     
282
0
     pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif);
283
0
     pln->cld = cld;
284
0
     pln->cld0 = cld0;
285
0
     pln->r = r;
286
0
     pln->m = m;
287
0
     pln->s = s;
288
0
     pln->vl = vl;
289
0
     pln->vs = vs;
290
0
     pln->td = 0;
291
0
     pln->mstart1 = mstart1;
292
0
     pln->mcount1 = mcount1;
293
294
0
     {
295
0
    double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl;
296
0
    pln->super.super.ops = cld->ops;
297
0
    pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0;
298
0
    pln->super.super.ops.add += 4.0 * n0;
299
0
    pln->super.super.ops.other += 11.0 * n0;
300
0
     }
301
0
     return &(pln->super.super);
302
303
0
 nada:
304
0
     X(plan_destroy_internal)(cld);
305
0
     X(plan_destroy_internal)(cld0);
306
0
     return (plan *) 0;
307
0
}
308
309
static void regsolver(planner *plnr, INT r)
310
1
{
311
1
     S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw);
312
1
     REGISTER_SOLVER(plnr, &(slv->super));
313
1
     if (X(mksolver_hc2hc_hook)) {
314
0
    slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw);
315
0
    REGISTER_SOLVER(plnr, &(slv->super));
316
0
     }
317
1
}
318
319
void X(hc2hc_generic_register)(planner *p)
320
1
{
321
1
     regsolver(p, 0);
322
1
}