/src/fftw3/reodft/reodft11e-r2hc-odd.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | /* Do an R{E,O}DFT11 problem via an R2HC problem of the same *odd* size, |
23 | | with some permutations and post-processing, as described in: |
24 | | |
25 | | S. C. Chan and K. L. Ho, "Fast algorithms for computing the |
26 | | discrete cosine transform," IEEE Trans. Circuits Systems II: |
27 | | Analog & Digital Sig. Proc. 39 (3), 185--190 (1992). |
28 | | |
29 | | (For even sizes, see reodft11e-radix2.c.) |
30 | | |
31 | | This algorithm is related to the 8 x n prime-factor-algorithm (PFA) |
32 | | decomposition of the size 8n "logical" DFT corresponding to the |
33 | | R{EO}DFT11. |
34 | | |
35 | | Aside from very confusing notation (several symbols are redefined |
36 | | from one line to the next), be aware that this paper has some |
37 | | errors. In particular, the signs are wrong in Eqs. (34-35). Also, |
38 | | Eqs. (36-37) should be simply C(k) = C(2k + 1 mod N), and similarly |
39 | | for S (or, equivalently, the second cases should have 2*N - 2*k - 1 |
40 | | instead of N - k - 1). Note also that in their definition of the |
41 | | DFT, similarly to FFTW's, the exponent's sign is -1, but they |
42 | | forgot to correspondingly multiply S (the sine terms) by -1. |
43 | | */ |
44 | | |
45 | | #include "reodft/reodft.h" |
46 | | |
47 | | typedef struct { |
48 | | solver super; |
49 | | } S; |
50 | | |
51 | | typedef struct { |
52 | | plan_rdft super; |
53 | | plan *cld; |
54 | | INT is, os; |
55 | | INT n; |
56 | | INT vl; |
57 | | INT ivs, ovs; |
58 | | rdft_kind kind; |
59 | | } P; |
60 | | |
61 | | static DK(SQRT2, +1.4142135623730950488016887242096980785696718753769); |
62 | | |
63 | 0 | #define SGN_SET(x, i) ((i) % 2 ? -(x) : (x)) |
64 | | |
65 | | static void apply_re11(const plan *ego_, R *I, R *O) |
66 | 0 | { |
67 | 0 | const P *ego = (const P *) ego_; |
68 | 0 | INT is = ego->is, os = ego->os; |
69 | 0 | INT i, n = ego->n, n2 = n/2; |
70 | 0 | INT iv, vl = ego->vl; |
71 | 0 | INT ivs = ego->ivs, ovs = ego->ovs; |
72 | 0 | R *buf; |
73 | |
|
74 | 0 | buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); |
75 | |
|
76 | 0 | for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { |
77 | 0 | { |
78 | 0 | INT m; |
79 | 0 | for (i = 0, m = n2; m < n; ++i, m += 4) |
80 | 0 | buf[i] = I[is * m]; |
81 | 0 | for (; m < 2 * n; ++i, m += 4) |
82 | 0 | buf[i] = -I[is * (2*n - m - 1)]; |
83 | 0 | for (; m < 3 * n; ++i, m += 4) |
84 | 0 | buf[i] = -I[is * (m - 2*n)]; |
85 | 0 | for (; m < 4 * n; ++i, m += 4) |
86 | 0 | buf[i] = I[is * (4*n - m - 1)]; |
87 | 0 | m -= 4 * n; |
88 | 0 | for (; i < n; ++i, m += 4) |
89 | 0 | buf[i] = I[is * m]; |
90 | 0 | } |
91 | |
|
92 | 0 | { /* child plan: R2HC of size n */ |
93 | 0 | plan_rdft *cld = (plan_rdft *) ego->cld; |
94 | 0 | cld->apply((plan *) cld, buf, buf); |
95 | 0 | } |
96 | | |
97 | | /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */ |
98 | 0 | for (i = 0; i + i + 1 < n2; ++i) { |
99 | 0 | INT k = i + i + 1; |
100 | 0 | E c1, s1; |
101 | 0 | E c2, s2; |
102 | 0 | c1 = buf[k]; |
103 | 0 | c2 = buf[k + 1]; |
104 | 0 | s2 = buf[n - (k + 1)]; |
105 | 0 | s1 = buf[n - k]; |
106 | | |
107 | 0 | O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2) + |
108 | 0 | SGN_SET(s1, i/2)); |
109 | 0 | O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2) - |
110 | 0 | SGN_SET(s1, (n-(i+1))/2)); |
111 | | |
112 | 0 | O[os * (n2 - (i+1))] = SQRT2 * (SGN_SET(c2, (n2-i)/2) - |
113 | 0 | SGN_SET(s2, (n2-(i+1))/2)); |
114 | 0 | O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2) + |
115 | 0 | SGN_SET(s2, (n2+(i+1))/2)); |
116 | 0 | } |
117 | 0 | if (i + i + 1 == n2) { |
118 | 0 | E c, s; |
119 | 0 | c = buf[n2]; |
120 | 0 | s = buf[n - n2]; |
121 | 0 | O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2) + |
122 | 0 | SGN_SET(s, i/2)); |
123 | 0 | O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2) + |
124 | 0 | SGN_SET(s, (i+1)/2)); |
125 | 0 | } |
126 | 0 | O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2); |
127 | 0 | } |
128 | |
|
129 | 0 | X(ifree)(buf); |
130 | 0 | } |
131 | | |
132 | | /* like for rodft01, rodft11 is obtained from redft11 by |
133 | | reversing the input and flipping the sign of every other output. */ |
134 | | static void apply_ro11(const plan *ego_, R *I, R *O) |
135 | 0 | { |
136 | 0 | const P *ego = (const P *) ego_; |
137 | 0 | INT is = ego->is, os = ego->os; |
138 | 0 | INT i, n = ego->n, n2 = n/2; |
139 | 0 | INT iv, vl = ego->vl; |
140 | 0 | INT ivs = ego->ivs, ovs = ego->ovs; |
141 | 0 | R *buf; |
142 | |
|
143 | 0 | buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); |
144 | |
|
145 | 0 | for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { |
146 | 0 | { |
147 | 0 | INT m; |
148 | 0 | for (i = 0, m = n2; m < n; ++i, m += 4) |
149 | 0 | buf[i] = I[is * (n - 1 - m)]; |
150 | 0 | for (; m < 2 * n; ++i, m += 4) |
151 | 0 | buf[i] = -I[is * (m - n)]; |
152 | 0 | for (; m < 3 * n; ++i, m += 4) |
153 | 0 | buf[i] = -I[is * (3*n - 1 - m)]; |
154 | 0 | for (; m < 4 * n; ++i, m += 4) |
155 | 0 | buf[i] = I[is * (m - 3*n)]; |
156 | 0 | m -= 4 * n; |
157 | 0 | for (; i < n; ++i, m += 4) |
158 | 0 | buf[i] = I[is * (n - 1 - m)]; |
159 | 0 | } |
160 | |
|
161 | 0 | { /* child plan: R2HC of size n */ |
162 | 0 | plan_rdft *cld = (plan_rdft *) ego->cld; |
163 | 0 | cld->apply((plan *) cld, buf, buf); |
164 | 0 | } |
165 | | |
166 | | /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */ |
167 | 0 | for (i = 0; i + i + 1 < n2; ++i) { |
168 | 0 | INT k = i + i + 1; |
169 | 0 | INT j; |
170 | 0 | E c1, s1; |
171 | 0 | E c2, s2; |
172 | 0 | c1 = buf[k]; |
173 | 0 | c2 = buf[k + 1]; |
174 | 0 | s2 = buf[n - (k + 1)]; |
175 | 0 | s1 = buf[n - k]; |
176 | | |
177 | 0 | O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2 + i) + |
178 | 0 | SGN_SET(s1, i/2 + i)); |
179 | 0 | O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2 + i) - |
180 | 0 | SGN_SET(s1, (n-(i+1))/2 + i)); |
181 | | |
182 | 0 | j = n2 - (i+1); |
183 | 0 | O[os * j] = SQRT2 * (SGN_SET(c2, (n2-i)/2 + j) - |
184 | 0 | SGN_SET(s2, (n2-(i+1))/2 + j)); |
185 | 0 | O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2 + j) + |
186 | 0 | SGN_SET(s2, (n2+(i+1))/2 + j)); |
187 | 0 | } |
188 | 0 | if (i + i + 1 == n2) { |
189 | 0 | E c, s; |
190 | 0 | c = buf[n2]; |
191 | 0 | s = buf[n - n2]; |
192 | 0 | O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2 + i) + |
193 | 0 | SGN_SET(s, i/2 + i)); |
194 | 0 | O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2 + i) + |
195 | 0 | SGN_SET(s, (i+1)/2 + i)); |
196 | 0 | } |
197 | 0 | O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2 + n2); |
198 | 0 | } |
199 | |
|
200 | 0 | X(ifree)(buf); |
201 | 0 | } |
202 | | |
203 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
204 | 0 | { |
205 | 0 | P *ego = (P *) ego_; |
206 | 0 | X(plan_awake)(ego->cld, wakefulness); |
207 | 0 | } |
208 | | |
209 | | static void destroy(plan *ego_) |
210 | 0 | { |
211 | 0 | P *ego = (P *) ego_; |
212 | 0 | X(plan_destroy_internal)(ego->cld); |
213 | 0 | } |
214 | | |
215 | | static void print(const plan *ego_, printer *p) |
216 | 0 | { |
217 | 0 | const P *ego = (const P *) ego_; |
218 | 0 | p->print(p, "(%se-r2hc-odd-%D%v%(%p%))", |
219 | 0 | X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld); |
220 | 0 | } |
221 | | |
222 | | static int applicable0(const solver *ego_, const problem *p_) |
223 | 0 | { |
224 | 0 | const problem_rdft *p = (const problem_rdft *) p_; |
225 | 0 | UNUSED(ego_); |
226 | |
|
227 | 0 | return (1 |
228 | 0 | && p->sz->rnk == 1 |
229 | 0 | && p->vecsz->rnk <= 1 |
230 | 0 | && p->sz->dims[0].n % 2 == 1 |
231 | 0 | && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11) |
232 | 0 | ); |
233 | 0 | } |
234 | | |
235 | | static int applicable(const solver *ego, const problem *p, const planner *plnr) |
236 | 326 | { |
237 | 326 | return (!NO_SLOWP(plnr) && applicable0(ego, p)); |
238 | 326 | } |
239 | | |
240 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
241 | 326 | { |
242 | 326 | P *pln; |
243 | 326 | const problem_rdft *p; |
244 | 326 | plan *cld; |
245 | 326 | R *buf; |
246 | 326 | INT n; |
247 | 326 | opcnt ops; |
248 | | |
249 | 326 | static const plan_adt padt = { |
250 | 326 | X(rdft_solve), awake, print, destroy |
251 | 326 | }; |
252 | | |
253 | 326 | if (!applicable(ego_, p_, plnr)) |
254 | 326 | return (plan *)0; |
255 | | |
256 | 0 | p = (const problem_rdft *) p_; |
257 | |
|
258 | 0 | n = p->sz->dims[0].n; |
259 | 0 | buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); |
260 | |
|
261 | 0 | cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), |
262 | 0 | X(mktensor_0d)(), |
263 | 0 | buf, buf, R2HC)); |
264 | 0 | X(ifree)(buf); |
265 | 0 | if (!cld) |
266 | 0 | return (plan *)0; |
267 | | |
268 | 0 | pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11); |
269 | 0 | pln->n = n; |
270 | 0 | pln->is = p->sz->dims[0].is; |
271 | 0 | pln->os = p->sz->dims[0].os; |
272 | 0 | pln->cld = cld; |
273 | 0 | pln->kind = p->kind[0]; |
274 | | |
275 | 0 | X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); |
276 | | |
277 | 0 | X(ops_zero)(&ops); |
278 | 0 | ops.add = n - 1; |
279 | 0 | ops.mul = n; |
280 | 0 | ops.other = 4*n; |
281 | |
|
282 | 0 | X(ops_zero)(&pln->super.super.ops); |
283 | 0 | X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); |
284 | 0 | X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); |
285 | |
|
286 | 0 | return &(pln->super.super); |
287 | 0 | } |
288 | | |
289 | | /* constructor */ |
290 | | static solver *mksolver(void) |
291 | 1 | { |
292 | 1 | static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
293 | 1 | S *slv = MKSOLVER(S, &sadt); |
294 | 1 | return &(slv->super); |
295 | 1 | } |
296 | | |
297 | | void X(reodft11e_r2hc_odd_register)(planner *p) |
298 | 1 | { |
299 | 1 | REGISTER_SOLVER(p, mksolver()); |
300 | 1 | } |