/src/fftw3/rdft/hc2hc-generic.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* express a hc2hc problem in terms of rdft + multiplication by |
22 | | twiddle factors */ |
23 | | |
24 | | #include "rdft/hc2hc.h" |
25 | | |
26 | | typedef hc2hc_solver S; |
27 | | |
28 | | typedef struct { |
29 | | plan_hc2hc super; |
30 | | |
31 | | INT r, m, s, vl, vs, mstart1, mcount1; |
32 | | plan *cld0; |
33 | | plan *cld; |
34 | | twid *td; |
35 | | } P; |
36 | | |
37 | | |
38 | | /**************************************************************/ |
39 | | static void mktwiddle(P *ego, enum wakefulness wakefulness) |
40 | 0 | { |
41 | 0 | static const tw_instr tw[] = { { TW_HALF, 0, 0 }, { TW_NEXT, 1, 0 } }; |
42 | | |
43 | | /* note that R and M are swapped, to allow for sequential |
44 | | access both to data and twiddles */ |
45 | 0 | X(twiddle_awake)(wakefulness, &ego->td, tw, |
46 | 0 | ego->r * ego->m, ego->m, ego->r); |
47 | 0 | } |
48 | | |
49 | | static void bytwiddle(const P *ego, R *IO, R sign) |
50 | 0 | { |
51 | 0 | INT i, j, k; |
52 | 0 | INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; |
53 | 0 | INT ms = m * s; |
54 | 0 | INT mstart1 = ego->mstart1, mcount1 = ego->mcount1; |
55 | 0 | INT wrem = 2 * ((m-1)/2 - mcount1); |
56 | |
|
57 | 0 | for (i = 0; i < vl; ++i, IO += vs) { |
58 | 0 | const R *W = ego->td->W; |
59 | |
|
60 | 0 | A(m % 2 == 1); |
61 | 0 | for (k = 1, W += (m - 1) + 2*(mstart1-1); k < r; ++k) { |
62 | | /* pr := IO + (j + mstart1) * s + k * ms */ |
63 | 0 | R *pr = IO + mstart1 * s + k * ms; |
64 | | |
65 | | /* pi := IO + (m - j - mstart1) * s + k * ms */ |
66 | 0 | R *pi = IO - mstart1 * s + (k + 1) * ms; |
67 | |
|
68 | 0 | for (j = 0; j < mcount1; ++j, pr += s, pi -= s) { |
69 | 0 | E xr = *pr; |
70 | 0 | E xi = *pi; |
71 | 0 | E wr = W[0]; |
72 | 0 | E wi = sign * W[1]; |
73 | 0 | *pr = xr * wr - xi * wi; |
74 | 0 | *pi = xi * wr + xr * wi; |
75 | 0 | W += 2; |
76 | 0 | } |
77 | 0 | W += wrem; |
78 | 0 | } |
79 | 0 | } |
80 | 0 | } |
81 | | |
82 | | static void swapri(R *IO, INT r, INT m, INT s, INT jstart, INT jend) |
83 | 0 | { |
84 | 0 | INT k; |
85 | 0 | INT ms = m * s; |
86 | 0 | INT js = jstart * s; |
87 | 0 | for (k = 0; k + k < r; ++k) { |
88 | | /* pr := IO + (m - j) * s + k * ms */ |
89 | 0 | R *pr = IO + (k + 1) * ms - js; |
90 | | /* pi := IO + (m - j) * s + (r - 1 - k) * ms */ |
91 | 0 | R *pi = IO + (r - k) * ms - js; |
92 | 0 | INT j; |
93 | 0 | for (j = jstart; j < jend; j += 1, pr -= s, pi -= s) { |
94 | 0 | R t = *pr; |
95 | 0 | *pr = *pi; |
96 | 0 | *pi = t; |
97 | 0 | } |
98 | 0 | } |
99 | 0 | } |
100 | | |
101 | | static void reorder_dit(const P *ego, R *IO) |
102 | 0 | { |
103 | 0 | INT i, k; |
104 | 0 | INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; |
105 | 0 | INT ms = m * s; |
106 | 0 | INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; |
107 | |
|
108 | 0 | for (i = 0; i < vl; ++i, IO += vs) { |
109 | 0 | for (k = 1; k + k < r; ++k) { |
110 | 0 | R *p0 = IO + k * ms; |
111 | 0 | R *p1 = IO + (r - k) * ms; |
112 | 0 | INT j; |
113 | |
|
114 | 0 | for (j = mstart1; j < mend1; ++j) { |
115 | 0 | E rp, ip, im, rm; |
116 | 0 | rp = p0[j * s]; |
117 | 0 | im = p1[ms - j * s]; |
118 | 0 | rm = p1[j * s]; |
119 | 0 | ip = p0[ms - j * s]; |
120 | 0 | p0[j * s] = rp - im; |
121 | 0 | p1[ms - j * s] = rp + im; |
122 | 0 | p1[j * s] = rm - ip; |
123 | 0 | p0[ms - j * s] = ip + rm; |
124 | 0 | } |
125 | 0 | } |
126 | |
|
127 | 0 | swapri(IO, r, m, s, mstart1, mend1); |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | static void reorder_dif(const P *ego, R *IO) |
132 | 0 | { |
133 | 0 | INT i, k; |
134 | 0 | INT r = ego->r, m = ego->m, s = ego->s, vl = ego->vl, vs = ego->vs; |
135 | 0 | INT ms = m * s; |
136 | 0 | INT mstart1 = ego->mstart1, mend1 = mstart1 + ego->mcount1; |
137 | |
|
138 | 0 | for (i = 0; i < vl; ++i, IO += vs) { |
139 | 0 | swapri(IO, r, m, s, mstart1, mend1); |
140 | |
|
141 | 0 | for (k = 1; k + k < r; ++k) { |
142 | 0 | R *p0 = IO + k * ms; |
143 | 0 | R *p1 = IO + (r - k) * ms; |
144 | 0 | const R half = K(0.5); |
145 | 0 | INT j; |
146 | |
|
147 | 0 | for (j = mstart1; j < mend1; ++j) { |
148 | 0 | E rp, ip, im, rm; |
149 | 0 | rp = half * p0[j * s]; |
150 | 0 | im = half * p1[ms - j * s]; |
151 | 0 | rm = half * p1[j * s]; |
152 | 0 | ip = half * p0[ms - j * s]; |
153 | 0 | p0[j * s] = rp + im; |
154 | 0 | p1[ms - j * s] = im - rp; |
155 | 0 | p1[j * s] = rm + ip; |
156 | 0 | p0[ms - j * s] = ip - rm; |
157 | 0 | } |
158 | 0 | } |
159 | 0 | } |
160 | 0 | } |
161 | | |
162 | | static int applicable(rdft_kind kind, INT r, INT m, const planner *plnr) |
163 | 0 | { |
164 | 0 | return (1 |
165 | 0 | && (kind == R2HC || kind == HC2R) |
166 | 0 | && (m % 2) |
167 | 0 | && (r % 2) |
168 | 0 | && !NO_SLOWP(plnr) |
169 | 0 | ); |
170 | 0 | } |
171 | | |
172 | | /**************************************************************/ |
173 | | |
174 | | static void apply_dit(const plan *ego_, R *IO) |
175 | 0 | { |
176 | 0 | const P *ego = (const P *) ego_; |
177 | 0 | INT start; |
178 | 0 | plan_rdft *cld, *cld0; |
179 | |
|
180 | 0 | bytwiddle(ego, IO, K(-1.0)); |
181 | |
|
182 | 0 | cld0 = (plan_rdft *) ego->cld0; |
183 | 0 | cld0->apply(ego->cld0, IO, IO); |
184 | |
|
185 | 0 | start = ego->mstart1 * ego->s; |
186 | 0 | cld = (plan_rdft *) ego->cld; |
187 | 0 | cld->apply(ego->cld, IO + start, IO + start); |
188 | |
|
189 | 0 | reorder_dit(ego, IO); |
190 | 0 | } |
191 | | |
192 | | static void apply_dif(const plan *ego_, R *IO) |
193 | 0 | { |
194 | 0 | const P *ego = (const P *) ego_; |
195 | 0 | INT start; |
196 | 0 | plan_rdft *cld, *cld0; |
197 | |
|
198 | 0 | reorder_dif(ego, IO); |
199 | |
|
200 | 0 | cld0 = (plan_rdft *) ego->cld0; |
201 | 0 | cld0->apply(ego->cld0, IO, IO); |
202 | |
|
203 | 0 | start = ego->mstart1 * ego->s; |
204 | 0 | cld = (plan_rdft *) ego->cld; |
205 | 0 | cld->apply(ego->cld, IO + start, IO + start); |
206 | |
|
207 | 0 | bytwiddle(ego, IO, K(1.0)); |
208 | 0 | } |
209 | | |
210 | | |
211 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
212 | 0 | { |
213 | 0 | P *ego = (P *) ego_; |
214 | 0 | X(plan_awake)(ego->cld0, wakefulness); |
215 | 0 | X(plan_awake)(ego->cld, wakefulness); |
216 | 0 | mktwiddle(ego, wakefulness); |
217 | 0 | } |
218 | | |
219 | | static void destroy(plan *ego_) |
220 | 0 | { |
221 | 0 | P *ego = (P *) ego_; |
222 | 0 | X(plan_destroy_internal)(ego->cld); |
223 | 0 | X(plan_destroy_internal)(ego->cld0); |
224 | 0 | } |
225 | | |
226 | | static void print(const plan *ego_, printer *p) |
227 | 0 | { |
228 | 0 | const P *ego = (const P *) ego_; |
229 | 0 | p->print(p, "(hc2hc-generic-%s-%D-%D%v%(%p%)%(%p%))", |
230 | 0 | ego->super.apply == apply_dit ? "dit" : "dif", |
231 | 0 | ego->r, ego->m, ego->vl, ego->cld0, ego->cld); |
232 | 0 | } |
233 | | |
234 | | static plan *mkcldw(const hc2hc_solver *ego_, |
235 | | rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs, |
236 | | INT mstart, INT mcount, |
237 | | R *IO, planner *plnr) |
238 | 0 | { |
239 | 0 | P *pln; |
240 | 0 | plan *cld0 = 0, *cld = 0; |
241 | 0 | INT mstart1, mcount1, mstride; |
242 | |
|
243 | 0 | static const plan_adt padt = { |
244 | 0 | 0, awake, print, destroy |
245 | 0 | }; |
246 | |
|
247 | 0 | UNUSED(ego_); |
248 | |
|
249 | 0 | A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m+2)/2); |
250 | |
|
251 | 0 | if (!applicable(kind, r, m, plnr)) |
252 | 0 | return (plan *)0; |
253 | | |
254 | 0 | A(m % 2); |
255 | 0 | mstart1 = mstart + (mstart == 0); |
256 | 0 | mcount1 = mcount - (mstart == 0); |
257 | 0 | mstride = m - (mstart + mcount - 1) - mstart1; |
258 | | |
259 | | /* 0th (DC) transform (vl of these), if mstart == 0 */ |
260 | 0 | cld0 = X(mkplan_d)(plnr, |
261 | 0 | X(mkproblem_rdft_1_d)( |
262 | 0 | mstart == 0 ? X(mktensor_1d)(r, m * s, m * s) |
263 | 0 | : X(mktensor_0d)(), |
264 | 0 | X(mktensor_1d)(vl, vs, vs), |
265 | 0 | IO, IO, kind) |
266 | 0 | ); |
267 | 0 | if (!cld0) goto nada; |
268 | | |
269 | | /* twiddle transforms: there are 2 x mcount1 x vl of these |
270 | | (where 2 corresponds to the real and imaginary parts) ... |
271 | | the 2 x mcount1 loops are combined if mstart=0 and mcount=(m+2)/2. */ |
272 | 0 | cld = X(mkplan_d)(plnr, |
273 | 0 | X(mkproblem_rdft_1_d)( |
274 | 0 | X(mktensor_1d)(r, m * s, m * s), |
275 | 0 | X(mktensor_3d)(2, mstride * s, mstride * s, |
276 | 0 | mcount1, s, s, |
277 | 0 | vl, vs, vs), |
278 | 0 | IO + s * mstart1, IO + s * mstart1, kind) |
279 | 0 | ); |
280 | 0 | if (!cld) goto nada; |
281 | | |
282 | 0 | pln = MKPLAN_HC2HC(P, &padt, (kind == R2HC) ? apply_dit : apply_dif); |
283 | 0 | pln->cld = cld; |
284 | 0 | pln->cld0 = cld0; |
285 | 0 | pln->r = r; |
286 | 0 | pln->m = m; |
287 | 0 | pln->s = s; |
288 | 0 | pln->vl = vl; |
289 | 0 | pln->vs = vs; |
290 | 0 | pln->td = 0; |
291 | 0 | pln->mstart1 = mstart1; |
292 | 0 | pln->mcount1 = mcount1; |
293 | |
|
294 | 0 | { |
295 | 0 | double n0 = 0.5 * (r - 1) * (2 * mcount1) * vl; |
296 | 0 | pln->super.super.ops = cld->ops; |
297 | 0 | pln->super.super.ops.mul += (kind == R2HC ? 5.0 : 7.0) * n0; |
298 | 0 | pln->super.super.ops.add += 4.0 * n0; |
299 | 0 | pln->super.super.ops.other += 11.0 * n0; |
300 | 0 | } |
301 | 0 | return &(pln->super.super); |
302 | | |
303 | 0 | nada: |
304 | 0 | X(plan_destroy_internal)(cld); |
305 | 0 | X(plan_destroy_internal)(cld0); |
306 | 0 | return (plan *) 0; |
307 | 0 | } |
308 | | |
309 | | static void regsolver(planner *plnr, INT r) |
310 | 1 | { |
311 | 1 | S *slv = (S *)X(mksolver_hc2hc)(sizeof(S), r, mkcldw); |
312 | 1 | REGISTER_SOLVER(plnr, &(slv->super)); |
313 | 1 | if (X(mksolver_hc2hc_hook)) { |
314 | 0 | slv = (S *)X(mksolver_hc2hc_hook)(sizeof(S), r, mkcldw); |
315 | 0 | REGISTER_SOLVER(plnr, &(slv->super)); |
316 | 0 | } |
317 | 1 | } |
318 | | |
319 | | void X(hc2hc_generic_register)(planner *p) |
320 | 1 | { |
321 | 1 | regsolver(p, 0); |
322 | 1 | } |