/src/fftw3/rdft/rdft2-rdft.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | #include "rdft/rdft.h" |
23 | | |
24 | | typedef struct { |
25 | | solver super; |
26 | | } S; |
27 | | |
28 | | typedef struct { |
29 | | plan_rdft2 super; |
30 | | |
31 | | plan *cld, *cldrest; |
32 | | INT n, vl, nbuf, bufdist; |
33 | | INT cs, ivs, ovs; |
34 | | } P; |
35 | | |
36 | | /***************************************************************************/ |
37 | | |
38 | | /* FIXME: have alternate copy functions that push a vector loop inside |
39 | | the n loops? */ |
40 | | |
41 | | /* copy halfcomplex array r (contiguous) to complex (strided) array rio/iio. */ |
42 | | static void hc2c(INT n, R *r, R *rio, R *iio, INT os) |
43 | 0 | { |
44 | 0 | INT i; |
45 | |
|
46 | 0 | rio[0] = r[0]; |
47 | 0 | iio[0] = 0; |
48 | |
|
49 | 0 | for (i = 1; i + i < n; ++i) { |
50 | 0 | rio[i * os] = r[i]; |
51 | 0 | iio[i * os] = r[n - i]; |
52 | 0 | } |
53 | |
|
54 | 0 | if (i + i == n) { /* store the Nyquist frequency */ |
55 | 0 | rio[i * os] = r[i]; |
56 | 0 | iio[i * os] = K(0.0); |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | /* reverse of hc2c */ |
61 | | static void c2hc(INT n, R *rio, R *iio, INT is, R *r) |
62 | 0 | { |
63 | 0 | INT i; |
64 | |
|
65 | 0 | r[0] = rio[0]; |
66 | |
|
67 | 0 | for (i = 1; i + i < n; ++i) { |
68 | 0 | r[i] = rio[i * is]; |
69 | 0 | r[n - i] = iio[i * is]; |
70 | 0 | } |
71 | |
|
72 | 0 | if (i + i == n) /* store the Nyquist frequency */ |
73 | 0 | r[i] = rio[i * is]; |
74 | 0 | } |
75 | | |
76 | | /***************************************************************************/ |
77 | | |
78 | | static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) |
79 | 0 | { |
80 | 0 | const P *ego = (const P *) ego_; |
81 | 0 | plan_rdft *cld = (plan_rdft *) ego->cld; |
82 | 0 | INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist; |
83 | 0 | INT n = ego->n; |
84 | 0 | INT ivs = ego->ivs, ovs = ego->ovs, os = ego->cs; |
85 | 0 | R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); |
86 | 0 | plan_rdft2 *cldrest; |
87 | |
|
88 | 0 | for (i = nbuf; i <= vl; i += nbuf) { |
89 | | /* transform to bufs: */ |
90 | 0 | cld->apply((plan *) cld, r0, bufs); |
91 | 0 | r0 += ivs * nbuf; r1 += ivs * nbuf; |
92 | | |
93 | | /* copy back */ |
94 | 0 | for (j = 0; j < nbuf; ++j, cr += ovs, ci += ovs) |
95 | 0 | hc2c(n, bufs + j*bufdist, cr, ci, os); |
96 | 0 | } |
97 | |
|
98 | 0 | X(ifree)(bufs); |
99 | | |
100 | | /* Do the remaining transforms, if any: */ |
101 | 0 | cldrest = (plan_rdft2 *) ego->cldrest; |
102 | 0 | cldrest->apply((plan *) cldrest, r0, r1, cr, ci); |
103 | 0 | } |
104 | | |
105 | | static void apply_hc2r(const plan *ego_, R *r0, R *r1, R *cr, R *ci) |
106 | 0 | { |
107 | 0 | const P *ego = (const P *) ego_; |
108 | 0 | plan_rdft *cld = (plan_rdft *) ego->cld; |
109 | 0 | INT i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist; |
110 | 0 | INT n = ego->n; |
111 | 0 | INT ivs = ego->ivs, ovs = ego->ovs, is = ego->cs; |
112 | 0 | R *bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); |
113 | 0 | plan_rdft2 *cldrest; |
114 | |
|
115 | 0 | for (i = nbuf; i <= vl; i += nbuf) { |
116 | | /* copy to bufs */ |
117 | 0 | for (j = 0; j < nbuf; ++j, cr += ivs, ci += ivs) |
118 | 0 | c2hc(n, cr, ci, is, bufs + j*bufdist); |
119 | | |
120 | | /* transform back: */ |
121 | 0 | cld->apply((plan *) cld, bufs, r0); |
122 | 0 | r0 += ovs * nbuf; r1 += ovs * nbuf; |
123 | 0 | } |
124 | |
|
125 | 0 | X(ifree)(bufs); |
126 | | |
127 | | /* Do the remaining transforms, if any: */ |
128 | 0 | cldrest = (plan_rdft2 *) ego->cldrest; |
129 | 0 | cldrest->apply((plan *) cldrest, r0, r1, cr, ci); |
130 | 0 | } |
131 | | |
132 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
133 | 0 | { |
134 | 0 | P *ego = (P *) ego_; |
135 | |
|
136 | 0 | X(plan_awake)(ego->cld, wakefulness); |
137 | 0 | X(plan_awake)(ego->cldrest, wakefulness); |
138 | 0 | } |
139 | | |
140 | | static void destroy(plan *ego_) |
141 | 0 | { |
142 | 0 | P *ego = (P *) ego_; |
143 | 0 | X(plan_destroy_internal)(ego->cldrest); |
144 | 0 | X(plan_destroy_internal)(ego->cld); |
145 | 0 | } |
146 | | |
147 | | static void print(const plan *ego_, printer *p) |
148 | 0 | { |
149 | 0 | const P *ego = (const P *) ego_; |
150 | 0 | p->print(p, "(rdft2-rdft-%s-%D%v/%D-%D%(%p%)%(%p%))", |
151 | 0 | ego->super.apply == apply_r2hc ? "r2hc" : "hc2r", |
152 | 0 | ego->n, ego->nbuf, |
153 | 0 | ego->vl, ego->bufdist % ego->n, |
154 | 0 | ego->cld, ego->cldrest); |
155 | 0 | } |
156 | | |
157 | | static INT min_nbuf(const problem_rdft2 *p, INT n, INT vl) |
158 | 0 | { |
159 | 0 | INT is, os, ivs, ovs; |
160 | |
|
161 | 0 | if (p->r0 != p->cr) |
162 | 0 | return 1; |
163 | 0 | if (X(rdft2_inplace_strides(p, RNK_MINFTY))) |
164 | 0 | return 1; |
165 | 0 | A(p->vecsz->rnk == 1); /* rank 0 and MINFTY are inplace */ |
166 | |
|
167 | 0 | X(rdft2_strides)(p->kind, p->sz->dims, &is, &os); |
168 | 0 | X(rdft2_strides)(p->kind, p->vecsz->dims, &ivs, &ovs); |
169 | | |
170 | | /* handle one potentially common case: "contiguous" real and |
171 | | complex arrays, which overlap because of the differing sizes. */ |
172 | 0 | if (n * X(iabs)(is) <= X(iabs)(ivs) |
173 | 0 | && (n/2 + 1) * X(iabs)(os) <= X(iabs)(ovs) |
174 | 0 | && ( ((p->cr - p->ci) <= X(iabs)(os)) || |
175 | 0 | ((p->ci - p->cr) <= X(iabs)(os)) ) |
176 | 0 | && ivs > 0 && ovs > 0) { |
177 | 0 | INT vsmin = X(imin)(ivs, ovs); |
178 | 0 | INT vsmax = X(imax)(ivs, ovs); |
179 | 0 | return(((vsmax - vsmin) * vl + vsmin - 1) / vsmin); |
180 | 0 | } |
181 | | |
182 | 0 | return vl; /* punt: just buffer the whole vector */ |
183 | 0 | } |
184 | | |
185 | | static int applicable0(const problem *p_, const S *ego, const planner *plnr) |
186 | 0 | { |
187 | 0 | const problem_rdft2 *p = (const problem_rdft2 *) p_; |
188 | 0 | UNUSED(ego); |
189 | 0 | return(1 |
190 | 0 | && p->vecsz->rnk <= 1 |
191 | 0 | && p->sz->rnk == 1 |
192 | | |
193 | | /* FIXME: does it make sense to do R2HCII ? */ |
194 | 0 | && (p->kind == R2HC || p->kind == HC2R) |
195 | | |
196 | | /* real strides must allow for reduction to rdft */ |
197 | 0 | && (2 * (p->r1 - p->r0) == |
198 | 0 | (((p->kind == R2HC) ? p->sz->dims[0].is : p->sz->dims[0].os))) |
199 | | |
200 | 0 | && !(X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr)) |
201 | 0 | ); |
202 | 0 | } |
203 | | |
204 | | static int applicable(const problem *p_, const S *ego, const planner *plnr) |
205 | 0 | { |
206 | 0 | const problem_rdft2 *p; |
207 | |
|
208 | 0 | if (NO_BUFFERINGP(plnr)) return 0; |
209 | | |
210 | 0 | if (!applicable0(p_, ego, plnr)) return 0; |
211 | | |
212 | 0 | p = (const problem_rdft2 *) p_; |
213 | 0 | if (NO_UGLYP(plnr)) { |
214 | 0 | if (p->r0 != p->cr) return 0; |
215 | 0 | if (X(toobig)(p->sz->dims[0].n)) return 0; |
216 | 0 | } |
217 | 0 | return 1; |
218 | 0 | } |
219 | | |
220 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
221 | 0 | { |
222 | 0 | const S *ego = (const S *) ego_; |
223 | 0 | P *pln; |
224 | 0 | plan *cld = (plan *) 0; |
225 | 0 | plan *cldrest = (plan *) 0; |
226 | 0 | const problem_rdft2 *p = (const problem_rdft2 *) p_; |
227 | 0 | R *bufs = (R *) 0; |
228 | 0 | INT nbuf = 0, bufdist, n, vl; |
229 | 0 | INT ivs, ovs, rs, id, od; |
230 | |
|
231 | 0 | static const plan_adt padt = { |
232 | 0 | X(rdft2_solve), awake, print, destroy |
233 | 0 | }; |
234 | |
|
235 | 0 | if (!applicable(p_, ego, plnr)) |
236 | 0 | goto nada; |
237 | | |
238 | 0 | n = p->sz->dims[0].n; |
239 | 0 | X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); |
240 | |
|
241 | 0 | nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl)); |
242 | 0 | bufdist = X(bufdist)(n, vl); |
243 | 0 | A(nbuf > 0); |
244 | | |
245 | | /* initial allocation for the purpose of planning */ |
246 | 0 | bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); |
247 | |
|
248 | 0 | id = ivs * (nbuf * (vl / nbuf)); |
249 | 0 | od = ovs * (nbuf * (vl / nbuf)); |
250 | |
|
251 | 0 | if (p->kind == R2HC) { |
252 | 0 | cld = X(mkplan_f_d)( |
253 | 0 | plnr, |
254 | 0 | X(mkproblem_rdft_d)( |
255 | 0 | X(mktensor_1d)(n, p->sz->dims[0].is/2, 1), |
256 | 0 | X(mktensor_1d)(nbuf, ivs, bufdist), |
257 | 0 | TAINT(p->r0, ivs * nbuf), bufs, &p->kind), |
258 | 0 | 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0); |
259 | 0 | if (!cld) goto nada; |
260 | 0 | X(ifree)(bufs); bufs = 0; |
261 | |
|
262 | 0 | cldrest = X(mkplan_d)(plnr, |
263 | 0 | X(mkproblem_rdft2_d)( |
264 | 0 | X(tensor_copy)(p->sz), |
265 | 0 | X(mktensor_1d)(vl % nbuf, ivs, ovs), |
266 | 0 | p->r0 + id, p->r1 + id, |
267 | 0 | p->cr + od, p->ci + od, |
268 | 0 | p->kind)); |
269 | 0 | if (!cldrest) goto nada; |
270 | | |
271 | 0 | pln = MKPLAN_RDFT2(P, &padt, apply_r2hc); |
272 | 0 | } else { |
273 | 0 | A(p->kind == HC2R); |
274 | 0 | cld = X(mkplan_f_d)( |
275 | 0 | plnr, |
276 | 0 | X(mkproblem_rdft_d)( |
277 | 0 | X(mktensor_1d)(n, 1, p->sz->dims[0].os/2), |
278 | 0 | X(mktensor_1d)(nbuf, bufdist, ovs), |
279 | 0 | bufs, TAINT(p->r0, ovs * nbuf), &p->kind), |
280 | 0 | 0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */ |
281 | 0 | if (!cld) goto nada; |
282 | 0 | X(ifree)(bufs); bufs = 0; |
283 | |
|
284 | 0 | cldrest = X(mkplan_d)(plnr, |
285 | 0 | X(mkproblem_rdft2_d)( |
286 | 0 | X(tensor_copy)(p->sz), |
287 | 0 | X(mktensor_1d)(vl % nbuf, ivs, ovs), |
288 | 0 | p->r0 + od, p->r1 + od, |
289 | 0 | p->cr + id, p->ci + id, |
290 | 0 | p->kind)); |
291 | 0 | if (!cldrest) goto nada; |
292 | 0 | pln = MKPLAN_RDFT2(P, &padt, apply_hc2r); |
293 | 0 | } |
294 | | |
295 | 0 | pln->cld = cld; |
296 | 0 | pln->cldrest = cldrest; |
297 | 0 | pln->n = n; |
298 | 0 | pln->vl = vl; |
299 | 0 | pln->ivs = ivs; |
300 | 0 | pln->ovs = ovs; |
301 | 0 | X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs); |
302 | 0 | pln->nbuf = nbuf; |
303 | 0 | pln->bufdist = bufdist; |
304 | |
|
305 | 0 | X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops, |
306 | 0 | &pln->super.super.ops); |
307 | 0 | pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl; |
308 | |
|
309 | 0 | return &(pln->super.super); |
310 | | |
311 | 0 | nada: |
312 | 0 | X(ifree0)(bufs); |
313 | 0 | X(plan_destroy_internal)(cldrest); |
314 | 0 | X(plan_destroy_internal)(cld); |
315 | 0 | return (plan *) 0; |
316 | 0 | } |
317 | | |
318 | | static solver *mksolver(void) |
319 | 1 | { |
320 | 1 | static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; |
321 | 1 | S *slv = MKSOLVER(S, &sadt); |
322 | 1 | return &(slv->super); |
323 | 1 | } |
324 | | |
325 | | void X(rdft2_rdft_register)(planner *p) |
326 | 1 | { |
327 | 1 | REGISTER_SOLVER(p, mksolver()); |
328 | 1 | } |