/src/fftw3/rdft/buffered.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | #include "rdft/rdft.h" |
23 | | |
24 | | typedef struct { |
25 | | solver super; |
26 | | size_t maxnbuf_ndx; |
27 | | } S; |
28 | | |
29 | | static const INT maxnbufs[] = { 8, 256 }; |
30 | | |
31 | | typedef struct { |
32 | | plan_rdft super; |
33 | | |
34 | | plan *cld, *cldcpy, *cldrest; |
35 | | INT n, vl, nbuf, bufdist; |
36 | | INT ivs_by_nbuf, ovs_by_nbuf; |
37 | | } P; |
38 | | |
39 | | /* transform a vector input with the help of bufs */ |
40 | | static void apply(const plan *ego_, R *I, R *O) |
41 | | { |
42 | | const P *ego = (const P *) ego_; |
43 | | plan_rdft *cld = (plan_rdft *) ego->cld; |
44 | | plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; |
45 | | plan_rdft *cldrest; |
46 | | INT i, vl = ego->vl, nbuf = ego->nbuf; |
47 | | INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; |
48 | | R *bufs; |
49 | | |
50 | | bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); |
51 | | |
52 | | for (i = nbuf; i <= vl; i += nbuf) { |
53 | | /* transform to bufs: */ |
54 | | cld->apply((plan *) cld, I, bufs); |
55 | | I += ivs_by_nbuf; |
56 | | |
57 | | /* copy back */ |
58 | | cldcpy->apply((plan *) cldcpy, bufs, O); |
59 | | O += ovs_by_nbuf; |
60 | | } |
61 | | |
62 | | X(ifree)(bufs); |
63 | | |
64 | | /* Do the remaining transforms, if any: */ |
65 | | cldrest = (plan_rdft *) ego->cldrest; |
66 | | cldrest->apply((plan *) cldrest, I, O); |
67 | | } |
68 | | |
69 | | /* for hc2r problems, copy the input into buffer, and then |
70 | | transform buffer->output, which allows for destruction of the |
71 | | buffer */ |
72 | | static void apply_hc2r(const plan *ego_, R *I, R *O) |
73 | 0 | { |
74 | 0 | const P *ego = (const P *) ego_; |
75 | 0 | plan_rdft *cld = (plan_rdft *) ego->cld; |
76 | 0 | plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; |
77 | 0 | plan_rdft *cldrest; |
78 | 0 | INT i, vl = ego->vl, nbuf = ego->nbuf; |
79 | 0 | INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf; |
80 | 0 | R *bufs; |
81 | |
|
82 | 0 | bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS); |
83 | |
|
84 | 0 | for (i = nbuf; i <= vl; i += nbuf) { |
85 | | /* copy input into bufs: */ |
86 | 0 | cldcpy->apply((plan *) cldcpy, I, bufs); |
87 | 0 | I += ivs_by_nbuf; |
88 | | |
89 | | /* transform to output */ |
90 | 0 | cld->apply((plan *) cld, bufs, O); |
91 | 0 | O += ovs_by_nbuf; |
92 | 0 | } |
93 | |
|
94 | 0 | X(ifree)(bufs); |
95 | | |
96 | | /* Do the remaining transforms, if any: */ |
97 | 0 | cldrest = (plan_rdft *) ego->cldrest; |
98 | 0 | cldrest->apply((plan *) cldrest, I, O); |
99 | 0 | } |
100 | | |
101 | | |
102 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
103 | | { |
104 | | P *ego = (P *) ego_; |
105 | | |
106 | | X(plan_awake)(ego->cld, wakefulness); |
107 | | X(plan_awake)(ego->cldcpy, wakefulness); |
108 | | X(plan_awake)(ego->cldrest, wakefulness); |
109 | | } |
110 | | |
111 | | static void destroy(plan *ego_) |
112 | | { |
113 | | P *ego = (P *) ego_; |
114 | | X(plan_destroy_internal)(ego->cldrest); |
115 | | X(plan_destroy_internal)(ego->cldcpy); |
116 | | X(plan_destroy_internal)(ego->cld); |
117 | | } |
118 | | |
119 | | static void print(const plan *ego_, printer *p) |
120 | | { |
121 | | const P *ego = (const P *) ego_; |
122 | | p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))", |
123 | | ego->n, ego->nbuf, |
124 | | ego->vl, ego->bufdist % ego->n, |
125 | | ego->cld, ego->cldcpy, ego->cldrest); |
126 | | } |
127 | | |
128 | | static int applicable0(const S *ego, const problem *p_, const planner *plnr) |
129 | | { |
130 | | const problem_rdft *p = (const problem_rdft *) p_; |
131 | | iodim *d = p->sz->dims; |
132 | | |
133 | | if (1 |
134 | | && p->vecsz->rnk <= 1 |
135 | | && p->sz->rnk == 1 |
136 | | ) { |
137 | | INT vl, ivs, ovs; |
138 | | X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); |
139 | | |
140 | | if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr)) |
141 | | return 0; |
142 | | |
143 | | /* if this solver is redundant, in the sense that a solver |
144 | | of lower index generates the same plan, then prune this |
145 | | solver */ |
146 | | if (X(nbuf_redundant)(d[0].n, vl, |
147 | | ego->maxnbuf_ndx, |
148 | | maxnbufs, NELEM(maxnbufs))) |
149 | | return 0; |
150 | | |
151 | | if (p->I != p->O) { |
152 | | if (p->kind[0] == HC2R) { |
153 | | /* Allow HC2R problems only if the input is to be |
154 | | preserved. This solver sets NO_DESTROY_INPUT, |
155 | | which prevents infinite loops */ |
156 | | return (NO_DESTROY_INPUTP(plnr)); |
157 | | } else { |
158 | | /* |
159 | | In principle, the buffered transforms might be useful |
160 | | when working out of place. However, in order to |
161 | | prevent infinite loops in the planner, we require |
162 | | that the output stride of the buffered transforms be |
163 | | greater than 1. |
164 | | */ |
165 | | return (d[0].os > 1); |
166 | | } |
167 | | } |
168 | | |
169 | | /* |
170 | | * If the problem is in place, the input/output strides must |
171 | | * be the same or the whole thing must fit in the buffer. |
172 | | */ |
173 | | if (X(tensor_inplace_strides2)(p->sz, p->vecsz)) |
174 | | return 1; |
175 | | |
176 | | if (/* fits into buffer: */ |
177 | | ((p->vecsz->rnk == 0) |
178 | | || |
179 | | (X(nbuf)(d[0].n, p->vecsz->dims[0].n, |
180 | | maxnbufs[ego->maxnbuf_ndx]) |
181 | | == p->vecsz->dims[0].n))) |
182 | | return 1; |
183 | | } |
184 | | |
185 | | return 0; |
186 | | } |
187 | | |
188 | | static int applicable(const S *ego, const problem *p_, const planner *plnr) |
189 | | { |
190 | | const problem_rdft *p; |
191 | | |
192 | | if (NO_BUFFERINGP(plnr)) return 0; |
193 | | |
194 | | if (!applicable0(ego, p_, plnr)) return 0; |
195 | | |
196 | | p = (const problem_rdft *) p_; |
197 | | if (p->kind[0] == HC2R) { |
198 | | if (NO_UGLYP(plnr)) { |
199 | | /* UGLY if in-place and too big, since the problem |
200 | | could be solved via transpositions */ |
201 | | if (p->I == p->O && X(toobig)(p->sz->dims[0].n)) |
202 | | return 0; |
203 | | } |
204 | | } else { |
205 | | if (NO_UGLYP(plnr)) { |
206 | | if (p->I != p->O) return 0; |
207 | | if (X(toobig)(p->sz->dims[0].n)) return 0; |
208 | | } |
209 | | } |
210 | | return 1; |
211 | | } |
212 | | |
213 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
214 | | { |
215 | | P *pln; |
216 | | const S *ego = (const S *)ego_; |
217 | | plan *cld = (plan *) 0; |
218 | | plan *cldcpy = (plan *) 0; |
219 | | plan *cldrest = (plan *) 0; |
220 | | const problem_rdft *p = (const problem_rdft *) p_; |
221 | | R *bufs = (R *) 0; |
222 | | INT nbuf = 0, bufdist, n, vl; |
223 | | INT ivs, ovs; |
224 | | int hc2rp; |
225 | | |
226 | | static const plan_adt padt = { |
227 | | X(rdft_solve), awake, print, destroy |
228 | | }; |
229 | | |
230 | | if (!applicable(ego, p_, plnr)) |
231 | | goto nada; |
232 | | |
233 | | n = X(tensor_sz)(p->sz); |
234 | | X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); |
235 | | hc2rp = (p->kind[0] == HC2R); |
236 | | |
237 | | nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]); |
238 | | bufdist = X(bufdist)(n, vl); |
239 | | A(nbuf > 0); |
240 | | |
241 | | /* initial allocation for the purpose of planning */ |
242 | | bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); |
243 | | |
244 | | if (hc2rp) { |
245 | | /* allow destruction of buffer */ |
246 | | cld = X(mkplan_f_d)(plnr, |
247 | | X(mkproblem_rdft_d)( |
248 | | X(mktensor_1d)(n, 1, p->sz->dims[0].os), |
249 | | X(mktensor_1d)(nbuf, bufdist, ovs), |
250 | | bufs, TAINT(p->O, ovs * nbuf), p->kind), |
251 | | 0, 0, NO_DESTROY_INPUT); |
252 | | if (!cld) goto nada; |
253 | | |
254 | | /* copying input into buffer buffer is a rank-0 transform: */ |
255 | | cldcpy = X(mkplan_d)(plnr, |
256 | | X(mkproblem_rdft_0_d)( |
257 | | X(mktensor_2d)(nbuf, ivs, bufdist, |
258 | | n, p->sz->dims[0].is, 1), |
259 | | TAINT(p->I, ivs * nbuf), bufs)); |
260 | | if (!cldcpy) goto nada; |
261 | | } else { |
262 | | /* allow destruction of input if problem is in place */ |
263 | | cld = X(mkplan_f_d)(plnr, |
264 | | X(mkproblem_rdft_d)( |
265 | | X(mktensor_1d)(n, p->sz->dims[0].is, 1), |
266 | | X(mktensor_1d)(nbuf, ivs, bufdist), |
267 | | TAINT(p->I, ivs * nbuf), bufs, p->kind), |
268 | | 0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0); |
269 | | if (!cld) goto nada; |
270 | | |
271 | | /* copying back from the buffer is a rank-0 transform: */ |
272 | | cldcpy = X(mkplan_d)(plnr, |
273 | | X(mkproblem_rdft_0_d)( |
274 | | X(mktensor_2d)(nbuf, bufdist, ovs, |
275 | | n, 1, p->sz->dims[0].os), |
276 | | bufs, TAINT(p->O, ovs * nbuf))); |
277 | | if (!cldcpy) goto nada; |
278 | | } |
279 | | |
280 | | /* deallocate buffers, let apply() allocate them for real */ |
281 | | X(ifree)(bufs); |
282 | | bufs = 0; |
283 | | |
284 | | /* plan the leftover transforms (cldrest): */ |
285 | | { |
286 | | INT id = ivs * (nbuf * (vl / nbuf)); |
287 | | INT od = ovs * (nbuf * (vl / nbuf)); |
288 | | cldrest = X(mkplan_d)(plnr, |
289 | | X(mkproblem_rdft_d)( |
290 | | X(tensor_copy)(p->sz), |
291 | | X(mktensor_1d)(vl % nbuf, ivs, ovs), |
292 | | p->I + id, p->O + od, p->kind)); |
293 | | } |
294 | | if (!cldrest) goto nada; |
295 | | |
296 | | pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply); |
297 | | pln->cld = cld; |
298 | | pln->cldcpy = cldcpy; |
299 | | pln->cldrest = cldrest; |
300 | | pln->n = n; |
301 | | pln->vl = vl; |
302 | | pln->ivs_by_nbuf = ivs * nbuf; |
303 | | pln->ovs_by_nbuf = ovs * nbuf; |
304 | | |
305 | | pln->nbuf = nbuf; |
306 | | pln->bufdist = bufdist; |
307 | | |
308 | | { |
309 | | opcnt t; |
310 | | X(ops_add)(&cld->ops, &cldcpy->ops, &t); |
311 | | X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); |
312 | | } |
313 | | |
314 | | return &(pln->super.super); |
315 | | |
316 | | nada: |
317 | | X(ifree0)(bufs); |
318 | | X(plan_destroy_internal)(cldrest); |
319 | | X(plan_destroy_internal)(cldcpy); |
320 | | X(plan_destroy_internal)(cld); |
321 | | return (plan *) 0; |
322 | | } |
323 | | |
324 | | static solver *mksolver(size_t maxnbuf_ndx) |
325 | | { |
326 | | static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
327 | | S *slv = MKSOLVER(S, &sadt); |
328 | | slv->maxnbuf_ndx = maxnbuf_ndx; |
329 | | return &(slv->super); |
330 | | } |
331 | | |
332 | | void X(rdft_buffered_register)(planner *p) |
333 | 1 | { |
334 | 1 | size_t i; |
335 | 3 | for (i = 0; i < NELEM(maxnbufs); ++i) |
336 | 2 | REGISTER_SOLVER(p, mksolver(i)); |
337 | 1 | } |