/src/fftw3/dft/indirect.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | |
23 | | /* solvers/plans for vectors of small DFT's that cannot be done |
24 | | in-place directly. Use a rank-0 plan to rearrange the data |
25 | | before or after the transform. Can also change an out-of-place |
26 | | plan into a copy + in-place (where the in-place transform |
27 | | is e.g. unit stride). */ |
28 | | |
29 | | /* FIXME: merge with rank-geq2.c(?), since this is just a special case |
30 | | of a rank split where the first/second transform has rank 0. */ |
31 | | |
32 | | #include "dft/dft.h" |
33 | | |
34 | | typedef problem *(*mkcld_t) (const problem_dft *p); |
35 | | |
36 | | typedef struct { |
37 | | dftapply apply; |
38 | | problem *(*mkcld)(const problem_dft *p); |
39 | | const char *nam; |
40 | | } ndrct_adt; |
41 | | |
42 | | typedef struct { |
43 | | solver super; |
44 | | const ndrct_adt *adt; |
45 | | } S; |
46 | | |
47 | | typedef struct { |
48 | | plan_dft super; |
49 | | plan *cldcpy, *cld; |
50 | | const S *slv; |
51 | | } P; |
52 | | |
53 | | /*-----------------------------------------------------------------------*/ |
54 | | /* first rearrange, then transform */ |
55 | | static void apply_before(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
56 | 0 | { |
57 | 0 | const P *ego = (const P *) ego_; |
58 | |
|
59 | 0 | { |
60 | 0 | plan_dft *cldcpy = (plan_dft *) ego->cldcpy; |
61 | 0 | cldcpy->apply(ego->cldcpy, ri, ii, ro, io); |
62 | 0 | } |
63 | 0 | { |
64 | 0 | plan_dft *cld = (plan_dft *) ego->cld; |
65 | 0 | cld->apply(ego->cld, ro, io, ro, io); |
66 | 0 | } |
67 | 0 | } |
68 | | |
69 | | static problem *mkcld_before(const problem_dft *p) |
70 | 14 | { |
71 | 14 | return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS), |
72 | 14 | X(tensor_copy_inplace)(p->vecsz, INPLACE_OS), |
73 | 14 | p->ro, p->io, p->ro, p->io); |
74 | 14 | } |
75 | | |
76 | | static const ndrct_adt adt_before = |
77 | | { |
78 | | apply_before, mkcld_before, "dft-indirect-before" |
79 | | }; |
80 | | |
81 | | /*-----------------------------------------------------------------------*/ |
82 | | /* first transform, then rearrange */ |
83 | | |
84 | | static void apply_after(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
85 | 0 | { |
86 | 0 | const P *ego = (const P *) ego_; |
87 | |
|
88 | 0 | { |
89 | 0 | plan_dft *cld = (plan_dft *) ego->cld; |
90 | 0 | cld->apply(ego->cld, ri, ii, ri, ii); |
91 | 0 | } |
92 | 0 | { |
93 | 0 | plan_dft *cldcpy = (plan_dft *) ego->cldcpy; |
94 | 0 | cldcpy->apply(ego->cldcpy, ri, ii, ro, io); |
95 | 0 | } |
96 | 0 | } |
97 | | |
98 | | static problem *mkcld_after(const problem_dft *p) |
99 | 22 | { |
100 | 22 | return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS), |
101 | 22 | X(tensor_copy_inplace)(p->vecsz, INPLACE_IS), |
102 | 22 | p->ri, p->ii, p->ri, p->ii); |
103 | 22 | } |
104 | | |
105 | | static const ndrct_adt adt_after = |
106 | | { |
107 | | apply_after, mkcld_after, "dft-indirect-after" |
108 | | }; |
109 | | |
110 | | /*-----------------------------------------------------------------------*/ |
111 | | static void destroy(plan *ego_) |
112 | 36 | { |
113 | 36 | P *ego = (P *) ego_; |
114 | 36 | X(plan_destroy_internal)(ego->cld); |
115 | 36 | X(plan_destroy_internal)(ego->cldcpy); |
116 | 36 | } |
117 | | |
118 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
119 | 0 | { |
120 | 0 | P *ego = (P *) ego_; |
121 | 0 | X(plan_awake)(ego->cldcpy, wakefulness); |
122 | 0 | X(plan_awake)(ego->cld, wakefulness); |
123 | 0 | } |
124 | | |
125 | | static void print(const plan *ego_, printer *p) |
126 | 0 | { |
127 | 0 | const P *ego = (const P *) ego_; |
128 | 0 | const S *s = ego->slv; |
129 | 0 | p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy); |
130 | 0 | } |
131 | | |
132 | | static int applicable0(const solver *ego_, const problem *p_, |
133 | | const planner *plnr) |
134 | 2.18k | { |
135 | 2.18k | const S *ego = (const S *) ego_; |
136 | 2.18k | const problem_dft *p = (const problem_dft *) p_; |
137 | 2.18k | return (1 |
138 | 2.18k | && FINITE_RNK(p->vecsz->rnk) |
139 | | |
140 | | /* problem must be a nontrivial transform, not just a copy */ |
141 | 2.18k | && p->sz->rnk > 0 |
142 | | |
143 | 2.18k | && (0 |
144 | | |
145 | | /* problem must be in-place & require some |
146 | | rearrangement of the data; to prevent |
147 | | infinite loops with indirect-transpose, we |
148 | | further require that at least some transform |
149 | | strides must decrease */ |
150 | 1.21k | || (p->ri == p->ro |
151 | 1.21k | && !X(tensor_inplace_strides2)(p->sz, p->vecsz) |
152 | 1.21k | && X(tensor_strides_decrease)( |
153 | 334 | p->sz, p->vecsz, |
154 | 334 | ego->adt->apply == apply_after ? |
155 | 167 | INPLACE_IS : INPLACE_OS)) |
156 | | |
157 | | /* or problem must be out of place, transforming |
158 | | from stride 1/2 to bigger stride, for apply_after */ |
159 | 1.21k | || (p->ri != p->ro && ego->adt->apply == apply_after |
160 | 1.04k | && !NO_DESTROY_INPUTP(plnr) |
161 | 1.04k | && X(tensor_min_istride)(p->sz) <= 2 |
162 | 1.04k | && X(tensor_min_ostride)(p->sz) > 2) |
163 | | |
164 | | /* or problem must be out of place, transforming |
165 | | to stride 1/2 from bigger stride, for apply_before */ |
166 | 1.21k | || (p->ri != p->ro && ego->adt->apply == apply_before |
167 | 1.04k | && X(tensor_min_ostride)(p->sz) <= 2 |
168 | 1.04k | && X(tensor_min_istride)(p->sz) > 2) |
169 | 1.21k | ) |
170 | 2.18k | ); |
171 | 2.18k | } |
172 | | |
173 | | static int applicable(const solver *ego_, const problem *p_, |
174 | | const planner *plnr) |
175 | 2.18k | { |
176 | 2.18k | if (!applicable0(ego_, p_, plnr)) return 0; |
177 | 462 | { |
178 | 462 | const problem_dft *p = (const problem_dft *) p_; |
179 | 462 | if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; |
180 | 462 | } |
181 | 167 | return 1; |
182 | 462 | } |
183 | | |
184 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
185 | 2.82k | { |
186 | 2.82k | const problem_dft *p = (const problem_dft *) p_; |
187 | 2.82k | const S *ego = (const S *) ego_; |
188 | 2.82k | P *pln; |
189 | 2.82k | plan *cld = 0, *cldcpy = 0; |
190 | | |
191 | 2.82k | static const plan_adt padt = { |
192 | 2.82k | X(dft_solve), awake, print, destroy |
193 | 2.82k | }; |
194 | | |
195 | 2.82k | if (!applicable(ego_, p_, plnr)) |
196 | 2.66k | return (plan *) 0; |
197 | | |
198 | 167 | cldcpy = |
199 | 167 | X(mkplan_d)(plnr, |
200 | 167 | X(mkproblem_dft_d)(X(mktensor_0d)(), |
201 | 167 | X(tensor_append)(p->vecsz, p->sz), |
202 | 167 | p->ri, p->ii, p->ro, p->io)); |
203 | | |
204 | 167 | if (!cldcpy) goto nada; |
205 | | |
206 | 36 | cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); |
207 | 36 | if (!cld) goto nada; |
208 | | |
209 | 36 | pln = MKPLAN_DFT(P, &padt, ego->adt->apply); |
210 | 36 | pln->cld = cld; |
211 | 36 | pln->cldcpy = cldcpy; |
212 | 36 | pln->slv = ego; |
213 | 36 | X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); |
214 | | |
215 | 36 | return &(pln->super.super); |
216 | | |
217 | 131 | nada: |
218 | 131 | X(plan_destroy_internal)(cld); |
219 | 131 | X(plan_destroy_internal)(cldcpy); |
220 | 131 | return (plan *)0; |
221 | 36 | } |
222 | | |
223 | | static solver *mksolver(const ndrct_adt *adt) |
224 | 4 | { |
225 | 4 | static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; |
226 | 4 | S *slv = MKSOLVER(S, &sadt); |
227 | 4 | slv->adt = adt; |
228 | 4 | return &(slv->super); |
229 | 4 | } |
230 | | |
231 | | void X(dft_indirect_register)(planner *p) |
232 | 1 | { |
233 | 1 | unsigned i; |
234 | 1 | static const ndrct_adt *const adts[] = { |
235 | 1 | &adt_before, &adt_after |
236 | 1 | }; |
237 | | |
238 | 3 | for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) |
239 | 2 | REGISTER_SOLVER(p, mksolver(adts[i])); |
240 | 1 | } |