/src/fftw3/rdft/indirect.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | |
23 | | /* solvers/plans for vectors of small RDFT's that cannot be done |
24 | | in-place directly. Use a rank-0 plan to rearrange the data |
25 | | before or after the transform. Can also change an out-of-place |
26 | | plan into a copy + in-place (where the in-place transform |
27 | | is e.g. unit stride). */ |
28 | | |
29 | | /* FIXME: merge with rank-geq2.c(?), since this is just a special case |
30 | | of a rank split where the first/second transform has rank 0. */ |
31 | | |
32 | | #include "rdft/rdft.h" |
33 | | |
34 | | typedef problem *(*mkcld_t) (const problem_rdft *p); |
35 | | |
36 | | typedef struct { |
37 | | rdftapply apply; |
38 | | problem *(*mkcld)(const problem_rdft *p); |
39 | | const char *nam; |
40 | | } ndrct_adt; |
41 | | |
42 | | typedef struct { |
43 | | solver super; |
44 | | const ndrct_adt *adt; |
45 | | } S; |
46 | | |
47 | | typedef struct { |
48 | | plan_rdft super; |
49 | | plan *cldcpy, *cld; |
50 | | const S *slv; |
51 | | } P; |
52 | | |
53 | | /*-----------------------------------------------------------------------*/ |
54 | | /* first rearrange, then transform */ |
55 | | static void apply_before(const plan *ego_, R *I, R *O) |
56 | | { |
57 | | const P *ego = (const P *) ego_; |
58 | | |
59 | | { |
60 | | plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; |
61 | | cldcpy->apply(ego->cldcpy, I, O); |
62 | | } |
63 | | { |
64 | | plan_rdft *cld = (plan_rdft *) ego->cld; |
65 | | cld->apply(ego->cld, O, O); |
66 | | } |
67 | | } |
68 | | |
69 | | static problem *mkcld_before(const problem_rdft *p) |
70 | | { |
71 | | return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS), |
72 | | X(tensor_copy_inplace)(p->vecsz, INPLACE_OS), |
73 | | p->O, p->O, p->kind); |
74 | | } |
75 | | |
76 | | static const ndrct_adt adt_before = |
77 | | { |
78 | | apply_before, mkcld_before, "rdft-indirect-before" |
79 | | }; |
80 | | |
81 | | /*-----------------------------------------------------------------------*/ |
82 | | /* first transform, then rearrange */ |
83 | | |
84 | | static void apply_after(const plan *ego_, R *I, R *O) |
85 | | { |
86 | | const P *ego = (const P *) ego_; |
87 | | |
88 | | { |
89 | | plan_rdft *cld = (plan_rdft *) ego->cld; |
90 | | cld->apply(ego->cld, I, I); |
91 | | } |
92 | | { |
93 | | plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy; |
94 | | cldcpy->apply(ego->cldcpy, I, O); |
95 | | } |
96 | | } |
97 | | |
98 | | static problem *mkcld_after(const problem_rdft *p) |
99 | | { |
100 | | return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS), |
101 | | X(tensor_copy_inplace)(p->vecsz, INPLACE_IS), |
102 | | p->I, p->I, p->kind); |
103 | | } |
104 | | |
105 | | static const ndrct_adt adt_after = |
106 | | { |
107 | | apply_after, mkcld_after, "rdft-indirect-after" |
108 | | }; |
109 | | |
110 | | /*-----------------------------------------------------------------------*/ |
111 | | static void destroy(plan *ego_) |
112 | | { |
113 | | P *ego = (P *) ego_; |
114 | | X(plan_destroy_internal)(ego->cld); |
115 | | X(plan_destroy_internal)(ego->cldcpy); |
116 | | } |
117 | | |
118 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
119 | | { |
120 | | P *ego = (P *) ego_; |
121 | | X(plan_awake)(ego->cldcpy, wakefulness); |
122 | | X(plan_awake)(ego->cld, wakefulness); |
123 | | } |
124 | | |
125 | | static void print(const plan *ego_, printer *p) |
126 | | { |
127 | | const P *ego = (const P *) ego_; |
128 | | const S *s = ego->slv; |
129 | | p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy); |
130 | | } |
131 | | |
132 | | static int applicable0(const solver *ego_, const problem *p_, |
133 | | const planner *plnr) |
134 | | { |
135 | | const S *ego = (const S *) ego_; |
136 | | const problem_rdft *p = (const problem_rdft *) p_; |
137 | | return (1 |
138 | | && FINITE_RNK(p->vecsz->rnk) |
139 | | |
140 | | /* problem must be a nontrivial transform, not just a copy */ |
141 | | && p->sz->rnk > 0 |
142 | | |
143 | | && (0 |
144 | | |
145 | | /* problem must be in-place & require some |
146 | | rearrangement of the data */ |
147 | | || (p->I == p->O |
148 | | && !(X(tensor_inplace_strides2)(p->sz, p->vecsz))) |
149 | | |
150 | | /* or problem must be out of place, transforming |
151 | | from stride 1/2 to bigger stride, for apply_after */ |
152 | | || (p->I != p->O && ego->adt->apply == apply_after |
153 | | && !NO_DESTROY_INPUTP(plnr) |
154 | | && X(tensor_min_istride)(p->sz) <= 2 |
155 | | && X(tensor_min_ostride)(p->sz) > 2) |
156 | | |
157 | | /* or problem must be out of place, transforming |
158 | | to stride 1/2 from bigger stride, for apply_before */ |
159 | | || (p->I != p->O && ego->adt->apply == apply_before |
160 | | && X(tensor_min_ostride)(p->sz) <= 2 |
161 | | && X(tensor_min_istride)(p->sz) > 2) |
162 | | |
163 | | ) |
164 | | ); |
165 | | } |
166 | | |
167 | | static int applicable(const solver *ego_, const problem *p_, |
168 | | const planner *plnr) |
169 | | { |
170 | | if (!applicable0(ego_, p_, plnr)) return 0; |
171 | | |
172 | | if (NO_INDIRECT_OP_P(plnr)) { |
173 | | const problem_rdft *p = (const problem_rdft *)p_; |
174 | | if (p->I != p->O) return 0; |
175 | | } |
176 | | |
177 | | return 1; |
178 | | } |
179 | | |
180 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
181 | | { |
182 | | const problem_rdft *p = (const problem_rdft *) p_; |
183 | | const S *ego = (const S *) ego_; |
184 | | P *pln; |
185 | | plan *cld = 0, *cldcpy = 0; |
186 | | |
187 | | static const plan_adt padt = { |
188 | | X(rdft_solve), awake, print, destroy |
189 | | }; |
190 | | |
191 | | if (!applicable(ego_, p_, plnr)) |
192 | | return (plan *) 0; |
193 | | |
194 | | cldcpy = X(mkplan_d)(plnr, |
195 | | X(mkproblem_rdft_0_d)( |
196 | | X(tensor_append)(p->vecsz, p->sz), |
197 | | p->I, p->O)); |
198 | | if (!cldcpy) goto nada; |
199 | | |
200 | | cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); |
201 | | if (!cld) goto nada; |
202 | | |
203 | | pln = MKPLAN_RDFT(P, &padt, ego->adt->apply); |
204 | | pln->cld = cld; |
205 | | pln->cldcpy = cldcpy; |
206 | | pln->slv = ego; |
207 | | X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); |
208 | | |
209 | | return &(pln->super.super); |
210 | | |
211 | | nada: |
212 | | X(plan_destroy_internal)(cld); |
213 | | X(plan_destroy_internal)(cldcpy); |
214 | | return (plan *)0; |
215 | | } |
216 | | |
217 | | static solver *mksolver(const ndrct_adt *adt) |
218 | | { |
219 | | static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
220 | | S *slv = MKSOLVER(S, &sadt); |
221 | | slv->adt = adt; |
222 | | return &(slv->super); |
223 | | } |
224 | | |
225 | | void X(rdft_indirect_register)(planner *p) |
226 | 1 | { |
227 | 1 | unsigned i; |
228 | 1 | static const ndrct_adt *const adts[] = { |
229 | 1 | &adt_before, &adt_after |
230 | 1 | }; |
231 | | |
232 | 3 | for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i) |
233 | 2 | REGISTER_SOLVER(p, mksolver(adts[i])); |
234 | 1 | } |