Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | #include "dft/ct.h" |
23 | | |
24 | | ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, |
25 | | ct_mkinferior, ct_force_vrecursion) = 0; |
26 | | |
27 | | typedef struct { |
28 | | plan_dft super; |
29 | | plan *cld; |
30 | | plan *cldw; |
31 | | INT r; |
32 | | } P; |
33 | | |
34 | | static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
35 | 1.34k | { |
36 | 1.34k | const P *ego = (const P *) ego_; |
37 | 1.34k | plan_dft *cld; |
38 | 1.34k | plan_dftw *cldw; |
39 | | |
40 | 1.34k | cld = (plan_dft *) ego->cld; |
41 | 1.34k | cld->apply(ego->cld, ri, ii, ro, io); |
42 | | |
43 | 1.34k | cldw = (plan_dftw *) ego->cldw; |
44 | 1.34k | cldw->apply(ego->cldw, ro, io); |
45 | 1.34k | } |
46 | | |
47 | | static void apply_dif(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
48 | 14 | { |
49 | 14 | const P *ego = (const P *) ego_; |
50 | 14 | plan_dft *cld; |
51 | 14 | plan_dftw *cldw; |
52 | | |
53 | 14 | cldw = (plan_dftw *) ego->cldw; |
54 | 14 | cldw->apply(ego->cldw, ri, ii); |
55 | | |
56 | 14 | cld = (plan_dft *) ego->cld; |
57 | 14 | cld->apply(ego->cld, ri, ii, ro, io); |
58 | 14 | } |
59 | | |
60 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
61 | 744 | { |
62 | 744 | P *ego = (P *) ego_; |
63 | 744 | X(plan_awake)(ego->cld, wakefulness); |
64 | 744 | X(plan_awake)(ego->cldw, wakefulness); |
65 | 744 | } |
66 | | |
67 | | static void destroy(plan *ego_) |
68 | 1.18k | { |
69 | 1.18k | P *ego = (P *) ego_; |
70 | 1.18k | X(plan_destroy_internal)(ego->cldw); |
71 | 1.18k | X(plan_destroy_internal)(ego->cld); |
72 | 1.18k | } |
73 | | |
74 | | static void print(const plan *ego_, printer *p) |
75 | 0 | { |
76 | 0 | const P *ego = (const P *) ego_; |
77 | 0 | p->print(p, "(dft-ct-%s/%D%(%p%)%(%p%))", |
78 | 0 | ego->super.apply == apply_dit ? "dit" : "dif", |
79 | 0 | ego->r, ego->cldw, ego->cld); |
80 | 0 | } |
81 | | |
82 | | static int applicable0(const ct_solver *ego, const problem *p_, planner *plnr) |
83 | 133k | { |
84 | 133k | const problem_dft *p = (const problem_dft *) p_; |
85 | 133k | INT r; |
86 | | |
87 | 133k | return (1 |
88 | 133k | && p->sz->rnk == 1 |
89 | 133k | && p->vecsz->rnk <= 1 |
90 | | |
91 | | /* DIF destroys the input and we don't like it */ |
92 | 133k | && (ego->dec == DECDIT || |
93 | 84.8k | p->ri == p->ro || |
94 | 84.8k | !NO_DESTROY_INPUTP(plnr)) |
95 | | |
96 | 133k | && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 1) |
97 | 133k | && p->sz->dims[0].n > r); |
98 | 133k | } |
99 | | |
100 | | |
101 | | int X(ct_applicable)(const ct_solver *ego, const problem *p_, planner *plnr) |
102 | 133k | { |
103 | 133k | const problem_dft *p; |
104 | | |
105 | 133k | if (!applicable0(ego, p_, plnr)) |
106 | 126k | return 0; |
107 | | |
108 | 6.79k | p = (const problem_dft *) p_; |
109 | | |
110 | 6.79k | return (0 |
111 | 6.79k | || ego->dec == DECDIF+TRANSPOSE |
112 | 6.79k | || p->vecsz->rnk == 0 |
113 | 6.79k | || !NO_VRECURSEP(plnr) |
114 | 6.79k | || (ego->force_vrecursionp && ego->force_vrecursionp(ego, p)) |
115 | 6.79k | ); |
116 | 133k | } |
117 | | |
118 | | |
119 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
120 | 133k | { |
121 | 133k | const ct_solver *ego = (const ct_solver *) ego_; |
122 | 133k | const problem_dft *p; |
123 | 133k | P *pln = 0; |
124 | 133k | plan *cld = 0, *cldw = 0; |
125 | 133k | INT n, r, m, v, ivs, ovs; |
126 | 133k | iodim *d; |
127 | | |
128 | 133k | static const plan_adt padt = { |
129 | 133k | X(dft_solve), awake, print, destroy |
130 | 133k | }; |
131 | | |
132 | 133k | if ((NO_NONTHREADEDP(plnr)) || !X(ct_applicable)(ego, p_, plnr)) |
133 | 130k | return (plan *) 0; |
134 | | |
135 | 3.33k | p = (const problem_dft *) p_; |
136 | 3.33k | d = p->sz->dims; |
137 | 3.33k | n = d[0].n; |
138 | 3.33k | r = X(choose_radix)(ego->r, n); |
139 | 3.33k | m = n / r; |
140 | | |
141 | 3.33k | X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); |
142 | | |
143 | 3.33k | switch (ego->dec) { |
144 | 2.21k | case DECDIT: |
145 | 2.21k | { |
146 | 2.21k | cldw = ego->mkcldw(ego, |
147 | 2.21k | r, m * d[0].os, m * d[0].os, |
148 | 2.21k | m, d[0].os, |
149 | 2.21k | v, ovs, ovs, |
150 | 2.21k | 0, m, |
151 | 2.21k | p->ro, p->io, plnr); |
152 | 2.21k | if (!cldw) goto nada; |
153 | | |
154 | 1.16k | cld = X(mkplan_d)(plnr, |
155 | 1.16k | X(mkproblem_dft_d)( |
156 | 1.16k | X(mktensor_1d)(m, r * d[0].is, d[0].os), |
157 | 1.16k | X(mktensor_2d)(r, d[0].is, m * d[0].os, |
158 | 1.16k | v, ivs, ovs), |
159 | 1.16k | p->ri, p->ii, p->ro, p->io) |
160 | 1.16k | ); |
161 | 1.16k | if (!cld) goto nada; |
162 | | |
163 | 1.16k | pln = MKPLAN_DFT(P, &padt, apply_dit); |
164 | 1.16k | break; |
165 | 1.16k | } |
166 | 27 | case DECDIF: |
167 | 1.12k | case DECDIF+TRANSPOSE: |
168 | 1.12k | { |
169 | 1.12k | INT cors, covs; /* cldw ors, ovs */ |
170 | 1.12k | if (ego->dec == DECDIF+TRANSPOSE) { |
171 | 1.09k | cors = ivs; |
172 | 1.09k | covs = m * d[0].is; |
173 | | /* ensure that we generate well-formed dftw subproblems */ |
174 | | /* FIXME: too conservative */ |
175 | 1.09k | if (!(1 |
176 | 1.09k | && r == v |
177 | 1.09k | && d[0].is == r * cors)) |
178 | 1.03k | goto nada; |
179 | | |
180 | | /* FIXME: allow in-place only for now, like in |
181 | | fftw-3.[01] */ |
182 | 65 | if (!(1 |
183 | 65 | && p->ri == p->ro |
184 | 65 | && d[0].is == r * d[0].os |
185 | 65 | && cors == d[0].os |
186 | 65 | && covs == ovs |
187 | 65 | )) |
188 | 43 | goto nada; |
189 | 65 | } else { |
190 | 27 | cors = m * d[0].is; |
191 | 27 | covs = ivs; |
192 | 27 | } |
193 | | |
194 | 49 | cldw = ego->mkcldw(ego, |
195 | 49 | r, m * d[0].is, cors, |
196 | 49 | m, d[0].is, |
197 | 49 | v, ivs, covs, |
198 | 49 | 0, m, |
199 | 49 | p->ri, p->ii, plnr); |
200 | 49 | if (!cldw) goto nada; |
201 | | |
202 | 22 | cld = X(mkplan_d)(plnr, |
203 | 22 | X(mkproblem_dft_d)( |
204 | 22 | X(mktensor_1d)(m, d[0].is, r * d[0].os), |
205 | 22 | X(mktensor_2d)(r, cors, d[0].os, |
206 | 22 | v, covs, ovs), |
207 | 22 | p->ri, p->ii, p->ro, p->io) |
208 | 22 | ); |
209 | 22 | if (!cld) goto nada; |
210 | | |
211 | 22 | pln = MKPLAN_DFT(P, &padt, apply_dif); |
212 | 22 | break; |
213 | 22 | } |
214 | | |
215 | 0 | default: A(0); |
216 | | |
217 | 3.33k | } |
218 | | |
219 | 1.18k | pln->cld = cld; |
220 | 1.18k | pln->cldw = cldw; |
221 | 1.18k | pln->r = r; |
222 | 1.18k | X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); |
223 | | |
224 | | /* inherit could_prune_now_p attribute from cldw */ |
225 | 1.18k | pln->super.super.could_prune_now_p = cldw->could_prune_now_p; |
226 | 1.18k | return &(pln->super.super); |
227 | | |
228 | 2.15k | nada: |
229 | 2.15k | X(plan_destroy_internal)(cldw); |
230 | 2.15k | X(plan_destroy_internal)(cld); |
231 | 2.15k | return (plan *) 0; |
232 | 3.33k | } |
233 | | |
234 | | ct_solver *X(mksolver_ct)(size_t size, INT r, int dec, |
235 | | ct_mkinferior mkcldw, |
236 | | ct_force_vrecursion force_vrecursionp) |
237 | 93 | { |
238 | 93 | static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; |
239 | 93 | ct_solver *slv = (ct_solver *)X(mksolver)(size, &sadt); |
240 | 93 | slv->r = r; |
241 | 93 | slv->dec = dec; |
242 | 93 | slv->mkcldw = mkcldw; |
243 | 93 | slv->force_vrecursionp = force_vrecursionp; |
244 | 93 | return slv; |
245 | 93 | } |
246 | | |
247 | | plan *X(mkplan_dftw)(size_t size, const plan_adt *adt, dftwapply apply) |
248 | 1.18k | { |
249 | 1.18k | plan_dftw *ego; |
250 | | |
251 | 1.18k | ego = (plan_dftw *) X(mkplan)(size, adt); |
252 | 1.18k | ego->apply = apply; |
253 | | |
254 | 1.18k | return &(ego->super); |
255 | 1.18k | } |