/src/fftw3/dft/indirect-transpose.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* solvers/plans for vectors of DFTs corresponding to the columns |
22 | | of a matrix: first transpose the matrix so that the DFTs are |
23 | | contiguous, then do DFTs with transposed output. In particular, |
24 | | we restrict ourselves to the case of a square transpose (or a |
25 | | sequence thereof). */ |
26 | | |
27 | | #include "dft/dft.h" |
28 | | |
29 | | typedef solver S; |
30 | | |
31 | | typedef struct { |
32 | | plan_dft super; |
33 | | INT vl, ivs, ovs; |
34 | | plan *cldtrans, *cld, *cldrest; |
35 | | } P; |
36 | | |
37 | | /* initial transpose is out-of-place from input to output */ |
38 | | static void apply_op(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
39 | 0 | { |
40 | 0 | const P *ego = (const P *) ego_; |
41 | 0 | INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i; |
42 | |
|
43 | 0 | for (i = 0; i < vl; ++i) { |
44 | 0 | { |
45 | 0 | plan_dft *cldtrans = (plan_dft *) ego->cldtrans; |
46 | 0 | cldtrans->apply(ego->cldtrans, ri, ii, ro, io); |
47 | 0 | } |
48 | 0 | { |
49 | 0 | plan_dft *cld = (plan_dft *) ego->cld; |
50 | 0 | cld->apply(ego->cld, ro, io, ro, io); |
51 | 0 | } |
52 | 0 | ri += ivs; ii += ivs; |
53 | 0 | ro += ovs; io += ovs; |
54 | 0 | } |
55 | 0 | { |
56 | 0 | plan_dft *cldrest = (plan_dft *) ego->cldrest; |
57 | 0 | cldrest->apply(ego->cldrest, ri, ii, ro, io); |
58 | 0 | } |
59 | 0 | } |
60 | | |
61 | | static void destroy(plan *ego_) |
62 | 12 | { |
63 | 12 | P *ego = (P *) ego_; |
64 | 12 | X(plan_destroy_internal)(ego->cldrest); |
65 | 12 | X(plan_destroy_internal)(ego->cld); |
66 | 12 | X(plan_destroy_internal)(ego->cldtrans); |
67 | 12 | } |
68 | | |
69 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
70 | 0 | { |
71 | 0 | P *ego = (P *) ego_; |
72 | 0 | X(plan_awake)(ego->cldtrans, wakefulness); |
73 | 0 | X(plan_awake)(ego->cld, wakefulness); |
74 | 0 | X(plan_awake)(ego->cldrest, wakefulness); |
75 | 0 | } |
76 | | |
77 | | static void print(const plan *ego_, printer *p) |
78 | 0 | { |
79 | 0 | const P *ego = (const P *) ego_; |
80 | 0 | p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))", |
81 | 0 | ego->vl, ego->cldtrans, ego->cld, ego->cldrest); |
82 | 0 | } |
83 | | |
84 | | static int pickdim(const tensor *vs, const tensor *s, int *pdim0, int *pdim1) |
85 | 162 | { |
86 | 162 | int dim0, dim1; |
87 | 162 | *pdim0 = *pdim1 = -1; |
88 | 223 | for (dim0 = 0; dim0 < vs->rnk; ++dim0) |
89 | 105 | for (dim1 = 0; dim1 < s->rnk; ++dim1) |
90 | 44 | if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is) |
91 | 44 | && vs->dims[dim0].n >= s->dims[dim1].n |
92 | 44 | && (*pdim0 == -1 |
93 | 12 | || (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is) |
94 | 12 | && X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) { |
95 | 12 | *pdim0 = dim0; |
96 | 12 | *pdim1 = dim1; |
97 | 12 | } |
98 | 162 | return (*pdim0 != -1 && *pdim1 != -1); |
99 | 162 | } |
100 | | |
101 | | static int applicable0(const solver *ego_, const problem *p_, |
102 | | const planner *plnr, |
103 | | int *pdim0, int *pdim1) |
104 | 1.09k | { |
105 | 1.09k | const problem_dft *p = (const problem_dft *) p_; |
106 | 1.09k | UNUSED(ego_); UNUSED(plnr); |
107 | | |
108 | 1.09k | return (1 |
109 | 1.09k | && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk) |
110 | | |
111 | | /* FIXME: can/should we relax this constraint? */ |
112 | 1.09k | && X(tensor_inplace_strides2)(p->vecsz, p->sz) |
113 | | |
114 | 1.09k | && pickdim(p->vecsz, p->sz, pdim0, pdim1) |
115 | | |
116 | | /* output should not *already* include the transpose |
117 | | (in which case we duplicate the regular indirect.c) */ |
118 | 1.09k | && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is) |
119 | 1.09k | ); |
120 | 1.09k | } |
121 | | |
122 | | static int applicable(const solver *ego_, const problem *p_, |
123 | | const planner *plnr, |
124 | | int *pdim0, int *pdim1) |
125 | 1.09k | { |
126 | 1.09k | if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0; |
127 | 12 | { |
128 | 12 | const problem_dft *p = (const problem_dft *) p_; |
129 | 12 | INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1; |
130 | | |
131 | | /* UGLY if does not result in contiguous transforms or |
132 | | transforms of contiguous vectors (since the latter at |
133 | | least have efficient transpositions) */ |
134 | 12 | if (NO_UGLYP(plnr) |
135 | 12 | && p->vecsz->dims[*pdim0].is != u |
136 | 12 | && !(p->vecsz->rnk == 2 |
137 | 0 | && p->vecsz->dims[1-*pdim0].is == u |
138 | 0 | && p->vecsz->dims[*pdim0].is |
139 | 0 | == u * p->vecsz->dims[1-*pdim0].n)) |
140 | 0 | return 0; |
141 | | |
142 | 12 | if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; |
143 | 12 | } |
144 | 12 | return 1; |
145 | 12 | } |
146 | | |
147 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
148 | 1.09k | { |
149 | 1.09k | const problem_dft *p = (const problem_dft *) p_; |
150 | 1.09k | P *pln; |
151 | 1.09k | plan *cld = 0, *cldtrans = 0, *cldrest = 0; |
152 | 1.09k | int pdim0, pdim1; |
153 | 1.09k | tensor *ts, *tv; |
154 | 1.09k | INT vl, ivs, ovs; |
155 | 1.09k | R *rit, *iit, *rot, *iot; |
156 | | |
157 | 1.09k | static const plan_adt padt = { |
158 | 1.09k | X(dft_solve), awake, print, destroy |
159 | 1.09k | }; |
160 | | |
161 | 1.09k | if (!applicable(ego_, p_, plnr, &pdim0, &pdim1)) |
162 | 1.08k | return (plan *) 0; |
163 | | |
164 | 12 | vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n; |
165 | 12 | A(vl >= 1); |
166 | 12 | ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is; |
167 | 12 | ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os; |
168 | 12 | rit = TAINT(p->ri, vl == 1 ? 0 : ivs); |
169 | 12 | iit = TAINT(p->ii, vl == 1 ? 0 : ivs); |
170 | 12 | rot = TAINT(p->ro, vl == 1 ? 0 : ovs); |
171 | 12 | iot = TAINT(p->io, vl == 1 ? 0 : ovs); |
172 | | |
173 | 12 | ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS); |
174 | 12 | ts->dims[pdim1].os = p->vecsz->dims[pdim0].is; |
175 | 12 | tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS); |
176 | 12 | tv->dims[pdim0].os = p->sz->dims[pdim1].is; |
177 | 12 | tv->dims[pdim0].n = p->sz->dims[pdim1].n; |
178 | 12 | cldtrans = X(mkplan_d)(plnr, |
179 | 12 | X(mkproblem_dft_d)(X(mktensor_0d)(), |
180 | 12 | X(tensor_append)(tv, ts), |
181 | 12 | rit, iit, |
182 | 12 | rot, iot)); |
183 | 12 | X(tensor_destroy2)(ts, tv); |
184 | 12 | if (!cldtrans) goto nada; |
185 | | |
186 | 12 | ts = X(tensor_copy)(p->sz); |
187 | 12 | ts->dims[pdim1].is = p->vecsz->dims[pdim0].is; |
188 | 12 | tv = X(tensor_copy)(p->vecsz); |
189 | 12 | tv->dims[pdim0].is = p->sz->dims[pdim1].is; |
190 | 12 | tv->dims[pdim0].n = p->sz->dims[pdim1].n; |
191 | 12 | cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv, |
192 | 12 | rot, iot, |
193 | 12 | rot, iot)); |
194 | 12 | if (!cld) goto nada; |
195 | | |
196 | 12 | tv = X(tensor_copy)(p->vecsz); |
197 | 12 | tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n; |
198 | 12 | cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv, |
199 | 12 | p->ri + ivs * vl, |
200 | 12 | p->ii + ivs * vl, |
201 | 12 | p->ro + ovs * vl, |
202 | 12 | p->io + ovs * vl)); |
203 | 12 | if (!cldrest) goto nada; |
204 | | |
205 | 12 | pln = MKPLAN_DFT(P, &padt, apply_op); |
206 | 12 | pln->cldtrans = cldtrans; |
207 | 12 | pln->cld = cld; |
208 | 12 | pln->cldrest = cldrest; |
209 | 12 | pln->vl = vl; |
210 | 12 | pln->ivs = ivs; |
211 | 12 | pln->ovs = ovs; |
212 | 12 | X(ops_cpy)(&cldrest->ops, &pln->super.super.ops); |
213 | 12 | X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops); |
214 | 12 | X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops); |
215 | 12 | return &(pln->super.super); |
216 | | |
217 | 0 | nada: |
218 | 0 | X(plan_destroy_internal)(cldrest); |
219 | 0 | X(plan_destroy_internal)(cld); |
220 | 0 | X(plan_destroy_internal)(cldtrans); |
221 | 0 | return (plan *)0; |
222 | 12 | } |
223 | | |
224 | | static solver *mksolver(void) |
225 | 1 | { |
226 | 1 | static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; |
227 | 1 | S *slv = MKSOLVER(S, &sadt); |
228 | 1 | return slv; |
229 | 1 | } |
230 | | |
231 | | void X(dft_indirect_transpose_register)(planner *p) |
232 | 1 | { |
233 | 1 | REGISTER_SOLVER(p, mksolver()); |
234 | 1 | } |