/src/fftw3/dft/vrank-geq1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | |
23 | | /* Plans for handling vector transform loops. These are *just* the |
24 | | loops, and rely on child plans for the actual DFTs. |
25 | | |
26 | | They form a wrapper around solvers that don't have apply functions |
27 | | for non-null vectors. |
28 | | |
29 | | vrank-geq1 plans also recursively handle the case of multi-dimensional |
30 | | vectors, obviating the need for most solvers to deal with this. We |
31 | | can also play games here, such as reordering the vector loops. |
32 | | |
33 | | Each vrank-geq1 plan reduces the vector rank by 1, picking out a |
34 | | dimension determined by the vecloop_dim field of the solver. */ |
35 | | |
36 | | #include "dft/dft.h" |
37 | | |
38 | | typedef struct { |
39 | | solver super; |
40 | | int vecloop_dim; |
41 | | const int *buddies; |
42 | | size_t nbuddies; |
43 | | } S; |
44 | | |
45 | | typedef struct { |
46 | | plan_dft super; |
47 | | |
48 | | plan *cld; |
49 | | INT vl; |
50 | | INT ivs, ovs; |
51 | | const S *solver; |
52 | | } P; |
53 | | |
54 | | static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io) |
55 | 154 | { |
56 | 154 | const P *ego = (const P *) ego_; |
57 | 154 | INT i, vl = ego->vl; |
58 | 154 | INT ivs = ego->ivs, ovs = ego->ovs; |
59 | 154 | dftapply cldapply = ((plan_dft *) ego->cld)->apply; |
60 | | |
61 | 1.33k | for (i = 0; i < vl; ++i) { |
62 | 1.18k | cldapply(ego->cld, |
63 | 1.18k | ri + i * ivs, ii + i * ivs, ro + i * ovs, io + i * ovs); |
64 | 1.18k | } |
65 | 154 | } |
66 | | |
67 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
68 | 224 | { |
69 | 224 | P *ego = (P *) ego_; |
70 | 224 | X(plan_awake)(ego->cld, wakefulness); |
71 | 224 | } |
72 | | |
73 | | static void destroy(plan *ego_) |
74 | 337 | { |
75 | 337 | P *ego = (P *) ego_; |
76 | 337 | X(plan_destroy_internal)(ego->cld); |
77 | 337 | } |
78 | | |
79 | | static void print(const plan *ego_, printer *p) |
80 | 0 | { |
81 | 0 | const P *ego = (const P *) ego_; |
82 | 0 | const S *s = ego->solver; |
83 | 0 | p->print(p, "(dft-vrank>=1-x%D/%d%(%p%))", |
84 | 0 | ego->vl, s->vecloop_dim, ego->cld); |
85 | 0 | } |
86 | | |
87 | | static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp) |
88 | 1.49k | { |
89 | 1.49k | return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies, |
90 | 1.49k | vecsz, oop, dp); |
91 | 1.49k | } |
92 | | |
93 | | static int applicable0(const solver *ego_, const problem *p_, int *dp) |
94 | 2.33k | { |
95 | 2.33k | const S *ego = (const S *) ego_; |
96 | 2.33k | const problem_dft *p = (const problem_dft *) p_; |
97 | | |
98 | 2.33k | return (1 |
99 | 2.33k | && FINITE_RNK(p->vecsz->rnk) |
100 | 2.33k | && p->vecsz->rnk > 0 |
101 | | |
102 | | /* do not bother looping over rank-0 problems, |
103 | | since they are handled via rdft */ |
104 | 2.33k | && p->sz->rnk > 0 |
105 | | |
106 | 2.33k | && pickdim(ego, p->vecsz, p->ri != p->ro, dp) |
107 | 2.33k | ); |
108 | 2.33k | } |
109 | | |
110 | | static int applicable(const solver *ego_, const problem *p_, |
111 | | const planner *plnr, int *dp) |
112 | 2.33k | { |
113 | 2.33k | const S *ego = (const S *)ego_; |
114 | 2.33k | const problem_dft *p; |
115 | | |
116 | 2.33k | if (!applicable0(ego_, p_, dp)) return 0; |
117 | | |
118 | | /* fftw2 behavior */ |
119 | 367 | if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) |
120 | 21 | return 0; |
121 | | |
122 | 346 | p = (const problem_dft *) p_; |
123 | | |
124 | 346 | if (NO_UGLYP(plnr)) { |
125 | | /* Heuristic: if the transform is multi-dimensional, and the |
126 | | vector stride is less than the transform size, then we |
127 | | probably want to use a rank>=2 plan first in order to combine |
128 | | this vector with the transform-dimension vectors. */ |
129 | 346 | { |
130 | 346 | iodim *d = p->vecsz->dims + *dp; |
131 | 346 | if (1 |
132 | 346 | && p->sz->rnk > 1 |
133 | 346 | && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) |
134 | 0 | < X(tensor_max_index)(p->sz) |
135 | 346 | ) |
136 | 0 | return 0; |
137 | 346 | } |
138 | | |
139 | 346 | if (NO_NONTHREADEDP(plnr)) return 0; /* prefer threaded version */ |
140 | 346 | } |
141 | | |
142 | 346 | return 1; |
143 | 346 | } |
144 | | |
145 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
146 | 2.98k | { |
147 | 2.98k | const S *ego = (const S *) ego_; |
148 | 2.98k | const problem_dft *p; |
149 | 2.98k | P *pln; |
150 | 2.98k | plan *cld; |
151 | 2.98k | int vdim; |
152 | 2.98k | iodim *d; |
153 | | |
154 | 2.98k | static const plan_adt padt = { |
155 | 2.98k | X(dft_solve), awake, print, destroy |
156 | 2.98k | }; |
157 | | |
158 | 2.98k | if (!applicable(ego_, p_, plnr, &vdim)) |
159 | 2.63k | return (plan *) 0; |
160 | 346 | p = (const problem_dft *) p_; |
161 | | |
162 | 346 | d = p->vecsz->dims + vdim; |
163 | | |
164 | 346 | A(d->n > 1); |
165 | 346 | cld = X(mkplan_d)(plnr, |
166 | 346 | X(mkproblem_dft_d)( |
167 | 346 | X(tensor_copy)(p->sz), |
168 | 346 | X(tensor_copy_except)(p->vecsz, vdim), |
169 | 346 | TAINT(p->ri, d->is), TAINT(p->ii, d->is), |
170 | 346 | TAINT(p->ro, d->os), TAINT(p->io, d->os))); |
171 | 346 | if (!cld) return (plan *) 0; |
172 | | |
173 | 337 | pln = MKPLAN_DFT(P, &padt, apply); |
174 | | |
175 | 337 | pln->cld = cld; |
176 | 337 | pln->vl = d->n; |
177 | 337 | pln->ivs = d->is; |
178 | 337 | pln->ovs = d->os; |
179 | | |
180 | 337 | pln->solver = ego; |
181 | 337 | X(ops_zero)(&pln->super.super.ops); |
182 | 337 | pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ |
183 | 337 | X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); |
184 | | |
185 | 337 | if (p->sz->rnk != 1 || (p->sz->dims[0].n > 64)) |
186 | 37 | pln->super.super.pcost = pln->vl * cld->pcost; |
187 | | |
188 | 337 | return &(pln->super.super); |
189 | 346 | } |
190 | | |
191 | | static solver *mksolver(int vecloop_dim, const int *buddies, size_t nbuddies) |
192 | 4 | { |
193 | 4 | static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 }; |
194 | 4 | S *slv = MKSOLVER(S, &sadt); |
195 | 4 | slv->vecloop_dim = vecloop_dim; |
196 | 4 | slv->buddies = buddies; |
197 | 4 | slv->nbuddies = nbuddies; |
198 | 4 | return &(slv->super); |
199 | 4 | } |
200 | | |
201 | | void X(dft_vrank_geq1_register)(planner *p) |
202 | 1 | { |
203 | | /* FIXME: Should we try other vecloop_dim values? */ |
204 | 1 | static const int buddies[] = { 1, -1 }; |
205 | 1 | size_t i; |
206 | | |
207 | 3 | for (i = 0; i < NELEM(buddies); ++i) |
208 | 2 | REGISTER_SOLVER(p, mksolver(buddies[i], buddies, NELEM(buddies))); |
209 | 1 | } |