/src/fftw3/dft/dftw-genericbuf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* express a twiddle problem in terms of dft + multiplication by |
22 | | twiddle factors */ |
23 | | |
24 | | #include "dft/ct.h" |
25 | | |
26 | | typedef struct { |
27 | | ct_solver super; |
28 | | INT batchsz; |
29 | | } S; |
30 | | |
31 | | typedef struct { |
32 | | plan_dftw super; |
33 | | |
34 | | INT r, rs, m, ms, v, vs, mb, me; |
35 | | INT batchsz; |
36 | | plan *cld; |
37 | | |
38 | | triggen *t; |
39 | | const S *slv; |
40 | | } P; |
41 | | |
42 | | |
43 | 0 | #define BATCHDIST(r) ((r) + 16) |
44 | | |
45 | | /**************************************************************/ |
46 | | static void bytwiddle(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio) |
47 | 0 | { |
48 | 0 | INT j, k; |
49 | 0 | INT r = ego->r, rs = ego->rs, ms = ego->ms; |
50 | 0 | triggen *t = ego->t; |
51 | 0 | for (j = 0; j < r; ++j) { |
52 | 0 | for (k = mb; k < me; ++k) |
53 | 0 | t->rotate(t, j * k, |
54 | 0 | rio[j * rs + k * ms], |
55 | 0 | iio[j * rs + k * ms], |
56 | 0 | &buf[j * 2 + 2 * BATCHDIST(r) * (k - mb) + 0]); |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | static int applicable0(const S *ego, |
61 | | INT r, INT irs, INT ors, |
62 | | INT m, INT v, |
63 | | INT mcount) |
64 | 235 | { |
65 | 235 | return (1 |
66 | 235 | && v == 1 |
67 | 235 | && irs == ors |
68 | 235 | && mcount >= ego->batchsz |
69 | 235 | && mcount % ego->batchsz == 0 |
70 | 235 | && r >= 64 |
71 | 235 | && m >= r |
72 | 235 | ); |
73 | 235 | } |
74 | | |
75 | | static int applicable(const S *ego, |
76 | | INT r, INT irs, INT ors, |
77 | | INT m, INT v, |
78 | | INT mcount, |
79 | | const planner *plnr) |
80 | 235 | { |
81 | 235 | if (!applicable0(ego, r, irs, ors, m, v, mcount)) |
82 | 235 | return 0; |
83 | 0 | if (NO_UGLYP(plnr) && m * r < 65536) |
84 | 0 | return 0; |
85 | | |
86 | 0 | return 1; |
87 | 0 | } |
88 | | |
89 | | static void dobatch(const P *ego, INT mb, INT me, R *buf, R *rio, R *iio) |
90 | 0 | { |
91 | 0 | plan_dft *cld; |
92 | 0 | INT ms = ego->ms; |
93 | |
|
94 | 0 | bytwiddle(ego, mb, me, buf, rio, iio); |
95 | |
|
96 | 0 | cld = (plan_dft *) ego->cld; |
97 | 0 | cld->apply(ego->cld, buf, buf + 1, buf, buf + 1); |
98 | 0 | X(cpy2d_pair_co)(buf, buf + 1, |
99 | 0 | rio + ms * mb, iio + ms * mb, |
100 | 0 | me-mb, 2 * BATCHDIST(ego->r), ms, |
101 | 0 | ego->r, 2, ego->rs); |
102 | 0 | } |
103 | | |
104 | | static void apply(const plan *ego_, R *rio, R *iio) |
105 | 0 | { |
106 | 0 | const P *ego = (const P *) ego_; |
107 | 0 | R *buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(ego->r) * ego->batchsz, |
108 | 0 | BUFFERS); |
109 | 0 | INT m; |
110 | |
|
111 | 0 | for (m = ego->mb; m < ego->me; m += ego->batchsz) |
112 | 0 | dobatch(ego, m, m + ego->batchsz, buf, rio, iio); |
113 | |
|
114 | 0 | A(m == ego->me); |
115 | |
|
116 | 0 | X(ifree)(buf); |
117 | 0 | } |
118 | | |
119 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
120 | 0 | { |
121 | 0 | P *ego = (P *) ego_; |
122 | 0 | X(plan_awake)(ego->cld, wakefulness); |
123 | |
|
124 | 0 | switch (wakefulness) { |
125 | 0 | case SLEEPY: |
126 | 0 | X(triggen_destroy)(ego->t); ego->t = 0; |
127 | 0 | break; |
128 | 0 | default: |
129 | 0 | ego->t = X(mktriggen)(AWAKE_SQRTN_TABLE, ego->r * ego->m); |
130 | 0 | break; |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | | static void destroy(plan *ego_) |
135 | 0 | { |
136 | 0 | P *ego = (P *) ego_; |
137 | 0 | X(plan_destroy_internal)(ego->cld); |
138 | 0 | } |
139 | | |
140 | | static void print(const plan *ego_, printer *p) |
141 | 0 | { |
142 | 0 | const P *ego = (const P *) ego_; |
143 | 0 | p->print(p, "(dftw-genericbuf/%D-%D-%D%(%p%))", |
144 | 0 | ego->batchsz, ego->r, ego->m, ego->cld); |
145 | 0 | } |
146 | | |
147 | | static plan *mkcldw(const ct_solver *ego_, |
148 | | INT r, INT irs, INT ors, |
149 | | INT m, INT ms, |
150 | | INT v, INT ivs, INT ovs, |
151 | | INT mstart, INT mcount, |
152 | | R *rio, R *iio, |
153 | | planner *plnr) |
154 | 235 | { |
155 | 235 | const S *ego = (const S *)ego_; |
156 | 235 | P *pln; |
157 | 235 | plan *cld = 0; |
158 | 235 | R *buf; |
159 | | |
160 | 235 | static const plan_adt padt = { |
161 | 235 | 0, awake, print, destroy |
162 | 235 | }; |
163 | | |
164 | 235 | UNUSED(ivs); UNUSED(ovs); UNUSED(rio); UNUSED(iio); |
165 | | |
166 | 235 | A(mstart >= 0 && mstart + mcount <= m); |
167 | 235 | if (!applicable(ego, r, irs, ors, m, v, mcount, plnr)) |
168 | 235 | return (plan *)0; |
169 | | |
170 | 0 | buf = (R *) MALLOC(sizeof(R) * 2 * BATCHDIST(r) * ego->batchsz, BUFFERS); |
171 | 0 | cld = X(mkplan_d)(plnr, |
172 | 0 | X(mkproblem_dft_d)( |
173 | 0 | X(mktensor_1d)(r, 2, 2), |
174 | 0 | X(mktensor_1d)(ego->batchsz, |
175 | 0 | 2 * BATCHDIST(r), |
176 | 0 | 2 * BATCHDIST(r)), |
177 | 0 | buf, buf + 1, buf, buf + 1 |
178 | 0 | ) |
179 | 0 | ); |
180 | 0 | X(ifree)(buf); |
181 | 0 | if (!cld) goto nada; |
182 | | |
183 | 0 | pln = MKPLAN_DFTW(P, &padt, apply); |
184 | 0 | pln->slv = ego; |
185 | 0 | pln->cld = cld; |
186 | 0 | pln->r = r; |
187 | 0 | pln->m = m; |
188 | 0 | pln->ms = ms; |
189 | 0 | pln->rs = irs; |
190 | 0 | pln->batchsz = ego->batchsz; |
191 | 0 | pln->mb = mstart; |
192 | 0 | pln->me = mstart + mcount; |
193 | |
|
194 | 0 | { |
195 | 0 | double n0 = (r - 1) * (mcount - 1); |
196 | 0 | pln->super.super.ops = cld->ops; |
197 | 0 | pln->super.super.ops.mul += 8 * n0; |
198 | 0 | pln->super.super.ops.add += 4 * n0; |
199 | 0 | pln->super.super.ops.other += 8 * n0; |
200 | 0 | } |
201 | 0 | return &(pln->super.super); |
202 | | |
203 | 0 | nada: |
204 | 0 | X(plan_destroy_internal)(cld); |
205 | 0 | return (plan *) 0; |
206 | 0 | } |
207 | | |
208 | | static void regsolver(planner *plnr, INT r, INT batchsz) |
209 | 35 | { |
210 | 35 | S *slv = (S *)X(mksolver_ct)(sizeof(S), r, DECDIT, mkcldw, 0); |
211 | 35 | slv->batchsz = batchsz; |
212 | 35 | REGISTER_SOLVER(plnr, &(slv->super.super)); |
213 | | |
214 | 35 | if (X(mksolver_ct_hook)) { |
215 | 0 | slv = (S *)X(mksolver_ct_hook)(sizeof(S), r, DECDIT, mkcldw, 0); |
216 | 0 | slv->batchsz = batchsz; |
217 | 0 | REGISTER_SOLVER(plnr, &(slv->super.super)); |
218 | 0 | } |
219 | | |
220 | 35 | } |
221 | | |
222 | | void X(ct_genericbuf_register)(planner *p) |
223 | 1 | { |
224 | 1 | static const INT radices[] = { -1, -2, -4, -8, -16, -32, -64 }; |
225 | 1 | static const INT batchsizes[] = { 4, 8, 16, 32, 64 }; |
226 | 1 | unsigned i, j; |
227 | | |
228 | 8 | for (i = 0; i < sizeof(radices) / sizeof(radices[0]); ++i) |
229 | 42 | for (j = 0; j < sizeof(batchsizes) / sizeof(batchsizes[0]); ++j) |
230 | 35 | regsolver(p, radices[i], batchsizes[j]); |
231 | 1 | } |