/src/fftw3/rdft/generic.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | #include "rdft/rdft.h" |
22 | | |
23 | | typedef struct { |
24 | | solver super; |
25 | | rdft_kind kind; |
26 | | } S; |
27 | | |
28 | | typedef struct { |
29 | | plan_rdft super; |
30 | | twid *td; |
31 | | INT n, is, os; |
32 | | rdft_kind kind; |
33 | | } P; |
34 | | |
35 | | /***************************************************************************/ |
36 | | |
37 | | static void cdot_r2hc(INT n, const E *x, const R *w, R *or0, R *oi1) |
38 | 0 | { |
39 | 0 | INT i; |
40 | |
|
41 | 0 | E rr = x[0], ri = 0; |
42 | 0 | x += 1; |
43 | 0 | for (i = 1; i + i < n; ++i) { |
44 | 0 | rr += x[0] * w[0]; |
45 | 0 | ri += x[1] * w[1]; |
46 | 0 | x += 2; w += 2; |
47 | 0 | } |
48 | 0 | *or0 = rr; |
49 | 0 | *oi1 = ri; |
50 | 0 | } |
51 | | |
52 | | static void hartley_r2hc(INT n, const R *xr, INT xs, E *o, R *pr) |
53 | 0 | { |
54 | 0 | INT i; |
55 | 0 | E sr; |
56 | 0 | o[0] = sr = xr[0]; o += 1; |
57 | 0 | for (i = 1; i + i < n; ++i) { |
58 | 0 | R a, b; |
59 | 0 | a = xr[i * xs]; |
60 | 0 | b = xr[(n - i) * xs]; |
61 | 0 | sr += (o[0] = a + b); |
62 | 0 | #if FFT_SIGN == -1 |
63 | 0 | o[1] = b - a; |
64 | | #else |
65 | | o[1] = a - b; |
66 | | #endif |
67 | 0 | o += 2; |
68 | 0 | } |
69 | 0 | *pr = sr; |
70 | 0 | } |
71 | | |
72 | | static void apply_r2hc(const plan *ego_, R *I, R *O) |
73 | 0 | { |
74 | 0 | const P *ego = (const P *) ego_; |
75 | 0 | INT i; |
76 | 0 | INT n = ego->n, is = ego->is, os = ego->os; |
77 | 0 | const R *W = ego->td->W; |
78 | 0 | E *buf; |
79 | 0 | size_t bufsz = n * sizeof(E); |
80 | |
|
81 | 0 | BUF_ALLOC(E *, buf, bufsz); |
82 | 0 | hartley_r2hc(n, I, is, buf, O); |
83 | |
|
84 | 0 | for (i = 1; i + i < n; ++i) { |
85 | 0 | cdot_r2hc(n, buf, W, O + i * os, O + (n - i) * os); |
86 | 0 | W += n - 1; |
87 | 0 | } |
88 | |
|
89 | 0 | BUF_FREE(buf, bufsz); |
90 | 0 | } |
91 | | |
92 | | |
93 | | static void cdot_hc2r(INT n, const E *x, const R *w, R *or0, R *or1) |
94 | 0 | { |
95 | 0 | INT i; |
96 | |
|
97 | 0 | E rr = x[0], ii = 0; |
98 | 0 | x += 1; |
99 | 0 | for (i = 1; i + i < n; ++i) { |
100 | 0 | rr += x[0] * w[0]; |
101 | 0 | ii += x[1] * w[1]; |
102 | 0 | x += 2; w += 2; |
103 | 0 | } |
104 | 0 | #if FFT_SIGN == -1 |
105 | 0 | *or0 = rr - ii; |
106 | 0 | *or1 = rr + ii; |
107 | | #else |
108 | | *or0 = rr + ii; |
109 | | *or1 = rr - ii; |
110 | | #endif |
111 | 0 | } |
112 | | |
113 | | static void hartley_hc2r(INT n, const R *x, INT xs, E *o, R *pr) |
114 | 0 | { |
115 | 0 | INT i; |
116 | 0 | E sr; |
117 | |
|
118 | 0 | o[0] = sr = x[0]; o += 1; |
119 | 0 | for (i = 1; i + i < n; ++i) { |
120 | 0 | sr += (o[0] = x[i * xs] + x[i * xs]); |
121 | 0 | o[1] = x[(n - i) * xs] + x[(n - i) * xs]; |
122 | 0 | o += 2; |
123 | 0 | } |
124 | 0 | *pr = sr; |
125 | 0 | } |
126 | | |
127 | | static void apply_hc2r(const plan *ego_, R *I, R *O) |
128 | 0 | { |
129 | 0 | const P *ego = (const P *) ego_; |
130 | 0 | INT i; |
131 | 0 | INT n = ego->n, is = ego->is, os = ego->os; |
132 | 0 | const R *W = ego->td->W; |
133 | 0 | E *buf; |
134 | 0 | size_t bufsz = n * sizeof(E); |
135 | |
|
136 | 0 | BUF_ALLOC(E *, buf, bufsz); |
137 | 0 | hartley_hc2r(n, I, is, buf, O); |
138 | |
|
139 | 0 | for (i = 1; i + i < n; ++i) { |
140 | 0 | cdot_hc2r(n, buf, W, O + i * os, O + (n - i) * os); |
141 | 0 | W += n - 1; |
142 | 0 | } |
143 | |
|
144 | 0 | BUF_FREE(buf, bufsz); |
145 | 0 | } |
146 | | |
147 | | |
148 | | /***************************************************************************/ |
149 | | |
150 | | static void awake(plan *ego_, enum wakefulness wakefulness) |
151 | | { |
152 | | P *ego = (P *) ego_; |
153 | | static const tw_instr half_tw[] = { |
154 | | { TW_HALF, 1, 0 }, |
155 | | { TW_NEXT, 1, 0 } |
156 | | }; |
157 | | |
158 | | X(twiddle_awake)(wakefulness, &ego->td, half_tw, ego->n, ego->n, |
159 | | (ego->n - 1) / 2); |
160 | | } |
161 | | |
162 | | static void print(const plan *ego_, printer *p) |
163 | | { |
164 | | const P *ego = (const P *) ego_; |
165 | | |
166 | | p->print(p, "(rdft-generic-%s-%D)", |
167 | | ego->kind == R2HC ? "r2hc" : "hc2r", |
168 | | ego->n); |
169 | | } |
170 | | |
171 | | static int applicable(const S *ego, const problem *p_, |
172 | | const planner *plnr) |
173 | | { |
174 | | const problem_rdft *p = (const problem_rdft *) p_; |
175 | | return (1 |
176 | | && p->sz->rnk == 1 |
177 | | && p->vecsz->rnk == 0 |
178 | | && (p->sz->dims[0].n % 2) == 1 |
179 | | && CIMPLIES(NO_LARGE_GENERICP(plnr), p->sz->dims[0].n < GENERIC_MIN_BAD) |
180 | | && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > GENERIC_MAX_SLOW) |
181 | | && X(is_prime)(p->sz->dims[0].n) |
182 | | && p->kind[0] == ego->kind |
183 | | ); |
184 | | } |
185 | | |
186 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
187 | | { |
188 | | const S *ego = (const S *)ego_; |
189 | | const problem_rdft *p; |
190 | | P *pln; |
191 | | INT n; |
192 | | |
193 | | static const plan_adt padt = { |
194 | | X(rdft_solve), awake, print, X(plan_null_destroy) |
195 | | }; |
196 | | |
197 | | if (!applicable(ego, p_, plnr)) |
198 | | return (plan *)0; |
199 | | |
200 | | p = (const problem_rdft *) p_; |
201 | | pln = MKPLAN_RDFT(P, &padt, |
202 | | R2HC_KINDP(p->kind[0]) ? apply_r2hc : apply_hc2r); |
203 | | |
204 | | pln->n = n = p->sz->dims[0].n; |
205 | | pln->is = p->sz->dims[0].is; |
206 | | pln->os = p->sz->dims[0].os; |
207 | | pln->td = 0; |
208 | | pln->kind = ego->kind; |
209 | | |
210 | | pln->super.super.ops.add = (n-1) * 2.5; |
211 | | pln->super.super.ops.mul = 0; |
212 | | pln->super.super.ops.fma = 0.5 * (n-1) * (n-1) ; |
213 | | #if 0 /* these are nice pipelined sequential loads and should cost nothing */ |
214 | | pln->super.super.ops.other = (n-1)*(2 + 1 + (n-1)); /* approximate */ |
215 | | #endif |
216 | | |
217 | | return &(pln->super.super); |
218 | | } |
219 | | |
220 | | static solver *mksolver(rdft_kind kind) |
221 | | { |
222 | | static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; |
223 | | S *slv = MKSOLVER(S, &sadt); |
224 | | slv->kind = kind; |
225 | | return &(slv->super); |
226 | | } |
227 | | |
228 | | void X(rdft_generic_register)(planner *p) |
229 | 1 | { |
230 | 1 | REGISTER_SOLVER(p, mksolver(R2HC)); |
231 | 1 | REGISTER_SOLVER(p, mksolver(HC2R)); |
232 | 1 | } |