/src/fftw3/rdft/problem2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | #include "dft/dft.h" |
23 | | #include "rdft/rdft.h" |
24 | | #include <stddef.h> |
25 | | |
26 | | static void destroy(problem *ego_) |
27 | 0 | { |
28 | 0 | problem_rdft2 *ego = (problem_rdft2 *) ego_; |
29 | 0 | X(tensor_destroy2)(ego->vecsz, ego->sz); |
30 | 0 | X(ifree)(ego_); |
31 | 0 | } |
32 | | |
33 | | static void hash(const problem *p_, md5 *m) |
34 | 0 | { |
35 | 0 | const problem_rdft2 *p = (const problem_rdft2 *) p_; |
36 | 0 | X(md5puts)(m, "rdft2"); |
37 | 0 | X(md5int)(m, p->r0 == p->cr); |
38 | 0 | X(md5INT)(m, p->r1 - p->r0); |
39 | 0 | X(md5INT)(m, p->ci - p->cr); |
40 | 0 | X(md5int)(m, X(ialignment_of)(p->r0)); |
41 | 0 | X(md5int)(m, X(ialignment_of)(p->r1)); |
42 | 0 | X(md5int)(m, X(ialignment_of)(p->cr)); |
43 | 0 | X(md5int)(m, X(ialignment_of)(p->ci)); |
44 | 0 | X(md5int)(m, p->kind); |
45 | 0 | X(tensor_md5)(m, p->sz); |
46 | 0 | X(tensor_md5)(m, p->vecsz); |
47 | 0 | } |
48 | | |
49 | | static void print(const problem *ego_, printer *p) |
50 | 0 | { |
51 | 0 | const problem_rdft2 *ego = (const problem_rdft2 *) ego_; |
52 | 0 | p->print(p, "(rdft2 %d %d %T %T)", |
53 | 0 | (int)(ego->cr == ego->r0), |
54 | 0 | (int)(ego->kind), |
55 | 0 | ego->sz, |
56 | 0 | ego->vecsz); |
57 | 0 | } |
58 | | |
59 | | static void recur(const iodim *dims, int rnk, R *I0, R *I1) |
60 | 0 | { |
61 | 0 | if (rnk == RNK_MINFTY) |
62 | 0 | return; |
63 | 0 | else if (rnk == 0) |
64 | 0 | I0[0] = K(0.0); |
65 | 0 | else if (rnk > 0) { |
66 | 0 | INT i, n = dims[0].n, is = dims[0].is; |
67 | |
|
68 | 0 | if (rnk == 1) { |
69 | 0 | for (i = 0; i < n - 1; i += 2) { |
70 | 0 | *I0 = *I1 = K(0.0); |
71 | 0 | I0 += is; I1 += is; |
72 | 0 | } |
73 | 0 | if (i < n) |
74 | 0 | *I0 = K(0.0); |
75 | 0 | } else { |
76 | 0 | for (i = 0; i < n; ++i) |
77 | 0 | recur(dims + 1, rnk - 1, I0 + i * is, I1 + i * is); |
78 | 0 | } |
79 | 0 | } |
80 | 0 | } |
81 | | |
82 | | static void vrecur(const iodim *vdims, int vrnk, |
83 | | const iodim *dims, int rnk, R *I0, R *I1) |
84 | 0 | { |
85 | 0 | if (vrnk == RNK_MINFTY) |
86 | 0 | return; |
87 | 0 | else if (vrnk == 0) |
88 | 0 | recur(dims, rnk, I0, I1); |
89 | 0 | else if (vrnk > 0) { |
90 | 0 | INT i, n = vdims[0].n, is = vdims[0].is; |
91 | |
|
92 | 0 | for (i = 0; i < n; ++i) |
93 | 0 | vrecur(vdims + 1, vrnk - 1, |
94 | 0 | dims, rnk, I0 + i * is, I1 + i * is); |
95 | 0 | } |
96 | 0 | } |
97 | | |
98 | | INT X(rdft2_complex_n)(INT real_n, rdft_kind kind) |
99 | 0 | { |
100 | 0 | switch (kind) { |
101 | 0 | case R2HC: |
102 | 0 | case HC2R: |
103 | 0 | return (real_n / 2) + 1; |
104 | 0 | case R2HCII: |
105 | 0 | case HC2RIII: |
106 | 0 | return (real_n + 1) / 2; |
107 | 0 | default: |
108 | | /* can't happen */ |
109 | 0 | A(0); |
110 | 0 | return 0; |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | | static void zero(const problem *ego_) |
115 | 0 | { |
116 | 0 | const problem_rdft2 *ego = (const problem_rdft2 *) ego_; |
117 | 0 | if (R2HC_KINDP(ego->kind)) { |
118 | | /* FIXME: can we avoid the double recursion somehow? */ |
119 | 0 | vrecur(ego->vecsz->dims, ego->vecsz->rnk, |
120 | 0 | ego->sz->dims, ego->sz->rnk, |
121 | 0 | UNTAINT(ego->r0), UNTAINT(ego->r1)); |
122 | 0 | } else { |
123 | 0 | tensor *sz; |
124 | 0 | tensor *sz2 = X(tensor_copy)(ego->sz); |
125 | 0 | int rnk = sz2->rnk; |
126 | 0 | if (rnk > 0) /* ~half as many complex outputs */ |
127 | 0 | sz2->dims[rnk-1].n = |
128 | 0 | X(rdft2_complex_n)(sz2->dims[rnk-1].n, ego->kind); |
129 | 0 | sz = X(tensor_append)(ego->vecsz, sz2); |
130 | 0 | X(tensor_destroy)(sz2); |
131 | 0 | X(dft_zerotens)(sz, UNTAINT(ego->cr), UNTAINT(ego->ci)); |
132 | 0 | X(tensor_destroy)(sz); |
133 | 0 | } |
134 | 0 | } |
135 | | |
136 | | static const problem_adt padt = |
137 | | { |
138 | | PROBLEM_RDFT2, |
139 | | hash, |
140 | | zero, |
141 | | print, |
142 | | destroy |
143 | | }; |
144 | | |
145 | | problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, |
146 | | R *r0, R *r1, R *cr, R *ci, |
147 | | rdft_kind kind) |
148 | 0 | { |
149 | 0 | problem_rdft2 *ego; |
150 | |
|
151 | 0 | A(kind == R2HC || kind == R2HCII || kind == HC2R || kind == HC2RIII); |
152 | 0 | A(X(tensor_kosherp)(sz)); |
153 | 0 | A(X(tensor_kosherp)(vecsz)); |
154 | 0 | A(FINITE_RNK(sz->rnk)); |
155 | | |
156 | | /* require in-place problems to use r0 == cr */ |
157 | 0 | if (UNTAINT(r0) == UNTAINT(ci)) |
158 | 0 | return X(mkproblem_unsolvable)(); |
159 | | |
160 | | /* FIXME: should check UNTAINT(r1) == UNTAINT(cr) but |
161 | | only if odd elements exist, which requires compressing the |
162 | | tensors first */ |
163 | | |
164 | 0 | if (UNTAINT(r0) == UNTAINT(cr)) |
165 | 0 | r0 = cr = JOIN_TAINT(r0, cr); |
166 | |
|
167 | 0 | ego = (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt); |
168 | |
|
169 | 0 | if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */ |
170 | 0 | tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1); |
171 | 0 | tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1); |
172 | 0 | tensor *szcc = X(tensor_compress)(szc); |
173 | 0 | if (szcc->rnk > 0) |
174 | 0 | ego->sz = X(tensor_append)(szcc, szr); |
175 | 0 | else |
176 | 0 | ego->sz = X(tensor_compress)(szr); |
177 | 0 | X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc); |
178 | 0 | } else { |
179 | 0 | ego->sz = X(tensor_compress)(sz); |
180 | 0 | } |
181 | 0 | ego->vecsz = X(tensor_compress_contiguous)(vecsz); |
182 | 0 | ego->r0 = r0; |
183 | 0 | ego->r1 = r1; |
184 | 0 | ego->cr = cr; |
185 | 0 | ego->ci = ci; |
186 | 0 | ego->kind = kind; |
187 | |
|
188 | 0 | A(FINITE_RNK(ego->sz->rnk)); |
189 | 0 | return &(ego->super); |
190 | |
|
191 | 0 | } |
192 | | |
193 | | /* Same as X(mkproblem_rdft2), but also destroy input tensors. */ |
194 | | problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz, |
195 | | R *r0, R *r1, R *cr, R *ci, rdft_kind kind) |
196 | 0 | { |
197 | 0 | problem *p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind); |
198 | 0 | X(tensor_destroy2)(vecsz, sz); |
199 | 0 | return p; |
200 | 0 | } |
201 | | |
202 | | /* Same as X(mkproblem_rdft2_d), but with only one R pointer. |
203 | | Used by the API. */ |
204 | | problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz, |
205 | | R *r0, R *cr, R *ci, rdft_kind kind) |
206 | 0 | { |
207 | 0 | problem *p; |
208 | 0 | int rnk = sz->rnk; |
209 | 0 | R *r1; |
210 | |
|
211 | 0 | if (rnk == 0) |
212 | 0 | r1 = r0; |
213 | 0 | else if (R2HC_KINDP(kind)) { |
214 | 0 | r1 = r0 + sz->dims[rnk-1].is; |
215 | 0 | sz->dims[rnk-1].is *= 2; |
216 | 0 | } else { |
217 | 0 | r1 = r0 + sz->dims[rnk-1].os; |
218 | 0 | sz->dims[rnk-1].os *= 2; |
219 | 0 | } |
220 | |
|
221 | 0 | p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind); |
222 | 0 | X(tensor_destroy2)(vecsz, sz); |
223 | 0 | return p; |
224 | 0 | } |