/src/fftw3/rdft/direct2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | /* direct RDFT2 R2HC/HC2R solver, if we have a codelet */ |
23 | | |
24 | | #include "rdft/rdft.h" |
25 | | |
26 | | typedef struct { |
27 | | solver super; |
28 | | const kr2c_desc *desc; |
29 | | kr2c k; |
30 | | } S; |
31 | | |
32 | | typedef struct { |
33 | | plan_rdft2 super; |
34 | | |
35 | | stride rs, cs; |
36 | | INT vl; |
37 | | INT ivs, ovs; |
38 | | kr2c k; |
39 | | const S *slv; |
40 | | INT ilast; |
41 | | } P; |
42 | | |
43 | | static void apply(const plan *ego_, R *r0, R *r1, R *cr, R *ci) |
44 | 0 | { |
45 | 0 | const P *ego = (const P *) ego_; |
46 | 0 | ASSERT_ALIGNED_DOUBLE; |
47 | 0 | ego->k(r0, r1, cr, ci, |
48 | 0 | ego->rs, ego->cs, ego->cs, |
49 | 0 | ego->vl, ego->ivs, ego->ovs); |
50 | 0 | } |
51 | | |
52 | | static void apply_r2hc(const plan *ego_, R *r0, R *r1, R *cr, R *ci) |
53 | 0 | { |
54 | 0 | const P *ego = (const P *) ego_; |
55 | 0 | INT i, vl = ego->vl, ovs = ego->ovs; |
56 | 0 | ASSERT_ALIGNED_DOUBLE; |
57 | 0 | ego->k(r0, r1, cr, ci, |
58 | 0 | ego->rs, ego->cs, ego->cs, |
59 | 0 | vl, ego->ivs, ovs); |
60 | 0 | for (i = 0; i < vl; ++i, ci += ovs) |
61 | 0 | ci[0] = ci[ego->ilast] = 0; |
62 | 0 | } |
63 | | |
64 | | static void destroy(plan *ego_) |
65 | 0 | { |
66 | 0 | P *ego = (P *) ego_; |
67 | 0 | X(stride_destroy)(ego->rs); |
68 | 0 | X(stride_destroy)(ego->cs); |
69 | 0 | } |
70 | | |
71 | | static void print(const plan *ego_, printer *p) |
72 | 0 | { |
73 | 0 | const P *ego = (const P *) ego_; |
74 | 0 | const S *s = ego->slv; |
75 | |
|
76 | 0 | p->print(p, "(rdft2-%s-direct-%D%v \"%s\")", |
77 | 0 | X(rdft_kind_str)(s->desc->genus->kind), s->desc->n, |
78 | 0 | ego->vl, s->desc->nam); |
79 | 0 | } |
80 | | |
81 | | static int applicable(const solver *ego_, const problem *p_) |
82 | 0 | { |
83 | 0 | const S *ego = (const S *) ego_; |
84 | 0 | const kr2c_desc *desc = ego->desc; |
85 | 0 | const problem_rdft2 *p = (const problem_rdft2 *) p_; |
86 | 0 | INT vl; |
87 | 0 | INT ivs, ovs; |
88 | |
|
89 | 0 | return ( |
90 | 0 | 1 |
91 | 0 | && p->sz->rnk == 1 |
92 | 0 | && p->vecsz->rnk <= 1 |
93 | 0 | && p->sz->dims[0].n == desc->n |
94 | 0 | && p->kind == desc->genus->kind |
95 | | |
96 | | /* check strides etc */ |
97 | 0 | && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) |
98 | |
|
99 | 0 | && (0 |
100 | | /* can operate out-of-place */ |
101 | 0 | || p->r0 != p->cr |
102 | | |
103 | | /* |
104 | | * can compute one transform in-place, no matter |
105 | | * what the strides are. |
106 | | */ |
107 | 0 | || p->vecsz->rnk == 0 |
108 | | |
109 | | /* can operate in-place as long as strides are the same */ |
110 | 0 | || X(rdft2_inplace_strides)(p, RNK_MINFTY) |
111 | 0 | ) |
112 | 0 | ); |
113 | 0 | } |
114 | | |
115 | | static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) |
116 | 0 | { |
117 | 0 | const S *ego = (const S *) ego_; |
118 | 0 | P *pln; |
119 | 0 | const problem_rdft2 *p; |
120 | 0 | iodim *d; |
121 | 0 | int r2hc_kindp; |
122 | |
|
123 | 0 | static const plan_adt padt = { |
124 | 0 | X(rdft2_solve), X(null_awake), print, destroy |
125 | 0 | }; |
126 | |
|
127 | 0 | UNUSED(plnr); |
128 | |
|
129 | 0 | if (!applicable(ego_, p_)) |
130 | 0 | return (plan *)0; |
131 | | |
132 | 0 | p = (const problem_rdft2 *) p_; |
133 | |
|
134 | 0 | r2hc_kindp = R2HC_KINDP(p->kind); |
135 | 0 | A(r2hc_kindp || HC2R_KINDP(p->kind)); |
136 | |
|
137 | 0 | pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply); |
138 | |
|
139 | 0 | d = p->sz->dims; |
140 | |
|
141 | 0 | pln->k = ego->k; |
142 | |
|
143 | 0 | pln->rs = X(mkstride)(d->n, r2hc_kindp ? d->is : d->os); |
144 | 0 | pln->cs = X(mkstride)(d->n, r2hc_kindp ? d->os : d->is); |
145 | |
|
146 | 0 | X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); |
147 | | |
148 | | /* Nyquist freq., if any */ |
149 | 0 | pln->ilast = (d->n % 2) ? 0 : (d->n/2) * d->os; |
150 | |
|
151 | 0 | pln->slv = ego; |
152 | 0 | X(ops_zero)(&pln->super.super.ops); |
153 | 0 | X(ops_madd2)(pln->vl / ego->desc->genus->vl, |
154 | 0 | &ego->desc->ops, |
155 | 0 | &pln->super.super.ops); |
156 | 0 | if (p->kind == R2HC) |
157 | 0 | pln->super.super.ops.other += 2 * pln->vl; /* + 2 stores */ |
158 | |
|
159 | 0 | pln->super.super.could_prune_now_p = 1; |
160 | 0 | return &(pln->super.super); |
161 | 0 | } |
162 | | |
163 | | /* constructor */ |
164 | | solver *X(mksolver_rdft2_direct)(kr2c k, const kr2c_desc *desc) |
165 | 72 | { |
166 | 72 | static const solver_adt sadt = { PROBLEM_RDFT2, mkplan, 0 }; |
167 | 72 | S *slv = MKSOLVER(S, &sadt); |
168 | 72 | slv->k = k; |
169 | 72 | slv->desc = desc; |
170 | 72 | return &(slv->super); |
171 | 72 | } |