/src/fftw3/kernel/twiddle.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | |
22 | | /* Twiddle manipulation */ |
23 | | |
24 | | #include "kernel/ifftw.h" |
25 | | #include <math.h> |
26 | | |
27 | 1.12k | #define HASHSZ 109 |
28 | | |
29 | | /* hash table of known twiddle factors */ |
30 | | static twid *twlist[HASHSZ]; |
31 | | |
32 | | static INT hash(INT n, INT r) |
33 | 1.12k | { |
34 | 1.12k | INT h = n * 17 + r; |
35 | | |
36 | 1.12k | if (h < 0) h = -h; |
37 | | |
38 | 1.12k | return (h % HASHSZ); |
39 | 1.12k | } |
40 | | |
41 | | static int equal_instr(const tw_instr *p, const tw_instr *q) |
42 | 82 | { |
43 | 82 | if (p == q) |
44 | 82 | return 1; |
45 | | |
46 | 0 | for (;; ++p, ++q) { |
47 | 0 | if (p->op != q->op) |
48 | 0 | return 0; |
49 | | |
50 | 0 | switch (p->op) { |
51 | 0 | case TW_NEXT: |
52 | 0 | return (p->v == q->v); /* p->i is ignored */ |
53 | | |
54 | 0 | case TW_FULL: |
55 | 0 | case TW_HALF: |
56 | 0 | if (p->v != q->v) return 0; /* p->i is ignored */ |
57 | 0 | break; |
58 | | |
59 | 0 | default: |
60 | 0 | if (p->v != q->v || p->i != q->i) return 0; |
61 | 0 | break; |
62 | 0 | } |
63 | 0 | } |
64 | 0 | A(0 /* can't happen */); |
65 | 0 | } |
66 | | |
67 | | static int ok_twid(const twid *t, |
68 | | enum wakefulness wakefulness, |
69 | | const tw_instr *q, INT n, INT r, INT m) |
70 | 82 | { |
71 | 82 | return (wakefulness == t->wakefulness && |
72 | 82 | n == t->n && |
73 | 82 | r == t->r && |
74 | 82 | m <= t->m && |
75 | 82 | equal_instr(t->instr, q)); |
76 | 82 | } |
77 | | |
78 | | static twid *lookup(enum wakefulness wakefulness, |
79 | | const tw_instr *q, INT n, INT r, INT m) |
80 | 430 | { |
81 | 430 | twid *p; |
82 | | |
83 | 430 | for (p = twlist[hash(n,r)]; |
84 | 430 | p && !ok_twid(p, wakefulness, q, n, r, m); |
85 | 430 | p = p->cdr) |
86 | 0 | ; |
87 | 430 | return p; |
88 | 430 | } |
89 | | |
90 | | static INT twlen0(INT r, const tw_instr *p, INT *vl) |
91 | 348 | { |
92 | 348 | INT ntwiddle = 0; |
93 | | |
94 | | /* compute length of bytecode program */ |
95 | 348 | A(r > 0); |
96 | 891 | for ( ; p->op != TW_NEXT; ++p) { |
97 | 543 | switch (p->op) { |
98 | 168 | case TW_FULL: |
99 | 168 | ntwiddle += (r - 1) * 2; |
100 | 168 | break; |
101 | 72 | case TW_HALF: |
102 | 72 | ntwiddle += (r - 1); |
103 | 72 | break; |
104 | 303 | case TW_CEXP: |
105 | 303 | ntwiddle += 2; |
106 | 303 | break; |
107 | 0 | case TW_COS: |
108 | 0 | case TW_SIN: |
109 | 0 | ntwiddle += 1; |
110 | 0 | break; |
111 | 543 | } |
112 | 543 | } |
113 | | |
114 | 348 | *vl = (INT)p->v; |
115 | 348 | return ntwiddle; |
116 | 348 | } |
117 | | |
118 | | INT X(twiddle_length)(INT r, const tw_instr *p) |
119 | 0 | { |
120 | 0 | INT vl; |
121 | 0 | return twlen0(r, p, &vl); |
122 | 0 | } |
123 | | |
124 | | static R *compute(enum wakefulness wakefulness, |
125 | | const tw_instr *instr, INT n, INT r, INT m) |
126 | 348 | { |
127 | 348 | INT ntwiddle, j, vl; |
128 | 348 | R *W, *W0; |
129 | 348 | const tw_instr *p; |
130 | 348 | triggen *t = X(mktriggen)(wakefulness, n); |
131 | | |
132 | 348 | p = instr; |
133 | 348 | ntwiddle = twlen0(r, p, &vl); |
134 | | |
135 | 348 | A(m % vl == 0); |
136 | | |
137 | 348 | W0 = W = (R *)MALLOC((ntwiddle * (m / vl)) * sizeof(R), TWIDDLES); |
138 | | |
139 | 7.58k | for (j = 0; j < m; j += vl) { |
140 | 17.8k | for (p = instr; p->op != TW_NEXT; ++p) { |
141 | 10.5k | switch (p->op) { |
142 | 4.04k | case TW_FULL: { |
143 | 4.04k | INT i; |
144 | 15.7k | for (i = 1; i < r; ++i) { |
145 | 11.7k | A((j + (INT)p->v) * i < n); |
146 | 11.7k | A((j + (INT)p->v) * i > -n); |
147 | 11.7k | t->cexp(t, (j + (INT)p->v) * i, W); |
148 | 11.7k | W += 2; |
149 | 11.7k | } |
150 | 4.04k | break; |
151 | 0 | } |
152 | | |
153 | 1.32k | case TW_HALF: { |
154 | 1.32k | INT i; |
155 | 1.32k | A((r % 2) == 1); |
156 | 33.6k | for (i = 1; i + i < r; ++i) { |
157 | 32.3k | t->cexp(t, MULMOD(i, (j + (INT)p->v), n), W); |
158 | 32.3k | W += 2; |
159 | 32.3k | } |
160 | 1.32k | break; |
161 | 0 | } |
162 | | |
163 | 0 | case TW_COS: { |
164 | 0 | R d[2]; |
165 | |
|
166 | 0 | A((j + (INT)p->v) * p->i < n); |
167 | 0 | A((j + (INT)p->v) * p->i > -n); |
168 | 0 | t->cexp(t, (j + (INT)p->v) * (INT)p->i, d); |
169 | 0 | *W++ = d[0]; |
170 | 0 | break; |
171 | 0 | } |
172 | | |
173 | 0 | case TW_SIN: { |
174 | 0 | R d[2]; |
175 | |
|
176 | 0 | A((j + (INT)p->v) * p->i < n); |
177 | 0 | A((j + (INT)p->v) * p->i > -n); |
178 | 0 | t->cexp(t, (j + (INT)p->v) * (INT)p->i, d); |
179 | 0 | *W++ = d[1]; |
180 | 0 | break; |
181 | 0 | } |
182 | | |
183 | 5.20k | case TW_CEXP: |
184 | 5.20k | A((j + (INT)p->v) * p->i < n); |
185 | 5.20k | A((j + (INT)p->v) * p->i > -n); |
186 | 5.20k | t->cexp(t, (j + (INT)p->v) * (INT)p->i, W); |
187 | 5.20k | W += 2; |
188 | 5.20k | break; |
189 | 10.5k | } |
190 | 10.5k | } |
191 | 7.23k | } |
192 | | |
193 | 348 | X(triggen_destroy)(t); |
194 | 348 | return W0; |
195 | 348 | } |
196 | | |
197 | | static void mktwiddle(enum wakefulness wakefulness, |
198 | | twid **pp, const tw_instr *instr, INT n, INT r, INT m) |
199 | 430 | { |
200 | 430 | twid *p; |
201 | 430 | INT h; |
202 | | |
203 | 430 | if ((p = lookup(wakefulness, instr, n, r, m))) { |
204 | 82 | ++p->refcnt; |
205 | 348 | } else { |
206 | 348 | p = (twid *) MALLOC(sizeof(twid), TWIDDLES); |
207 | 348 | p->n = n; |
208 | 348 | p->r = r; |
209 | 348 | p->m = m; |
210 | 348 | p->instr = instr; |
211 | 348 | p->refcnt = 1; |
212 | 348 | p->wakefulness = wakefulness; |
213 | 348 | p->W = compute(wakefulness, instr, n, r, m); |
214 | | |
215 | | /* cons! onto twlist */ |
216 | 348 | h = hash(n, r); |
217 | 348 | p->cdr = twlist[h]; |
218 | 348 | twlist[h] = p; |
219 | 348 | } |
220 | | |
221 | 430 | *pp = p; |
222 | 430 | } |
223 | | |
224 | | static void twiddle_destroy(twid **pp) |
225 | 430 | { |
226 | 430 | twid *p = *pp; |
227 | 430 | twid **q; |
228 | | |
229 | 430 | if ((--p->refcnt) == 0) { |
230 | | /* remove p from twiddle list */ |
231 | 348 | for (q = &twlist[hash(p->n, p->r)]; *q; q = &((*q)->cdr)) { |
232 | 348 | if (*q == p) { |
233 | 348 | *q = p->cdr; |
234 | 348 | X(ifree)(p->W); |
235 | 348 | X(ifree)(p); |
236 | 348 | *pp = 0; |
237 | 348 | return; |
238 | 348 | } |
239 | 348 | } |
240 | 0 | A(0 /* can't happen */ ); |
241 | 0 | } |
242 | 430 | } |
243 | | |
244 | | |
245 | | void X(twiddle_awake)(enum wakefulness wakefulness, twid **pp, |
246 | | const tw_instr *instr, INT n, INT r, INT m) |
247 | 860 | { |
248 | 860 | switch (wakefulness) { |
249 | 430 | case SLEEPY: |
250 | 430 | twiddle_destroy(pp); |
251 | 430 | break; |
252 | 430 | default: |
253 | 430 | mktwiddle(wakefulness, pp, instr, n, r, m); |
254 | 430 | break; |
255 | 860 | } |
256 | 860 | } |