/src/fftw3/rdft/scalar/r2cf/hc2cfdft_32.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* This file was automatically generated --- DO NOT EDIT */ |
22 | | /* Generated on Sun Sep 8 06:41:50 UTC 2024 */ |
23 | | |
24 | | #include "rdft/codelet-rdft.h" |
25 | | |
26 | | #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA) |
27 | | |
28 | | /* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include rdft/scalar/hc2cf.h */ |
29 | | |
30 | | /* |
31 | | * This function contains 498 FP additions, 324 FP multiplications, |
32 | | * (or, 300 additions, 126 multiplications, 198 fused multiply/add), |
33 | | * 113 stack variables, 8 constants, and 128 memory accesses |
34 | | */ |
35 | | #include "rdft/scalar/hc2cf.h" |
36 | | |
37 | | static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) |
38 | | { |
39 | | DK(KP831469612, +0.831469612302545237078788377617905756738560812); |
40 | | DK(KP980785280, +0.980785280403230449126182236134239036973933731); |
41 | | DK(KP198912367, +0.198912367379658006911597622644676228597850501); |
42 | | DK(KP668178637, +0.668178637919298919997757686523080761552472251); |
43 | | DK(KP923879532, +0.923879532511286756128183189396788286822416626); |
44 | | DK(KP414213562, +0.414213562373095048801688724209698078569671875); |
45 | | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
46 | | DK(KP500000000, +0.500000000000000000000000000000000000000000000); |
47 | | { |
48 | | INT m; |
49 | | for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { |
50 | | E T3B, T89, T61, T8l, T2F, T8t, T4B, T7p, T1n, T7L, T5e, T7I, T4u, T82, T5E; |
51 | | E T7R, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T12, T7K, T59, T7H, T4h, T81; |
52 | | E T5z, T7Q, Tl, T7D, T4Y, T7A, T3Q, T5o, T7V, T84, T1K, T7t, T4M, T7s, T2V; |
53 | | E T8n, T5L, T8e, T25, T7w, T4R, T7v, T38, T8o, T5Q, T8h, TG, T7E, T53, T7B; |
54 | | E T43, T5t, T7Y, T85; |
55 | | { |
56 | | E T2E, T3z, T4y, T3y, T5Z, T3t, T3x, T2v, T2A, T3r, T3q, T5X, T3n, T3p, T2w; |
57 | | E T4z, T3s, T3A; |
58 | | { |
59 | | E T2C, T2D, T3u, T3v, T3w; |
60 | | T2C = Ip[0]; |
61 | | T2D = Im[0]; |
62 | | T2E = T2C - T2D; |
63 | | T3z = T2C + T2D; |
64 | | T3u = Rm[0]; |
65 | | T3v = Rp[0]; |
66 | | T3w = T3u - T3v; |
67 | | T4y = T3v + T3u; |
68 | | T3y = W[1]; |
69 | | T5Z = T3y * T3w; |
70 | | T3t = W[0]; |
71 | | T3x = T3t * T3w; |
72 | | { |
73 | | E T2t, T2u, T3o, T2y, T2z, T2s; |
74 | | T2t = Ip[WS(rs, 8)]; |
75 | | T2u = Im[WS(rs, 8)]; |
76 | | T2v = T2t - T2u; |
77 | | T2y = Rp[WS(rs, 8)]; |
78 | | T2z = Rm[WS(rs, 8)]; |
79 | | T2A = T2y + T2z; |
80 | | T3o = T2z - T2y; |
81 | | T3r = T2t + T2u; |
82 | | T3q = W[33]; |
83 | | T5X = T3q * T3o; |
84 | | T3n = W[32]; |
85 | | T3p = T3n * T3o; |
86 | | T2s = W[30]; |
87 | | T2w = T2s * T2v; |
88 | | T4z = T2s * T2A; |
89 | | } |
90 | | } |
91 | | T3s = FNMS(T3q, T3r, T3p); |
92 | | T3A = FNMS(T3y, T3z, T3x); |
93 | | T3B = T3s + T3A; |
94 | | T89 = T3A - T3s; |
95 | | { |
96 | | E T5Y, T60, T2B, T4A, T2x; |
97 | | T5Y = FMA(T3n, T3r, T5X); |
98 | | T60 = FMA(T3t, T3z, T5Z); |
99 | | T61 = T5Y + T60; |
100 | | T8l = T60 - T5Y; |
101 | | T2x = W[31]; |
102 | | T2B = FNMS(T2x, T2A, T2w); |
103 | | T4A = FMA(T2x, T2v, T4z); |
104 | | T2F = T2B + T2E; |
105 | | T8t = T4y - T4A; |
106 | | T4B = T4y + T4A; |
107 | | T7p = T2E - T2B; |
108 | | } |
109 | | } |
110 | | { |
111 | | E T16, T4m, T1b, T4j, T17, T5a, T4k, T5A, T1g, T4s, T1l, T4p, T1h, T5c, T4q; |
112 | | E T5C; |
113 | | { |
114 | | E T13, T4i, T1d, T4o; |
115 | | { |
116 | | E T14, T15, T19, T1a; |
117 | | T14 = Ip[WS(rs, 3)]; |
118 | | T15 = Im[WS(rs, 3)]; |
119 | | T16 = T14 - T15; |
120 | | T4m = T14 + T15; |
121 | | T19 = Rp[WS(rs, 3)]; |
122 | | T1a = Rm[WS(rs, 3)]; |
123 | | T1b = T19 + T1a; |
124 | | T4j = T19 - T1a; |
125 | | } |
126 | | T13 = W[10]; |
127 | | T17 = T13 * T16; |
128 | | T5a = T13 * T1b; |
129 | | T4i = W[12]; |
130 | | T4k = T4i * T4j; |
131 | | T5A = T4i * T4m; |
132 | | { |
133 | | E T1e, T1f, T1j, T1k; |
134 | | T1e = Ip[WS(rs, 11)]; |
135 | | T1f = Im[WS(rs, 11)]; |
136 | | T1g = T1e - T1f; |
137 | | T4s = T1e + T1f; |
138 | | T1j = Rp[WS(rs, 11)]; |
139 | | T1k = Rm[WS(rs, 11)]; |
140 | | T1l = T1j + T1k; |
141 | | T4p = T1j - T1k; |
142 | | } |
143 | | T1d = W[42]; |
144 | | T1h = T1d * T1g; |
145 | | T5c = T1d * T1l; |
146 | | T4o = W[44]; |
147 | | T4q = T4o * T4p; |
148 | | T5C = T4o * T4s; |
149 | | } |
150 | | { |
151 | | E T1c, T5b, T1m, T5d, T18, T1i; |
152 | | T18 = W[11]; |
153 | | T1c = FNMS(T18, T1b, T17); |
154 | | T5b = FMA(T18, T16, T5a); |
155 | | T1i = W[43]; |
156 | | T1m = FNMS(T1i, T1l, T1h); |
157 | | T5d = FMA(T1i, T1g, T5c); |
158 | | T1n = T1c + T1m; |
159 | | T7L = T1c - T1m; |
160 | | T5e = T5b + T5d; |
161 | | T7I = T5b - T5d; |
162 | | } |
163 | | { |
164 | | E T4n, T5B, T4t, T5D, T4l, T4r; |
165 | | T4l = W[13]; |
166 | | T4n = FMA(T4l, T4m, T4k); |
167 | | T5B = FNMS(T4l, T4j, T5A); |
168 | | T4r = W[45]; |
169 | | T4t = FMA(T4r, T4s, T4q); |
170 | | T5D = FNMS(T4r, T4p, T5C); |
171 | | T4u = T4n + T4t; |
172 | | T82 = T4t - T4n; |
173 | | T5E = T5B + T5D; |
174 | | T7R = T5D - T5B; |
175 | | } |
176 | | } |
177 | | { |
178 | | E T2a, T2f, T3e, T3d, T5S, T3a, T3c, T2b, T4C, T2k, T2p, T3k, T3j, T5U, T3g; |
179 | | E T3i, T2l, T4E; |
180 | | { |
181 | | E T28, T29, T3b, T2d, T2e, T27; |
182 | | T28 = Ip[WS(rs, 4)]; |
183 | | T29 = Im[WS(rs, 4)]; |
184 | | T2a = T28 - T29; |
185 | | T2d = Rp[WS(rs, 4)]; |
186 | | T2e = Rm[WS(rs, 4)]; |
187 | | T2f = T2d + T2e; |
188 | | T3b = T2e - T2d; |
189 | | T3e = T28 + T29; |
190 | | T3d = W[17]; |
191 | | T5S = T3d * T3b; |
192 | | T3a = W[16]; |
193 | | T3c = T3a * T3b; |
194 | | T27 = W[14]; |
195 | | T2b = T27 * T2a; |
196 | | T4C = T27 * T2f; |
197 | | } |
198 | | { |
199 | | E T2i, T2j, T3h, T2n, T2o, T2h; |
200 | | T2i = Ip[WS(rs, 12)]; |
201 | | T2j = Im[WS(rs, 12)]; |
202 | | T2k = T2i - T2j; |
203 | | T2n = Rp[WS(rs, 12)]; |
204 | | T2o = Rm[WS(rs, 12)]; |
205 | | T2p = T2n + T2o; |
206 | | T3h = T2o - T2n; |
207 | | T3k = T2i + T2j; |
208 | | T3j = W[49]; |
209 | | T5U = T3j * T3h; |
210 | | T3g = W[48]; |
211 | | T3i = T3g * T3h; |
212 | | T2h = W[46]; |
213 | | T2l = T2h * T2k; |
214 | | T4E = T2h * T2p; |
215 | | } |
216 | | { |
217 | | E T3f, T3l, T5T, T5V; |
218 | | T3f = FNMS(T3d, T3e, T3c); |
219 | | T3l = FNMS(T3j, T3k, T3i); |
220 | | T3m = T3f + T3l; |
221 | | T8k = T3f - T3l; |
222 | | T5T = FMA(T3a, T3e, T5S); |
223 | | T5V = FMA(T3g, T3k, T5U); |
224 | | T5W = T5T + T5V; |
225 | | T8a = T5T - T5V; |
226 | | { |
227 | | E T2g, T4D, T2q, T4F, T2c, T2m; |
228 | | T2c = W[15]; |
229 | | T2g = FNMS(T2c, T2f, T2b); |
230 | | T4D = FMA(T2c, T2a, T4C); |
231 | | T2m = W[47]; |
232 | | T2q = FNMS(T2m, T2p, T2l); |
233 | | T4F = FMA(T2m, T2k, T4E); |
234 | | T2r = T2g + T2q; |
235 | | T8u = T2g - T2q; |
236 | | T4G = T4D + T4F; |
237 | | T7q = T4D - T4F; |
238 | | } |
239 | | } |
240 | | } |
241 | | { |
242 | | E TL, T49, TQ, T46, TM, T55, T47, T5v, TV, T4f, T10, T4c, TW, T57, T4d; |
243 | | E T5x; |
244 | | { |
245 | | E TI, T45, TS, T4b; |
246 | | { |
247 | | E TJ, TK, TO, TP; |
248 | | TJ = Ip[WS(rs, 15)]; |
249 | | TK = Im[WS(rs, 15)]; |
250 | | TL = TJ - TK; |
251 | | T49 = TJ + TK; |
252 | | TO = Rp[WS(rs, 15)]; |
253 | | TP = Rm[WS(rs, 15)]; |
254 | | TQ = TO + TP; |
255 | | T46 = TO - TP; |
256 | | } |
257 | | TI = W[58]; |
258 | | TM = TI * TL; |
259 | | T55 = TI * TQ; |
260 | | T45 = W[60]; |
261 | | T47 = T45 * T46; |
262 | | T5v = T45 * T49; |
263 | | { |
264 | | E TT, TU, TY, TZ; |
265 | | TT = Ip[WS(rs, 7)]; |
266 | | TU = Im[WS(rs, 7)]; |
267 | | TV = TT - TU; |
268 | | T4f = TT + TU; |
269 | | TY = Rp[WS(rs, 7)]; |
270 | | TZ = Rm[WS(rs, 7)]; |
271 | | T10 = TY + TZ; |
272 | | T4c = TY - TZ; |
273 | | } |
274 | | TS = W[26]; |
275 | | TW = TS * TV; |
276 | | T57 = TS * T10; |
277 | | T4b = W[28]; |
278 | | T4d = T4b * T4c; |
279 | | T5x = T4b * T4f; |
280 | | } |
281 | | { |
282 | | E TR, T56, T11, T58, TN, TX; |
283 | | TN = W[59]; |
284 | | TR = FNMS(TN, TQ, TM); |
285 | | T56 = FMA(TN, TL, T55); |
286 | | TX = W[27]; |
287 | | T11 = FNMS(TX, T10, TW); |
288 | | T58 = FMA(TX, TV, T57); |
289 | | T12 = TR + T11; |
290 | | T7K = T56 - T58; |
291 | | T59 = T56 + T58; |
292 | | T7H = TR - T11; |
293 | | } |
294 | | { |
295 | | E T4a, T5w, T4g, T5y, T48, T4e; |
296 | | T48 = W[61]; |
297 | | T4a = FMA(T48, T49, T47); |
298 | | T5w = FNMS(T48, T46, T5v); |
299 | | T4e = W[29]; |
300 | | T4g = FMA(T4e, T4f, T4d); |
301 | | T5y = FNMS(T4e, T4c, T5x); |
302 | | T4h = T4a + T4g; |
303 | | T81 = T5w - T5y; |
304 | | T5z = T5w + T5y; |
305 | | T7Q = T4g - T4a; |
306 | | } |
307 | | } |
308 | | { |
309 | | E T4, T3I, T9, T3F, T5, T4U, T3G, T5k, Te, T3O, Tj, T3L, Tf, T4W, T3M; |
310 | | E T5m; |
311 | | { |
312 | | E T1, T3E, Tb, T3K; |
313 | | { |
314 | | E T2, T3, T7, T8; |
315 | | T2 = Ip[WS(rs, 1)]; |
316 | | T3 = Im[WS(rs, 1)]; |
317 | | T4 = T2 - T3; |
318 | | T3I = T2 + T3; |
319 | | T7 = Rp[WS(rs, 1)]; |
320 | | T8 = Rm[WS(rs, 1)]; |
321 | | T9 = T7 + T8; |
322 | | T3F = T7 - T8; |
323 | | } |
324 | | T1 = W[2]; |
325 | | T5 = T1 * T4; |
326 | | T4U = T1 * T9; |
327 | | T3E = W[4]; |
328 | | T3G = T3E * T3F; |
329 | | T5k = T3E * T3I; |
330 | | { |
331 | | E Tc, Td, Th, Ti; |
332 | | Tc = Ip[WS(rs, 9)]; |
333 | | Td = Im[WS(rs, 9)]; |
334 | | Te = Tc - Td; |
335 | | T3O = Tc + Td; |
336 | | Th = Rp[WS(rs, 9)]; |
337 | | Ti = Rm[WS(rs, 9)]; |
338 | | Tj = Th + Ti; |
339 | | T3L = Th - Ti; |
340 | | } |
341 | | Tb = W[34]; |
342 | | Tf = Tb * Te; |
343 | | T4W = Tb * Tj; |
344 | | T3K = W[36]; |
345 | | T3M = T3K * T3L; |
346 | | T5m = T3K * T3O; |
347 | | } |
348 | | { |
349 | | E Ta, T4V, Tk, T4X, T6, Tg; |
350 | | T6 = W[3]; |
351 | | Ta = FNMS(T6, T9, T5); |
352 | | T4V = FMA(T6, T4, T4U); |
353 | | Tg = W[35]; |
354 | | Tk = FNMS(Tg, Tj, Tf); |
355 | | T4X = FMA(Tg, Te, T4W); |
356 | | Tl = Ta + Tk; |
357 | | T7D = T4V - T4X; |
358 | | T4Y = T4V + T4X; |
359 | | T7A = Ta - Tk; |
360 | | } |
361 | | { |
362 | | E T3J, T5l, T3P, T5n, T3H, T3N, T7T, T7U; |
363 | | T3H = W[5]; |
364 | | T3J = FMA(T3H, T3I, T3G); |
365 | | T5l = FNMS(T3H, T3F, T5k); |
366 | | T3N = W[37]; |
367 | | T3P = FMA(T3N, T3O, T3M); |
368 | | T5n = FNMS(T3N, T3L, T5m); |
369 | | T3Q = T3J + T3P; |
370 | | T5o = T5l + T5n; |
371 | | T7T = T3P - T3J; |
372 | | T7U = T5l - T5n; |
373 | | T7V = T7T - T7U; |
374 | | T84 = T7U + T7T; |
375 | | } |
376 | | } |
377 | | { |
378 | | E T1t, T1y, T2N, T2M, T5H, T2J, T2L, T1u, T4I, T1D, T1I, T2T, T2S, T5J, T2P; |
379 | | E T2R, T1E, T4K; |
380 | | { |
381 | | E T1r, T1s, T2K, T1w, T1x, T1q; |
382 | | T1r = Ip[WS(rs, 2)]; |
383 | | T1s = Im[WS(rs, 2)]; |
384 | | T1t = T1r - T1s; |
385 | | T1w = Rp[WS(rs, 2)]; |
386 | | T1x = Rm[WS(rs, 2)]; |
387 | | T1y = T1w + T1x; |
388 | | T2K = T1x - T1w; |
389 | | T2N = T1r + T1s; |
390 | | T2M = W[9]; |
391 | | T5H = T2M * T2K; |
392 | | T2J = W[8]; |
393 | | T2L = T2J * T2K; |
394 | | T1q = W[6]; |
395 | | T1u = T1q * T1t; |
396 | | T4I = T1q * T1y; |
397 | | } |
398 | | { |
399 | | E T1B, T1C, T2Q, T1G, T1H, T1A; |
400 | | T1B = Ip[WS(rs, 10)]; |
401 | | T1C = Im[WS(rs, 10)]; |
402 | | T1D = T1B - T1C; |
403 | | T1G = Rp[WS(rs, 10)]; |
404 | | T1H = Rm[WS(rs, 10)]; |
405 | | T1I = T1G + T1H; |
406 | | T2Q = T1H - T1G; |
407 | | T2T = T1B + T1C; |
408 | | T2S = W[41]; |
409 | | T5J = T2S * T2Q; |
410 | | T2P = W[40]; |
411 | | T2R = T2P * T2Q; |
412 | | T1A = W[38]; |
413 | | T1E = T1A * T1D; |
414 | | T4K = T1A * T1I; |
415 | | } |
416 | | { |
417 | | E T1z, T4J, T1J, T4L, T1v, T1F; |
418 | | T1v = W[7]; |
419 | | T1z = FNMS(T1v, T1y, T1u); |
420 | | T4J = FMA(T1v, T1t, T4I); |
421 | | T1F = W[39]; |
422 | | T1J = FNMS(T1F, T1I, T1E); |
423 | | T4L = FMA(T1F, T1D, T4K); |
424 | | T1K = T1z + T1J; |
425 | | T7t = T4J - T4L; |
426 | | T4M = T4J + T4L; |
427 | | T7s = T1z - T1J; |
428 | | } |
429 | | { |
430 | | E T2O, T2U, T8c, T5I, T5K, T8d; |
431 | | T2O = FNMS(T2M, T2N, T2L); |
432 | | T2U = FNMS(T2S, T2T, T2R); |
433 | | T8c = T2O - T2U; |
434 | | T5I = FMA(T2J, T2N, T5H); |
435 | | T5K = FMA(T2P, T2T, T5J); |
436 | | T8d = T5I - T5K; |
437 | | T2V = T2O + T2U; |
438 | | T8n = T8c + T8d; |
439 | | T5L = T5I + T5K; |
440 | | T8e = T8c - T8d; |
441 | | } |
442 | | } |
443 | | { |
444 | | E T1O, T1T, T30, T2Z, T5M, T2W, T2Y, T1P, T4N, T1Y, T23, T36, T35, T5O, T32; |
445 | | E T34, T1Z, T4P; |
446 | | { |
447 | | E T1M, T1N, T2X, T1R, T1S, T1L; |
448 | | T1M = Ip[WS(rs, 14)]; |
449 | | T1N = Im[WS(rs, 14)]; |
450 | | T1O = T1M - T1N; |
451 | | T1R = Rp[WS(rs, 14)]; |
452 | | T1S = Rm[WS(rs, 14)]; |
453 | | T1T = T1R + T1S; |
454 | | T2X = T1S - T1R; |
455 | | T30 = T1M + T1N; |
456 | | T2Z = W[57]; |
457 | | T5M = T2Z * T2X; |
458 | | T2W = W[56]; |
459 | | T2Y = T2W * T2X; |
460 | | T1L = W[54]; |
461 | | T1P = T1L * T1O; |
462 | | T4N = T1L * T1T; |
463 | | } |
464 | | { |
465 | | E T1W, T1X, T33, T21, T22, T1V; |
466 | | T1W = Ip[WS(rs, 6)]; |
467 | | T1X = Im[WS(rs, 6)]; |
468 | | T1Y = T1W - T1X; |
469 | | T21 = Rp[WS(rs, 6)]; |
470 | | T22 = Rm[WS(rs, 6)]; |
471 | | T23 = T21 + T22; |
472 | | T33 = T22 - T21; |
473 | | T36 = T1W + T1X; |
474 | | T35 = W[25]; |
475 | | T5O = T35 * T33; |
476 | | T32 = W[24]; |
477 | | T34 = T32 * T33; |
478 | | T1V = W[22]; |
479 | | T1Z = T1V * T1Y; |
480 | | T4P = T1V * T23; |
481 | | } |
482 | | { |
483 | | E T1U, T4O, T24, T4Q, T1Q, T20; |
484 | | T1Q = W[55]; |
485 | | T1U = FNMS(T1Q, T1T, T1P); |
486 | | T4O = FMA(T1Q, T1O, T4N); |
487 | | T20 = W[23]; |
488 | | T24 = FNMS(T20, T23, T1Z); |
489 | | T4Q = FMA(T20, T1Y, T4P); |
490 | | T25 = T1U + T24; |
491 | | T7w = T1U - T24; |
492 | | T4R = T4O + T4Q; |
493 | | T7v = T4O - T4Q; |
494 | | } |
495 | | { |
496 | | E T31, T37, T8f, T5N, T5P, T8g; |
497 | | T31 = FNMS(T2Z, T30, T2Y); |
498 | | T37 = FNMS(T35, T36, T34); |
499 | | T8f = T31 - T37; |
500 | | T5N = FMA(T2W, T30, T5M); |
501 | | T5P = FMA(T32, T36, T5O); |
502 | | T8g = T5N - T5P; |
503 | | T38 = T31 + T37; |
504 | | T8o = T8g - T8f; |
505 | | T5Q = T5N + T5P; |
506 | | T8h = T8f + T8g; |
507 | | } |
508 | | } |
509 | | { |
510 | | E Tp, T3V, Tu, T3S, Tq, T4Z, T3T, T5p, Tz, T41, TE, T3Y, TA, T51, T3Z; |
511 | | E T5r; |
512 | | { |
513 | | E Tm, T3R, Tw, T3X; |
514 | | { |
515 | | E Tn, To, Ts, Tt; |
516 | | Tn = Ip[WS(rs, 5)]; |
517 | | To = Im[WS(rs, 5)]; |
518 | | Tp = Tn - To; |
519 | | T3V = Tn + To; |
520 | | Ts = Rp[WS(rs, 5)]; |
521 | | Tt = Rm[WS(rs, 5)]; |
522 | | Tu = Ts + Tt; |
523 | | T3S = Ts - Tt; |
524 | | } |
525 | | Tm = W[18]; |
526 | | Tq = Tm * Tp; |
527 | | T4Z = Tm * Tu; |
528 | | T3R = W[20]; |
529 | | T3T = T3R * T3S; |
530 | | T5p = T3R * T3V; |
531 | | { |
532 | | E Tx, Ty, TC, TD; |
533 | | Tx = Ip[WS(rs, 13)]; |
534 | | Ty = Im[WS(rs, 13)]; |
535 | | Tz = Tx - Ty; |
536 | | T41 = Tx + Ty; |
537 | | TC = Rp[WS(rs, 13)]; |
538 | | TD = Rm[WS(rs, 13)]; |
539 | | TE = TC + TD; |
540 | | T3Y = TC - TD; |
541 | | } |
542 | | Tw = W[50]; |
543 | | TA = Tw * Tz; |
544 | | T51 = Tw * TE; |
545 | | T3X = W[52]; |
546 | | T3Z = T3X * T3Y; |
547 | | T5r = T3X * T41; |
548 | | } |
549 | | { |
550 | | E Tv, T50, TF, T52, Tr, TB; |
551 | | Tr = W[19]; |
552 | | Tv = FNMS(Tr, Tu, Tq); |
553 | | T50 = FMA(Tr, Tp, T4Z); |
554 | | TB = W[51]; |
555 | | TF = FNMS(TB, TE, TA); |
556 | | T52 = FMA(TB, Tz, T51); |
557 | | TG = Tv + TF; |
558 | | T7E = Tv - TF; |
559 | | T53 = T50 + T52; |
560 | | T7B = T50 - T52; |
561 | | } |
562 | | { |
563 | | E T3W, T5q, T42, T5s, T3U, T40, T7W, T7X; |
564 | | T3U = W[21]; |
565 | | T3W = FMA(T3U, T3V, T3T); |
566 | | T5q = FNMS(T3U, T3S, T5p); |
567 | | T40 = W[53]; |
568 | | T42 = FMA(T40, T41, T3Z); |
569 | | T5s = FNMS(T40, T3Y, T5r); |
570 | | T43 = T3W + T42; |
571 | | T5t = T5q + T5s; |
572 | | T7W = T5s - T5q; |
573 | | T7X = T3W - T42; |
574 | | T7Y = T7W + T7X; |
575 | | T85 = T7W - T7X; |
576 | | } |
577 | | } |
578 | | { |
579 | | E T1p, T6i, T2H, T68, T5g, T67, T4T, T6h, T4w, T6m, T5G, T6c, T3D, T6n, T63; |
580 | | E T6f; |
581 | | { |
582 | | E TH, T1o, T4H, T4S; |
583 | | TH = Tl + TG; |
584 | | T1o = T12 + T1n; |
585 | | T1p = TH + T1o; |
586 | | T6i = TH - T1o; |
587 | | { |
588 | | E T26, T2G, T54, T5f; |
589 | | T26 = T1K + T25; |
590 | | T2G = T2r + T2F; |
591 | | T2H = T26 + T2G; |
592 | | T68 = T2G - T26; |
593 | | T54 = T4Y + T53; |
594 | | T5f = T59 + T5e; |
595 | | T5g = T54 + T5f; |
596 | | T67 = T5f - T54; |
597 | | } |
598 | | T4H = T4B + T4G; |
599 | | T4S = T4M + T4R; |
600 | | T4T = T4H + T4S; |
601 | | T6h = T4H - T4S; |
602 | | { |
603 | | E T44, T4v, T6b, T5u, T5F, T6a; |
604 | | T44 = T3Q + T43; |
605 | | T4v = T4h + T4u; |
606 | | T6b = T44 - T4v; |
607 | | T5u = T5o + T5t; |
608 | | T5F = T5z + T5E; |
609 | | T6a = T5F - T5u; |
610 | | T4w = T44 + T4v; |
611 | | T6m = T6a - T6b; |
612 | | T5G = T5u + T5F; |
613 | | T6c = T6a + T6b; |
614 | | } |
615 | | { |
616 | | E T39, T3C, T6d, T5R, T62, T6e; |
617 | | T39 = T2V + T38; |
618 | | T3C = T3m + T3B; |
619 | | T6d = T3C - T39; |
620 | | T5R = T5L + T5Q; |
621 | | T62 = T5W + T61; |
622 | | T6e = T62 - T5R; |
623 | | T3D = T39 + T3C; |
624 | | T6n = T6d + T6e; |
625 | | T63 = T5R + T62; |
626 | | T6f = T6d - T6e; |
627 | | } |
628 | | } |
629 | | { |
630 | | E T2I, T4x, T65, T66; |
631 | | T2I = T1p + T2H; |
632 | | T4x = T3D - T4w; |
633 | | Ip[0] = KP500000000 * (T2I + T4x); |
634 | | Im[WS(rs, 15)] = KP500000000 * (T4x - T2I); |
635 | | T65 = T4T + T5g; |
636 | | T66 = T5G + T63; |
637 | | Rm[WS(rs, 15)] = KP500000000 * (T65 - T66); |
638 | | Rp[0] = KP500000000 * (T65 + T66); |
639 | | } |
640 | | { |
641 | | E T5h, T5i, T5j, T64; |
642 | | T5h = T4T - T5g; |
643 | | T5i = T4w + T3D; |
644 | | Rm[WS(rs, 7)] = KP500000000 * (T5h - T5i); |
645 | | Rp[WS(rs, 8)] = KP500000000 * (T5h + T5i); |
646 | | T5j = T2H - T1p; |
647 | | T64 = T5G - T63; |
648 | | Ip[WS(rs, 8)] = KP500000000 * (T5j + T64); |
649 | | Im[WS(rs, 7)] = KP500000000 * (T64 - T5j); |
650 | | } |
651 | | { |
652 | | E T69, T6g, T6p, T6q; |
653 | | T69 = T67 + T68; |
654 | | T6g = T6c + T6f; |
655 | | Ip[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6g, T69)); |
656 | | Im[WS(rs, 11)] = -(KP500000000 * (FNMS(KP707106781, T6g, T69))); |
657 | | T6p = T6h + T6i; |
658 | | T6q = T6m + T6n; |
659 | | Rm[WS(rs, 11)] = KP500000000 * (FNMS(KP707106781, T6q, T6p)); |
660 | | Rp[WS(rs, 4)] = KP500000000 * (FMA(KP707106781, T6q, T6p)); |
661 | | } |
662 | | { |
663 | | E T6j, T6k, T6l, T6o; |
664 | | T6j = T6h - T6i; |
665 | | T6k = T6f - T6c; |
666 | | Rm[WS(rs, 3)] = KP500000000 * (FNMS(KP707106781, T6k, T6j)); |
667 | | Rp[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6k, T6j)); |
668 | | T6l = T68 - T67; |
669 | | T6o = T6m - T6n; |
670 | | Ip[WS(rs, 12)] = KP500000000 * (FMA(KP707106781, T6o, T6l)); |
671 | | Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP707106781, T6o, T6l))); |
672 | | } |
673 | | } |
674 | | { |
675 | | E T6t, T75, T6T, T7f, T6A, T7g, T6W, T76, T6I, T7k, T70, T7a, T6P, T7l, T71; |
676 | | E T7d; |
677 | | { |
678 | | E T6r, T6s, T6R, T6S; |
679 | | T6r = T4R - T4M; |
680 | | T6s = T2F - T2r; |
681 | | T6t = T6r + T6s; |
682 | | T75 = T6s - T6r; |
683 | | T6R = T4B - T4G; |
684 | | T6S = T1K - T25; |
685 | | T6T = T6R + T6S; |
686 | | T7f = T6R - T6S; |
687 | | } |
688 | | { |
689 | | E T6w, T6U, T6z, T6V; |
690 | | { |
691 | | E T6u, T6v, T6x, T6y; |
692 | | T6u = Tl - TG; |
693 | | T6v = T4Y - T53; |
694 | | T6w = T6u - T6v; |
695 | | T6U = T6v + T6u; |
696 | | T6x = T59 - T5e; |
697 | | T6y = T12 - T1n; |
698 | | T6z = T6x + T6y; |
699 | | T6V = T6x - T6y; |
700 | | } |
701 | | T6A = T6w + T6z; |
702 | | T7g = T6w - T6z; |
703 | | T6W = T6U + T6V; |
704 | | T76 = T6V - T6U; |
705 | | } |
706 | | { |
707 | | E T6E, T78, T6H, T79; |
708 | | { |
709 | | E T6C, T6D, T6F, T6G; |
710 | | T6C = T5t - T5o; |
711 | | T6D = T4u - T4h; |
712 | | T6E = T6C + T6D; |
713 | | T78 = T6C - T6D; |
714 | | T6F = T43 - T3Q; |
715 | | T6G = T5z - T5E; |
716 | | T6H = T6F + T6G; |
717 | | T79 = T6G - T6F; |
718 | | } |
719 | | T6I = FMA(KP414213562, T6H, T6E); |
720 | | T7k = FNMS(KP414213562, T78, T79); |
721 | | T70 = FNMS(KP414213562, T6E, T6H); |
722 | | T7a = FMA(KP414213562, T79, T78); |
723 | | } |
724 | | { |
725 | | E T6L, T7b, T6O, T7c; |
726 | | { |
727 | | E T6J, T6K, T6M, T6N; |
728 | | T6J = T5Q - T5L; |
729 | | T6K = T3B - T3m; |
730 | | T6L = T6J + T6K; |
731 | | T7b = T6K - T6J; |
732 | | T6M = T2V - T38; |
733 | | T6N = T61 - T5W; |
734 | | T6O = T6M + T6N; |
735 | | T7c = T6N - T6M; |
736 | | } |
737 | | T6P = FNMS(KP414213562, T6O, T6L); |
738 | | T7l = FNMS(KP414213562, T7b, T7c); |
739 | | T71 = FMA(KP414213562, T6L, T6O); |
740 | | T7d = FMA(KP414213562, T7c, T7b); |
741 | | } |
742 | | { |
743 | | E T6B, T6Q, T73, T74; |
744 | | T6B = FMA(KP707106781, T6A, T6t); |
745 | | T6Q = T6I + T6P; |
746 | | Ip[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T6Q, T6B)); |
747 | | Im[WS(rs, 13)] = -(KP500000000 * (FNMS(KP923879532, T6Q, T6B))); |
748 | | T73 = FMA(KP707106781, T6W, T6T); |
749 | | T74 = T70 + T71; |
750 | | Rm[WS(rs, 13)] = KP500000000 * (FNMS(KP923879532, T74, T73)); |
751 | | Rp[WS(rs, 2)] = KP500000000 * (FMA(KP923879532, T74, T73)); |
752 | | } |
753 | | { |
754 | | E T6X, T6Y, T6Z, T72; |
755 | | T6X = FNMS(KP707106781, T6W, T6T); |
756 | | T6Y = T6P - T6I; |
757 | | Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP923879532, T6Y, T6X)); |
758 | | Rp[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T6Y, T6X)); |
759 | | T6Z = FNMS(KP707106781, T6A, T6t); |
760 | | T72 = T70 - T71; |
761 | | Ip[WS(rs, 10)] = KP500000000 * (FMA(KP923879532, T72, T6Z)); |
762 | | Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP923879532, T72, T6Z))); |
763 | | } |
764 | | { |
765 | | E T77, T7e, T7n, T7o; |
766 | | T77 = FNMS(KP707106781, T76, T75); |
767 | | T7e = T7a - T7d; |
768 | | Ip[WS(rs, 14)] = KP500000000 * (FMA(KP923879532, T7e, T77)); |
769 | | Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP923879532, T7e, T77))); |
770 | | T7n = FNMS(KP707106781, T7g, T7f); |
771 | | T7o = T7k + T7l; |
772 | | Rp[WS(rs, 14)] = KP500000000 * (FNMS(KP923879532, T7o, T7n)); |
773 | | Rm[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T7o, T7n)); |
774 | | } |
775 | | { |
776 | | E T7h, T7i, T7j, T7m; |
777 | | T7h = FMA(KP707106781, T7g, T7f); |
778 | | T7i = T7a + T7d; |
779 | | Rm[WS(rs, 9)] = KP500000000 * (FNMS(KP923879532, T7i, T7h)); |
780 | | Rp[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7i, T7h)); |
781 | | T7j = FMA(KP707106781, T76, T75); |
782 | | T7m = T7k - T7l; |
783 | | Ip[WS(rs, 6)] = KP500000000 * (FMA(KP923879532, T7m, T7j)); |
784 | | Im[WS(rs, 9)] = -(KP500000000 * (FNMS(KP923879532, T7m, T7j))); |
785 | | } |
786 | | } |
787 | | { |
788 | | E T7z, T9T, T8L, T9x, T8z, T9J, T8V, T97, T7O, T8W, T8C, T8M, T9t, T9Y, T9E; |
789 | | E T9O, T88, T90, T8G, T8Q, T9e, T9U, T9A, T9K, T9m, T9Z, T9F, T9R, T8r, T91; |
790 | | E T8H, T8T; |
791 | | { |
792 | | E T7r, T9v, T7y, T9w, T7u, T7x; |
793 | | T7r = T7p - T7q; |
794 | | T9v = T8t - T8u; |
795 | | T7u = T7s - T7t; |
796 | | T7x = T7v + T7w; |
797 | | T7y = T7u + T7x; |
798 | | T9w = T7u - T7x; |
799 | | T7z = FMA(KP707106781, T7y, T7r); |
800 | | T9T = FNMS(KP707106781, T9w, T9v); |
801 | | T8L = FNMS(KP707106781, T7y, T7r); |
802 | | T9x = FMA(KP707106781, T9w, T9v); |
803 | | } |
804 | | { |
805 | | E T8v, T95, T8y, T96, T8w, T8x; |
806 | | T8v = T8t + T8u; |
807 | | T95 = T7q + T7p; |
808 | | T8w = T7t + T7s; |
809 | | T8x = T7v - T7w; |
810 | | T8y = T8w + T8x; |
811 | | T96 = T8x - T8w; |
812 | | T8z = FMA(KP707106781, T8y, T8v); |
813 | | T9J = FNMS(KP707106781, T96, T95); |
814 | | T8V = FNMS(KP707106781, T8y, T8v); |
815 | | T97 = FMA(KP707106781, T96, T95); |
816 | | } |
817 | | { |
818 | | E T7G, T8A, T7N, T8B; |
819 | | { |
820 | | E T7C, T7F, T7J, T7M; |
821 | | T7C = T7A - T7B; |
822 | | T7F = T7D + T7E; |
823 | | T7G = FNMS(KP414213562, T7F, T7C); |
824 | | T8A = FMA(KP414213562, T7C, T7F); |
825 | | T7J = T7H - T7I; |
826 | | T7M = T7K + T7L; |
827 | | T7N = FMA(KP414213562, T7M, T7J); |
828 | | T8B = FNMS(KP414213562, T7J, T7M); |
829 | | } |
830 | | T7O = T7G + T7N; |
831 | | T8W = T7G - T7N; |
832 | | T8C = T8A + T8B; |
833 | | T8M = T8B - T8A; |
834 | | } |
835 | | { |
836 | | E T9p, T9M, T9s, T9N; |
837 | | { |
838 | | E T9n, T9o, T9q, T9r; |
839 | | T9n = T7R - T7Q; |
840 | | T9o = T85 - T84; |
841 | | T9p = FNMS(KP707106781, T9o, T9n); |
842 | | T9M = FMA(KP707106781, T9o, T9n); |
843 | | T9q = T81 - T82; |
844 | | T9r = T7Y - T7V; |
845 | | T9s = FNMS(KP707106781, T9r, T9q); |
846 | | T9N = FMA(KP707106781, T9r, T9q); |
847 | | } |
848 | | T9t = FNMS(KP668178637, T9s, T9p); |
849 | | T9Y = FNMS(KP198912367, T9M, T9N); |
850 | | T9E = FMA(KP668178637, T9p, T9s); |
851 | | T9O = FMA(KP198912367, T9N, T9M); |
852 | | } |
853 | | { |
854 | | E T80, T8O, T87, T8P; |
855 | | { |
856 | | E T7S, T7Z, T83, T86; |
857 | | T7S = T7Q + T7R; |
858 | | T7Z = T7V + T7Y; |
859 | | T80 = FMA(KP707106781, T7Z, T7S); |
860 | | T8O = FNMS(KP707106781, T7Z, T7S); |
861 | | T83 = T81 + T82; |
862 | | T86 = T84 + T85; |
863 | | T87 = FMA(KP707106781, T86, T83); |
864 | | T8P = FNMS(KP707106781, T86, T83); |
865 | | } |
866 | | T88 = FMA(KP198912367, T87, T80); |
867 | | T90 = FMA(KP668178637, T8O, T8P); |
868 | | T8G = FNMS(KP198912367, T80, T87); |
869 | | T8Q = FNMS(KP668178637, T8P, T8O); |
870 | | } |
871 | | { |
872 | | E T9a, T9z, T9d, T9y; |
873 | | { |
874 | | E T98, T99, T9b, T9c; |
875 | | T98 = T7K - T7L; |
876 | | T99 = T7H + T7I; |
877 | | T9a = FMA(KP414213562, T99, T98); |
878 | | T9z = FNMS(KP414213562, T98, T99); |
879 | | T9b = T7D - T7E; |
880 | | T9c = T7A + T7B; |
881 | | T9d = FNMS(KP414213562, T9c, T9b); |
882 | | T9y = FMA(KP414213562, T9b, T9c); |
883 | | } |
884 | | T9e = T9a - T9d; |
885 | | T9U = T9d + T9a; |
886 | | T9A = T9y - T9z; |
887 | | T9K = T9y + T9z; |
888 | | } |
889 | | { |
890 | | E T9i, T9P, T9l, T9Q; |
891 | | { |
892 | | E T9g, T9h, T9j, T9k; |
893 | | T9g = T8a + T89; |
894 | | T9h = T8n - T8o; |
895 | | T9i = FNMS(KP707106781, T9h, T9g); |
896 | | T9P = FMA(KP707106781, T9h, T9g); |
897 | | T9j = T8l - T8k; |
898 | | T9k = T8h - T8e; |
899 | | T9l = FNMS(KP707106781, T9k, T9j); |
900 | | T9Q = FMA(KP707106781, T9k, T9j); |
901 | | } |
902 | | T9m = FNMS(KP668178637, T9l, T9i); |
903 | | T9Z = FNMS(KP198912367, T9P, T9Q); |
904 | | T9F = FMA(KP668178637, T9i, T9l); |
905 | | T9R = FMA(KP198912367, T9Q, T9P); |
906 | | } |
907 | | { |
908 | | E T8j, T8R, T8q, T8S; |
909 | | { |
910 | | E T8b, T8i, T8m, T8p; |
911 | | T8b = T89 - T8a; |
912 | | T8i = T8e + T8h; |
913 | | T8j = FMA(KP707106781, T8i, T8b); |
914 | | T8R = FNMS(KP707106781, T8i, T8b); |
915 | | T8m = T8k + T8l; |
916 | | T8p = T8n + T8o; |
917 | | T8q = FMA(KP707106781, T8p, T8m); |
918 | | T8S = FNMS(KP707106781, T8p, T8m); |
919 | | } |
920 | | T8r = FNMS(KP198912367, T8q, T8j); |
921 | | T91 = FNMS(KP668178637, T8R, T8S); |
922 | | T8H = FMA(KP198912367, T8j, T8q); |
923 | | T8T = FMA(KP668178637, T8S, T8R); |
924 | | } |
925 | | { |
926 | | E T7P, T8s, T8J, T8K; |
927 | | T7P = FMA(KP923879532, T7O, T7z); |
928 | | T8s = T88 + T8r; |
929 | | Ip[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8s, T7P)); |
930 | | Im[WS(rs, 14)] = -(KP500000000 * (FNMS(KP980785280, T8s, T7P))); |
931 | | T8J = FMA(KP923879532, T8C, T8z); |
932 | | T8K = T8G + T8H; |
933 | | Rm[WS(rs, 14)] = KP500000000 * (FNMS(KP980785280, T8K, T8J)); |
934 | | Rp[WS(rs, 1)] = KP500000000 * (FMA(KP980785280, T8K, T8J)); |
935 | | } |
936 | | { |
937 | | E T8D, T8E, T8F, T8I; |
938 | | T8D = FNMS(KP923879532, T8C, T8z); |
939 | | T8E = T8r - T88; |
940 | | Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP980785280, T8E, T8D)); |
941 | | Rp[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8E, T8D)); |
942 | | T8F = FNMS(KP923879532, T7O, T7z); |
943 | | T8I = T8G - T8H; |
944 | | Ip[WS(rs, 9)] = KP500000000 * (FMA(KP980785280, T8I, T8F)); |
945 | | Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP980785280, T8I, T8F))); |
946 | | } |
947 | | { |
948 | | E T8N, T8U, T93, T94; |
949 | | T8N = FNMS(KP923879532, T8M, T8L); |
950 | | T8U = T8Q + T8T; |
951 | | Ip[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T8U, T8N)); |
952 | | Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP831469612, T8U, T8N))); |
953 | | T93 = FNMS(KP923879532, T8W, T8V); |
954 | | T94 = T90 + T91; |
955 | | Rp[WS(rs, 13)] = KP500000000 * (FNMS(KP831469612, T94, T93)); |
956 | | Rm[WS(rs, 2)] = KP500000000 * (FMA(KP831469612, T94, T93)); |
957 | | } |
958 | | { |
959 | | E T8X, T8Y, T8Z, T92; |
960 | | T8X = FMA(KP923879532, T8W, T8V); |
961 | | T8Y = T8T - T8Q; |
962 | | Rm[WS(rs, 10)] = KP500000000 * (FNMS(KP831469612, T8Y, T8X)); |
963 | | Rp[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T8Y, T8X)); |
964 | | T8Z = FMA(KP923879532, T8M, T8L); |
965 | | T92 = T90 - T91; |
966 | | Ip[WS(rs, 5)] = KP500000000 * (FMA(KP831469612, T92, T8Z)); |
967 | | Im[WS(rs, 10)] = -(KP500000000 * (FNMS(KP831469612, T92, T8Z))); |
968 | | } |
969 | | { |
970 | | E T9f, T9u, T9H, T9I; |
971 | | T9f = FMA(KP923879532, T9e, T97); |
972 | | T9u = T9m - T9t; |
973 | | Ip[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9u, T9f)); |
974 | | Im[WS(rs, 12)] = -(KP500000000 * (FNMS(KP831469612, T9u, T9f))); |
975 | | T9H = FMA(KP923879532, T9A, T9x); |
976 | | T9I = T9E + T9F; |
977 | | Rm[WS(rs, 12)] = KP500000000 * (FNMS(KP831469612, T9I, T9H)); |
978 | | Rp[WS(rs, 3)] = KP500000000 * (FMA(KP831469612, T9I, T9H)); |
979 | | } |
980 | | { |
981 | | E T9B, T9C, T9D, T9G; |
982 | | T9B = FNMS(KP923879532, T9A, T9x); |
983 | | T9C = T9t + T9m; |
984 | | Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP831469612, T9C, T9B)); |
985 | | Rp[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9C, T9B)); |
986 | | T9D = FNMS(KP923879532, T9e, T97); |
987 | | T9G = T9E - T9F; |
988 | | Ip[WS(rs, 11)] = KP500000000 * (FMA(KP831469612, T9G, T9D)); |
989 | | Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP831469612, T9G, T9D))); |
990 | | } |
991 | | { |
992 | | E T9L, T9S, Ta1, Ta2; |
993 | | T9L = FMA(KP923879532, T9K, T9J); |
994 | | T9S = T9O - T9R; |
995 | | Ip[WS(rs, 15)] = KP500000000 * (FMA(KP980785280, T9S, T9L)); |
996 | | Im[0] = -(KP500000000 * (FNMS(KP980785280, T9S, T9L))); |
997 | | Ta1 = FMA(KP923879532, T9U, T9T); |
998 | | Ta2 = T9Y + T9Z; |
999 | | Rp[WS(rs, 15)] = KP500000000 * (FNMS(KP980785280, Ta2, Ta1)); |
1000 | | Rm[0] = KP500000000 * (FMA(KP980785280, Ta2, Ta1)); |
1001 | | } |
1002 | | { |
1003 | | E T9V, T9W, T9X, Ta0; |
1004 | | T9V = FNMS(KP923879532, T9U, T9T); |
1005 | | T9W = T9O + T9R; |
1006 | | Rm[WS(rs, 8)] = KP500000000 * (FNMS(KP980785280, T9W, T9V)); |
1007 | | Rp[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, T9W, T9V)); |
1008 | | T9X = FNMS(KP923879532, T9K, T9J); |
1009 | | Ta0 = T9Y - T9Z; |
1010 | | Ip[WS(rs, 7)] = KP500000000 * (FMA(KP980785280, Ta0, T9X)); |
1011 | | Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP980785280, Ta0, T9X))); |
1012 | | } |
1013 | | } |
1014 | | } |
1015 | | } |
1016 | | } |
1017 | | |
1018 | | static const tw_instr twinstr[] = { |
1019 | | { TW_FULL, 1, 32 }, |
1020 | | { TW_NEXT, 1, 0 } |
1021 | | }; |
1022 | | |
1023 | | static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, { 300, 126, 198, 0 } }; |
1024 | | |
1025 | | void X(codelet_hc2cfdft_32) (planner *p) { |
1026 | | X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT); |
1027 | | } |
1028 | | #else |
1029 | | |
1030 | | /* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include rdft/scalar/hc2cf.h */ |
1031 | | |
1032 | | /* |
1033 | | * This function contains 498 FP additions, 228 FP multiplications, |
1034 | | * (or, 404 additions, 134 multiplications, 94 fused multiply/add), |
1035 | | * 106 stack variables, 9 constants, and 128 memory accesses |
1036 | | */ |
1037 | | #include "rdft/scalar/hc2cf.h" |
1038 | | |
1039 | | static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) |
1040 | 0 | { |
1041 | 0 | DK(KP277785116, +0.277785116509801112371415406974266437187468595); |
1042 | 0 | DK(KP415734806, +0.415734806151272618539394188808952878369280406); |
1043 | 0 | DK(KP097545161, +0.097545161008064133924142434238511120463845809); |
1044 | 0 | DK(KP490392640, +0.490392640201615224563091118067119518486966865); |
1045 | 0 | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
1046 | 0 | DK(KP191341716, +0.191341716182544885864229992015199433380672281); |
1047 | 0 | DK(KP461939766, +0.461939766255643378064091594698394143411208313); |
1048 | 0 | DK(KP353553390, +0.353553390593273762200422181052424519642417969); |
1049 | 0 | DK(KP500000000, +0.500000000000000000000000000000000000000000000); |
1050 | 0 | { |
1051 | 0 | INT m; |
1052 | 0 | for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(128, rs)) { |
1053 | 0 | E T2S, T5K, T52, T5N, T7p, T8r, T7i, T8o, T2q, T7t, T45, T6L, T2d, T7u, T48; |
1054 | 0 | E T6M, T1A, T4c, T4f, T1T, T3f, T5M, T7e, T7l, T6J, T7x, T4V, T5J, T7b, T7k; |
1055 | 0 | E T6G, T7w, Tj, TC, T5r, T4k, T4n, T5s, T3D, T5C, T6V, T72, T4G, T5F, T6u; |
1056 | 0 | E T86, T6S, T71, T6r, T85, TW, T1f, T5v, T4r, T4u, T5u, T40, T5G, T76, T8k; |
1057 | 0 | E T4N, T5D, T6B, T89, T6Z, T8h, T6y, T88; |
1058 | 0 | { |
1059 | 0 | E T1Y, T22, T2L, T4W, T2p, T43, T2A, T50, T27, T2b, T2Q, T4X, T2h, T2l, T2F; |
1060 | 0 | E T4Z; |
1061 | 0 | { |
1062 | 0 | E T1W, T1X, T2K, T20, T21, T2I, T2H, T2J; |
1063 | 0 | T1W = Ip[WS(rs, 4)]; |
1064 | 0 | T1X = Im[WS(rs, 4)]; |
1065 | 0 | T2K = T1W + T1X; |
1066 | 0 | T20 = Rp[WS(rs, 4)]; |
1067 | 0 | T21 = Rm[WS(rs, 4)]; |
1068 | 0 | T2I = T20 - T21; |
1069 | 0 | T1Y = T1W - T1X; |
1070 | 0 | T22 = T20 + T21; |
1071 | 0 | T2H = W[16]; |
1072 | 0 | T2J = W[17]; |
1073 | 0 | T2L = FMA(T2H, T2I, T2J * T2K); |
1074 | 0 | T4W = FNMS(T2J, T2I, T2H * T2K); |
1075 | 0 | } |
1076 | 0 | { |
1077 | 0 | E T2n, T2o, T2z, T2v, T2w, T2x, T2u, T2y; |
1078 | 0 | T2n = Ip[0]; |
1079 | 0 | T2o = Im[0]; |
1080 | 0 | T2z = T2n + T2o; |
1081 | 0 | T2v = Rm[0]; |
1082 | 0 | T2w = Rp[0]; |
1083 | 0 | T2x = T2v - T2w; |
1084 | 0 | T2p = T2n - T2o; |
1085 | 0 | T43 = T2w + T2v; |
1086 | 0 | T2u = W[0]; |
1087 | 0 | T2y = W[1]; |
1088 | 0 | T2A = FNMS(T2y, T2z, T2u * T2x); |
1089 | 0 | T50 = FMA(T2y, T2x, T2u * T2z); |
1090 | 0 | } |
1091 | 0 | { |
1092 | 0 | E T25, T26, T2P, T29, T2a, T2N, T2M, T2O; |
1093 | 0 | T25 = Ip[WS(rs, 12)]; |
1094 | 0 | T26 = Im[WS(rs, 12)]; |
1095 | 0 | T2P = T25 + T26; |
1096 | 0 | T29 = Rp[WS(rs, 12)]; |
1097 | 0 | T2a = Rm[WS(rs, 12)]; |
1098 | 0 | T2N = T29 - T2a; |
1099 | 0 | T27 = T25 - T26; |
1100 | 0 | T2b = T29 + T2a; |
1101 | 0 | T2M = W[48]; |
1102 | 0 | T2O = W[49]; |
1103 | 0 | T2Q = FMA(T2M, T2N, T2O * T2P); |
1104 | 0 | T4X = FNMS(T2O, T2N, T2M * T2P); |
1105 | 0 | } |
1106 | 0 | { |
1107 | 0 | E T2f, T2g, T2E, T2j, T2k, T2C, T2B, T2D; |
1108 | 0 | T2f = Ip[WS(rs, 8)]; |
1109 | 0 | T2g = Im[WS(rs, 8)]; |
1110 | 0 | T2E = T2f + T2g; |
1111 | 0 | T2j = Rp[WS(rs, 8)]; |
1112 | 0 | T2k = Rm[WS(rs, 8)]; |
1113 | 0 | T2C = T2j - T2k; |
1114 | 0 | T2h = T2f - T2g; |
1115 | 0 | T2l = T2j + T2k; |
1116 | 0 | T2B = W[32]; |
1117 | 0 | T2D = W[33]; |
1118 | 0 | T2F = FMA(T2B, T2C, T2D * T2E); |
1119 | 0 | T4Z = FNMS(T2D, T2C, T2B * T2E); |
1120 | 0 | } |
1121 | 0 | { |
1122 | 0 | E T2G, T2R, T7g, T7h; |
1123 | 0 | T2G = T2A - T2F; |
1124 | 0 | T2R = T2L + T2Q; |
1125 | 0 | T2S = T2G - T2R; |
1126 | 0 | T5K = T2R + T2G; |
1127 | 0 | { |
1128 | 0 | E T4Y, T51, T7n, T7o; |
1129 | 0 | T4Y = T4W + T4X; |
1130 | 0 | T51 = T4Z + T50; |
1131 | 0 | T52 = T4Y + T51; |
1132 | 0 | T5N = T51 - T4Y; |
1133 | 0 | T7n = T2Q - T2L; |
1134 | 0 | T7o = T50 - T4Z; |
1135 | 0 | T7p = T7n + T7o; |
1136 | 0 | T8r = T7o - T7n; |
1137 | 0 | } |
1138 | 0 | T7g = T2F + T2A; |
1139 | 0 | T7h = T4W - T4X; |
1140 | 0 | T7i = T7g - T7h; |
1141 | 0 | T8o = T7h + T7g; |
1142 | 0 | { |
1143 | 0 | E T2m, T44, T2e, T2i; |
1144 | 0 | T2e = W[30]; |
1145 | 0 | T2i = W[31]; |
1146 | 0 | T2m = FNMS(T2i, T2l, T2e * T2h); |
1147 | 0 | T44 = FMA(T2e, T2l, T2i * T2h); |
1148 | 0 | T2q = T2m + T2p; |
1149 | 0 | T7t = T43 - T44; |
1150 | 0 | T45 = T43 + T44; |
1151 | 0 | T6L = T2p - T2m; |
1152 | 0 | } |
1153 | 0 | { |
1154 | 0 | E T23, T46, T2c, T47; |
1155 | 0 | { |
1156 | 0 | E T1V, T1Z, T24, T28; |
1157 | 0 | T1V = W[14]; |
1158 | 0 | T1Z = W[15]; |
1159 | 0 | T23 = FNMS(T1Z, T22, T1V * T1Y); |
1160 | 0 | T46 = FMA(T1V, T22, T1Z * T1Y); |
1161 | 0 | T24 = W[46]; |
1162 | 0 | T28 = W[47]; |
1163 | 0 | T2c = FNMS(T28, T2b, T24 * T27); |
1164 | 0 | T47 = FMA(T24, T2b, T28 * T27); |
1165 | 0 | } |
1166 | 0 | T2d = T23 + T2c; |
1167 | 0 | T7u = T23 - T2c; |
1168 | 0 | T48 = T46 + T47; |
1169 | 0 | T6M = T46 - T47; |
1170 | 0 | } |
1171 | 0 | } |
1172 | 0 | } |
1173 | 0 | { |
1174 | 0 | E T1q, T4a, T2X, T4P, T1S, T4e, T3d, T4T, T1z, T4b, T32, T4Q, T1J, T4d, T38; |
1175 | 0 | E T4S; |
1176 | 0 | { |
1177 | 0 | E T1l, T2W, T1p, T2U; |
1178 | 0 | { |
1179 | 0 | E T1j, T1k, T1n, T1o; |
1180 | 0 | T1j = Ip[WS(rs, 2)]; |
1181 | 0 | T1k = Im[WS(rs, 2)]; |
1182 | 0 | T1l = T1j - T1k; |
1183 | 0 | T2W = T1j + T1k; |
1184 | 0 | T1n = Rp[WS(rs, 2)]; |
1185 | 0 | T1o = Rm[WS(rs, 2)]; |
1186 | 0 | T1p = T1n + T1o; |
1187 | 0 | T2U = T1n - T1o; |
1188 | 0 | } |
1189 | 0 | { |
1190 | 0 | E T1i, T1m, T2T, T2V; |
1191 | 0 | T1i = W[6]; |
1192 | 0 | T1m = W[7]; |
1193 | 0 | T1q = FNMS(T1m, T1p, T1i * T1l); |
1194 | 0 | T4a = FMA(T1i, T1p, T1m * T1l); |
1195 | 0 | T2T = W[8]; |
1196 | 0 | T2V = W[9]; |
1197 | 0 | T2X = FMA(T2T, T2U, T2V * T2W); |
1198 | 0 | T4P = FNMS(T2V, T2U, T2T * T2W); |
1199 | 0 | } |
1200 | 0 | } |
1201 | 0 | { |
1202 | 0 | E T1N, T3c, T1R, T3a; |
1203 | 0 | { |
1204 | 0 | E T1L, T1M, T1P, T1Q; |
1205 | 0 | T1L = Ip[WS(rs, 6)]; |
1206 | 0 | T1M = Im[WS(rs, 6)]; |
1207 | 0 | T1N = T1L - T1M; |
1208 | 0 | T3c = T1L + T1M; |
1209 | 0 | T1P = Rp[WS(rs, 6)]; |
1210 | 0 | T1Q = Rm[WS(rs, 6)]; |
1211 | 0 | T1R = T1P + T1Q; |
1212 | 0 | T3a = T1P - T1Q; |
1213 | 0 | } |
1214 | 0 | { |
1215 | 0 | E T1K, T1O, T39, T3b; |
1216 | 0 | T1K = W[22]; |
1217 | 0 | T1O = W[23]; |
1218 | 0 | T1S = FNMS(T1O, T1R, T1K * T1N); |
1219 | 0 | T4e = FMA(T1K, T1R, T1O * T1N); |
1220 | 0 | T39 = W[24]; |
1221 | 0 | T3b = W[25]; |
1222 | 0 | T3d = FMA(T39, T3a, T3b * T3c); |
1223 | 0 | T4T = FNMS(T3b, T3a, T39 * T3c); |
1224 | 0 | } |
1225 | 0 | } |
1226 | 0 | { |
1227 | 0 | E T1u, T31, T1y, T2Z; |
1228 | 0 | { |
1229 | 0 | E T1s, T1t, T1w, T1x; |
1230 | 0 | T1s = Ip[WS(rs, 10)]; |
1231 | 0 | T1t = Im[WS(rs, 10)]; |
1232 | 0 | T1u = T1s - T1t; |
1233 | 0 | T31 = T1s + T1t; |
1234 | 0 | T1w = Rp[WS(rs, 10)]; |
1235 | 0 | T1x = Rm[WS(rs, 10)]; |
1236 | 0 | T1y = T1w + T1x; |
1237 | 0 | T2Z = T1w - T1x; |
1238 | 0 | } |
1239 | 0 | { |
1240 | 0 | E T1r, T1v, T2Y, T30; |
1241 | 0 | T1r = W[38]; |
1242 | 0 | T1v = W[39]; |
1243 | 0 | T1z = FNMS(T1v, T1y, T1r * T1u); |
1244 | 0 | T4b = FMA(T1r, T1y, T1v * T1u); |
1245 | 0 | T2Y = W[40]; |
1246 | 0 | T30 = W[41]; |
1247 | 0 | T32 = FMA(T2Y, T2Z, T30 * T31); |
1248 | 0 | T4Q = FNMS(T30, T2Z, T2Y * T31); |
1249 | 0 | } |
1250 | 0 | } |
1251 | 0 | { |
1252 | 0 | E T1E, T37, T1I, T35; |
1253 | 0 | { |
1254 | 0 | E T1C, T1D, T1G, T1H; |
1255 | 0 | T1C = Ip[WS(rs, 14)]; |
1256 | 0 | T1D = Im[WS(rs, 14)]; |
1257 | 0 | T1E = T1C - T1D; |
1258 | 0 | T37 = T1C + T1D; |
1259 | 0 | T1G = Rp[WS(rs, 14)]; |
1260 | 0 | T1H = Rm[WS(rs, 14)]; |
1261 | 0 | T1I = T1G + T1H; |
1262 | 0 | T35 = T1G - T1H; |
1263 | 0 | } |
1264 | 0 | { |
1265 | 0 | E T1B, T1F, T34, T36; |
1266 | 0 | T1B = W[54]; |
1267 | 0 | T1F = W[55]; |
1268 | 0 | T1J = FNMS(T1F, T1I, T1B * T1E); |
1269 | 0 | T4d = FMA(T1B, T1I, T1F * T1E); |
1270 | 0 | T34 = W[56]; |
1271 | 0 | T36 = W[57]; |
1272 | 0 | T38 = FMA(T34, T35, T36 * T37); |
1273 | 0 | T4S = FNMS(T36, T35, T34 * T37); |
1274 | 0 | } |
1275 | 0 | } |
1276 | 0 | { |
1277 | 0 | E T33, T3e, T4R, T4U; |
1278 | 0 | T1A = T1q + T1z; |
1279 | 0 | T4c = T4a + T4b; |
1280 | 0 | T4f = T4d + T4e; |
1281 | 0 | T1T = T1J + T1S; |
1282 | 0 | T33 = T2X + T32; |
1283 | 0 | T3e = T38 + T3d; |
1284 | 0 | T3f = T33 + T3e; |
1285 | 0 | T5M = T3e - T33; |
1286 | 0 | { |
1287 | 0 | E T7c, T7d, T6H, T6I; |
1288 | 0 | T7c = T4S - T4T; |
1289 | 0 | T7d = T3d - T38; |
1290 | 0 | T7e = T7c + T7d; |
1291 | 0 | T7l = T7c - T7d; |
1292 | 0 | T6H = T4d - T4e; |
1293 | 0 | T6I = T1J - T1S; |
1294 | 0 | T6J = T6H + T6I; |
1295 | 0 | T7x = T6H - T6I; |
1296 | 0 | } |
1297 | 0 | T4R = T4P + T4Q; |
1298 | 0 | T4U = T4S + T4T; |
1299 | 0 | T4V = T4R + T4U; |
1300 | 0 | T5J = T4U - T4R; |
1301 | 0 | { |
1302 | 0 | E T79, T7a, T6E, T6F; |
1303 | 0 | T79 = T32 - T2X; |
1304 | 0 | T7a = T4P - T4Q; |
1305 | 0 | T7b = T79 - T7a; |
1306 | 0 | T7k = T7a + T79; |
1307 | 0 | T6E = T1q - T1z; |
1308 | 0 | T6F = T4a - T4b; |
1309 | 0 | T6G = T6E - T6F; |
1310 | 0 | T7w = T6F + T6E; |
1311 | 0 | } |
1312 | 0 | } |
1313 | 0 | } |
1314 | 0 | { |
1315 | 0 | E T9, T4i, T3l, T4A, TB, T4m, T3B, T4E, Ti, T4j, T3q, T4B, Ts, T4l, T3w; |
1316 | 0 | E T4D; |
1317 | 0 | { |
1318 | 0 | E T4, T3k, T8, T3i; |
1319 | 0 | { |
1320 | 0 | E T2, T3, T6, T7; |
1321 | 0 | T2 = Ip[WS(rs, 1)]; |
1322 | 0 | T3 = Im[WS(rs, 1)]; |
1323 | 0 | T4 = T2 - T3; |
1324 | 0 | T3k = T2 + T3; |
1325 | 0 | T6 = Rp[WS(rs, 1)]; |
1326 | 0 | T7 = Rm[WS(rs, 1)]; |
1327 | 0 | T8 = T6 + T7; |
1328 | 0 | T3i = T6 - T7; |
1329 | 0 | } |
1330 | 0 | { |
1331 | 0 | E T1, T5, T3h, T3j; |
1332 | 0 | T1 = W[2]; |
1333 | 0 | T5 = W[3]; |
1334 | 0 | T9 = FNMS(T5, T8, T1 * T4); |
1335 | 0 | T4i = FMA(T1, T8, T5 * T4); |
1336 | 0 | T3h = W[4]; |
1337 | 0 | T3j = W[5]; |
1338 | 0 | T3l = FMA(T3h, T3i, T3j * T3k); |
1339 | 0 | T4A = FNMS(T3j, T3i, T3h * T3k); |
1340 | 0 | } |
1341 | 0 | } |
1342 | 0 | { |
1343 | 0 | E Tw, T3A, TA, T3y; |
1344 | 0 | { |
1345 | 0 | E Tu, Tv, Ty, Tz; |
1346 | 0 | Tu = Ip[WS(rs, 13)]; |
1347 | 0 | Tv = Im[WS(rs, 13)]; |
1348 | 0 | Tw = Tu - Tv; |
1349 | 0 | T3A = Tu + Tv; |
1350 | 0 | Ty = Rp[WS(rs, 13)]; |
1351 | 0 | Tz = Rm[WS(rs, 13)]; |
1352 | 0 | TA = Ty + Tz; |
1353 | 0 | T3y = Ty - Tz; |
1354 | 0 | } |
1355 | 0 | { |
1356 | 0 | E Tt, Tx, T3x, T3z; |
1357 | 0 | Tt = W[50]; |
1358 | 0 | Tx = W[51]; |
1359 | 0 | TB = FNMS(Tx, TA, Tt * Tw); |
1360 | 0 | T4m = FMA(Tt, TA, Tx * Tw); |
1361 | 0 | T3x = W[52]; |
1362 | 0 | T3z = W[53]; |
1363 | 0 | T3B = FMA(T3x, T3y, T3z * T3A); |
1364 | 0 | T4E = FNMS(T3z, T3y, T3x * T3A); |
1365 | 0 | } |
1366 | 0 | } |
1367 | 0 | { |
1368 | 0 | E Td, T3p, Th, T3n; |
1369 | 0 | { |
1370 | 0 | E Tb, Tc, Tf, Tg; |
1371 | 0 | Tb = Ip[WS(rs, 9)]; |
1372 | 0 | Tc = Im[WS(rs, 9)]; |
1373 | 0 | Td = Tb - Tc; |
1374 | 0 | T3p = Tb + Tc; |
1375 | 0 | Tf = Rp[WS(rs, 9)]; |
1376 | 0 | Tg = Rm[WS(rs, 9)]; |
1377 | 0 | Th = Tf + Tg; |
1378 | 0 | T3n = Tf - Tg; |
1379 | 0 | } |
1380 | 0 | { |
1381 | 0 | E Ta, Te, T3m, T3o; |
1382 | 0 | Ta = W[34]; |
1383 | 0 | Te = W[35]; |
1384 | 0 | Ti = FNMS(Te, Th, Ta * Td); |
1385 | 0 | T4j = FMA(Ta, Th, Te * Td); |
1386 | 0 | T3m = W[36]; |
1387 | 0 | T3o = W[37]; |
1388 | 0 | T3q = FMA(T3m, T3n, T3o * T3p); |
1389 | 0 | T4B = FNMS(T3o, T3n, T3m * T3p); |
1390 | 0 | } |
1391 | 0 | } |
1392 | 0 | { |
1393 | 0 | E Tn, T3v, Tr, T3t; |
1394 | 0 | { |
1395 | 0 | E Tl, Tm, Tp, Tq; |
1396 | 0 | Tl = Ip[WS(rs, 5)]; |
1397 | 0 | Tm = Im[WS(rs, 5)]; |
1398 | 0 | Tn = Tl - Tm; |
1399 | 0 | T3v = Tl + Tm; |
1400 | 0 | Tp = Rp[WS(rs, 5)]; |
1401 | 0 | Tq = Rm[WS(rs, 5)]; |
1402 | 0 | Tr = Tp + Tq; |
1403 | 0 | T3t = Tp - Tq; |
1404 | 0 | } |
1405 | 0 | { |
1406 | 0 | E Tk, To, T3s, T3u; |
1407 | 0 | Tk = W[18]; |
1408 | 0 | To = W[19]; |
1409 | 0 | Ts = FNMS(To, Tr, Tk * Tn); |
1410 | 0 | T4l = FMA(Tk, Tr, To * Tn); |
1411 | 0 | T3s = W[20]; |
1412 | 0 | T3u = W[21]; |
1413 | 0 | T3w = FMA(T3s, T3t, T3u * T3v); |
1414 | 0 | T4D = FNMS(T3u, T3t, T3s * T3v); |
1415 | 0 | } |
1416 | 0 | } |
1417 | 0 | Tj = T9 + Ti; |
1418 | 0 | TC = Ts + TB; |
1419 | 0 | T5r = Tj - TC; |
1420 | 0 | T4k = T4i + T4j; |
1421 | 0 | T4n = T4l + T4m; |
1422 | 0 | T5s = T4k - T4n; |
1423 | 0 | { |
1424 | 0 | E T3r, T3C, T6T, T6U; |
1425 | 0 | T3r = T3l + T3q; |
1426 | 0 | T3C = T3w + T3B; |
1427 | 0 | T3D = T3r + T3C; |
1428 | 0 | T5C = T3C - T3r; |
1429 | 0 | T6T = T4E - T4D; |
1430 | 0 | T6U = T3w - T3B; |
1431 | 0 | T6V = T6T + T6U; |
1432 | 0 | T72 = T6T - T6U; |
1433 | 0 | } |
1434 | 0 | { |
1435 | 0 | E T4C, T4F, T6s, T6t; |
1436 | 0 | T4C = T4A + T4B; |
1437 | 0 | T4F = T4D + T4E; |
1438 | 0 | T4G = T4C + T4F; |
1439 | 0 | T5F = T4F - T4C; |
1440 | 0 | T6s = T4i - T4j; |
1441 | 0 | T6t = Ts - TB; |
1442 | 0 | T6u = T6s + T6t; |
1443 | 0 | T86 = T6s - T6t; |
1444 | 0 | } |
1445 | 0 | { |
1446 | 0 | E T6Q, T6R, T6p, T6q; |
1447 | 0 | T6Q = T3q - T3l; |
1448 | 0 | T6R = T4A - T4B; |
1449 | 0 | T6S = T6Q - T6R; |
1450 | 0 | T71 = T6R + T6Q; |
1451 | 0 | T6p = T9 - Ti; |
1452 | 0 | T6q = T4l - T4m; |
1453 | 0 | T6r = T6p - T6q; |
1454 | 0 | T85 = T6p + T6q; |
1455 | 0 | } |
1456 | 0 | } |
1457 | 0 | { |
1458 | 0 | E TM, T4p, T3I, T4H, T1e, T4t, T3Y, T4L, TV, T4q, T3N, T4I, T15, T4s, T3T; |
1459 | 0 | E T4K; |
1460 | 0 | { |
1461 | 0 | E TH, T3H, TL, T3F; |
1462 | 0 | { |
1463 | 0 | E TF, TG, TJ, TK; |
1464 | 0 | TF = Ip[WS(rs, 15)]; |
1465 | 0 | TG = Im[WS(rs, 15)]; |
1466 | 0 | TH = TF - TG; |
1467 | 0 | T3H = TF + TG; |
1468 | 0 | TJ = Rp[WS(rs, 15)]; |
1469 | 0 | TK = Rm[WS(rs, 15)]; |
1470 | 0 | TL = TJ + TK; |
1471 | 0 | T3F = TJ - TK; |
1472 | 0 | } |
1473 | 0 | { |
1474 | 0 | E TE, TI, T3E, T3G; |
1475 | 0 | TE = W[58]; |
1476 | 0 | TI = W[59]; |
1477 | 0 | TM = FNMS(TI, TL, TE * TH); |
1478 | 0 | T4p = FMA(TE, TL, TI * TH); |
1479 | 0 | T3E = W[60]; |
1480 | 0 | T3G = W[61]; |
1481 | 0 | T3I = FMA(T3E, T3F, T3G * T3H); |
1482 | 0 | T4H = FNMS(T3G, T3F, T3E * T3H); |
1483 | 0 | } |
1484 | 0 | } |
1485 | 0 | { |
1486 | 0 | E T19, T3X, T1d, T3V; |
1487 | 0 | { |
1488 | 0 | E T17, T18, T1b, T1c; |
1489 | 0 | T17 = Ip[WS(rs, 11)]; |
1490 | 0 | T18 = Im[WS(rs, 11)]; |
1491 | 0 | T19 = T17 - T18; |
1492 | 0 | T3X = T17 + T18; |
1493 | 0 | T1b = Rp[WS(rs, 11)]; |
1494 | 0 | T1c = Rm[WS(rs, 11)]; |
1495 | 0 | T1d = T1b + T1c; |
1496 | 0 | T3V = T1b - T1c; |
1497 | 0 | } |
1498 | 0 | { |
1499 | 0 | E T16, T1a, T3U, T3W; |
1500 | 0 | T16 = W[42]; |
1501 | 0 | T1a = W[43]; |
1502 | 0 | T1e = FNMS(T1a, T1d, T16 * T19); |
1503 | 0 | T4t = FMA(T16, T1d, T1a * T19); |
1504 | 0 | T3U = W[44]; |
1505 | 0 | T3W = W[45]; |
1506 | 0 | T3Y = FMA(T3U, T3V, T3W * T3X); |
1507 | 0 | T4L = FNMS(T3W, T3V, T3U * T3X); |
1508 | 0 | } |
1509 | 0 | } |
1510 | 0 | { |
1511 | 0 | E TQ, T3M, TU, T3K; |
1512 | 0 | { |
1513 | 0 | E TO, TP, TS, TT; |
1514 | 0 | TO = Ip[WS(rs, 7)]; |
1515 | 0 | TP = Im[WS(rs, 7)]; |
1516 | 0 | TQ = TO - TP; |
1517 | 0 | T3M = TO + TP; |
1518 | 0 | TS = Rp[WS(rs, 7)]; |
1519 | 0 | TT = Rm[WS(rs, 7)]; |
1520 | 0 | TU = TS + TT; |
1521 | 0 | T3K = TS - TT; |
1522 | 0 | } |
1523 | 0 | { |
1524 | 0 | E TN, TR, T3J, T3L; |
1525 | 0 | TN = W[26]; |
1526 | 0 | TR = W[27]; |
1527 | 0 | TV = FNMS(TR, TU, TN * TQ); |
1528 | 0 | T4q = FMA(TN, TU, TR * TQ); |
1529 | 0 | T3J = W[28]; |
1530 | 0 | T3L = W[29]; |
1531 | 0 | T3N = FMA(T3J, T3K, T3L * T3M); |
1532 | 0 | T4I = FNMS(T3L, T3K, T3J * T3M); |
1533 | 0 | } |
1534 | 0 | } |
1535 | 0 | { |
1536 | 0 | E T10, T3S, T14, T3Q; |
1537 | 0 | { |
1538 | 0 | E TY, TZ, T12, T13; |
1539 | 0 | TY = Ip[WS(rs, 3)]; |
1540 | 0 | TZ = Im[WS(rs, 3)]; |
1541 | 0 | T10 = TY - TZ; |
1542 | 0 | T3S = TY + TZ; |
1543 | 0 | T12 = Rp[WS(rs, 3)]; |
1544 | 0 | T13 = Rm[WS(rs, 3)]; |
1545 | 0 | T14 = T12 + T13; |
1546 | 0 | T3Q = T12 - T13; |
1547 | 0 | } |
1548 | 0 | { |
1549 | 0 | E TX, T11, T3P, T3R; |
1550 | 0 | TX = W[10]; |
1551 | 0 | T11 = W[11]; |
1552 | 0 | T15 = FNMS(T11, T14, TX * T10); |
1553 | 0 | T4s = FMA(TX, T14, T11 * T10); |
1554 | 0 | T3P = W[12]; |
1555 | 0 | T3R = W[13]; |
1556 | 0 | T3T = FMA(T3P, T3Q, T3R * T3S); |
1557 | 0 | T4K = FNMS(T3R, T3Q, T3P * T3S); |
1558 | 0 | } |
1559 | 0 | } |
1560 | 0 | TW = TM + TV; |
1561 | 0 | T1f = T15 + T1e; |
1562 | 0 | T5v = TW - T1f; |
1563 | 0 | T4r = T4p + T4q; |
1564 | 0 | T4u = T4s + T4t; |
1565 | 0 | T5u = T4r - T4u; |
1566 | 0 | { |
1567 | 0 | E T3O, T3Z, T74, T75; |
1568 | 0 | T3O = T3I + T3N; |
1569 | 0 | T3Z = T3T + T3Y; |
1570 | 0 | T40 = T3O + T3Z; |
1571 | 0 | T5G = T3Z - T3O; |
1572 | 0 | T74 = T4H - T4I; |
1573 | 0 | T75 = T3Y - T3T; |
1574 | 0 | T76 = T74 + T75; |
1575 | 0 | T8k = T74 - T75; |
1576 | 0 | } |
1577 | 0 | { |
1578 | 0 | E T4J, T4M, T6z, T6A; |
1579 | 0 | T4J = T4H + T4I; |
1580 | 0 | T4M = T4K + T4L; |
1581 | 0 | T4N = T4J + T4M; |
1582 | 0 | T5D = T4J - T4M; |
1583 | 0 | T6z = T4p - T4q; |
1584 | 0 | T6A = T15 - T1e; |
1585 | 0 | T6B = T6z + T6A; |
1586 | 0 | T89 = T6z - T6A; |
1587 | 0 | } |
1588 | 0 | { |
1589 | 0 | E T6X, T6Y, T6w, T6x; |
1590 | 0 | T6X = T3N - T3I; |
1591 | 0 | T6Y = T4K - T4L; |
1592 | 0 | T6Z = T6X - T6Y; |
1593 | 0 | T8h = T6X + T6Y; |
1594 | 0 | T6w = TM - TV; |
1595 | 0 | T6x = T4s - T4t; |
1596 | 0 | T6y = T6w - T6x; |
1597 | 0 | T88 = T6w + T6x; |
1598 | 0 | } |
1599 | 0 | } |
1600 | 0 | { |
1601 | 0 | E T1h, T5i, T5c, T5m, T5f, T5n, T2s, T58, T42, T4y, T4w, T57, T54, T56, T4h; |
1602 | 0 | E T5h; |
1603 | 0 | { |
1604 | 0 | E TD, T1g, T5a, T5b; |
1605 | 0 | TD = Tj + TC; |
1606 | 0 | T1g = TW + T1f; |
1607 | 0 | T1h = TD + T1g; |
1608 | 0 | T5i = TD - T1g; |
1609 | 0 | T5a = T4N - T4G; |
1610 | 0 | T5b = T3D - T40; |
1611 | 0 | T5c = T5a + T5b; |
1612 | 0 | T5m = T5a - T5b; |
1613 | 0 | } |
1614 | 0 | { |
1615 | 0 | E T5d, T5e, T1U, T2r; |
1616 | 0 | T5d = T3f + T2S; |
1617 | 0 | T5e = T52 - T4V; |
1618 | 0 | T5f = T5d - T5e; |
1619 | 0 | T5n = T5d + T5e; |
1620 | 0 | T1U = T1A + T1T; |
1621 | 0 | T2r = T2d + T2q; |
1622 | 0 | T2s = T1U + T2r; |
1623 | 0 | T58 = T2r - T1U; |
1624 | 0 | } |
1625 | 0 | { |
1626 | 0 | E T3g, T41, T4o, T4v; |
1627 | 0 | T3g = T2S - T3f; |
1628 | 0 | T41 = T3D + T40; |
1629 | 0 | T42 = T3g - T41; |
1630 | 0 | T4y = T41 + T3g; |
1631 | 0 | T4o = T4k + T4n; |
1632 | 0 | T4v = T4r + T4u; |
1633 | 0 | T4w = T4o + T4v; |
1634 | 0 | T57 = T4v - T4o; |
1635 | 0 | } |
1636 | 0 | { |
1637 | 0 | E T4O, T53, T49, T4g; |
1638 | 0 | T4O = T4G + T4N; |
1639 | 0 | T53 = T4V + T52; |
1640 | 0 | T54 = T4O - T53; |
1641 | 0 | T56 = T4O + T53; |
1642 | 0 | T49 = T45 + T48; |
1643 | 0 | T4g = T4c + T4f; |
1644 | 0 | T4h = T49 + T4g; |
1645 | 0 | T5h = T49 - T4g; |
1646 | 0 | } |
1647 | 0 | { |
1648 | 0 | E T2t, T55, T4x, T4z; |
1649 | 0 | T2t = T1h + T2s; |
1650 | 0 | Ip[0] = KP500000000 * (T2t + T42); |
1651 | 0 | Im[WS(rs, 15)] = KP500000000 * (T42 - T2t); |
1652 | 0 | T55 = T4h + T4w; |
1653 | 0 | Rm[WS(rs, 15)] = KP500000000 * (T55 - T56); |
1654 | 0 | Rp[0] = KP500000000 * (T55 + T56); |
1655 | 0 | T4x = T4h - T4w; |
1656 | 0 | Rm[WS(rs, 7)] = KP500000000 * (T4x - T4y); |
1657 | 0 | Rp[WS(rs, 8)] = KP500000000 * (T4x + T4y); |
1658 | 0 | T4z = T2s - T1h; |
1659 | 0 | Ip[WS(rs, 8)] = KP500000000 * (T4z + T54); |
1660 | 0 | Im[WS(rs, 7)] = KP500000000 * (T54 - T4z); |
1661 | 0 | } |
1662 | 0 | { |
1663 | 0 | E T59, T5g, T5p, T5q; |
1664 | 0 | T59 = KP500000000 * (T57 + T58); |
1665 | 0 | T5g = KP353553390 * (T5c + T5f); |
1666 | 0 | Ip[WS(rs, 4)] = T59 + T5g; |
1667 | 0 | Im[WS(rs, 11)] = T5g - T59; |
1668 | 0 | T5p = KP500000000 * (T5h + T5i); |
1669 | 0 | T5q = KP353553390 * (T5m + T5n); |
1670 | 0 | Rm[WS(rs, 11)] = T5p - T5q; |
1671 | 0 | Rp[WS(rs, 4)] = T5p + T5q; |
1672 | 0 | } |
1673 | 0 | { |
1674 | 0 | E T5j, T5k, T5l, T5o; |
1675 | 0 | T5j = KP500000000 * (T5h - T5i); |
1676 | 0 | T5k = KP353553390 * (T5f - T5c); |
1677 | 0 | Rm[WS(rs, 3)] = T5j - T5k; |
1678 | 0 | Rp[WS(rs, 12)] = T5j + T5k; |
1679 | 0 | T5l = KP500000000 * (T58 - T57); |
1680 | 0 | T5o = KP353553390 * (T5m - T5n); |
1681 | 0 | Ip[WS(rs, 12)] = T5l + T5o; |
1682 | 0 | Im[WS(rs, 3)] = T5o - T5l; |
1683 | 0 | } |
1684 | 0 | } |
1685 | 0 | { |
1686 | 0 | E T5x, T6g, T6a, T6k, T6d, T6l, T5A, T66, T5I, T60, T5T, T6f, T5W, T65, T5P; |
1687 | 0 | E T61; |
1688 | 0 | { |
1689 | 0 | E T5t, T5w, T68, T69; |
1690 | 0 | T5t = T5r - T5s; |
1691 | 0 | T5w = T5u + T5v; |
1692 | 0 | T5x = KP353553390 * (T5t + T5w); |
1693 | 0 | T6g = KP353553390 * (T5t - T5w); |
1694 | 0 | T68 = T5D - T5C; |
1695 | 0 | T69 = T5G - T5F; |
1696 | 0 | T6a = FMA(KP461939766, T68, KP191341716 * T69); |
1697 | 0 | T6k = FNMS(KP461939766, T69, KP191341716 * T68); |
1698 | 0 | } |
1699 | 0 | { |
1700 | 0 | E T6b, T6c, T5y, T5z; |
1701 | 0 | T6b = T5K - T5J; |
1702 | 0 | T6c = T5N - T5M; |
1703 | 0 | T6d = FNMS(KP461939766, T6c, KP191341716 * T6b); |
1704 | 0 | T6l = FMA(KP461939766, T6b, KP191341716 * T6c); |
1705 | 0 | T5y = T4f - T4c; |
1706 | 0 | T5z = T2q - T2d; |
1707 | 0 | T5A = KP500000000 * (T5y + T5z); |
1708 | 0 | T66 = KP500000000 * (T5z - T5y); |
1709 | 0 | } |
1710 | 0 | { |
1711 | 0 | E T5E, T5H, T5R, T5S; |
1712 | 0 | T5E = T5C + T5D; |
1713 | 0 | T5H = T5F + T5G; |
1714 | 0 | T5I = FMA(KP191341716, T5E, KP461939766 * T5H); |
1715 | 0 | T60 = FNMS(KP191341716, T5H, KP461939766 * T5E); |
1716 | 0 | T5R = T45 - T48; |
1717 | 0 | T5S = T1A - T1T; |
1718 | 0 | T5T = KP500000000 * (T5R + T5S); |
1719 | 0 | T6f = KP500000000 * (T5R - T5S); |
1720 | 0 | } |
1721 | 0 | { |
1722 | 0 | E T5U, T5V, T5L, T5O; |
1723 | 0 | T5U = T5s + T5r; |
1724 | 0 | T5V = T5u - T5v; |
1725 | 0 | T5W = KP353553390 * (T5U + T5V); |
1726 | 0 | T65 = KP353553390 * (T5V - T5U); |
1727 | 0 | T5L = T5J + T5K; |
1728 | 0 | T5O = T5M + T5N; |
1729 | 0 | T5P = FNMS(KP191341716, T5O, KP461939766 * T5L); |
1730 | 0 | T61 = FMA(KP191341716, T5L, KP461939766 * T5O); |
1731 | 0 | } |
1732 | 0 | { |
1733 | 0 | E T5B, T5Q, T63, T64; |
1734 | 0 | T5B = T5x + T5A; |
1735 | 0 | T5Q = T5I + T5P; |
1736 | 0 | Ip[WS(rs, 2)] = T5B + T5Q; |
1737 | 0 | Im[WS(rs, 13)] = T5Q - T5B; |
1738 | 0 | T63 = T5T + T5W; |
1739 | 0 | T64 = T60 + T61; |
1740 | 0 | Rm[WS(rs, 13)] = T63 - T64; |
1741 | 0 | Rp[WS(rs, 2)] = T63 + T64; |
1742 | 0 | } |
1743 | 0 | { |
1744 | 0 | E T5X, T5Y, T5Z, T62; |
1745 | 0 | T5X = T5T - T5W; |
1746 | 0 | T5Y = T5P - T5I; |
1747 | 0 | Rm[WS(rs, 5)] = T5X - T5Y; |
1748 | 0 | Rp[WS(rs, 10)] = T5X + T5Y; |
1749 | 0 | T5Z = T5A - T5x; |
1750 | 0 | T62 = T60 - T61; |
1751 | 0 | Ip[WS(rs, 10)] = T5Z + T62; |
1752 | 0 | Im[WS(rs, 5)] = T62 - T5Z; |
1753 | 0 | } |
1754 | 0 | { |
1755 | 0 | E T67, T6e, T6n, T6o; |
1756 | 0 | T67 = T65 + T66; |
1757 | 0 | T6e = T6a + T6d; |
1758 | 0 | Ip[WS(rs, 6)] = T67 + T6e; |
1759 | 0 | Im[WS(rs, 9)] = T6e - T67; |
1760 | 0 | T6n = T6f + T6g; |
1761 | 0 | T6o = T6k + T6l; |
1762 | 0 | Rm[WS(rs, 9)] = T6n - T6o; |
1763 | 0 | Rp[WS(rs, 6)] = T6n + T6o; |
1764 | 0 | } |
1765 | 0 | { |
1766 | 0 | E T6h, T6i, T6j, T6m; |
1767 | 0 | T6h = T6f - T6g; |
1768 | 0 | T6i = T6d - T6a; |
1769 | 0 | Rm[WS(rs, 1)] = T6h - T6i; |
1770 | 0 | Rp[WS(rs, 14)] = T6h + T6i; |
1771 | 0 | T6j = T66 - T65; |
1772 | 0 | T6m = T6k - T6l; |
1773 | 0 | Ip[WS(rs, 14)] = T6j + T6m; |
1774 | 0 | Im[WS(rs, 1)] = T6m - T6j; |
1775 | 0 | } |
1776 | 0 | } |
1777 | 0 | { |
1778 | 0 | E T6D, T7W, T6O, T7M, T7C, T7L, T7z, T7V, T7r, T81, T7H, T7T, T78, T80, T7G; |
1779 | 0 | E T7Q; |
1780 | 0 | { |
1781 | 0 | E T6v, T6C, T7v, T7y; |
1782 | 0 | T6v = FNMS(KP191341716, T6u, KP461939766 * T6r); |
1783 | 0 | T6C = FMA(KP461939766, T6y, KP191341716 * T6B); |
1784 | 0 | T6D = T6v + T6C; |
1785 | 0 | T7W = T6v - T6C; |
1786 | 0 | { |
1787 | 0 | E T6K, T6N, T7A, T7B; |
1788 | 0 | T6K = KP353553390 * (T6G + T6J); |
1789 | 0 | T6N = KP500000000 * (T6L - T6M); |
1790 | 0 | T6O = T6K + T6N; |
1791 | 0 | T7M = T6N - T6K; |
1792 | 0 | T7A = FMA(KP191341716, T6r, KP461939766 * T6u); |
1793 | 0 | T7B = FNMS(KP191341716, T6y, KP461939766 * T6B); |
1794 | 0 | T7C = T7A + T7B; |
1795 | 0 | T7L = T7B - T7A; |
1796 | 0 | } |
1797 | 0 | T7v = KP500000000 * (T7t + T7u); |
1798 | 0 | T7y = KP353553390 * (T7w + T7x); |
1799 | 0 | T7z = T7v + T7y; |
1800 | 0 | T7V = T7v - T7y; |
1801 | 0 | { |
1802 | 0 | E T7j, T7R, T7q, T7S, T7f, T7m; |
1803 | 0 | T7f = KP707106781 * (T7b + T7e); |
1804 | 0 | T7j = T7f + T7i; |
1805 | 0 | T7R = T7i - T7f; |
1806 | 0 | T7m = KP707106781 * (T7k + T7l); |
1807 | 0 | T7q = T7m + T7p; |
1808 | 0 | T7S = T7p - T7m; |
1809 | 0 | T7r = FNMS(KP097545161, T7q, KP490392640 * T7j); |
1810 | 0 | T81 = FMA(KP415734806, T7R, KP277785116 * T7S); |
1811 | 0 | T7H = FMA(KP097545161, T7j, KP490392640 * T7q); |
1812 | 0 | T7T = FNMS(KP415734806, T7S, KP277785116 * T7R); |
1813 | 0 | } |
1814 | 0 | { |
1815 | 0 | E T70, T7O, T77, T7P, T6W, T73; |
1816 | 0 | T6W = KP707106781 * (T6S + T6V); |
1817 | 0 | T70 = T6W + T6Z; |
1818 | 0 | T7O = T6Z - T6W; |
1819 | 0 | T73 = KP707106781 * (T71 + T72); |
1820 | 0 | T77 = T73 + T76; |
1821 | 0 | T7P = T76 - T73; |
1822 | 0 | T78 = FMA(KP490392640, T70, KP097545161 * T77); |
1823 | 0 | T80 = FNMS(KP415734806, T7O, KP277785116 * T7P); |
1824 | 0 | T7G = FNMS(KP097545161, T70, KP490392640 * T77); |
1825 | 0 | T7Q = FMA(KP277785116, T7O, KP415734806 * T7P); |
1826 | 0 | } |
1827 | 0 | } |
1828 | 0 | { |
1829 | 0 | E T6P, T7s, T7J, T7K; |
1830 | 0 | T6P = T6D + T6O; |
1831 | 0 | T7s = T78 + T7r; |
1832 | 0 | Ip[WS(rs, 1)] = T6P + T7s; |
1833 | 0 | Im[WS(rs, 14)] = T7s - T6P; |
1834 | 0 | T7J = T7z + T7C; |
1835 | 0 | T7K = T7G + T7H; |
1836 | 0 | Rm[WS(rs, 14)] = T7J - T7K; |
1837 | 0 | Rp[WS(rs, 1)] = T7J + T7K; |
1838 | 0 | } |
1839 | 0 | { |
1840 | 0 | E T7D, T7E, T7F, T7I; |
1841 | 0 | T7D = T7z - T7C; |
1842 | 0 | T7E = T7r - T78; |
1843 | 0 | Rm[WS(rs, 6)] = T7D - T7E; |
1844 | 0 | Rp[WS(rs, 9)] = T7D + T7E; |
1845 | 0 | T7F = T6O - T6D; |
1846 | 0 | T7I = T7G - T7H; |
1847 | 0 | Ip[WS(rs, 9)] = T7F + T7I; |
1848 | 0 | Im[WS(rs, 6)] = T7I - T7F; |
1849 | 0 | } |
1850 | 0 | { |
1851 | 0 | E T7N, T7U, T83, T84; |
1852 | 0 | T7N = T7L + T7M; |
1853 | 0 | T7U = T7Q + T7T; |
1854 | 0 | Ip[WS(rs, 5)] = T7N + T7U; |
1855 | 0 | Im[WS(rs, 10)] = T7U - T7N; |
1856 | 0 | T83 = T7V + T7W; |
1857 | 0 | T84 = T80 + T81; |
1858 | 0 | Rm[WS(rs, 10)] = T83 - T84; |
1859 | 0 | Rp[WS(rs, 5)] = T83 + T84; |
1860 | 0 | } |
1861 | 0 | { |
1862 | 0 | E T7X, T7Y, T7Z, T82; |
1863 | 0 | T7X = T7V - T7W; |
1864 | 0 | T7Y = T7T - T7Q; |
1865 | 0 | Rm[WS(rs, 2)] = T7X - T7Y; |
1866 | 0 | Rp[WS(rs, 13)] = T7X + T7Y; |
1867 | 0 | T7Z = T7M - T7L; |
1868 | 0 | T82 = T80 - T81; |
1869 | 0 | Ip[WS(rs, 13)] = T7Z + T82; |
1870 | 0 | Im[WS(rs, 2)] = T82 - T7Z; |
1871 | 0 | } |
1872 | 0 | } |
1873 | 0 | { |
1874 | 0 | E T8b, T8U, T8e, T8K, T8A, T8J, T8x, T8T, T8t, T8Z, T8F, T8R, T8m, T8Y, T8E; |
1875 | 0 | E T8O; |
1876 | 0 | { |
1877 | 0 | E T87, T8a, T8v, T8w; |
1878 | 0 | T87 = FNMS(KP461939766, T86, KP191341716 * T85); |
1879 | 0 | T8a = FMA(KP191341716, T88, KP461939766 * T89); |
1880 | 0 | T8b = T87 + T8a; |
1881 | 0 | T8U = T87 - T8a; |
1882 | 0 | { |
1883 | 0 | E T8c, T8d, T8y, T8z; |
1884 | 0 | T8c = KP353553390 * (T7x - T7w); |
1885 | 0 | T8d = KP500000000 * (T6M + T6L); |
1886 | 0 | T8e = T8c + T8d; |
1887 | 0 | T8K = T8d - T8c; |
1888 | 0 | T8y = FMA(KP461939766, T85, KP191341716 * T86); |
1889 | 0 | T8z = FNMS(KP461939766, T88, KP191341716 * T89); |
1890 | 0 | T8A = T8y + T8z; |
1891 | 0 | T8J = T8z - T8y; |
1892 | 0 | } |
1893 | 0 | T8v = KP500000000 * (T7t - T7u); |
1894 | 0 | T8w = KP353553390 * (T6G - T6J); |
1895 | 0 | T8x = T8v + T8w; |
1896 | 0 | T8T = T8v - T8w; |
1897 | 0 | { |
1898 | 0 | E T8p, T8P, T8s, T8Q, T8n, T8q; |
1899 | 0 | T8n = KP707106781 * (T7l - T7k); |
1900 | 0 | T8p = T8n + T8o; |
1901 | 0 | T8P = T8o - T8n; |
1902 | 0 | T8q = KP707106781 * (T7b - T7e); |
1903 | 0 | T8s = T8q + T8r; |
1904 | 0 | T8Q = T8r - T8q; |
1905 | 0 | T8t = FNMS(KP277785116, T8s, KP415734806 * T8p); |
1906 | 0 | T8Z = FMA(KP490392640, T8P, KP097545161 * T8Q); |
1907 | 0 | T8F = FMA(KP277785116, T8p, KP415734806 * T8s); |
1908 | 0 | T8R = FNMS(KP490392640, T8Q, KP097545161 * T8P); |
1909 | 0 | } |
1910 | 0 | { |
1911 | 0 | E T8i, T8M, T8l, T8N, T8g, T8j; |
1912 | 0 | T8g = KP707106781 * (T72 - T71); |
1913 | 0 | T8i = T8g + T8h; |
1914 | 0 | T8M = T8h - T8g; |
1915 | 0 | T8j = KP707106781 * (T6S - T6V); |
1916 | 0 | T8l = T8j + T8k; |
1917 | 0 | T8N = T8k - T8j; |
1918 | 0 | T8m = FMA(KP415734806, T8i, KP277785116 * T8l); |
1919 | 0 | T8Y = FNMS(KP490392640, T8M, KP097545161 * T8N); |
1920 | 0 | T8E = FNMS(KP277785116, T8i, KP415734806 * T8l); |
1921 | 0 | T8O = FMA(KP097545161, T8M, KP490392640 * T8N); |
1922 | 0 | } |
1923 | 0 | } |
1924 | 0 | { |
1925 | 0 | E T8f, T8u, T8H, T8I; |
1926 | 0 | T8f = T8b + T8e; |
1927 | 0 | T8u = T8m + T8t; |
1928 | 0 | Ip[WS(rs, 3)] = T8f + T8u; |
1929 | 0 | Im[WS(rs, 12)] = T8u - T8f; |
1930 | 0 | T8H = T8x + T8A; |
1931 | 0 | T8I = T8E + T8F; |
1932 | 0 | Rm[WS(rs, 12)] = T8H - T8I; |
1933 | 0 | Rp[WS(rs, 3)] = T8H + T8I; |
1934 | 0 | } |
1935 | 0 | { |
1936 | 0 | E T8B, T8C, T8D, T8G; |
1937 | 0 | T8B = T8x - T8A; |
1938 | 0 | T8C = T8t - T8m; |
1939 | 0 | Rm[WS(rs, 4)] = T8B - T8C; |
1940 | 0 | Rp[WS(rs, 11)] = T8B + T8C; |
1941 | 0 | T8D = T8e - T8b; |
1942 | 0 | T8G = T8E - T8F; |
1943 | 0 | Ip[WS(rs, 11)] = T8D + T8G; |
1944 | 0 | Im[WS(rs, 4)] = T8G - T8D; |
1945 | 0 | } |
1946 | 0 | { |
1947 | 0 | E T8L, T8S, T91, T92; |
1948 | 0 | T8L = T8J + T8K; |
1949 | 0 | T8S = T8O + T8R; |
1950 | 0 | Ip[WS(rs, 7)] = T8L + T8S; |
1951 | 0 | Im[WS(rs, 8)] = T8S - T8L; |
1952 | 0 | T91 = T8T + T8U; |
1953 | 0 | T92 = T8Y + T8Z; |
1954 | 0 | Rm[WS(rs, 8)] = T91 - T92; |
1955 | 0 | Rp[WS(rs, 7)] = T91 + T92; |
1956 | 0 | } |
1957 | 0 | { |
1958 | 0 | E T8V, T8W, T8X, T90; |
1959 | 0 | T8V = T8T - T8U; |
1960 | 0 | T8W = T8R - T8O; |
1961 | 0 | Rm[0] = T8V - T8W; |
1962 | 0 | Rp[WS(rs, 15)] = T8V + T8W; |
1963 | 0 | T8X = T8K - T8J; |
1964 | 0 | T90 = T8Y - T8Z; |
1965 | 0 | Ip[WS(rs, 15)] = T8X + T90; |
1966 | 0 | Im[0] = T90 - T8X; |
1967 | 0 | } |
1968 | 0 | } |
1969 | 0 | } |
1970 | 0 | } |
1971 | 0 | } |
1972 | | |
1973 | | static const tw_instr twinstr[] = { |
1974 | | { TW_FULL, 1, 32 }, |
1975 | | { TW_NEXT, 1, 0 } |
1976 | | }; |
1977 | | |
1978 | | static const hc2c_desc desc = { 32, "hc2cfdft_32", twinstr, &GENUS, { 404, 134, 94, 0 } }; |
1979 | | |
1980 | 1 | void X(codelet_hc2cfdft_32) (planner *p) { |
1981 | 1 | X(khc2c_register) (p, hc2cfdft_32, &desc, HC2C_VIA_DFT); |
1982 | 1 | } |
1983 | | #endif |