/src/fftw3/rdft/scalar/r2cf/r2cfII_32.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* This file was automatically generated --- DO NOT EDIT */ |
22 | | /* Generated on Tue Nov 11 06:18:25 UTC 2025 */ |
23 | | |
24 | | #include "rdft/codelet-rdft.h" |
25 | | |
26 | | #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA) |
27 | | |
28 | | /* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include rdft/scalar/r2cfII.h */ |
29 | | |
30 | | /* |
31 | | * This function contains 174 FP additions, 128 FP multiplications, |
32 | | * (or, 46 additions, 0 multiplications, 128 fused multiply/add), |
33 | | * 62 stack variables, 15 constants, and 64 memory accesses |
34 | | */ |
35 | | #include "rdft/scalar/r2cfII.h" |
36 | | |
37 | | static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) |
38 | | { |
39 | | DK(KP773010453, +0.773010453362736960810906609758469800971041293); |
40 | | DK(KP820678790, +0.820678790828660330972281985331011598767386482); |
41 | | DK(KP956940335, +0.956940335732208864935797886980269969482849206); |
42 | | DK(KP303346683, +0.303346683607342391675883946941299872384187453); |
43 | | DK(KP995184726, +0.995184726672196886244836953109479921575474869); |
44 | | DK(KP098491403, +0.098491403357164253077197521291327432293052451); |
45 | | DK(KP980785280, +0.980785280403230449126182236134239036973933731); |
46 | | DK(KP881921264, +0.881921264348355029712756863660388349508442621); |
47 | | DK(KP534511135, +0.534511135950791641089685961295362908582039528); |
48 | | DK(KP831469612, +0.831469612302545237078788377617905756738560812); |
49 | | DK(KP923879532, +0.923879532511286756128183189396788286822416626); |
50 | | DK(KP198912367, +0.198912367379658006911597622644676228597850501); |
51 | | DK(KP668178637, +0.668178637919298919997757686523080761552472251); |
52 | | DK(KP414213562, +0.414213562373095048801688724209698078569671875); |
53 | | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
54 | | { |
55 | | INT i; |
56 | | for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { |
57 | | E T5, T2B, T1z, T2n, Tc, T2C, T1C, T2o, Tm, T1l, T1J, T27, Tv, T1k, T1G; |
58 | | E T26, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p; |
59 | | E T1O, T2a; |
60 | | { |
61 | | E T1, T2l, T4, T2m, T2, T3; |
62 | | T1 = R0[0]; |
63 | | T2l = R0[WS(rs, 8)]; |
64 | | T2 = R0[WS(rs, 4)]; |
65 | | T3 = R0[WS(rs, 12)]; |
66 | | T4 = T2 - T3; |
67 | | T2m = T2 + T3; |
68 | | T5 = FNMS(KP707106781, T4, T1); |
69 | | T2B = FNMS(KP707106781, T2m, T2l); |
70 | | T1z = FMA(KP707106781, T4, T1); |
71 | | T2n = FMA(KP707106781, T2m, T2l); |
72 | | } |
73 | | { |
74 | | E T8, T1A, Tb, T1B; |
75 | | { |
76 | | E T6, T7, T9, Ta; |
77 | | T6 = R0[WS(rs, 10)]; |
78 | | T7 = R0[WS(rs, 2)]; |
79 | | T8 = FMA(KP414213562, T7, T6); |
80 | | T1A = FNMS(KP414213562, T6, T7); |
81 | | T9 = R0[WS(rs, 6)]; |
82 | | Ta = R0[WS(rs, 14)]; |
83 | | Tb = FMA(KP414213562, Ta, T9); |
84 | | T1B = FMS(KP414213562, T9, Ta); |
85 | | } |
86 | | Tc = T8 - Tb; |
87 | | T2C = T1B - T1A; |
88 | | T1C = T1A + T1B; |
89 | | T2o = T8 + Tb; |
90 | | } |
91 | | { |
92 | | E Te, Tj, Th, Tk, Tf, Tg; |
93 | | Te = R0[WS(rs, 7)]; |
94 | | Tj = R0[WS(rs, 15)]; |
95 | | Tf = R0[WS(rs, 3)]; |
96 | | Tg = R0[WS(rs, 11)]; |
97 | | Th = Tf + Tg; |
98 | | Tk = Tg - Tf; |
99 | | { |
100 | | E Ti, Tl, T1H, T1I; |
101 | | Ti = FNMS(KP707106781, Th, Te); |
102 | | Tl = FNMS(KP707106781, Tk, Tj); |
103 | | Tm = FNMS(KP668178637, Tl, Ti); |
104 | | T1l = FMA(KP668178637, Ti, Tl); |
105 | | T1H = FMA(KP707106781, Th, Te); |
106 | | T1I = FMA(KP707106781, Tk, Tj); |
107 | | T1J = FMA(KP198912367, T1I, T1H); |
108 | | T27 = FNMS(KP198912367, T1H, T1I); |
109 | | } |
110 | | } |
111 | | { |
112 | | E Tn, Ts, Tq, Tt, To, Tp; |
113 | | Tn = R0[WS(rs, 9)]; |
114 | | Ts = R0[WS(rs, 1)]; |
115 | | To = R0[WS(rs, 5)]; |
116 | | Tp = R0[WS(rs, 13)]; |
117 | | Tq = To + Tp; |
118 | | Tt = To - Tp; |
119 | | { |
120 | | E Tr, Tu, T1E, T1F; |
121 | | Tr = FNMS(KP707106781, Tq, Tn); |
122 | | Tu = FNMS(KP707106781, Tt, Ts); |
123 | | Tv = FNMS(KP668178637, Tu, Tr); |
124 | | T1k = FMA(KP668178637, Tr, Tu); |
125 | | T1E = FMA(KP707106781, Tq, Tn); |
126 | | T1F = FMA(KP707106781, Tt, Ts); |
127 | | T1G = FMA(KP198912367, T1F, T1E); |
128 | | T26 = FNMS(KP198912367, T1E, T1F); |
129 | | } |
130 | | } |
131 | | { |
132 | | E TT, T16, TW, T17, T10, T1a, T13, T19, TU, TV; |
133 | | TT = R1[WS(rs, 15)]; |
134 | | T16 = R1[WS(rs, 7)]; |
135 | | TU = R1[WS(rs, 3)]; |
136 | | TV = R1[WS(rs, 11)]; |
137 | | TW = TU - TV; |
138 | | T17 = TU + TV; |
139 | | { |
140 | | E TY, TZ, T11, T12; |
141 | | TY = R1[WS(rs, 9)]; |
142 | | TZ = R1[WS(rs, 1)]; |
143 | | T10 = FMA(KP414213562, TZ, TY); |
144 | | T1a = FNMS(KP414213562, TY, TZ); |
145 | | T11 = R1[WS(rs, 5)]; |
146 | | T12 = R1[WS(rs, 13)]; |
147 | | T13 = FMA(KP414213562, T12, T11); |
148 | | T19 = FMS(KP414213562, T11, T12); |
149 | | } |
150 | | { |
151 | | E TX, T14, T1W, T1X; |
152 | | TX = FMA(KP707106781, TW, TT); |
153 | | T14 = T10 - T13; |
154 | | T15 = FMA(KP923879532, T14, TX); |
155 | | T1r = FNMS(KP923879532, T14, TX); |
156 | | T1W = FMA(KP707106781, T17, T16); |
157 | | T1X = T10 + T13; |
158 | | T1Y = FNMS(KP923879532, T1X, T1W); |
159 | | T2e = FMA(KP923879532, T1X, T1W); |
160 | | } |
161 | | { |
162 | | E T18, T1b, T1T, T1U; |
163 | | T18 = FNMS(KP707106781, T17, T16); |
164 | | T1b = T19 - T1a; |
165 | | T1c = FNMS(KP923879532, T1b, T18); |
166 | | T1s = FMA(KP923879532, T1b, T18); |
167 | | T1T = FMS(KP707106781, TW, TT); |
168 | | T1U = T1a + T19; |
169 | | T1V = FNMS(KP923879532, T1U, T1T); |
170 | | T2d = FMA(KP923879532, T1U, T1T); |
171 | | } |
172 | | } |
173 | | { |
174 | | E Ty, TL, TB, TM, TF, TP, TI, TO, Tz, TA; |
175 | | Ty = R1[0]; |
176 | | TL = R1[WS(rs, 8)]; |
177 | | Tz = R1[WS(rs, 4)]; |
178 | | TA = R1[WS(rs, 12)]; |
179 | | TB = Tz - TA; |
180 | | TM = Tz + TA; |
181 | | { |
182 | | E TD, TE, TG, TH; |
183 | | TD = R1[WS(rs, 10)]; |
184 | | TE = R1[WS(rs, 2)]; |
185 | | TF = FMA(KP414213562, TE, TD); |
186 | | TP = FNMS(KP414213562, TD, TE); |
187 | | TG = R1[WS(rs, 6)]; |
188 | | TH = R1[WS(rs, 14)]; |
189 | | TI = FMA(KP414213562, TH, TG); |
190 | | TO = FMS(KP414213562, TG, TH); |
191 | | } |
192 | | { |
193 | | E TC, TJ, T1P, T1Q; |
194 | | TC = FNMS(KP707106781, TB, Ty); |
195 | | TJ = TF - TI; |
196 | | TK = FNMS(KP923879532, TJ, TC); |
197 | | T1o = FMA(KP923879532, TJ, TC); |
198 | | T1P = FMA(KP707106781, TM, TL); |
199 | | T1Q = TF + TI; |
200 | | T1R = FNMS(KP923879532, T1Q, T1P); |
201 | | T2b = FMA(KP923879532, T1Q, T1P); |
202 | | } |
203 | | { |
204 | | E TN, TQ, T1M, T1N; |
205 | | TN = FNMS(KP707106781, TM, TL); |
206 | | TQ = TO - TP; |
207 | | TR = FNMS(KP923879532, TQ, TN); |
208 | | T1p = FMA(KP923879532, TQ, TN); |
209 | | T1M = FMA(KP707106781, TB, Ty); |
210 | | T1N = TP + TO; |
211 | | T1O = FNMS(KP923879532, T1N, T1M); |
212 | | T2a = FMA(KP923879532, T1N, T1M); |
213 | | } |
214 | | } |
215 | | { |
216 | | E Tx, T1f, T2L, T2N, T1e, T2O, T1i, T2M; |
217 | | { |
218 | | E Td, Tw, T2J, T2K; |
219 | | Td = FNMS(KP923879532, Tc, T5); |
220 | | Tw = Tm - Tv; |
221 | | Tx = FMA(KP831469612, Tw, Td); |
222 | | T1f = FNMS(KP831469612, Tw, Td); |
223 | | T2J = FNMS(KP923879532, T2C, T2B); |
224 | | T2K = T1k + T1l; |
225 | | T2L = FMA(KP831469612, T2K, T2J); |
226 | | T2N = FNMS(KP831469612, T2K, T2J); |
227 | | } |
228 | | { |
229 | | E TS, T1d, T1g, T1h; |
230 | | TS = FNMS(KP534511135, TR, TK); |
231 | | T1d = FNMS(KP534511135, T1c, T15); |
232 | | T1e = TS - T1d; |
233 | | T2O = TS + T1d; |
234 | | T1g = FMA(KP534511135, TK, TR); |
235 | | T1h = FMA(KP534511135, T15, T1c); |
236 | | T1i = T1g - T1h; |
237 | | T2M = T1g + T1h; |
238 | | } |
239 | | Cr[WS(csr, 13)] = FNMS(KP881921264, T1e, Tx); |
240 | | Ci[WS(csi, 13)] = FNMS(KP881921264, T2M, T2L); |
241 | | Cr[WS(csr, 2)] = FMA(KP881921264, T1e, Tx); |
242 | | Ci[WS(csi, 2)] = -(FMA(KP881921264, T2M, T2L)); |
243 | | Cr[WS(csr, 10)] = FNMS(KP881921264, T1i, T1f); |
244 | | Ci[WS(csi, 10)] = -(FMA(KP881921264, T2O, T2N)); |
245 | | Cr[WS(csr, 5)] = FMA(KP881921264, T1i, T1f); |
246 | | Ci[WS(csi, 5)] = FNMS(KP881921264, T2O, T2N); |
247 | | } |
248 | | { |
249 | | E T29, T2h, T2r, T2t, T2g, T2u, T2k, T2s; |
250 | | { |
251 | | E T25, T28, T2p, T2q; |
252 | | T25 = FMA(KP923879532, T1C, T1z); |
253 | | T28 = T26 - T27; |
254 | | T29 = FMA(KP980785280, T28, T25); |
255 | | T2h = FNMS(KP980785280, T28, T25); |
256 | | T2p = FMA(KP923879532, T2o, T2n); |
257 | | T2q = T1G + T1J; |
258 | | T2r = FMA(KP980785280, T2q, T2p); |
259 | | T2t = FNMS(KP980785280, T2q, T2p); |
260 | | } |
261 | | { |
262 | | E T2c, T2f, T2i, T2j; |
263 | | T2c = FNMS(KP098491403, T2b, T2a); |
264 | | T2f = FMA(KP098491403, T2e, T2d); |
265 | | T2g = T2c + T2f; |
266 | | T2u = T2f - T2c; |
267 | | T2i = FMA(KP098491403, T2a, T2b); |
268 | | T2j = FNMS(KP098491403, T2d, T2e); |
269 | | T2k = T2i - T2j; |
270 | | T2s = T2i + T2j; |
271 | | } |
272 | | Cr[WS(csr, 15)] = FNMS(KP995184726, T2g, T29); |
273 | | Ci[WS(csi, 15)] = FNMS(KP995184726, T2s, T2r); |
274 | | Cr[0] = FMA(KP995184726, T2g, T29); |
275 | | Ci[0] = -(FMA(KP995184726, T2s, T2r)); |
276 | | Cr[WS(csr, 8)] = FNMS(KP995184726, T2k, T2h); |
277 | | Ci[WS(csi, 8)] = FMS(KP995184726, T2u, T2t); |
278 | | Cr[WS(csr, 7)] = FMA(KP995184726, T2k, T2h); |
279 | | Ci[WS(csi, 7)] = FMA(KP995184726, T2u, T2t); |
280 | | } |
281 | | { |
282 | | E T1n, T1v, T2F, T2H, T1u, T2I, T1y, T2G; |
283 | | { |
284 | | E T1j, T1m, T2D, T2E; |
285 | | T1j = FMA(KP923879532, Tc, T5); |
286 | | T1m = T1k - T1l; |
287 | | T1n = FMA(KP831469612, T1m, T1j); |
288 | | T1v = FNMS(KP831469612, T1m, T1j); |
289 | | T2D = FMA(KP923879532, T2C, T2B); |
290 | | T2E = Tv + Tm; |
291 | | T2F = FMA(KP831469612, T2E, T2D); |
292 | | T2H = FNMS(KP831469612, T2E, T2D); |
293 | | } |
294 | | { |
295 | | E T1q, T1t, T1w, T1x; |
296 | | T1q = FMA(KP303346683, T1p, T1o); |
297 | | T1t = FMA(KP303346683, T1s, T1r); |
298 | | T1u = T1q - T1t; |
299 | | T2I = T1q + T1t; |
300 | | T1w = FNMS(KP303346683, T1r, T1s); |
301 | | T1x = FNMS(KP303346683, T1o, T1p); |
302 | | T1y = T1w - T1x; |
303 | | T2G = T1x + T1w; |
304 | | } |
305 | | Cr[WS(csr, 14)] = FNMS(KP956940335, T1u, T1n); |
306 | | Ci[WS(csi, 14)] = FMS(KP956940335, T2G, T2F); |
307 | | Cr[WS(csr, 1)] = FMA(KP956940335, T1u, T1n); |
308 | | Ci[WS(csi, 1)] = FMA(KP956940335, T2G, T2F); |
309 | | Cr[WS(csr, 9)] = FNMS(KP956940335, T1y, T1v); |
310 | | Ci[WS(csi, 9)] = FNMS(KP956940335, T2I, T2H); |
311 | | Cr[WS(csr, 6)] = FMA(KP956940335, T1y, T1v); |
312 | | Ci[WS(csi, 6)] = -(FMA(KP956940335, T2I, T2H)); |
313 | | } |
314 | | { |
315 | | E T1L, T21, T2x, T2z, T20, T2A, T24, T2y; |
316 | | { |
317 | | E T1D, T1K, T2v, T2w; |
318 | | T1D = FNMS(KP923879532, T1C, T1z); |
319 | | T1K = T1G - T1J; |
320 | | T1L = FMA(KP980785280, T1K, T1D); |
321 | | T21 = FNMS(KP980785280, T1K, T1D); |
322 | | T2v = FNMS(KP923879532, T2o, T2n); |
323 | | T2w = T26 + T27; |
324 | | T2x = FNMS(KP980785280, T2w, T2v); |
325 | | T2z = FMA(KP980785280, T2w, T2v); |
326 | | } |
327 | | { |
328 | | E T1S, T1Z, T22, T23; |
329 | | T1S = FMA(KP820678790, T1R, T1O); |
330 | | T1Z = FNMS(KP820678790, T1Y, T1V); |
331 | | T20 = T1S + T1Z; |
332 | | T2A = T1Z - T1S; |
333 | | T22 = FMA(KP820678790, T1V, T1Y); |
334 | | T23 = FNMS(KP820678790, T1O, T1R); |
335 | | T24 = T22 - T23; |
336 | | T2y = T23 + T22; |
337 | | } |
338 | | Cr[WS(csr, 12)] = FNMS(KP773010453, T20, T1L); |
339 | | Ci[WS(csi, 12)] = FMS(KP773010453, T2y, T2x); |
340 | | Cr[WS(csr, 3)] = FMA(KP773010453, T20, T1L); |
341 | | Ci[WS(csi, 3)] = FMA(KP773010453, T2y, T2x); |
342 | | Cr[WS(csr, 11)] = FNMS(KP773010453, T24, T21); |
343 | | Ci[WS(csi, 11)] = FMA(KP773010453, T2A, T2z); |
344 | | Cr[WS(csr, 4)] = FMA(KP773010453, T24, T21); |
345 | | Ci[WS(csi, 4)] = FMS(KP773010453, T2A, T2z); |
346 | | } |
347 | | } |
348 | | } |
349 | | } |
350 | | |
351 | | static const kr2c_desc desc = { 32, "r2cfII_32", { 46, 0, 128, 0 }, &GENUS }; |
352 | | |
353 | | void X(codelet_r2cfII_32) (planner *p) { X(kr2c_register) (p, r2cfII_32, &desc); |
354 | | } |
355 | | |
356 | | #else |
357 | | |
358 | | /* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 32 -name r2cfII_32 -dft-II -include rdft/scalar/r2cfII.h */ |
359 | | |
360 | | /* |
361 | | * This function contains 174 FP additions, 82 FP multiplications, |
362 | | * (or, 138 additions, 46 multiplications, 36 fused multiply/add), |
363 | | * 62 stack variables, 15 constants, and 64 memory accesses |
364 | | */ |
365 | | #include "rdft/scalar/r2cfII.h" |
366 | | |
367 | | static void r2cfII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) |
368 | 0 | { |
369 | 0 | DK(KP471396736, +0.471396736825997648556387625905254377657460319); |
370 | 0 | DK(KP881921264, +0.881921264348355029712756863660388349508442621); |
371 | 0 | DK(KP634393284, +0.634393284163645498215171613225493370675687095); |
372 | 0 | DK(KP773010453, +0.773010453362736960810906609758469800971041293); |
373 | 0 | DK(KP290284677, +0.290284677254462367636192375817395274691476278); |
374 | 0 | DK(KP956940335, +0.956940335732208864935797886980269969482849206); |
375 | 0 | DK(KP995184726, +0.995184726672196886244836953109479921575474869); |
376 | 0 | DK(KP098017140, +0.098017140329560601994195563888641845861136673); |
377 | 0 | DK(KP555570233, +0.555570233019602224742830813948532874374937191); |
378 | 0 | DK(KP831469612, +0.831469612302545237078788377617905756738560812); |
379 | 0 | DK(KP195090322, +0.195090322016128267848284868477022240927691618); |
380 | 0 | DK(KP980785280, +0.980785280403230449126182236134239036973933731); |
381 | 0 | DK(KP382683432, +0.382683432365089771728459984030398866761344562); |
382 | 0 | DK(KP923879532, +0.923879532511286756128183189396788286822416626); |
383 | 0 | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
384 | 0 | { |
385 | 0 | INT i; |
386 | 0 | for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { |
387 | 0 | E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G; |
388 | 0 | E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p; |
389 | 0 | E T1O, T2a; |
390 | 0 | { |
391 | 0 | E T1, T2p, T4, T2o, T2, T3; |
392 | 0 | T1 = R0[0]; |
393 | 0 | T2p = R0[WS(rs, 8)]; |
394 | 0 | T2 = R0[WS(rs, 4)]; |
395 | 0 | T3 = R0[WS(rs, 12)]; |
396 | 0 | T4 = KP707106781 * (T2 - T3); |
397 | 0 | T2o = KP707106781 * (T2 + T3); |
398 | 0 | T5 = T1 + T4; |
399 | 0 | T2D = T2p - T2o; |
400 | 0 | T1z = T1 - T4; |
401 | 0 | T2q = T2o + T2p; |
402 | 0 | } |
403 | 0 | { |
404 | 0 | E T8, T1A, Tb, T1B; |
405 | 0 | { |
406 | 0 | E T6, T7, T9, Ta; |
407 | 0 | T6 = R0[WS(rs, 2)]; |
408 | 0 | T7 = R0[WS(rs, 10)]; |
409 | 0 | T8 = FNMS(KP382683432, T7, KP923879532 * T6); |
410 | 0 | T1A = FMA(KP382683432, T6, KP923879532 * T7); |
411 | 0 | T9 = R0[WS(rs, 6)]; |
412 | 0 | Ta = R0[WS(rs, 14)]; |
413 | 0 | Tb = FNMS(KP923879532, Ta, KP382683432 * T9); |
414 | 0 | T1B = FMA(KP923879532, T9, KP382683432 * Ta); |
415 | 0 | } |
416 | 0 | Tc = T8 + Tb; |
417 | 0 | T2C = Tb - T8; |
418 | 0 | T1C = T1A - T1B; |
419 | 0 | T2n = T1A + T1B; |
420 | 0 | } |
421 | 0 | { |
422 | 0 | E Te, Tk, Th, Tj, Tf, Tg; |
423 | 0 | Te = R0[WS(rs, 1)]; |
424 | 0 | Tk = R0[WS(rs, 9)]; |
425 | 0 | Tf = R0[WS(rs, 5)]; |
426 | 0 | Tg = R0[WS(rs, 13)]; |
427 | 0 | Th = KP707106781 * (Tf - Tg); |
428 | 0 | Tj = KP707106781 * (Tf + Tg); |
429 | 0 | { |
430 | 0 | E Ti, Tl, T1H, T1I; |
431 | 0 | Ti = Te + Th; |
432 | 0 | Tl = Tj + Tk; |
433 | 0 | Tm = FNMS(KP195090322, Tl, KP980785280 * Ti); |
434 | 0 | T1k = FMA(KP195090322, Ti, KP980785280 * Tl); |
435 | 0 | T1H = Tk - Tj; |
436 | 0 | T1I = Te - Th; |
437 | 0 | T1J = FNMS(KP555570233, T1I, KP831469612 * T1H); |
438 | 0 | T26 = FMA(KP831469612, T1I, KP555570233 * T1H); |
439 | 0 | } |
440 | 0 | } |
441 | 0 | { |
442 | 0 | E Tq, Tt, Tp, Ts, Tn, To; |
443 | 0 | Tq = R0[WS(rs, 15)]; |
444 | 0 | Tt = R0[WS(rs, 7)]; |
445 | 0 | Tn = R0[WS(rs, 3)]; |
446 | 0 | To = R0[WS(rs, 11)]; |
447 | 0 | Tp = KP707106781 * (Tn - To); |
448 | 0 | Ts = KP707106781 * (Tn + To); |
449 | 0 | { |
450 | 0 | E Tr, Tu, T1E, T1F; |
451 | 0 | Tr = Tp - Tq; |
452 | 0 | Tu = Ts + Tt; |
453 | 0 | Tv = FMA(KP980785280, Tr, KP195090322 * Tu); |
454 | 0 | T1l = FNMS(KP980785280, Tu, KP195090322 * Tr); |
455 | 0 | T1E = Tt - Ts; |
456 | 0 | T1F = Tp + Tq; |
457 | 0 | T1G = FNMS(KP555570233, T1F, KP831469612 * T1E); |
458 | 0 | T27 = FMA(KP831469612, T1F, KP555570233 * T1E); |
459 | 0 | } |
460 | 0 | } |
461 | 0 | { |
462 | 0 | E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU; |
463 | 0 | TW = R1[WS(rs, 15)]; |
464 | 0 | T1a = R1[WS(rs, 7)]; |
465 | 0 | TT = R1[WS(rs, 3)]; |
466 | 0 | TU = R1[WS(rs, 11)]; |
467 | 0 | TV = KP707106781 * (TT - TU); |
468 | 0 | T19 = KP707106781 * (TT + TU); |
469 | 0 | { |
470 | 0 | E TY, TZ, T11, T12; |
471 | 0 | TY = R1[WS(rs, 1)]; |
472 | 0 | TZ = R1[WS(rs, 9)]; |
473 | 0 | T10 = FNMS(KP382683432, TZ, KP923879532 * TY); |
474 | 0 | T16 = FMA(KP382683432, TY, KP923879532 * TZ); |
475 | 0 | T11 = R1[WS(rs, 5)]; |
476 | 0 | T12 = R1[WS(rs, 13)]; |
477 | 0 | T13 = FNMS(KP923879532, T12, KP382683432 * T11); |
478 | 0 | T17 = FMA(KP923879532, T11, KP382683432 * T12); |
479 | 0 | } |
480 | 0 | { |
481 | 0 | E TX, T14, T1W, T1X; |
482 | 0 | TX = TV - TW; |
483 | 0 | T14 = T10 + T13; |
484 | 0 | T15 = TX + T14; |
485 | 0 | T1r = TX - T14; |
486 | 0 | T1W = T13 - T10; |
487 | 0 | T1X = T1a - T19; |
488 | 0 | T1Y = T1W - T1X; |
489 | 0 | T2e = T1W + T1X; |
490 | 0 | } |
491 | 0 | { |
492 | 0 | E T18, T1b, T1T, T1U; |
493 | 0 | T18 = T16 + T17; |
494 | 0 | T1b = T19 + T1a; |
495 | 0 | T1c = T18 + T1b; |
496 | 0 | T1s = T1b - T18; |
497 | 0 | T1T = TV + TW; |
498 | 0 | T1U = T16 - T17; |
499 | 0 | T1V = T1T + T1U; |
500 | 0 | T2d = T1U - T1T; |
501 | 0 | } |
502 | 0 | } |
503 | 0 | { |
504 | 0 | E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA; |
505 | 0 | Ty = R1[0]; |
506 | 0 | TP = R1[WS(rs, 8)]; |
507 | 0 | Tz = R1[WS(rs, 4)]; |
508 | 0 | TA = R1[WS(rs, 12)]; |
509 | 0 | TB = KP707106781 * (Tz - TA); |
510 | 0 | TO = KP707106781 * (Tz + TA); |
511 | 0 | { |
512 | 0 | E TD, TE, TG, TH; |
513 | 0 | TD = R1[WS(rs, 2)]; |
514 | 0 | TE = R1[WS(rs, 10)]; |
515 | 0 | TF = FNMS(KP382683432, TE, KP923879532 * TD); |
516 | 0 | TL = FMA(KP382683432, TD, KP923879532 * TE); |
517 | 0 | TG = R1[WS(rs, 6)]; |
518 | 0 | TH = R1[WS(rs, 14)]; |
519 | 0 | TI = FNMS(KP923879532, TH, KP382683432 * TG); |
520 | 0 | TM = FMA(KP923879532, TG, KP382683432 * TH); |
521 | 0 | } |
522 | 0 | { |
523 | 0 | E TC, TJ, T1P, T1Q; |
524 | 0 | TC = Ty + TB; |
525 | 0 | TJ = TF + TI; |
526 | 0 | TK = TC + TJ; |
527 | 0 | T1o = TC - TJ; |
528 | 0 | T1P = TI - TF; |
529 | 0 | T1Q = TP - TO; |
530 | 0 | T1R = T1P - T1Q; |
531 | 0 | T2b = T1P + T1Q; |
532 | 0 | } |
533 | 0 | { |
534 | 0 | E TN, TQ, T1M, T1N; |
535 | 0 | TN = TL + TM; |
536 | 0 | TQ = TO + TP; |
537 | 0 | TR = TN + TQ; |
538 | 0 | T1p = TQ - TN; |
539 | 0 | T1M = Ty - TB; |
540 | 0 | T1N = TL - TM; |
541 | 0 | T1O = T1M - T1N; |
542 | 0 | T2a = T1M + T1N; |
543 | 0 | } |
544 | 0 | } |
545 | 0 | { |
546 | 0 | E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t; |
547 | 0 | { |
548 | 0 | E Td, Tw, T2m, T2r; |
549 | 0 | Td = T5 + Tc; |
550 | 0 | Tw = Tm + Tv; |
551 | 0 | Tx = Td - Tw; |
552 | 0 | T1f = Td + Tw; |
553 | 0 | T2m = T1l - T1k; |
554 | 0 | T2r = T2n + T2q; |
555 | 0 | T2s = T2m - T2r; |
556 | 0 | T2u = T2m + T2r; |
557 | 0 | } |
558 | 0 | { |
559 | 0 | E TS, T1d, T1g, T1h; |
560 | 0 | TS = FMA(KP098017140, TK, KP995184726 * TR); |
561 | 0 | T1d = FNMS(KP995184726, T1c, KP098017140 * T15); |
562 | 0 | T1e = TS + T1d; |
563 | 0 | T2l = T1d - TS; |
564 | 0 | T1g = FNMS(KP098017140, TR, KP995184726 * TK); |
565 | 0 | T1h = FMA(KP995184726, T15, KP098017140 * T1c); |
566 | 0 | T1i = T1g + T1h; |
567 | 0 | T2t = T1h - T1g; |
568 | 0 | } |
569 | 0 | Cr[WS(csr, 8)] = Tx - T1e; |
570 | 0 | Ci[WS(csi, 8)] = T2t - T2u; |
571 | 0 | Cr[WS(csr, 7)] = Tx + T1e; |
572 | 0 | Ci[WS(csi, 7)] = T2t + T2u; |
573 | 0 | Cr[WS(csr, 15)] = T1f - T1i; |
574 | 0 | Ci[WS(csi, 15)] = T2l - T2s; |
575 | 0 | Cr[0] = T1f + T1i; |
576 | 0 | Ci[0] = T2l + T2s; |
577 | 0 | } |
578 | 0 | { |
579 | 0 | E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N; |
580 | 0 | { |
581 | 0 | E T25, T28, T2K, T2L; |
582 | 0 | T25 = T1z + T1C; |
583 | 0 | T28 = T26 - T27; |
584 | 0 | T29 = T25 + T28; |
585 | 0 | T2h = T25 - T28; |
586 | 0 | T2K = T1J + T1G; |
587 | 0 | T2L = T2C + T2D; |
588 | 0 | T2M = T2K - T2L; |
589 | 0 | T2O = T2K + T2L; |
590 | 0 | } |
591 | 0 | { |
592 | 0 | E T2c, T2f, T2i, T2j; |
593 | 0 | T2c = FMA(KP956940335, T2a, KP290284677 * T2b); |
594 | 0 | T2f = FNMS(KP290284677, T2e, KP956940335 * T2d); |
595 | 0 | T2g = T2c + T2f; |
596 | 0 | T2J = T2f - T2c; |
597 | 0 | T2i = FMA(KP290284677, T2d, KP956940335 * T2e); |
598 | 0 | T2j = FNMS(KP290284677, T2a, KP956940335 * T2b); |
599 | 0 | T2k = T2i - T2j; |
600 | 0 | T2N = T2j + T2i; |
601 | 0 | } |
602 | 0 | Cr[WS(csr, 14)] = T29 - T2g; |
603 | 0 | Ci[WS(csi, 14)] = T2N - T2O; |
604 | 0 | Cr[WS(csr, 1)] = T29 + T2g; |
605 | 0 | Ci[WS(csi, 1)] = T2N + T2O; |
606 | 0 | Cr[WS(csr, 9)] = T2h - T2k; |
607 | 0 | Ci[WS(csi, 9)] = T2J - T2M; |
608 | 0 | Cr[WS(csr, 6)] = T2h + T2k; |
609 | 0 | Ci[WS(csi, 6)] = T2J + T2M; |
610 | 0 | } |
611 | 0 | { |
612 | 0 | E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z; |
613 | 0 | { |
614 | 0 | E T1j, T1m, T2w, T2x; |
615 | 0 | T1j = T5 - Tc; |
616 | 0 | T1m = T1k + T1l; |
617 | 0 | T1n = T1j + T1m; |
618 | 0 | T1v = T1j - T1m; |
619 | 0 | T2w = Tv - Tm; |
620 | 0 | T2x = T2q - T2n; |
621 | 0 | T2y = T2w - T2x; |
622 | 0 | T2A = T2w + T2x; |
623 | 0 | } |
624 | 0 | { |
625 | 0 | E T1q, T1t, T1w, T1x; |
626 | 0 | T1q = FMA(KP773010453, T1o, KP634393284 * T1p); |
627 | 0 | T1t = FNMS(KP634393284, T1s, KP773010453 * T1r); |
628 | 0 | T1u = T1q + T1t; |
629 | 0 | T2v = T1t - T1q; |
630 | 0 | T1w = FMA(KP634393284, T1r, KP773010453 * T1s); |
631 | 0 | T1x = FNMS(KP634393284, T1o, KP773010453 * T1p); |
632 | 0 | T1y = T1w - T1x; |
633 | 0 | T2z = T1x + T1w; |
634 | 0 | } |
635 | 0 | Cr[WS(csr, 12)] = T1n - T1u; |
636 | 0 | Ci[WS(csi, 12)] = T2z - T2A; |
637 | 0 | Cr[WS(csr, 3)] = T1n + T1u; |
638 | 0 | Ci[WS(csi, 3)] = T2z + T2A; |
639 | 0 | Cr[WS(csr, 11)] = T1v - T1y; |
640 | 0 | Ci[WS(csi, 11)] = T2v - T2y; |
641 | 0 | Cr[WS(csr, 4)] = T1v + T1y; |
642 | 0 | Ci[WS(csi, 4)] = T2v + T2y; |
643 | 0 | } |
644 | 0 | { |
645 | 0 | E T1L, T21, T2G, T2I, T20, T2H, T24, T2B; |
646 | 0 | { |
647 | 0 | E T1D, T1K, T2E, T2F; |
648 | 0 | T1D = T1z - T1C; |
649 | 0 | T1K = T1G - T1J; |
650 | 0 | T1L = T1D + T1K; |
651 | 0 | T21 = T1D - T1K; |
652 | 0 | T2E = T2C - T2D; |
653 | 0 | T2F = T26 + T27; |
654 | 0 | T2G = T2E - T2F; |
655 | 0 | T2I = T2F + T2E; |
656 | 0 | } |
657 | 0 | { |
658 | 0 | E T1S, T1Z, T22, T23; |
659 | 0 | T1S = FMA(KP881921264, T1O, KP471396736 * T1R); |
660 | 0 | T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y); |
661 | 0 | T20 = T1S - T1Z; |
662 | 0 | T2H = T1S + T1Z; |
663 | 0 | T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y); |
664 | 0 | T23 = FNMS(KP471396736, T1O, KP881921264 * T1R); |
665 | 0 | T24 = T22 - T23; |
666 | 0 | T2B = T23 + T22; |
667 | 0 | } |
668 | 0 | Cr[WS(csr, 13)] = T1L - T20; |
669 | 0 | Ci[WS(csi, 13)] = T2B - T2G; |
670 | 0 | Cr[WS(csr, 2)] = T1L + T20; |
671 | 0 | Ci[WS(csi, 2)] = T2B + T2G; |
672 | 0 | Cr[WS(csr, 10)] = T21 - T24; |
673 | 0 | Ci[WS(csi, 10)] = T2I - T2H; |
674 | 0 | Cr[WS(csr, 5)] = T21 + T24; |
675 | 0 | Ci[WS(csi, 5)] = -(T2H + T2I); |
676 | 0 | } |
677 | 0 | } |
678 | 0 | } |
679 | 0 | } |
680 | | |
681 | | static const kr2c_desc desc = { 32, "r2cfII_32", { 138, 46, 36, 0 }, &GENUS }; |
682 | | |
683 | 1 | void X(codelet_r2cfII_32) (planner *p) { X(kr2c_register) (p, r2cfII_32, &desc); |
684 | 1 | } |
685 | | |
686 | | #endif |