/src/fftw3/rdft/scalar/r2cb/r2cbIII_32.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | | * |
5 | | * This program is free software; you can redistribute it and/or modify |
6 | | * it under the terms of the GNU General Public License as published by |
7 | | * the Free Software Foundation; either version 2 of the License, or |
8 | | * (at your option) any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, |
11 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | * GNU General Public License for more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License |
16 | | * along with this program; if not, write to the Free Software |
17 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | | * |
19 | | */ |
20 | | |
21 | | /* This file was automatically generated --- DO NOT EDIT */ |
22 | | /* Generated on Sat Nov 15 06:11:31 UTC 2025 */ |
23 | | |
24 | | #include "rdft/codelet-rdft.h" |
25 | | |
26 | | #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA) |
27 | | |
28 | | /* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include rdft/scalar/r2cbIII.h */ |
29 | | |
30 | | /* |
31 | | * This function contains 174 FP additions, 100 FP multiplications, |
32 | | * (or, 106 additions, 32 multiplications, 68 fused multiply/add), |
33 | | * 65 stack variables, 18 constants, and 64 memory accesses |
34 | | */ |
35 | | #include "rdft/scalar/r2cbIII.h" |
36 | | |
37 | | static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) |
38 | | { |
39 | | DK(KP303346683, +0.303346683607342391675883946941299872384187453); |
40 | | DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); |
41 | | DK(KP534511135, +0.534511135950791641089685961295362908582039528); |
42 | | DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); |
43 | | DK(KP820678790, +0.820678790828660330972281985331011598767386482); |
44 | | DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); |
45 | | DK(KP098491403, +0.098491403357164253077197521291327432293052451); |
46 | | DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); |
47 | | DK(KP923879532, +0.923879532511286756128183189396788286822416626); |
48 | | DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); |
49 | | DK(KP198912367, +0.198912367379658006911597622644676228597850501); |
50 | | DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); |
51 | | DK(KP668178637, +0.668178637919298919997757686523080761552472251); |
52 | | DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); |
53 | | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
54 | | DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); |
55 | | DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); |
56 | | DK(KP414213562, +0.414213562373095048801688724209698078569671875); |
57 | | { |
58 | | INT i; |
59 | | for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { |
60 | | E T7, T2i, T2E, Tz, T1e, T1I, T1Z, T1x, Te, T22, T2F, T2j, T1h, T1y, TK; |
61 | | E T1J, Tm, T2B, TW, T1k, T1C, T1M, T28, T2m, Tt, T2A, T17, T1j, T1F, T1L; |
62 | | E T2d, T2l; |
63 | | { |
64 | | E T3, Tv, T1d, T2g, T6, T1a, Ty, T2h; |
65 | | { |
66 | | E T1, T2, T1b, T1c; |
67 | | T1 = Cr[0]; |
68 | | T2 = Cr[WS(csr, 15)]; |
69 | | T3 = T1 + T2; |
70 | | Tv = T1 - T2; |
71 | | T1b = Ci[0]; |
72 | | T1c = Ci[WS(csi, 15)]; |
73 | | T1d = T1b + T1c; |
74 | | T2g = T1c - T1b; |
75 | | } |
76 | | { |
77 | | E T4, T5, Tw, Tx; |
78 | | T4 = Cr[WS(csr, 8)]; |
79 | | T5 = Cr[WS(csr, 7)]; |
80 | | T6 = T4 + T5; |
81 | | T1a = T4 - T5; |
82 | | Tw = Ci[WS(csi, 8)]; |
83 | | Tx = Ci[WS(csi, 7)]; |
84 | | Ty = Tw + Tx; |
85 | | T2h = Tx - Tw; |
86 | | } |
87 | | T7 = T3 + T6; |
88 | | T2i = T2g - T2h; |
89 | | T2E = T2h + T2g; |
90 | | Tz = Tv - Ty; |
91 | | T1e = T1a + T1d; |
92 | | T1I = T1a - T1d; |
93 | | T1Z = T3 - T6; |
94 | | T1x = Tv + Ty; |
95 | | } |
96 | | { |
97 | | E Ta, TA, TD, T20, Td, TF, TI, T21; |
98 | | { |
99 | | E T8, T9, TB, TC; |
100 | | T8 = Cr[WS(csr, 4)]; |
101 | | T9 = Cr[WS(csr, 11)]; |
102 | | Ta = T8 + T9; |
103 | | TA = T8 - T9; |
104 | | TB = Ci[WS(csi, 4)]; |
105 | | TC = Ci[WS(csi, 11)]; |
106 | | TD = TB + TC; |
107 | | T20 = TC - TB; |
108 | | } |
109 | | { |
110 | | E Tb, Tc, TG, TH; |
111 | | Tb = Cr[WS(csr, 3)]; |
112 | | Tc = Cr[WS(csr, 12)]; |
113 | | Td = Tb + Tc; |
114 | | TF = Tb - Tc; |
115 | | TG = Ci[WS(csi, 3)]; |
116 | | TH = Ci[WS(csi, 12)]; |
117 | | TI = TG + TH; |
118 | | T21 = TG - TH; |
119 | | } |
120 | | Te = Ta + Td; |
121 | | T22 = T20 - T21; |
122 | | T2F = T20 + T21; |
123 | | T2j = Ta - Td; |
124 | | { |
125 | | E T1f, T1g, TE, TJ; |
126 | | T1f = TF + TI; |
127 | | T1g = TA + TD; |
128 | | T1h = T1f - T1g; |
129 | | T1y = T1g + T1f; |
130 | | TE = TA - TD; |
131 | | TJ = TF - TI; |
132 | | TK = TE + TJ; |
133 | | T1J = TE - TJ; |
134 | | } |
135 | | } |
136 | | { |
137 | | E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV; |
138 | | { |
139 | | E Tg, Th, TS, TT; |
140 | | Tg = Cr[WS(csr, 2)]; |
141 | | Th = Cr[WS(csr, 13)]; |
142 | | Ti = Tg + Th; |
143 | | TM = Tg - Th; |
144 | | TS = Ci[WS(csi, 2)]; |
145 | | TT = Ci[WS(csi, 13)]; |
146 | | TU = TS + TT; |
147 | | T25 = TS - TT; |
148 | | } |
149 | | { |
150 | | E Tj, Tk, TN, TO; |
151 | | Tj = Cr[WS(csr, 10)]; |
152 | | Tk = Cr[WS(csr, 5)]; |
153 | | Tl = Tj + Tk; |
154 | | TR = Tj - Tk; |
155 | | TN = Ci[WS(csi, 10)]; |
156 | | TO = Ci[WS(csi, 5)]; |
157 | | TP = TN + TO; |
158 | | T26 = TN - TO; |
159 | | } |
160 | | Tm = Ti + Tl; |
161 | | T2B = T26 + T25; |
162 | | TQ = TM - TP; |
163 | | TV = TR + TU; |
164 | | TW = FNMS(KP414213562, TV, TQ); |
165 | | T1k = FMA(KP414213562, TQ, TV); |
166 | | { |
167 | | E T1A, T1B, T24, T27; |
168 | | T1A = TR - TU; |
169 | | T1B = TM + TP; |
170 | | T1C = FMA(KP414213562, T1B, T1A); |
171 | | T1M = FNMS(KP414213562, T1A, T1B); |
172 | | T24 = Ti - Tl; |
173 | | T27 = T25 - T26; |
174 | | T28 = T24 - T27; |
175 | | T2m = T24 + T27; |
176 | | } |
177 | | } |
178 | | { |
179 | | E Tp, TX, T14, T2a, Ts, T15, T10, T2b, T11, T16; |
180 | | { |
181 | | E Tn, To, T12, T13; |
182 | | Tn = Cr[WS(csr, 1)]; |
183 | | To = Cr[WS(csr, 14)]; |
184 | | Tp = Tn + To; |
185 | | TX = Tn - To; |
186 | | T12 = Ci[WS(csi, 1)]; |
187 | | T13 = Ci[WS(csi, 14)]; |
188 | | T14 = T12 + T13; |
189 | | T2a = T13 - T12; |
190 | | } |
191 | | { |
192 | | E Tq, Tr, TY, TZ; |
193 | | Tq = Cr[WS(csr, 6)]; |
194 | | Tr = Cr[WS(csr, 9)]; |
195 | | Ts = Tq + Tr; |
196 | | T15 = Tq - Tr; |
197 | | TY = Ci[WS(csi, 6)]; |
198 | | TZ = Ci[WS(csi, 9)]; |
199 | | T10 = TY + TZ; |
200 | | T2b = TY - TZ; |
201 | | } |
202 | | Tt = Tp + Ts; |
203 | | T2A = T2b + T2a; |
204 | | T11 = TX - T10; |
205 | | T16 = T14 - T15; |
206 | | T17 = FNMS(KP414213562, T16, T11); |
207 | | T1j = FMA(KP414213562, T11, T16); |
208 | | { |
209 | | E T1D, T1E, T29, T2c; |
210 | | T1D = T15 + T14; |
211 | | T1E = TX + T10; |
212 | | T1F = FNMS(KP414213562, T1E, T1D); |
213 | | T1L = FMA(KP414213562, T1D, T1E); |
214 | | T29 = Tp - Ts; |
215 | | T2c = T2a - T2b; |
216 | | T2d = T29 + T2c; |
217 | | T2l = T29 - T2c; |
218 | | } |
219 | | } |
220 | | { |
221 | | E Tf, Tu, T2L, T2M, T2N, T2O; |
222 | | Tf = T7 + Te; |
223 | | Tu = Tm + Tt; |
224 | | T2L = Tf - Tu; |
225 | | T2M = T2B + T2A; |
226 | | T2N = T2F + T2E; |
227 | | T2O = T2M + T2N; |
228 | | R0[0] = KP2_000000000 * (Tf + Tu); |
229 | | R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); |
230 | | R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); |
231 | | R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); |
232 | | } |
233 | | { |
234 | | E T2t, T2y, T2w, T2x; |
235 | | { |
236 | | E T2r, T2s, T2u, T2v; |
237 | | T2r = T1Z - T22; |
238 | | T2s = T2m + T2l; |
239 | | T2t = FNMS(KP707106781, T2s, T2r); |
240 | | T2y = FMA(KP707106781, T2s, T2r); |
241 | | T2u = T2j + T2i; |
242 | | T2v = T28 - T2d; |
243 | | T2w = FNMS(KP707106781, T2v, T2u); |
244 | | T2x = FMA(KP707106781, T2v, T2u); |
245 | | } |
246 | | R0[WS(rs, 3)] = KP1_662939224 * (FMA(KP668178637, T2w, T2t)); |
247 | | R0[WS(rs, 15)] = -(KP1_961570560 * (FNMS(KP198912367, T2x, T2y))); |
248 | | R0[WS(rs, 11)] = KP1_662939224 * (FNMS(KP668178637, T2t, T2w)); |
249 | | R0[WS(rs, 7)] = KP1_961570560 * (FMA(KP198912367, T2y, T2x)); |
250 | | } |
251 | | { |
252 | | E T2D, T2K, T2I, T2J; |
253 | | { |
254 | | E T2z, T2C, T2G, T2H; |
255 | | T2z = T7 - Te; |
256 | | T2C = T2A - T2B; |
257 | | T2D = T2z + T2C; |
258 | | T2K = T2z - T2C; |
259 | | T2G = T2E - T2F; |
260 | | T2H = Tm - Tt; |
261 | | T2I = T2G - T2H; |
262 | | T2J = T2H + T2G; |
263 | | } |
264 | | R0[WS(rs, 2)] = KP1_847759065 * (FMA(KP414213562, T2I, T2D)); |
265 | | R0[WS(rs, 14)] = -(KP1_847759065 * (FNMS(KP414213562, T2J, T2K))); |
266 | | R0[WS(rs, 10)] = KP1_847759065 * (FNMS(KP414213562, T2D, T2I)); |
267 | | R0[WS(rs, 6)] = KP1_847759065 * (FMA(KP414213562, T2K, T2J)); |
268 | | } |
269 | | { |
270 | | E T19, T1o, T1m, T1n; |
271 | | { |
272 | | E TL, T18, T1i, T1l; |
273 | | TL = FMA(KP707106781, TK, Tz); |
274 | | T18 = TW + T17; |
275 | | T19 = FMA(KP923879532, T18, TL); |
276 | | T1o = FNMS(KP923879532, T18, TL); |
277 | | T1i = FNMS(KP707106781, T1h, T1e); |
278 | | T1l = T1j - T1k; |
279 | | T1m = FNMS(KP923879532, T1l, T1i); |
280 | | T1n = FMA(KP923879532, T1l, T1i); |
281 | | } |
282 | | R1[0] = KP1_990369453 * (FNMS(KP098491403, T1m, T19)); |
283 | | R1[WS(rs, 12)] = -(KP1_546020906 * (FMA(KP820678790, T1n, T1o))); |
284 | | R1[WS(rs, 8)] = -(KP1_990369453 * (FMA(KP098491403, T19, T1m))); |
285 | | R1[WS(rs, 4)] = -(KP1_546020906 * (FNMS(KP820678790, T1o, T1n))); |
286 | | } |
287 | | { |
288 | | E T1r, T1w, T1u, T1v; |
289 | | { |
290 | | E T1p, T1q, T1s, T1t; |
291 | | T1p = FNMS(KP707106781, TK, Tz); |
292 | | T1q = T1k + T1j; |
293 | | T1r = FNMS(KP923879532, T1q, T1p); |
294 | | T1w = FMA(KP923879532, T1q, T1p); |
295 | | T1s = FMA(KP707106781, T1h, T1e); |
296 | | T1t = TW - T17; |
297 | | T1u = FMA(KP923879532, T1t, T1s); |
298 | | T1v = FNMS(KP923879532, T1t, T1s); |
299 | | } |
300 | | R1[WS(rs, 2)] = KP1_763842528 * (FNMS(KP534511135, T1u, T1r)); |
301 | | R1[WS(rs, 14)] = -(KP1_913880671 * (FMA(KP303346683, T1v, T1w))); |
302 | | R1[WS(rs, 10)] = -(KP1_763842528 * (FMA(KP534511135, T1r, T1u))); |
303 | | R1[WS(rs, 6)] = -(KP1_913880671 * (FNMS(KP303346683, T1w, T1v))); |
304 | | } |
305 | | { |
306 | | E T1T, T1Y, T1W, T1X; |
307 | | { |
308 | | E T1R, T1S, T1U, T1V; |
309 | | T1R = FMA(KP707106781, T1y, T1x); |
310 | | T1S = T1M + T1L; |
311 | | T1T = FNMS(KP923879532, T1S, T1R); |
312 | | T1Y = FMA(KP923879532, T1S, T1R); |
313 | | T1U = FMA(KP707106781, T1J, T1I); |
314 | | T1V = T1C + T1F; |
315 | | T1W = FNMS(KP923879532, T1V, T1U); |
316 | | T1X = FMA(KP923879532, T1V, T1U); |
317 | | } |
318 | | R1[WS(rs, 3)] = KP1_546020906 * (FMA(KP820678790, T1W, T1T)); |
319 | | R1[WS(rs, 15)] = -(KP1_990369453 * (FNMS(KP098491403, T1X, T1Y))); |
320 | | R1[WS(rs, 11)] = KP1_546020906 * (FNMS(KP820678790, T1T, T1W)); |
321 | | R1[WS(rs, 7)] = KP1_990369453 * (FMA(KP098491403, T1Y, T1X)); |
322 | | } |
323 | | { |
324 | | E T2f, T2q, T2o, T2p; |
325 | | { |
326 | | E T23, T2e, T2k, T2n; |
327 | | T23 = T1Z + T22; |
328 | | T2e = T28 + T2d; |
329 | | T2f = FMA(KP707106781, T2e, T23); |
330 | | T2q = FNMS(KP707106781, T2e, T23); |
331 | | T2k = T2i - T2j; |
332 | | T2n = T2l - T2m; |
333 | | T2o = FMA(KP707106781, T2n, T2k); |
334 | | T2p = FNMS(KP707106781, T2n, T2k); |
335 | | } |
336 | | R0[WS(rs, 1)] = KP1_961570560 * (FMA(KP198912367, T2o, T2f)); |
337 | | R0[WS(rs, 13)] = -(KP1_662939224 * (FNMS(KP668178637, T2p, T2q))); |
338 | | R0[WS(rs, 9)] = KP1_961570560 * (FNMS(KP198912367, T2f, T2o)); |
339 | | R0[WS(rs, 5)] = KP1_662939224 * (FMA(KP668178637, T2q, T2p)); |
340 | | } |
341 | | { |
342 | | E T1H, T1Q, T1O, T1P; |
343 | | { |
344 | | E T1z, T1G, T1K, T1N; |
345 | | T1z = FNMS(KP707106781, T1y, T1x); |
346 | | T1G = T1C - T1F; |
347 | | T1H = FMA(KP923879532, T1G, T1z); |
348 | | T1Q = FNMS(KP923879532, T1G, T1z); |
349 | | T1K = FNMS(KP707106781, T1J, T1I); |
350 | | T1N = T1L - T1M; |
351 | | T1O = FMA(KP923879532, T1N, T1K); |
352 | | T1P = FNMS(KP923879532, T1N, T1K); |
353 | | } |
354 | | R1[WS(rs, 1)] = KP1_913880671 * (FMA(KP303346683, T1O, T1H)); |
355 | | R1[WS(rs, 13)] = -(KP1_763842528 * (FNMS(KP534511135, T1P, T1Q))); |
356 | | R1[WS(rs, 9)] = KP1_913880671 * (FNMS(KP303346683, T1H, T1O)); |
357 | | R1[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T1Q, T1P)); |
358 | | } |
359 | | } |
360 | | } |
361 | | } |
362 | | |
363 | | static const kr2c_desc desc = { 32, "r2cbIII_32", { 106, 32, 68, 0 }, &GENUS }; |
364 | | |
365 | | void X(codelet_r2cbIII_32) (planner *p) { X(kr2c_register) (p, r2cbIII_32, &desc); |
366 | | } |
367 | | |
368 | | #else |
369 | | |
370 | | /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -name r2cbIII_32 -dft-III -include rdft/scalar/r2cbIII.h */ |
371 | | |
372 | | /* |
373 | | * This function contains 174 FP additions, 84 FP multiplications, |
374 | | * (or, 138 additions, 48 multiplications, 36 fused multiply/add), |
375 | | * 66 stack variables, 19 constants, and 64 memory accesses |
376 | | */ |
377 | | #include "rdft/scalar/r2cbIII.h" |
378 | | |
379 | | static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) |
380 | 0 | { |
381 | 0 | DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); |
382 | 0 | DK(KP580569354, +0.580569354508924735272384751634790549382952557); |
383 | 0 | DK(KP942793473, +0.942793473651995297112775251810508755314920638); |
384 | 0 | DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); |
385 | 0 | DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); |
386 | 0 | DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); |
387 | 0 | DK(KP196034280, +0.196034280659121203988391127777283691722273346); |
388 | 0 | DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); |
389 | 0 | DK(KP765366864, +0.765366864730179543456919968060797733522689125); |
390 | 0 | DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); |
391 | 0 | DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); |
392 | 0 | DK(KP390180644, +0.390180644032256535696569736954044481855383236); |
393 | 0 | DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); |
394 | 0 | DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); |
395 | 0 | DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); |
396 | 0 | DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); |
397 | 0 | DK(KP382683432, +0.382683432365089771728459984030398866761344562); |
398 | 0 | DK(KP923879532, +0.923879532511286756128183189396788286822416626); |
399 | 0 | DK(KP707106781, +0.707106781186547524400844362104849039284835938); |
400 | 0 | { |
401 | 0 | INT i; |
402 | 0 | for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { |
403 | 0 | E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK; |
404 | 0 | E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M; |
405 | 0 | E T2d, T2m; |
406 | 0 | { |
407 | 0 | E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g; |
408 | 0 | { |
409 | 0 | E T1, T2, T1h, T1i; |
410 | 0 | T1 = Cr[0]; |
411 | 0 | T2 = Cr[WS(csr, 15)]; |
412 | 0 | T3 = T1 + T2; |
413 | 0 | Tv = T1 - T2; |
414 | 0 | T1h = Ci[0]; |
415 | 0 | T1i = Ci[WS(csi, 15)]; |
416 | 0 | T1j = T1h + T1i; |
417 | 0 | T2h = T1i - T1h; |
418 | 0 | } |
419 | 0 | { |
420 | 0 | E T4, T5, Tw, Tx; |
421 | 0 | T4 = Cr[WS(csr, 8)]; |
422 | 0 | T5 = Cr[WS(csr, 7)]; |
423 | 0 | T6 = T4 + T5; |
424 | 0 | T1g = T4 - T5; |
425 | 0 | Tw = Ci[WS(csi, 8)]; |
426 | 0 | Tx = Ci[WS(csi, 7)]; |
427 | 0 | Ty = Tw + Tx; |
428 | 0 | T2g = Tw - Tx; |
429 | 0 | } |
430 | 0 | T7 = T3 + T6; |
431 | 0 | T2i = T2g + T2h; |
432 | 0 | T2F = T2h - T2g; |
433 | 0 | Tz = Tv - Ty; |
434 | 0 | T1k = T1g + T1j; |
435 | 0 | T1I = T1g - T1j; |
436 | 0 | T1Z = T3 - T6; |
437 | 0 | T1x = Tv + Ty; |
438 | 0 | } |
439 | 0 | { |
440 | 0 | E Ta, TA, TD, T21, Td, TF, TI, T20; |
441 | 0 | { |
442 | 0 | E T8, T9, TB, TC; |
443 | 0 | T8 = Cr[WS(csr, 4)]; |
444 | 0 | T9 = Cr[WS(csr, 11)]; |
445 | 0 | Ta = T8 + T9; |
446 | 0 | TA = T8 - T9; |
447 | 0 | TB = Ci[WS(csi, 4)]; |
448 | 0 | TC = Ci[WS(csi, 11)]; |
449 | 0 | TD = TB + TC; |
450 | 0 | T21 = TB - TC; |
451 | 0 | } |
452 | 0 | { |
453 | 0 | E Tb, Tc, TG, TH; |
454 | 0 | Tb = Cr[WS(csr, 3)]; |
455 | 0 | Tc = Cr[WS(csr, 12)]; |
456 | 0 | Td = Tb + Tc; |
457 | 0 | TF = Tb - Tc; |
458 | 0 | TG = Ci[WS(csi, 3)]; |
459 | 0 | TH = Ci[WS(csi, 12)]; |
460 | 0 | TI = TG + TH; |
461 | 0 | T20 = TH - TG; |
462 | 0 | } |
463 | 0 | Te = Ta + Td; |
464 | 0 | T22 = T20 - T21; |
465 | 0 | T2E = T21 + T20; |
466 | 0 | T2j = Ta - Td; |
467 | 0 | { |
468 | 0 | E T1d, T1e, TE, TJ; |
469 | 0 | T1d = TA + TD; |
470 | 0 | T1e = TF + TI; |
471 | 0 | T1f = KP707106781 * (T1d - T1e); |
472 | 0 | T1y = KP707106781 * (T1d + T1e); |
473 | 0 | TE = TA - TD; |
474 | 0 | TJ = TF - TI; |
475 | 0 | TK = KP707106781 * (TE + TJ); |
476 | 0 | T1J = KP707106781 * (TE - TJ); |
477 | 0 | } |
478 | 0 | } |
479 | 0 | { |
480 | 0 | E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV; |
481 | 0 | { |
482 | 0 | E Tg, Th, TS, TT; |
483 | 0 | Tg = Cr[WS(csr, 2)]; |
484 | 0 | Th = Cr[WS(csr, 13)]; |
485 | 0 | Ti = Tg + Th; |
486 | 0 | TM = Tg - Th; |
487 | 0 | TS = Ci[WS(csi, 2)]; |
488 | 0 | TT = Ci[WS(csi, 13)]; |
489 | 0 | TU = TS + TT; |
490 | 0 | T25 = TS - TT; |
491 | 0 | } |
492 | 0 | { |
493 | 0 | E Tj, Tk, TN, TO; |
494 | 0 | Tj = Cr[WS(csr, 10)]; |
495 | 0 | Tk = Cr[WS(csr, 5)]; |
496 | 0 | Tl = Tj + Tk; |
497 | 0 | TR = Tj - Tk; |
498 | 0 | TN = Ci[WS(csi, 10)]; |
499 | 0 | TO = Ci[WS(csi, 5)]; |
500 | 0 | TP = TN + TO; |
501 | 0 | T26 = TN - TO; |
502 | 0 | } |
503 | 0 | Tm = Ti + Tl; |
504 | 0 | T2B = T26 + T25; |
505 | 0 | TQ = TM - TP; |
506 | 0 | TV = TR + TU; |
507 | 0 | TW = FNMS(KP382683432, TV, KP923879532 * TQ); |
508 | 0 | T1a = FMA(KP382683432, TQ, KP923879532 * TV); |
509 | 0 | { |
510 | 0 | E T1A, T1B, T24, T27; |
511 | 0 | T1A = TM + TP; |
512 | 0 | T1B = TU - TR; |
513 | 0 | T1C = FNMS(KP923879532, T1B, KP382683432 * T1A); |
514 | 0 | T1L = FMA(KP923879532, T1A, KP382683432 * T1B); |
515 | 0 | T24 = Ti - Tl; |
516 | 0 | T27 = T25 - T26; |
517 | 0 | T28 = T24 - T27; |
518 | 0 | T2l = T24 + T27; |
519 | 0 | } |
520 | 0 | } |
521 | 0 | { |
522 | 0 | E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16; |
523 | 0 | { |
524 | 0 | E Tn, To, T13, T14; |
525 | 0 | Tn = Cr[WS(csr, 1)]; |
526 | 0 | To = Cr[WS(csr, 14)]; |
527 | 0 | Tp = Tn + To; |
528 | 0 | TX = Tn - To; |
529 | 0 | T13 = Ci[WS(csi, 1)]; |
530 | 0 | T14 = Ci[WS(csi, 14)]; |
531 | 0 | T15 = T13 + T14; |
532 | 0 | T2a = T14 - T13; |
533 | 0 | } |
534 | 0 | { |
535 | 0 | E Tq, Tr, TY, TZ; |
536 | 0 | Tq = Cr[WS(csr, 6)]; |
537 | 0 | Tr = Cr[WS(csr, 9)]; |
538 | 0 | Ts = Tq + Tr; |
539 | 0 | T12 = Tq - Tr; |
540 | 0 | TY = Ci[WS(csi, 6)]; |
541 | 0 | TZ = Ci[WS(csi, 9)]; |
542 | 0 | T10 = TY + TZ; |
543 | 0 | T2b = TY - TZ; |
544 | 0 | } |
545 | 0 | Tt = Tp + Ts; |
546 | 0 | T2A = T2b + T2a; |
547 | 0 | T11 = TX - T10; |
548 | 0 | T16 = T12 - T15; |
549 | 0 | T17 = FMA(KP923879532, T11, KP382683432 * T16); |
550 | 0 | T1b = FNMS(KP382683432, T11, KP923879532 * T16); |
551 | 0 | { |
552 | 0 | E T1D, T1E, T29, T2c; |
553 | 0 | T1D = TX + T10; |
554 | 0 | T1E = T12 + T15; |
555 | 0 | T1F = FNMS(KP923879532, T1E, KP382683432 * T1D); |
556 | 0 | T1M = FMA(KP923879532, T1D, KP382683432 * T1E); |
557 | 0 | T29 = Tp - Ts; |
558 | 0 | T2c = T2a - T2b; |
559 | 0 | T2d = T29 + T2c; |
560 | 0 | T2m = T2c - T29; |
561 | 0 | } |
562 | 0 | } |
563 | 0 | { |
564 | 0 | E Tf, Tu, T2L, T2M, T2N, T2O; |
565 | 0 | Tf = T7 + Te; |
566 | 0 | Tu = Tm + Tt; |
567 | 0 | T2L = Tf - Tu; |
568 | 0 | T2M = T2B + T2A; |
569 | 0 | T2N = T2F - T2E; |
570 | 0 | T2O = T2M + T2N; |
571 | 0 | R0[0] = KP2_000000000 * (Tf + Tu); |
572 | 0 | R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); |
573 | 0 | R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); |
574 | 0 | R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); |
575 | 0 | } |
576 | 0 | { |
577 | 0 | E T2t, T2x, T2w, T2y; |
578 | 0 | { |
579 | 0 | E T2r, T2s, T2u, T2v; |
580 | 0 | T2r = T1Z - T22; |
581 | 0 | T2s = KP707106781 * (T2m - T2l); |
582 | 0 | T2t = T2r + T2s; |
583 | 0 | T2x = T2r - T2s; |
584 | 0 | T2u = T2j + T2i; |
585 | 0 | T2v = KP707106781 * (T28 - T2d); |
586 | 0 | T2w = T2u - T2v; |
587 | 0 | T2y = T2v + T2u; |
588 | 0 | } |
589 | 0 | R0[WS(rs, 3)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w); |
590 | 0 | R0[WS(rs, 15)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y); |
591 | 0 | R0[WS(rs, 11)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w); |
592 | 0 | R0[WS(rs, 7)] = FMA(KP390180644, T2x, KP1_961570560 * T2y); |
593 | 0 | } |
594 | 0 | { |
595 | 0 | E T2D, T2J, T2I, T2K; |
596 | 0 | { |
597 | 0 | E T2z, T2C, T2G, T2H; |
598 | 0 | T2z = T7 - Te; |
599 | 0 | T2C = T2A - T2B; |
600 | 0 | T2D = T2z + T2C; |
601 | 0 | T2J = T2z - T2C; |
602 | 0 | T2G = T2E + T2F; |
603 | 0 | T2H = Tm - Tt; |
604 | 0 | T2I = T2G - T2H; |
605 | 0 | T2K = T2H + T2G; |
606 | 0 | } |
607 | 0 | R0[WS(rs, 2)] = FMA(KP1_847759065, T2D, KP765366864 * T2I); |
608 | 0 | R0[WS(rs, 14)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K); |
609 | 0 | R0[WS(rs, 10)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I); |
610 | 0 | R0[WS(rs, 6)] = FMA(KP765366864, T2J, KP1_847759065 * T2K); |
611 | 0 | } |
612 | 0 | { |
613 | 0 | E T19, T1n, T1m, T1o; |
614 | 0 | { |
615 | 0 | E TL, T18, T1c, T1l; |
616 | 0 | TL = Tz + TK; |
617 | 0 | T18 = TW + T17; |
618 | 0 | T19 = TL + T18; |
619 | 0 | T1n = TL - T18; |
620 | 0 | T1c = T1a + T1b; |
621 | 0 | T1l = T1f + T1k; |
622 | 0 | T1m = T1c + T1l; |
623 | 0 | T1o = T1c - T1l; |
624 | 0 | } |
625 | 0 | R1[0] = FNMS(KP196034280, T1m, KP1_990369453 * T19); |
626 | 0 | R1[WS(rs, 12)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o); |
627 | 0 | R1[WS(rs, 8)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m)); |
628 | 0 | R1[WS(rs, 4)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o); |
629 | 0 | } |
630 | 0 | { |
631 | 0 | E T1r, T1v, T1u, T1w; |
632 | 0 | { |
633 | 0 | E T1p, T1q, T1s, T1t; |
634 | 0 | T1p = Tz - TK; |
635 | 0 | T1q = T1b - T1a; |
636 | 0 | T1r = T1p + T1q; |
637 | 0 | T1v = T1p - T1q; |
638 | 0 | T1s = T1f - T1k; |
639 | 0 | T1t = TW - T17; |
640 | 0 | T1u = T1s - T1t; |
641 | 0 | T1w = T1t + T1s; |
642 | 0 | } |
643 | 0 | R1[WS(rs, 2)] = FMA(KP1_763842528, T1r, KP942793473 * T1u); |
644 | 0 | R1[WS(rs, 14)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w); |
645 | 0 | R1[WS(rs, 10)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u); |
646 | 0 | R1[WS(rs, 6)] = FMA(KP580569354, T1v, KP1_913880671 * T1w); |
647 | 0 | } |
648 | 0 | { |
649 | 0 | E T1T, T1X, T1W, T1Y; |
650 | 0 | { |
651 | 0 | E T1R, T1S, T1U, T1V; |
652 | 0 | T1R = T1x + T1y; |
653 | 0 | T1S = T1L + T1M; |
654 | 0 | T1T = T1R - T1S; |
655 | 0 | T1X = T1R + T1S; |
656 | 0 | T1U = T1J + T1I; |
657 | 0 | T1V = T1C - T1F; |
658 | 0 | T1W = T1U - T1V; |
659 | 0 | T1Y = T1V + T1U; |
660 | 0 | } |
661 | 0 | R1[WS(rs, 3)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W); |
662 | 0 | R1[WS(rs, 15)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y); |
663 | 0 | R1[WS(rs, 11)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W); |
664 | 0 | R1[WS(rs, 7)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y); |
665 | 0 | } |
666 | 0 | { |
667 | 0 | E T2f, T2p, T2o, T2q; |
668 | 0 | { |
669 | 0 | E T23, T2e, T2k, T2n; |
670 | 0 | T23 = T1Z + T22; |
671 | 0 | T2e = KP707106781 * (T28 + T2d); |
672 | 0 | T2f = T23 + T2e; |
673 | 0 | T2p = T23 - T2e; |
674 | 0 | T2k = T2i - T2j; |
675 | 0 | T2n = KP707106781 * (T2l + T2m); |
676 | 0 | T2o = T2k - T2n; |
677 | 0 | T2q = T2n + T2k; |
678 | 0 | } |
679 | 0 | R0[WS(rs, 1)] = FMA(KP1_961570560, T2f, KP390180644 * T2o); |
680 | 0 | R0[WS(rs, 13)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q); |
681 | 0 | R0[WS(rs, 9)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o); |
682 | 0 | R0[WS(rs, 5)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q); |
683 | 0 | } |
684 | 0 | { |
685 | 0 | E T1H, T1P, T1O, T1Q; |
686 | 0 | { |
687 | 0 | E T1z, T1G, T1K, T1N; |
688 | 0 | T1z = T1x - T1y; |
689 | 0 | T1G = T1C + T1F; |
690 | 0 | T1H = T1z + T1G; |
691 | 0 | T1P = T1z - T1G; |
692 | 0 | T1K = T1I - T1J; |
693 | 0 | T1N = T1L - T1M; |
694 | 0 | T1O = T1K - T1N; |
695 | 0 | T1Q = T1N + T1K; |
696 | 0 | } |
697 | 0 | R1[WS(rs, 1)] = FMA(KP1_913880671, T1H, KP580569354 * T1O); |
698 | 0 | R1[WS(rs, 13)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q); |
699 | 0 | R1[WS(rs, 9)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O); |
700 | 0 | R1[WS(rs, 5)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q); |
701 | 0 | } |
702 | 0 | } |
703 | 0 | } |
704 | 0 | } |
705 | | |
706 | | static const kr2c_desc desc = { 32, "r2cbIII_32", { 138, 48, 36, 0 }, &GENUS }; |
707 | | |
708 | 1 | void X(codelet_r2cbIII_32) (planner *p) { X(kr2c_register) (p, r2cbIII_32, &desc); |
709 | 1 | } |
710 | | |
711 | | #endif |