Coverage Report

Created: 2026-02-14 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/rdft/scalar/r2cb/r2cb_64.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Feb 14 07:05:39 UTC 2026 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include rdft/scalar/r2cb.h */
29
30
/*
31
 * This function contains 394 FP additions, 216 FP multiplications,
32
 * (or, 178 additions, 0 multiplications, 216 fused multiply/add),
33
 * 109 stack variables, 18 constants, and 128 memory accesses
34
 */
35
#include "rdft/scalar/r2cb.h"
36
37
static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
40
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
41
     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
42
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
43
     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
44
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
45
     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
46
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
47
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
48
     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
49
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
50
     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
51
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
54
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
55
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
56
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
57
     {
58
    INT i;
59
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
60
         E T9, T5H, T4p, T5j, T1b, T2T, T3j, T3Z, Tg, T5I, T1m, T2U, T3m, T40, T4u;
61
         E T5k, T1s, T3o, T3r, T1J, To, Tv, T5K, T5L, T5M, T5N, T4A, T5n, T1D, T3s;
62
         E T4F, T5m, T1M, T3p, T1U, T3w, T3H, T2z, TM, T5Q, T5Y, T6f, T4M, T5q, T25;
63
         E T3I, T53, T5t, T2C, T3x, T11, T5V, T4W, T55, T5T, T6g, T2h, T2E, T2s, T2F;
64
         E T3E, T3L, T4R, T54, T3B, T3K;
65
         {
66
        E T4, T14, T3, T13, T8, T16, T19, T4o, T1, T2, T5, T4n;
67
        T4 = Cr[WS(csr, 16)];
68
        T14 = Ci[WS(csi, 16)];
69
        T1 = Cr[0];
70
        T2 = Cr[WS(csr, 32)];
71
        T3 = T1 + T2;
72
        T13 = T1 - T2;
73
        {
74
       E T6, T7, T17, T18;
75
       T6 = Cr[WS(csr, 8)];
76
       T7 = Cr[WS(csr, 24)];
77
       T8 = T6 + T7;
78
       T16 = T6 - T7;
79
       T17 = Ci[WS(csi, 8)];
80
       T18 = Ci[WS(csi, 24)];
81
       T19 = T17 + T18;
82
       T4o = T17 - T18;
83
        }
84
        T5 = FMA(KP2_000000000, T4, T3);
85
        T9 = FMA(KP2_000000000, T8, T5);
86
        T5H = FNMS(KP2_000000000, T8, T5);
87
        T4n = FNMS(KP2_000000000, T4, T3);
88
        T4p = FNMS(KP2_000000000, T4o, T4n);
89
        T5j = FMA(KP2_000000000, T4o, T4n);
90
        {
91
       E T15, T1a, T3h, T3i;
92
       T15 = FNMS(KP2_000000000, T14, T13);
93
       T1a = T16 - T19;
94
       T1b = FMA(KP1_414213562, T1a, T15);
95
       T2T = FNMS(KP1_414213562, T1a, T15);
96
       T3h = FMA(KP2_000000000, T14, T13);
97
       T3i = T16 + T19;
98
       T3j = FNMS(KP1_414213562, T3i, T3h);
99
       T3Z = FMA(KP1_414213562, T3i, T3h);
100
        }
101
         }
102
         {
103
        E Tc, T1c, T1k, T4r, Tf, T1h, T1f, T4s, T1g, T1l;
104
        {
105
       E Ta, Tb, T1i, T1j;
106
       Ta = Cr[WS(csr, 4)];
107
       Tb = Cr[WS(csr, 28)];
108
       Tc = Ta + Tb;
109
       T1c = Ta - Tb;
110
       T1i = Ci[WS(csi, 4)];
111
       T1j = Ci[WS(csi, 28)];
112
       T1k = T1i + T1j;
113
       T4r = T1i - T1j;
114
        }
115
        {
116
       E Td, Te, T1d, T1e;
117
       Td = Cr[WS(csr, 20)];
118
       Te = Cr[WS(csr, 12)];
119
       Tf = Td + Te;
120
       T1h = Td - Te;
121
       T1d = Ci[WS(csi, 20)];
122
       T1e = Ci[WS(csi, 12)];
123
       T1f = T1d + T1e;
124
       T4s = T1d - T1e;
125
        }
126
        Tg = Tc + Tf;
127
        T5I = T4s + T4r;
128
        T1g = T1c - T1f;
129
        T1l = T1h + T1k;
130
        T1m = FNMS(KP414213562, T1l, T1g);
131
        T2U = FMA(KP414213562, T1g, T1l);
132
        {
133
       E T3k, T3l, T4q, T4t;
134
       T3k = T1k - T1h;
135
       T3l = T1c + T1f;
136
       T3m = FNMS(KP414213562, T3l, T3k);
137
       T40 = FMA(KP414213562, T3k, T3l);
138
       T4q = Tc - Tf;
139
       T4t = T4r - T4s;
140
       T4u = T4q - T4t;
141
       T5k = T4q + T4t;
142
        }
143
         }
144
         {
145
        E Tk, T1o, T1I, T4C, Tn, T1F, T1r, T4D, Tr, T1t, T1w, T4y, Tu, T1y, T1B;
146
        E T4x;
147
        {
148
       E Ti, Tj, T1G, T1H;
149
       Ti = Cr[WS(csr, 2)];
150
       Tj = Cr[WS(csr, 30)];
151
       Tk = Ti + Tj;
152
       T1o = Ti - Tj;
153
       T1G = Ci[WS(csi, 2)];
154
       T1H = Ci[WS(csi, 30)];
155
       T1I = T1G + T1H;
156
       T4C = T1G - T1H;
157
        }
158
        {
159
       E Tl, Tm, T1p, T1q;
160
       Tl = Cr[WS(csr, 18)];
161
       Tm = Cr[WS(csr, 14)];
162
       Tn = Tl + Tm;
163
       T1F = Tl - Tm;
164
       T1p = Ci[WS(csi, 18)];
165
       T1q = Ci[WS(csi, 14)];
166
       T1r = T1p + T1q;
167
       T4D = T1p - T1q;
168
        }
169
        {
170
       E Tp, Tq, T1u, T1v;
171
       Tp = Cr[WS(csr, 10)];
172
       Tq = Cr[WS(csr, 22)];
173
       Tr = Tp + Tq;
174
       T1t = Tp - Tq;
175
       T1u = Ci[WS(csi, 10)];
176
       T1v = Ci[WS(csi, 22)];
177
       T1w = T1u + T1v;
178
       T4y = T1u - T1v;
179
        }
180
        {
181
       E Ts, Tt, T1z, T1A;
182
       Ts = Cr[WS(csr, 6)];
183
       Tt = Cr[WS(csr, 26)];
184
       Tu = Ts + Tt;
185
       T1y = Ts - Tt;
186
       T1z = Ci[WS(csi, 6)];
187
       T1A = Ci[WS(csi, 26)];
188
       T1B = T1z + T1A;
189
       T4x = T1A - T1z;
190
        }
191
        T1s = T1o - T1r;
192
        T3o = T1o + T1r;
193
        T3r = T1I - T1F;
194
        T1J = T1F + T1I;
195
        To = Tk + Tn;
196
        Tv = Tr + Tu;
197
        T5K = To - Tv;
198
        {
199
       E T4w, T4z, T1x, T1C;
200
       T5L = T4D + T4C;
201
       T5M = T4y + T4x;
202
       T5N = T5L - T5M;
203
       T4w = Tk - Tn;
204
       T4z = T4x - T4y;
205
       T4A = T4w + T4z;
206
       T5n = T4w - T4z;
207
       T1x = T1t - T1w;
208
       T1C = T1y - T1B;
209
       T1D = T1x + T1C;
210
       T3s = T1C - T1x;
211
       {
212
            E T4B, T4E, T1K, T1L;
213
            T4B = Tr - Tu;
214
            T4E = T4C - T4D;
215
            T4F = T4B + T4E;
216
            T5m = T4E - T4B;
217
            T1K = T1t + T1w;
218
            T1L = T1y + T1B;
219
            T1M = T1K - T1L;
220
            T3p = T1K + T1L;
221
       }
222
        }
223
         }
224
         {
225
        E TA, T1Q, T2y, T50, TD, T2v, T1T, T51, TH, T1V, T1Y, T4K, TK, T20, T23;
226
        E T4J;
227
        {
228
       E Ty, Tz, T2w, T2x;
229
       Ty = Cr[WS(csr, 1)];
230
       Tz = Cr[WS(csr, 31)];
231
       TA = Ty + Tz;
232
       T1Q = Ty - Tz;
233
       T2w = Ci[WS(csi, 1)];
234
       T2x = Ci[WS(csi, 31)];
235
       T2y = T2w + T2x;
236
       T50 = T2w - T2x;
237
        }
238
        {
239
       E TB, TC, T1R, T1S;
240
       TB = Cr[WS(csr, 17)];
241
       TC = Cr[WS(csr, 15)];
242
       TD = TB + TC;
243
       T2v = TB - TC;
244
       T1R = Ci[WS(csi, 17)];
245
       T1S = Ci[WS(csi, 15)];
246
       T1T = T1R + T1S;
247
       T51 = T1R - T1S;
248
        }
249
        {
250
       E TF, TG, T1W, T1X;
251
       TF = Cr[WS(csr, 9)];
252
       TG = Cr[WS(csr, 23)];
253
       TH = TF + TG;
254
       T1V = TF - TG;
255
       T1W = Ci[WS(csi, 9)];
256
       T1X = Ci[WS(csi, 23)];
257
       T1Y = T1W + T1X;
258
       T4K = T1W - T1X;
259
        }
260
        {
261
       E TI, TJ, T21, T22;
262
       TI = Cr[WS(csr, 7)];
263
       TJ = Cr[WS(csr, 25)];
264
       TK = TI + TJ;
265
       T20 = TI - TJ;
266
       T21 = Ci[WS(csi, 7)];
267
       T22 = Ci[WS(csi, 25)];
268
       T23 = T21 + T22;
269
       T4J = T22 - T21;
270
        }
271
        {
272
       E TE, TL, T1Z, T24;
273
       T1U = T1Q - T1T;
274
       T3w = T1Q + T1T;
275
       T3H = T2y - T2v;
276
       T2z = T2v + T2y;
277
       TE = TA + TD;
278
       TL = TH + TK;
279
       TM = TE + TL;
280
       T5Q = TE - TL;
281
       {
282
            E T5W, T5X, T4I, T4L;
283
            T5W = T51 + T50;
284
            T5X = T4K + T4J;
285
            T5Y = T5W - T5X;
286
            T6f = T5X + T5W;
287
            T4I = TA - TD;
288
            T4L = T4J - T4K;
289
            T4M = T4I + T4L;
290
            T5q = T4I - T4L;
291
       }
292
       T1Z = T1V - T1Y;
293
       T24 = T20 - T23;
294
       T25 = T1Z + T24;
295
       T3I = T24 - T1Z;
296
       {
297
            E T4Z, T52, T2A, T2B;
298
            T4Z = TH - TK;
299
            T52 = T50 - T51;
300
            T53 = T4Z + T52;
301
            T5t = T52 - T4Z;
302
            T2A = T1V + T1Y;
303
            T2B = T20 + T23;
304
            T2C = T2A - T2B;
305
            T3x = T2A + T2B;
306
       }
307
        }
308
         }
309
         {
310
        E TP, T27, T2f, T4O, TS, T2c, T2a, T4P, TW, T2i, T2q, T4T, TZ, T2n, T2l;
311
        E T4U;
312
        {
313
       E TN, TO, T2d, T2e;
314
       TN = Cr[WS(csr, 5)];
315
       TO = Cr[WS(csr, 27)];
316
       TP = TN + TO;
317
       T27 = TN - TO;
318
       T2d = Ci[WS(csi, 5)];
319
       T2e = Ci[WS(csi, 27)];
320
       T2f = T2d + T2e;
321
       T4O = T2d - T2e;
322
        }
323
        {
324
       E TQ, TR, T28, T29;
325
       TQ = Cr[WS(csr, 21)];
326
       TR = Cr[WS(csr, 11)];
327
       TS = TQ + TR;
328
       T2c = TQ - TR;
329
       T28 = Ci[WS(csi, 21)];
330
       T29 = Ci[WS(csi, 11)];
331
       T2a = T28 + T29;
332
       T4P = T28 - T29;
333
        }
334
        {
335
       E TU, TV, T2o, T2p;
336
       TU = Cr[WS(csr, 3)];
337
       TV = Cr[WS(csr, 29)];
338
       TW = TU + TV;
339
       T2i = TU - TV;
340
       T2o = Ci[WS(csi, 3)];
341
       T2p = Ci[WS(csi, 29)];
342
       T2q = T2o + T2p;
343
       T4T = T2p - T2o;
344
        }
345
        {
346
       E TX, TY, T2j, T2k;
347
       TX = Cr[WS(csr, 13)];
348
       TY = Cr[WS(csr, 19)];
349
       TZ = TX + TY;
350
       T2n = TX - TY;
351
       T2j = Ci[WS(csi, 13)];
352
       T2k = Ci[WS(csi, 19)];
353
       T2l = T2j + T2k;
354
       T4U = T2j - T2k;
355
        }
356
        {
357
       E TT, T10, T4S, T4V;
358
       TT = TP + TS;
359
       T10 = TW + TZ;
360
       T11 = TT + T10;
361
       T5V = TT - T10;
362
       T4S = TW - TZ;
363
       T4V = T4T - T4U;
364
       T4W = T4S + T4V;
365
       T55 = T4V - T4S;
366
        }
367
        {
368
       E T5R, T5S, T2b, T2g;
369
       T5R = T4U + T4T;
370
       T5S = T4P + T4O;
371
       T5T = T5R - T5S;
372
       T6g = T5S + T5R;
373
       T2b = T27 - T2a;
374
       T2g = T2c + T2f;
375
       T2h = FNMS(KP414213562, T2g, T2b);
376
       T2E = FMA(KP414213562, T2b, T2g);
377
        }
378
        {
379
       E T2m, T2r, T3C, T3D;
380
       T2m = T2i - T2l;
381
       T2r = T2n - T2q;
382
       T2s = FMA(KP414213562, T2r, T2m);
383
       T2F = FNMS(KP414213562, T2m, T2r);
384
       T3C = T2n + T2q;
385
       T3D = T2i + T2l;
386
       T3E = FNMS(KP414213562, T3D, T3C);
387
       T3L = FMA(KP414213562, T3C, T3D);
388
        }
389
        {
390
       E T4N, T4Q, T3z, T3A;
391
       T4N = TP - TS;
392
       T4Q = T4O - T4P;
393
       T4R = T4N - T4Q;
394
       T54 = T4N + T4Q;
395
       T3z = T2f - T2c;
396
       T3A = T27 + T2a;
397
       T3B = FNMS(KP414213562, T3A, T3z);
398
       T3K = FMA(KP414213562, T3z, T3A);
399
        }
400
         }
401
         {
402
        E T12, T6m, Tx, T6l, Th, Tw;
403
        T12 = TM + T11;
404
        T6m = T6g + T6f;
405
        Th = FMA(KP2_000000000, Tg, T9);
406
        Tw = To + Tv;
407
        Tx = FMA(KP2_000000000, Tw, Th);
408
        T6l = FNMS(KP2_000000000, Tw, Th);
409
        R0[WS(rs, 16)] = FNMS(KP2_000000000, T12, Tx);
410
        R0[WS(rs, 24)] = FMA(KP2_000000000, T6m, T6l);
411
        R0[0] = FMA(KP2_000000000, T12, Tx);
412
        R0[WS(rs, 8)] = FNMS(KP2_000000000, T6m, T6l);
413
         }
414
         {
415
        E T65, T69, T68, T6a;
416
        {
417
       E T63, T64, T66, T67;
418
       T63 = FMA(KP2_000000000, T5I, T5H);
419
       T64 = T5K + T5N;
420
       T65 = FNMS(KP1_414213562, T64, T63);
421
       T69 = FMA(KP1_414213562, T64, T63);
422
       T66 = T5Y - T5V;
423
       T67 = T5Q - T5T;
424
       T68 = FNMS(KP414213562, T67, T66);
425
       T6a = FMA(KP414213562, T66, T67);
426
        }
427
        R0[WS(rs, 6)] = FNMS(KP1_847759065, T68, T65);
428
        R0[WS(rs, 30)] = FMA(KP1_847759065, T6a, T69);
429
        R0[WS(rs, 22)] = FMA(KP1_847759065, T68, T65);
430
        R0[WS(rs, 14)] = FNMS(KP1_847759065, T6a, T69);
431
         }
432
         {
433
        E T6d, T6j, T6i, T6k;
434
        {
435
       E T6b, T6c, T6e, T6h;
436
       T6b = FNMS(KP2_000000000, Tg, T9);
437
       T6c = T5M + T5L;
438
       T6d = FNMS(KP2_000000000, T6c, T6b);
439
       T6j = FMA(KP2_000000000, T6c, T6b);
440
       T6e = TM - T11;
441
       T6h = T6f - T6g;
442
       T6i = T6e - T6h;
443
       T6k = T6e + T6h;
444
        }
445
        R0[WS(rs, 20)] = FNMS(KP1_414213562, T6i, T6d);
446
        R0[WS(rs, 28)] = FMA(KP1_414213562, T6k, T6j);
447
        R0[WS(rs, 4)] = FMA(KP1_414213562, T6i, T6d);
448
        R0[WS(rs, 12)] = FNMS(KP1_414213562, T6k, T6j);
449
         }
450
         {
451
        E T5P, T61, T60, T62;
452
        {
453
       E T5J, T5O, T5U, T5Z;
454
       T5J = FNMS(KP2_000000000, T5I, T5H);
455
       T5O = T5K - T5N;
456
       T5P = FMA(KP1_414213562, T5O, T5J);
457
       T61 = FNMS(KP1_414213562, T5O, T5J);
458
       T5U = T5Q + T5T;
459
       T5Z = T5V + T5Y;
460
       T60 = FNMS(KP414213562, T5Z, T5U);
461
       T62 = FMA(KP414213562, T5U, T5Z);
462
        }
463
        R0[WS(rs, 18)] = FNMS(KP1_847759065, T60, T5P);
464
        R0[WS(rs, 26)] = FMA(KP1_847759065, T62, T61);
465
        R0[WS(rs, 2)] = FMA(KP1_847759065, T60, T5P);
466
        R0[WS(rs, 10)] = FNMS(KP1_847759065, T62, T61);
467
         }
468
         {
469
        E T4Y, T5f, T57, T5e, T4H, T59, T5d, T5h, T4X, T56;
470
        T4X = T4R + T4W;
471
        T4Y = FMA(KP707106781, T4X, T4M);
472
        T5f = FNMS(KP707106781, T4X, T4M);
473
        T56 = T54 + T55;
474
        T57 = FMA(KP707106781, T56, T53);
475
        T5e = FNMS(KP707106781, T56, T53);
476
        {
477
       E T4v, T4G, T5b, T5c;
478
       T4v = FMA(KP1_414213562, T4u, T4p);
479
       T4G = FNMS(KP414213562, T4F, T4A);
480
       T4H = FMA(KP1_847759065, T4G, T4v);
481
       T59 = FNMS(KP1_847759065, T4G, T4v);
482
       T5b = FNMS(KP1_414213562, T4u, T4p);
483
       T5c = FMA(KP414213562, T4A, T4F);
484
       T5d = FNMS(KP1_847759065, T5c, T5b);
485
       T5h = FMA(KP1_847759065, T5c, T5b);
486
        }
487
        {
488
       E T58, T5i, T5a, T5g;
489
       T58 = FNMS(KP198912367, T57, T4Y);
490
       R0[WS(rs, 17)] = FNMS(KP1_961570560, T58, T4H);
491
       R0[WS(rs, 1)] = FMA(KP1_961570560, T58, T4H);
492
       T5i = FMA(KP668178637, T5e, T5f);
493
       R0[WS(rs, 13)] = FNMS(KP1_662939224, T5i, T5h);
494
       R0[WS(rs, 29)] = FMA(KP1_662939224, T5i, T5h);
495
       T5a = FMA(KP198912367, T4Y, T57);
496
       R0[WS(rs, 9)] = FNMS(KP1_961570560, T5a, T59);
497
       R0[WS(rs, 25)] = FMA(KP1_961570560, T5a, T59);
498
       T5g = FNMS(KP668178637, T5f, T5e);
499
       R0[WS(rs, 5)] = FNMS(KP1_662939224, T5g, T5d);
500
       R0[WS(rs, 21)] = FMA(KP1_662939224, T5g, T5d);
501
        }
502
         }
503
         {
504
        E T5s, T5D, T5v, T5C, T5p, T5x, T5B, T5F, T5r, T5u;
505
        T5r = T54 - T55;
506
        T5s = FNMS(KP707106781, T5r, T5q);
507
        T5D = FMA(KP707106781, T5r, T5q);
508
        T5u = T4W - T4R;
509
        T5v = FNMS(KP707106781, T5u, T5t);
510
        T5C = FMA(KP707106781, T5u, T5t);
511
        {
512
       E T5l, T5o, T5z, T5A;
513
       T5l = FNMS(KP1_414213562, T5k, T5j);
514
       T5o = FNMS(KP414213562, T5n, T5m);
515
       T5p = FNMS(KP1_847759065, T5o, T5l);
516
       T5x = FMA(KP1_847759065, T5o, T5l);
517
       T5z = FMA(KP1_414213562, T5k, T5j);
518
       T5A = FMA(KP414213562, T5m, T5n);
519
       T5B = FNMS(KP1_847759065, T5A, T5z);
520
       T5F = FMA(KP1_847759065, T5A, T5z);
521
        }
522
        {
523
       E T5w, T5G, T5y, T5E;
524
       T5w = FNMS(KP668178637, T5v, T5s);
525
       R0[WS(rs, 19)] = FNMS(KP1_662939224, T5w, T5p);
526
       R0[WS(rs, 3)] = FMA(KP1_662939224, T5w, T5p);
527
       T5G = FMA(KP198912367, T5C, T5D);
528
       R0[WS(rs, 15)] = FNMS(KP1_961570560, T5G, T5F);
529
       R0[WS(rs, 31)] = FMA(KP1_961570560, T5G, T5F);
530
       T5y = FMA(KP668178637, T5s, T5v);
531
       R0[WS(rs, 11)] = FNMS(KP1_662939224, T5y, T5x);
532
       R0[WS(rs, 27)] = FMA(KP1_662939224, T5y, T5x);
533
       T5E = FNMS(KP198912367, T5D, T5C);
534
       R0[WS(rs, 7)] = FNMS(KP1_961570560, T5E, T5B);
535
       R0[WS(rs, 23)] = FMA(KP1_961570560, T5E, T5B);
536
        }
537
         }
538
         {
539
        E T3n, T3R, T3u, T3S, T3G, T3V, T3N, T3U, T3q, T3t;
540
        T3n = FNMS(KP1_847759065, T3m, T3j);
541
        T3R = FMA(KP1_847759065, T3m, T3j);
542
        T3q = FNMS(KP707106781, T3p, T3o);
543
        T3t = FNMS(KP707106781, T3s, T3r);
544
        T3u = FNMS(KP668178637, T3t, T3q);
545
        T3S = FMA(KP668178637, T3q, T3t);
546
        {
547
       E T3y, T3F, T3J, T3M;
548
       T3y = FNMS(KP707106781, T3x, T3w);
549
       T3F = T3B + T3E;
550
       T3G = FNMS(KP923879532, T3F, T3y);
551
       T3V = FMA(KP923879532, T3F, T3y);
552
       T3J = FNMS(KP707106781, T3I, T3H);
553
       T3M = T3K - T3L;
554
       T3N = FMA(KP923879532, T3M, T3J);
555
       T3U = FNMS(KP923879532, T3M, T3J);
556
        }
557
        {
558
       E T3v, T3O, T3X, T3Y;
559
       T3v = FMA(KP1_662939224, T3u, T3n);
560
       T3O = FNMS(KP303346683, T3N, T3G);
561
       R1[WS(rs, 17)] = FNMS(KP1_913880671, T3O, T3v);
562
       R1[WS(rs, 1)] = FMA(KP1_913880671, T3O, T3v);
563
       T3X = FMA(KP1_662939224, T3S, T3R);
564
       T3Y = FMA(KP534511135, T3U, T3V);
565
       R1[WS(rs, 13)] = FNMS(KP1_763842528, T3Y, T3X);
566
       R1[WS(rs, 29)] = FMA(KP1_763842528, T3Y, T3X);
567
        }
568
        {
569
       E T3P, T3Q, T3T, T3W;
570
       T3P = FNMS(KP1_662939224, T3u, T3n);
571
       T3Q = FMA(KP303346683, T3G, T3N);
572
       R1[WS(rs, 9)] = FNMS(KP1_913880671, T3Q, T3P);
573
       R1[WS(rs, 25)] = FMA(KP1_913880671, T3Q, T3P);
574
       T3T = FNMS(KP1_662939224, T3S, T3R);
575
       T3W = FNMS(KP534511135, T3V, T3U);
576
       R1[WS(rs, 5)] = FNMS(KP1_763842528, T3W, T3T);
577
       R1[WS(rs, 21)] = FMA(KP1_763842528, T3W, T3T);
578
        }
579
         }
580
         {
581
        E T1n, T2L, T1O, T2M, T2u, T2P, T2H, T2O, T1E, T1N;
582
        T1n = FMA(KP1_847759065, T1m, T1b);
583
        T2L = FNMS(KP1_847759065, T1m, T1b);
584
        T1E = FMA(KP707106781, T1D, T1s);
585
        T1N = FMA(KP707106781, T1M, T1J);
586
        T1O = FNMS(KP198912367, T1N, T1E);
587
        T2M = FMA(KP198912367, T1E, T1N);
588
        {
589
       E T26, T2t, T2D, T2G;
590
       T26 = FMA(KP707106781, T25, T1U);
591
       T2t = T2h + T2s;
592
       T2u = FMA(KP923879532, T2t, T26);
593
       T2P = FNMS(KP923879532, T2t, T26);
594
       T2D = FMA(KP707106781, T2C, T2z);
595
       T2G = T2E + T2F;
596
       T2H = FMA(KP923879532, T2G, T2D);
597
       T2O = FNMS(KP923879532, T2G, T2D);
598
        }
599
        {
600
       E T1P, T2I, T2R, T2S;
601
       T1P = FMA(KP1_961570560, T1O, T1n);
602
       T2I = FNMS(KP098491403, T2H, T2u);
603
       R1[WS(rs, 16)] = FNMS(KP1_990369453, T2I, T1P);
604
       R1[0] = FMA(KP1_990369453, T2I, T1P);
605
       T2R = FMA(KP1_961570560, T2M, T2L);
606
       T2S = FMA(KP820678790, T2O, T2P);
607
       R1[WS(rs, 12)] = FNMS(KP1_546020906, T2S, T2R);
608
       R1[WS(rs, 28)] = FMA(KP1_546020906, T2S, T2R);
609
        }
610
        {
611
       E T2J, T2K, T2N, T2Q;
612
       T2J = FNMS(KP1_961570560, T1O, T1n);
613
       T2K = FMA(KP098491403, T2u, T2H);
614
       R1[WS(rs, 8)] = FNMS(KP1_990369453, T2K, T2J);
615
       R1[WS(rs, 24)] = FMA(KP1_990369453, T2K, T2J);
616
       T2N = FNMS(KP1_961570560, T2M, T2L);
617
       T2Q = FNMS(KP820678790, T2P, T2O);
618
       R1[WS(rs, 4)] = FNMS(KP1_546020906, T2Q, T2N);
619
       R1[WS(rs, 20)] = FMA(KP1_546020906, T2Q, T2N);
620
        }
621
         }
622
         {
623
        E T41, T4f, T44, T4g, T48, T4j, T4b, T4i, T42, T43;
624
        T41 = FNMS(KP1_847759065, T40, T3Z);
625
        T4f = FMA(KP1_847759065, T40, T3Z);
626
        T42 = FMA(KP707106781, T3s, T3r);
627
        T43 = FMA(KP707106781, T3p, T3o);
628
        T44 = FNMS(KP198912367, T43, T42);
629
        T4g = FMA(KP198912367, T42, T43);
630
        {
631
       E T46, T47, T49, T4a;
632
       T46 = FMA(KP707106781, T3x, T3w);
633
       T47 = T3K + T3L;
634
       T48 = FNMS(KP923879532, T47, T46);
635
       T4j = FMA(KP923879532, T47, T46);
636
       T49 = FMA(KP707106781, T3I, T3H);
637
       T4a = T3B - T3E;
638
       T4b = FNMS(KP923879532, T4a, T49);
639
       T4i = FMA(KP923879532, T4a, T49);
640
        }
641
        {
642
       E T45, T4c, T4l, T4m;
643
       T45 = FNMS(KP1_961570560, T44, T41);
644
       T4c = FNMS(KP820678790, T4b, T48);
645
       R1[WS(rs, 19)] = FNMS(KP1_546020906, T4c, T45);
646
       R1[WS(rs, 3)] = FMA(KP1_546020906, T4c, T45);
647
       T4l = FMA(KP1_961570560, T4g, T4f);
648
       T4m = FMA(KP098491403, T4i, T4j);
649
       R1[WS(rs, 15)] = FNMS(KP1_990369453, T4m, T4l);
650
       R1[WS(rs, 31)] = FMA(KP1_990369453, T4m, T4l);
651
        }
652
        {
653
       E T4d, T4e, T4h, T4k;
654
       T4d = FMA(KP1_961570560, T44, T41);
655
       T4e = FMA(KP820678790, T48, T4b);
656
       R1[WS(rs, 11)] = FNMS(KP1_546020906, T4e, T4d);
657
       R1[WS(rs, 27)] = FMA(KP1_546020906, T4e, T4d);
658
       T4h = FNMS(KP1_961570560, T4g, T4f);
659
       T4k = FNMS(KP098491403, T4j, T4i);
660
       R1[WS(rs, 7)] = FNMS(KP1_990369453, T4k, T4h);
661
       R1[WS(rs, 23)] = FMA(KP1_990369453, T4k, T4h);
662
        }
663
         }
664
         {
665
        E T2V, T39, T2Y, T3a, T32, T3d, T35, T3c, T2W, T2X;
666
        T2V = FNMS(KP1_847759065, T2U, T2T);
667
        T39 = FMA(KP1_847759065, T2U, T2T);
668
        T2W = FNMS(KP707106781, T1M, T1J);
669
        T2X = FNMS(KP707106781, T1D, T1s);
670
        T2Y = FNMS(KP668178637, T2X, T2W);
671
        T3a = FMA(KP668178637, T2W, T2X);
672
        {
673
       E T30, T31, T33, T34;
674
       T30 = FNMS(KP707106781, T25, T1U);
675
       T31 = T2E - T2F;
676
       T32 = FNMS(KP923879532, T31, T30);
677
       T3d = FMA(KP923879532, T31, T30);
678
       T33 = FNMS(KP707106781, T2C, T2z);
679
       T34 = T2s - T2h;
680
       T35 = FNMS(KP923879532, T34, T33);
681
       T3c = FMA(KP923879532, T34, T33);
682
        }
683
        {
684
       E T2Z, T36, T3f, T3g;
685
       T2Z = FNMS(KP1_662939224, T2Y, T2V);
686
       T36 = FNMS(KP534511135, T35, T32);
687
       R1[WS(rs, 18)] = FNMS(KP1_763842528, T36, T2Z);
688
       R1[WS(rs, 2)] = FMA(KP1_763842528, T36, T2Z);
689
       T3f = FMA(KP1_662939224, T3a, T39);
690
       T3g = FMA(KP303346683, T3c, T3d);
691
       R1[WS(rs, 14)] = FNMS(KP1_913880671, T3g, T3f);
692
       R1[WS(rs, 30)] = FMA(KP1_913880671, T3g, T3f);
693
        }
694
        {
695
       E T37, T38, T3b, T3e;
696
       T37 = FMA(KP1_662939224, T2Y, T2V);
697
       T38 = FMA(KP534511135, T32, T35);
698
       R1[WS(rs, 10)] = FNMS(KP1_763842528, T38, T37);
699
       R1[WS(rs, 26)] = FMA(KP1_763842528, T38, T37);
700
       T3b = FNMS(KP1_662939224, T3a, T39);
701
       T3e = FNMS(KP303346683, T3d, T3c);
702
       R1[WS(rs, 6)] = FNMS(KP1_913880671, T3e, T3b);
703
       R1[WS(rs, 22)] = FMA(KP1_913880671, T3e, T3b);
704
        }
705
         }
706
    }
707
     }
708
}
709
710
static const kr2c_desc desc = { 64, "r2cb_64", { 178, 0, 216, 0 }, &GENUS };
711
712
void X(codelet_r2cb_64) (planner *p) { X(kr2c_register) (p, r2cb_64, &desc);
713
}
714
715
#else
716
717
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cb_64 -include rdft/scalar/r2cb.h */
718
719
/*
720
 * This function contains 394 FP additions, 134 FP multiplications,
721
 * (or, 342 additions, 82 multiplications, 52 fused multiply/add),
722
 * 110 stack variables, 19 constants, and 128 memory accesses
723
 */
724
#include "rdft/scalar/r2cb.h"
725
726
static void r2cb_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
727
0
{
728
0
     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
729
0
     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
730
0
     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
731
0
     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
732
0
     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
733
0
     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
734
0
     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
735
0
     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
736
0
     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
737
0
     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
738
0
     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
739
0
     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
740
0
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
741
0
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
742
0
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
743
0
     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
744
0
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
745
0
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
746
0
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
747
0
     {
748
0
    INT i;
749
0
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
750
0
         E Ta, T2S, T18, T2u, T3F, T4V, T5l, T61, Th, T2T, T1h, T2v, T3M, T4W, T5o;
751
0
         E T62, T3Q, T5q, T5u, T44, Tp, Tw, T2V, T2W, T2X, T2Y, T3X, T5t, T1r, T2x;
752
0
         E T41, T5r, T1A, T2y, T4a, T5y, T5N, T4H, TN, T31, T4E, T5z, T39, T3q, T1L;
753
0
         E T2B, T4h, T5M, T2h, T2F, T12, T36, T5D, T5J, T5G, T5K, T1U, T26, T23, T27;
754
0
         E T4p, T4z, T4w, T4A, T34, T3r;
755
0
         {
756
0
        E T5, T3A, T3, T3y, T9, T3C, T17, T3D, T6, T14;
757
0
        {
758
0
       E T4, T3z, T1, T2;
759
0
       T4 = Cr[WS(csr, 16)];
760
0
       T5 = KP2_000000000 * T4;
761
0
       T3z = Ci[WS(csi, 16)];
762
0
       T3A = KP2_000000000 * T3z;
763
0
       T1 = Cr[0];
764
0
       T2 = Cr[WS(csr, 32)];
765
0
       T3 = T1 + T2;
766
0
       T3y = T1 - T2;
767
0
       {
768
0
            E T7, T8, T15, T16;
769
0
            T7 = Cr[WS(csr, 8)];
770
0
            T8 = Cr[WS(csr, 24)];
771
0
            T9 = KP2_000000000 * (T7 + T8);
772
0
            T3C = T7 - T8;
773
0
            T15 = Ci[WS(csi, 8)];
774
0
            T16 = Ci[WS(csi, 24)];
775
0
            T17 = KP2_000000000 * (T15 - T16);
776
0
            T3D = T15 + T16;
777
0
       }
778
0
        }
779
0
        T6 = T3 + T5;
780
0
        Ta = T6 + T9;
781
0
        T2S = T6 - T9;
782
0
        T14 = T3 - T5;
783
0
        T18 = T14 - T17;
784
0
        T2u = T14 + T17;
785
0
        {
786
0
       E T3B, T3E, T5j, T5k;
787
0
       T3B = T3y - T3A;
788
0
       T3E = KP1_414213562 * (T3C - T3D);
789
0
       T3F = T3B + T3E;
790
0
       T4V = T3B - T3E;
791
0
       T5j = T3y + T3A;
792
0
       T5k = KP1_414213562 * (T3C + T3D);
793
0
       T5l = T5j - T5k;
794
0
       T61 = T5j + T5k;
795
0
        }
796
0
         }
797
0
         {
798
0
        E Td, T3G, T1c, T3K, Tg, T3J, T1f, T3H, T19, T1g;
799
0
        {
800
0
       E Tb, Tc, T1a, T1b;
801
0
       Tb = Cr[WS(csr, 4)];
802
0
       Tc = Cr[WS(csr, 28)];
803
0
       Td = Tb + Tc;
804
0
       T3G = Tb - Tc;
805
0
       T1a = Ci[WS(csi, 4)];
806
0
       T1b = Ci[WS(csi, 28)];
807
0
       T1c = T1a - T1b;
808
0
       T3K = T1a + T1b;
809
0
        }
810
0
        {
811
0
       E Te, Tf, T1d, T1e;
812
0
       Te = Cr[WS(csr, 20)];
813
0
       Tf = Cr[WS(csr, 12)];
814
0
       Tg = Te + Tf;
815
0
       T3J = Te - Tf;
816
0
       T1d = Ci[WS(csi, 20)];
817
0
       T1e = Ci[WS(csi, 12)];
818
0
       T1f = T1d - T1e;
819
0
       T3H = T1d + T1e;
820
0
        }
821
0
        Th = KP2_000000000 * (Td + Tg);
822
0
        T2T = KP2_000000000 * (T1f + T1c);
823
0
        T19 = Td - Tg;
824
0
        T1g = T1c - T1f;
825
0
        T1h = KP1_414213562 * (T19 - T1g);
826
0
        T2v = KP1_414213562 * (T19 + T1g);
827
0
        {
828
0
       E T3I, T3L, T5m, T5n;
829
0
       T3I = T3G - T3H;
830
0
       T3L = T3J + T3K;
831
0
       T3M = FNMS(KP765366864, T3L, KP1_847759065 * T3I);
832
0
       T4W = FMA(KP765366864, T3I, KP1_847759065 * T3L);
833
0
       T5m = T3G + T3H;
834
0
       T5n = T3K - T3J;
835
0
       T5o = FNMS(KP1_847759065, T5n, KP765366864 * T5m);
836
0
       T62 = FMA(KP1_847759065, T5m, KP765366864 * T5n);
837
0
        }
838
0
         }
839
0
         {
840
0
        E Tl, T3O, T1v, T43, To, T42, T1y, T3P, Ts, T3R, T1p, T3S, Tv, T3U, T1m;
841
0
        E T3V;
842
0
        {
843
0
       E Tj, Tk, T1t, T1u;
844
0
       Tj = Cr[WS(csr, 2)];
845
0
       Tk = Cr[WS(csr, 30)];
846
0
       Tl = Tj + Tk;
847
0
       T3O = Tj - Tk;
848
0
       T1t = Ci[WS(csi, 2)];
849
0
       T1u = Ci[WS(csi, 30)];
850
0
       T1v = T1t - T1u;
851
0
       T43 = T1t + T1u;
852
0
        }
853
0
        {
854
0
       E Tm, Tn, T1w, T1x;
855
0
       Tm = Cr[WS(csr, 18)];
856
0
       Tn = Cr[WS(csr, 14)];
857
0
       To = Tm + Tn;
858
0
       T42 = Tm - Tn;
859
0
       T1w = Ci[WS(csi, 18)];
860
0
       T1x = Ci[WS(csi, 14)];
861
0
       T1y = T1w - T1x;
862
0
       T3P = T1w + T1x;
863
0
        }
864
0
        {
865
0
       E Tq, Tr, T1n, T1o;
866
0
       Tq = Cr[WS(csr, 10)];
867
0
       Tr = Cr[WS(csr, 22)];
868
0
       Ts = Tq + Tr;
869
0
       T3R = Tq - Tr;
870
0
       T1n = Ci[WS(csi, 10)];
871
0
       T1o = Ci[WS(csi, 22)];
872
0
       T1p = T1n - T1o;
873
0
       T3S = T1n + T1o;
874
0
        }
875
0
        {
876
0
       E Tt, Tu, T1k, T1l;
877
0
       Tt = Cr[WS(csr, 6)];
878
0
       Tu = Cr[WS(csr, 26)];
879
0
       Tv = Tt + Tu;
880
0
       T3U = Tt - Tu;
881
0
       T1k = Ci[WS(csi, 26)];
882
0
       T1l = Ci[WS(csi, 6)];
883
0
       T1m = T1k - T1l;
884
0
       T3V = T1l + T1k;
885
0
        }
886
0
        T3Q = T3O - T3P;
887
0
        T5q = T3O + T3P;
888
0
        T5u = T43 - T42;
889
0
        T44 = T42 + T43;
890
0
        Tp = Tl + To;
891
0
        Tw = Ts + Tv;
892
0
        T2V = Tp - Tw;
893
0
        {
894
0
       E T3T, T3W, T1j, T1q;
895
0
       T2W = T1y + T1v;
896
0
       T2X = T1p + T1m;
897
0
       T2Y = T2W - T2X;
898
0
       T3T = T3R - T3S;
899
0
       T3W = T3U - T3V;
900
0
       T3X = KP707106781 * (T3T + T3W);
901
0
       T5t = KP707106781 * (T3T - T3W);
902
0
       T1j = Tl - To;
903
0
       T1q = T1m - T1p;
904
0
       T1r = T1j + T1q;
905
0
       T2x = T1j - T1q;
906
0
       {
907
0
            E T3Z, T40, T1s, T1z;
908
0
            T3Z = T3R + T3S;
909
0
            T40 = T3U + T3V;
910
0
            T41 = KP707106781 * (T3Z - T40);
911
0
            T5r = KP707106781 * (T3Z + T40);
912
0
            T1s = Ts - Tv;
913
0
            T1z = T1v - T1y;
914
0
            T1A = T1s + T1z;
915
0
            T2y = T1z - T1s;
916
0
       }
917
0
        }
918
0
         }
919
0
         {
920
0
        E TB, T48, T2c, T4G, TE, T4F, T2f, T49, TI, T4b, T1J, T4c, TL, T4e, T1G;
921
0
        E T4f;
922
0
        {
923
0
       E Tz, TA, T2a, T2b;
924
0
       Tz = Cr[WS(csr, 1)];
925
0
       TA = Cr[WS(csr, 31)];
926
0
       TB = Tz + TA;
927
0
       T48 = Tz - TA;
928
0
       T2a = Ci[WS(csi, 1)];
929
0
       T2b = Ci[WS(csi, 31)];
930
0
       T2c = T2a - T2b;
931
0
       T4G = T2a + T2b;
932
0
        }
933
0
        {
934
0
       E TC, TD, T2d, T2e;
935
0
       TC = Cr[WS(csr, 17)];
936
0
       TD = Cr[WS(csr, 15)];
937
0
       TE = TC + TD;
938
0
       T4F = TC - TD;
939
0
       T2d = Ci[WS(csi, 17)];
940
0
       T2e = Ci[WS(csi, 15)];
941
0
       T2f = T2d - T2e;
942
0
       T49 = T2d + T2e;
943
0
        }
944
0
        {
945
0
       E TG, TH, T1H, T1I;
946
0
       TG = Cr[WS(csr, 9)];
947
0
       TH = Cr[WS(csr, 23)];
948
0
       TI = TG + TH;
949
0
       T4b = TG - TH;
950
0
       T1H = Ci[WS(csi, 9)];
951
0
       T1I = Ci[WS(csi, 23)];
952
0
       T1J = T1H - T1I;
953
0
       T4c = T1H + T1I;
954
0
        }
955
0
        {
956
0
       E TJ, TK, T1E, T1F;
957
0
       TJ = Cr[WS(csr, 7)];
958
0
       TK = Cr[WS(csr, 25)];
959
0
       TL = TJ + TK;
960
0
       T4e = TJ - TK;
961
0
       T1E = Ci[WS(csi, 25)];
962
0
       T1F = Ci[WS(csi, 7)];
963
0
       T1G = T1E - T1F;
964
0
       T4f = T1F + T1E;
965
0
        }
966
0
        {
967
0
       E TF, TM, T1D, T1K;
968
0
       T4a = T48 - T49;
969
0
       T5y = T48 + T49;
970
0
       T5N = T4G - T4F;
971
0
       T4H = T4F + T4G;
972
0
       TF = TB + TE;
973
0
       TM = TI + TL;
974
0
       TN = TF + TM;
975
0
       T31 = TF - TM;
976
0
       {
977
0
            E T4C, T4D, T37, T38;
978
0
            T4C = T4b + T4c;
979
0
            T4D = T4e + T4f;
980
0
            T4E = KP707106781 * (T4C - T4D);
981
0
            T5z = KP707106781 * (T4C + T4D);
982
0
            T37 = T2f + T2c;
983
0
            T38 = T1J + T1G;
984
0
            T39 = T37 - T38;
985
0
            T3q = T38 + T37;
986
0
       }
987
0
       T1D = TB - TE;
988
0
       T1K = T1G - T1J;
989
0
       T1L = T1D + T1K;
990
0
       T2B = T1D - T1K;
991
0
       {
992
0
            E T4d, T4g, T29, T2g;
993
0
            T4d = T4b - T4c;
994
0
            T4g = T4e - T4f;
995
0
            T4h = KP707106781 * (T4d + T4g);
996
0
            T5M = KP707106781 * (T4d - T4g);
997
0
            T29 = TI - TL;
998
0
            T2g = T2c - T2f;
999
0
            T2h = T29 + T2g;
1000
0
            T2F = T2g - T29;
1001
0
       }
1002
0
        }
1003
0
         }
1004
0
         {
1005
0
        E TQ, T4j, T1P, T4n, TT, T4m, T1S, T4k, TX, T4q, T1Y, T4u, T10, T4t, T21;
1006
0
        E T4r;
1007
0
        {
1008
0
       E TO, TP, T1N, T1O;
1009
0
       TO = Cr[WS(csr, 5)];
1010
0
       TP = Cr[WS(csr, 27)];
1011
0
       TQ = TO + TP;
1012
0
       T4j = TO - TP;
1013
0
       T1N = Ci[WS(csi, 5)];
1014
0
       T1O = Ci[WS(csi, 27)];
1015
0
       T1P = T1N - T1O;
1016
0
       T4n = T1N + T1O;
1017
0
        }
1018
0
        {
1019
0
       E TR, TS, T1Q, T1R;
1020
0
       TR = Cr[WS(csr, 21)];
1021
0
       TS = Cr[WS(csr, 11)];
1022
0
       TT = TR + TS;
1023
0
       T4m = TR - TS;
1024
0
       T1Q = Ci[WS(csi, 21)];
1025
0
       T1R = Ci[WS(csi, 11)];
1026
0
       T1S = T1Q - T1R;
1027
0
       T4k = T1Q + T1R;
1028
0
        }
1029
0
        {
1030
0
       E TV, TW, T1W, T1X;
1031
0
       TV = Cr[WS(csr, 3)];
1032
0
       TW = Cr[WS(csr, 29)];
1033
0
       TX = TV + TW;
1034
0
       T4q = TV - TW;
1035
0
       T1W = Ci[WS(csi, 29)];
1036
0
       T1X = Ci[WS(csi, 3)];
1037
0
       T1Y = T1W - T1X;
1038
0
       T4u = T1X + T1W;
1039
0
        }
1040
0
        {
1041
0
       E TY, TZ, T1Z, T20;
1042
0
       TY = Cr[WS(csr, 13)];
1043
0
       TZ = Cr[WS(csr, 19)];
1044
0
       T10 = TY + TZ;
1045
0
       T4t = TY - TZ;
1046
0
       T1Z = Ci[WS(csi, 13)];
1047
0
       T20 = Ci[WS(csi, 19)];
1048
0
       T21 = T1Z - T20;
1049
0
       T4r = T1Z + T20;
1050
0
        }
1051
0
        {
1052
0
       E TU, T11, T5B, T5C;
1053
0
       TU = TQ + TT;
1054
0
       T11 = TX + T10;
1055
0
       T12 = TU + T11;
1056
0
       T36 = TU - T11;
1057
0
       T5B = T4j + T4k;
1058
0
       T5C = T4n - T4m;
1059
0
       T5D = FNMS(KP923879532, T5C, KP382683432 * T5B);
1060
0
       T5J = FMA(KP923879532, T5B, KP382683432 * T5C);
1061
0
        }
1062
0
        {
1063
0
       E T5E, T5F, T1M, T1T;
1064
0
       T5E = T4q + T4r;
1065
0
       T5F = T4t + T4u;
1066
0
       T5G = FNMS(KP923879532, T5F, KP382683432 * T5E);
1067
0
       T5K = FMA(KP923879532, T5E, KP382683432 * T5F);
1068
0
       T1M = TQ - TT;
1069
0
       T1T = T1P - T1S;
1070
0
       T1U = T1M - T1T;
1071
0
       T26 = T1M + T1T;
1072
0
        }
1073
0
        {
1074
0
       E T1V, T22, T4l, T4o;
1075
0
       T1V = TX - T10;
1076
0
       T22 = T1Y - T21;
1077
0
       T23 = T1V + T22;
1078
0
       T27 = T22 - T1V;
1079
0
       T4l = T4j - T4k;
1080
0
       T4o = T4m + T4n;
1081
0
       T4p = FNMS(KP382683432, T4o, KP923879532 * T4l);
1082
0
       T4z = FMA(KP382683432, T4l, KP923879532 * T4o);
1083
0
        }
1084
0
        {
1085
0
       E T4s, T4v, T32, T33;
1086
0
       T4s = T4q - T4r;
1087
0
       T4v = T4t - T4u;
1088
0
       T4w = FMA(KP923879532, T4s, KP382683432 * T4v);
1089
0
       T4A = FNMS(KP382683432, T4s, KP923879532 * T4v);
1090
0
       T32 = T21 + T1Y;
1091
0
       T33 = T1S + T1P;
1092
0
       T34 = T32 - T33;
1093
0
       T3r = T33 + T32;
1094
0
        }
1095
0
         }
1096
0
         {
1097
0
        E T13, T3x, Ty, T3w, Ti, Tx;
1098
0
        T13 = KP2_000000000 * (TN + T12);
1099
0
        T3x = KP2_000000000 * (T3r + T3q);
1100
0
        Ti = Ta + Th;
1101
0
        Tx = KP2_000000000 * (Tp + Tw);
1102
0
        Ty = Ti + Tx;
1103
0
        T3w = Ti - Tx;
1104
0
        R0[WS(rs, 16)] = Ty - T13;
1105
0
        R0[WS(rs, 24)] = T3w + T3x;
1106
0
        R0[0] = Ty + T13;
1107
0
        R0[WS(rs, 8)] = T3w - T3x;
1108
0
         }
1109
0
         {
1110
0
        E T3g, T3k, T3j, T3l;
1111
0
        {
1112
0
       E T3e, T3f, T3h, T3i;
1113
0
       T3e = T2S + T2T;
1114
0
       T3f = KP1_414213562 * (T2V + T2Y);
1115
0
       T3g = T3e - T3f;
1116
0
       T3k = T3e + T3f;
1117
0
       T3h = T31 - T34;
1118
0
       T3i = T39 - T36;
1119
0
       T3j = FNMS(KP1_847759065, T3i, KP765366864 * T3h);
1120
0
       T3l = FMA(KP1_847759065, T3h, KP765366864 * T3i);
1121
0
        }
1122
0
        R0[WS(rs, 22)] = T3g - T3j;
1123
0
        R0[WS(rs, 30)] = T3k + T3l;
1124
0
        R0[WS(rs, 6)] = T3g + T3j;
1125
0
        R0[WS(rs, 14)] = T3k - T3l;
1126
0
         }
1127
0
         {
1128
0
        E T3o, T3u, T3t, T3v;
1129
0
        {
1130
0
       E T3m, T3n, T3p, T3s;
1131
0
       T3m = Ta - Th;
1132
0
       T3n = KP2_000000000 * (T2X + T2W);
1133
0
       T3o = T3m - T3n;
1134
0
       T3u = T3m + T3n;
1135
0
       T3p = TN - T12;
1136
0
       T3s = T3q - T3r;
1137
0
       T3t = KP1_414213562 * (T3p - T3s);
1138
0
       T3v = KP1_414213562 * (T3p + T3s);
1139
0
        }
1140
0
        R0[WS(rs, 20)] = T3o - T3t;
1141
0
        R0[WS(rs, 28)] = T3u + T3v;
1142
0
        R0[WS(rs, 4)] = T3o + T3t;
1143
0
        R0[WS(rs, 12)] = T3u - T3v;
1144
0
         }
1145
0
         {
1146
0
        E T30, T3c, T3b, T3d;
1147
0
        {
1148
0
       E T2U, T2Z, T35, T3a;
1149
0
       T2U = T2S - T2T;
1150
0
       T2Z = KP1_414213562 * (T2V - T2Y);
1151
0
       T30 = T2U + T2Z;
1152
0
       T3c = T2U - T2Z;
1153
0
       T35 = T31 + T34;
1154
0
       T3a = T36 + T39;
1155
0
       T3b = FNMS(KP765366864, T3a, KP1_847759065 * T35);
1156
0
       T3d = FMA(KP765366864, T35, KP1_847759065 * T3a);
1157
0
        }
1158
0
        R0[WS(rs, 18)] = T30 - T3b;
1159
0
        R0[WS(rs, 26)] = T3c + T3d;
1160
0
        R0[WS(rs, 2)] = T30 + T3b;
1161
0
        R0[WS(rs, 10)] = T3c - T3d;
1162
0
         }
1163
0
         {
1164
0
        E T25, T2p, T2i, T2q, T1C, T2k, T2o, T2s, T24, T28;
1165
0
        T24 = KP707106781 * (T1U + T23);
1166
0
        T25 = T1L + T24;
1167
0
        T2p = T1L - T24;
1168
0
        T28 = KP707106781 * (T26 + T27);
1169
0
        T2i = T28 + T2h;
1170
0
        T2q = T2h - T28;
1171
0
        {
1172
0
       E T1i, T1B, T2m, T2n;
1173
0
       T1i = T18 + T1h;
1174
0
       T1B = FNMS(KP765366864, T1A, KP1_847759065 * T1r);
1175
0
       T1C = T1i + T1B;
1176
0
       T2k = T1i - T1B;
1177
0
       T2m = T18 - T1h;
1178
0
       T2n = FMA(KP765366864, T1r, KP1_847759065 * T1A);
1179
0
       T2o = T2m - T2n;
1180
0
       T2s = T2m + T2n;
1181
0
        }
1182
0
        {
1183
0
       E T2j, T2t, T2l, T2r;
1184
0
       T2j = FNMS(KP390180644, T2i, KP1_961570560 * T25);
1185
0
       R0[WS(rs, 17)] = T1C - T2j;
1186
0
       R0[WS(rs, 1)] = T1C + T2j;
1187
0
       T2t = FMA(KP1_662939224, T2p, KP1_111140466 * T2q);
1188
0
       R0[WS(rs, 13)] = T2s - T2t;
1189
0
       R0[WS(rs, 29)] = T2s + T2t;
1190
0
       T2l = FMA(KP390180644, T25, KP1_961570560 * T2i);
1191
0
       R0[WS(rs, 9)] = T2k - T2l;
1192
0
       R0[WS(rs, 25)] = T2k + T2l;
1193
0
       T2r = FNMS(KP1_662939224, T2q, KP1_111140466 * T2p);
1194
0
       R0[WS(rs, 21)] = T2o - T2r;
1195
0
       R0[WS(rs, 5)] = T2o + T2r;
1196
0
        }
1197
0
         }
1198
0
         {
1199
0
        E T2D, T2N, T2G, T2O, T2A, T2I, T2M, T2Q, T2C, T2E;
1200
0
        T2C = KP707106781 * (T27 - T26);
1201
0
        T2D = T2B + T2C;
1202
0
        T2N = T2B - T2C;
1203
0
        T2E = KP707106781 * (T1U - T23);
1204
0
        T2G = T2E + T2F;
1205
0
        T2O = T2F - T2E;
1206
0
        {
1207
0
       E T2w, T2z, T2K, T2L;
1208
0
       T2w = T2u - T2v;
1209
0
       T2z = FNMS(KP1_847759065, T2y, KP765366864 * T2x);
1210
0
       T2A = T2w + T2z;
1211
0
       T2I = T2w - T2z;
1212
0
       T2K = T2u + T2v;
1213
0
       T2L = FMA(KP1_847759065, T2x, KP765366864 * T2y);
1214
0
       T2M = T2K - T2L;
1215
0
       T2Q = T2K + T2L;
1216
0
        }
1217
0
        {
1218
0
       E T2H, T2R, T2J, T2P;
1219
0
       T2H = FNMS(KP1_111140466, T2G, KP1_662939224 * T2D);
1220
0
       R0[WS(rs, 19)] = T2A - T2H;
1221
0
       R0[WS(rs, 3)] = T2A + T2H;
1222
0
       T2R = FMA(KP1_961570560, T2N, KP390180644 * T2O);
1223
0
       R0[WS(rs, 15)] = T2Q - T2R;
1224
0
       R0[WS(rs, 31)] = T2Q + T2R;
1225
0
       T2J = FMA(KP1_111140466, T2D, KP1_662939224 * T2G);
1226
0
       R0[WS(rs, 11)] = T2I - T2J;
1227
0
       R0[WS(rs, 27)] = T2I + T2J;
1228
0
       T2P = FNMS(KP1_961570560, T2O, KP390180644 * T2N);
1229
0
       R0[WS(rs, 23)] = T2M - T2P;
1230
0
       R0[WS(rs, 7)] = T2M + T2P;
1231
0
        }
1232
0
         }
1233
0
         {
1234
0
        E T5p, T5T, T5w, T5U, T5I, T5W, T5P, T5X, T5s, T5v;
1235
0
        T5p = T5l + T5o;
1236
0
        T5T = T5l - T5o;
1237
0
        T5s = T5q - T5r;
1238
0
        T5v = T5t + T5u;
1239
0
        T5w = FNMS(KP1_111140466, T5v, KP1_662939224 * T5s);
1240
0
        T5U = FMA(KP1_111140466, T5s, KP1_662939224 * T5v);
1241
0
        {
1242
0
       E T5A, T5H, T5L, T5O;
1243
0
       T5A = T5y - T5z;
1244
0
       T5H = T5D + T5G;
1245
0
       T5I = T5A + T5H;
1246
0
       T5W = T5A - T5H;
1247
0
       T5L = T5J - T5K;
1248
0
       T5O = T5M + T5N;
1249
0
       T5P = T5L + T5O;
1250
0
       T5X = T5O - T5L;
1251
0
        }
1252
0
        {
1253
0
       E T5x, T5Q, T5Z, T60;
1254
0
       T5x = T5p + T5w;
1255
0
       T5Q = FNMS(KP580569354, T5P, KP1_913880671 * T5I);
1256
0
       R1[WS(rs, 17)] = T5x - T5Q;
1257
0
       R1[WS(rs, 1)] = T5x + T5Q;
1258
0
       T5Z = T5T + T5U;
1259
0
       T60 = FMA(KP1_763842528, T5W, KP942793473 * T5X);
1260
0
       R1[WS(rs, 13)] = T5Z - T60;
1261
0
       R1[WS(rs, 29)] = T5Z + T60;
1262
0
        }
1263
0
        {
1264
0
       E T5R, T5S, T5V, T5Y;
1265
0
       T5R = T5p - T5w;
1266
0
       T5S = FMA(KP580569354, T5I, KP1_913880671 * T5P);
1267
0
       R1[WS(rs, 9)] = T5R - T5S;
1268
0
       R1[WS(rs, 25)] = T5R + T5S;
1269
0
       T5V = T5T - T5U;
1270
0
       T5Y = FNMS(KP1_763842528, T5X, KP942793473 * T5W);
1271
0
       R1[WS(rs, 21)] = T5V - T5Y;
1272
0
       R1[WS(rs, 5)] = T5V + T5Y;
1273
0
        }
1274
0
         }
1275
0
         {
1276
0
        E T3N, T4N, T46, T4O, T4y, T4Q, T4J, T4R, T3Y, T45;
1277
0
        T3N = T3F + T3M;
1278
0
        T4N = T3F - T3M;
1279
0
        T3Y = T3Q + T3X;
1280
0
        T45 = T41 + T44;
1281
0
        T46 = FNMS(KP390180644, T45, KP1_961570560 * T3Y);
1282
0
        T4O = FMA(KP390180644, T3Y, KP1_961570560 * T45);
1283
0
        {
1284
0
       E T4i, T4x, T4B, T4I;
1285
0
       T4i = T4a + T4h;
1286
0
       T4x = T4p + T4w;
1287
0
       T4y = T4i + T4x;
1288
0
       T4Q = T4i - T4x;
1289
0
       T4B = T4z + T4A;
1290
0
       T4I = T4E + T4H;
1291
0
       T4J = T4B + T4I;
1292
0
       T4R = T4I - T4B;
1293
0
        }
1294
0
        {
1295
0
       E T47, T4K, T4T, T4U;
1296
0
       T47 = T3N + T46;
1297
0
       T4K = FNMS(KP196034280, T4J, KP1_990369453 * T4y);
1298
0
       R1[WS(rs, 16)] = T47 - T4K;
1299
0
       R1[0] = T47 + T4K;
1300
0
       T4T = T4N + T4O;
1301
0
       T4U = FMA(KP1_546020906, T4Q, KP1_268786568 * T4R);
1302
0
       R1[WS(rs, 12)] = T4T - T4U;
1303
0
       R1[WS(rs, 28)] = T4T + T4U;
1304
0
        }
1305
0
        {
1306
0
       E T4L, T4M, T4P, T4S;
1307
0
       T4L = T3N - T46;
1308
0
       T4M = FMA(KP196034280, T4y, KP1_990369453 * T4J);
1309
0
       R1[WS(rs, 8)] = T4L - T4M;
1310
0
       R1[WS(rs, 24)] = T4L + T4M;
1311
0
       T4P = T4N - T4O;
1312
0
       T4S = FNMS(KP1_546020906, T4R, KP1_268786568 * T4Q);
1313
0
       R1[WS(rs, 20)] = T4P - T4S;
1314
0
       R1[WS(rs, 4)] = T4P + T4S;
1315
0
        }
1316
0
         }
1317
0
         {
1318
0
        E T63, T6h, T66, T6i, T6a, T6k, T6d, T6l, T64, T65;
1319
0
        T63 = T61 - T62;
1320
0
        T6h = T61 + T62;
1321
0
        T64 = T5q + T5r;
1322
0
        T65 = T5u - T5t;
1323
0
        T66 = FNMS(KP1_961570560, T65, KP390180644 * T64);
1324
0
        T6i = FMA(KP1_961570560, T64, KP390180644 * T65);
1325
0
        {
1326
0
       E T68, T69, T6b, T6c;
1327
0
       T68 = T5y + T5z;
1328
0
       T69 = T5J + T5K;
1329
0
       T6a = T68 - T69;
1330
0
       T6k = T68 + T69;
1331
0
       T6b = T5D - T5G;
1332
0
       T6c = T5N - T5M;
1333
0
       T6d = T6b + T6c;
1334
0
       T6l = T6c - T6b;
1335
0
        }
1336
0
        {
1337
0
       E T67, T6e, T6n, T6o;
1338
0
       T67 = T63 + T66;
1339
0
       T6e = FNMS(KP1_268786568, T6d, KP1_546020906 * T6a);
1340
0
       R1[WS(rs, 19)] = T67 - T6e;
1341
0
       R1[WS(rs, 3)] = T67 + T6e;
1342
0
       T6n = T6h + T6i;
1343
0
       T6o = FMA(KP1_990369453, T6k, KP196034280 * T6l);
1344
0
       R1[WS(rs, 15)] = T6n - T6o;
1345
0
       R1[WS(rs, 31)] = T6n + T6o;
1346
0
        }
1347
0
        {
1348
0
       E T6f, T6g, T6j, T6m;
1349
0
       T6f = T63 - T66;
1350
0
       T6g = FMA(KP1_268786568, T6a, KP1_546020906 * T6d);
1351
0
       R1[WS(rs, 11)] = T6f - T6g;
1352
0
       R1[WS(rs, 27)] = T6f + T6g;
1353
0
       T6j = T6h - T6i;
1354
0
       T6m = FNMS(KP1_990369453, T6l, KP196034280 * T6k);
1355
0
       R1[WS(rs, 23)] = T6j - T6m;
1356
0
       R1[WS(rs, 7)] = T6j + T6m;
1357
0
        }
1358
0
         }
1359
0
         {
1360
0
        E T4X, T5b, T50, T5c, T54, T5e, T57, T5f, T4Y, T4Z;
1361
0
        T4X = T4V - T4W;
1362
0
        T5b = T4V + T4W;
1363
0
        T4Y = T3Q - T3X;
1364
0
        T4Z = T44 - T41;
1365
0
        T50 = FNMS(KP1_662939224, T4Z, KP1_111140466 * T4Y);
1366
0
        T5c = FMA(KP1_662939224, T4Y, KP1_111140466 * T4Z);
1367
0
        {
1368
0
       E T52, T53, T55, T56;
1369
0
       T52 = T4a - T4h;
1370
0
       T53 = T4A - T4z;
1371
0
       T54 = T52 + T53;
1372
0
       T5e = T52 - T53;
1373
0
       T55 = T4p - T4w;
1374
0
       T56 = T4H - T4E;
1375
0
       T57 = T55 + T56;
1376
0
       T5f = T56 - T55;
1377
0
        }
1378
0
        {
1379
0
       E T51, T58, T5h, T5i;
1380
0
       T51 = T4X + T50;
1381
0
       T58 = FNMS(KP942793473, T57, KP1_763842528 * T54);
1382
0
       R1[WS(rs, 18)] = T51 - T58;
1383
0
       R1[WS(rs, 2)] = T51 + T58;
1384
0
       T5h = T5b + T5c;
1385
0
       T5i = FMA(KP1_913880671, T5e, KP580569354 * T5f);
1386
0
       R1[WS(rs, 14)] = T5h - T5i;
1387
0
       R1[WS(rs, 30)] = T5h + T5i;
1388
0
        }
1389
0
        {
1390
0
       E T59, T5a, T5d, T5g;
1391
0
       T59 = T4X - T50;
1392
0
       T5a = FMA(KP942793473, T54, KP1_763842528 * T57);
1393
0
       R1[WS(rs, 10)] = T59 - T5a;
1394
0
       R1[WS(rs, 26)] = T59 + T5a;
1395
0
       T5d = T5b - T5c;
1396
0
       T5g = FNMS(KP1_913880671, T5f, KP580569354 * T5e);
1397
0
       R1[WS(rs, 22)] = T5d - T5g;
1398
0
       R1[WS(rs, 6)] = T5d + T5g;
1399
0
        }
1400
0
         }
1401
0
    }
1402
0
     }
1403
0
}
1404
1405
static const kr2c_desc desc = { 64, "r2cb_64", { 342, 82, 52, 0 }, &GENUS };
1406
1407
1
void X(codelet_r2cb_64) (planner *p) { X(kr2c_register) (p, r2cb_64, &desc);
1408
1
}
1409
1410
#endif