Coverage Report

Created: 2025-06-22 06:45

/src/fftw3/rdft/scalar/r2cf/hc2cf2_20.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Jun 22 06:44:08 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cf2_20 -include rdft/scalar/hc2cf.h */
29
30
/*
31
 * This function contains 276 FP additions, 198 FP multiplications,
32
 * (or, 136 additions, 58 multiplications, 140 fused multiply/add),
33
 * 95 stack variables, 4 constants, and 80 memory accesses
34
 */
35
#include "rdft/scalar/hc2cf.h"
36
37
static void hc2cf2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     {
44
    INT m;
45
    for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) {
46
         E T2, Th, Tf, T6, T5, Ti, Tl, T1n, T3, Tt, Tv, T7, T17, T1L, T24;
47
         E Tb, T13, T1P, T21, T1b, T1D, T1A, T1H, T1f, TA, Tw, Tq, Tm, TK, T1S;
48
         E TO, T1p, T1q, T1u, T2n, T2k, T2h, T2d;
49
         {
50
        E Tk, Ta, T1e, T4, T1a, Tj, T12, T1G, T16, T1K, Tg, Tz;
51
        T2 = W[0];
52
        Th = W[3];
53
        Tf = W[2];
54
        Tg = T2 * Tf;
55
        Tk = T2 * Th;
56
        T6 = W[5];
57
        Ta = T2 * T6;
58
        T1e = Tf * T6;
59
        T5 = W[1];
60
        Ti = FNMS(T5, Th, Tg);
61
        Tl = FMA(T5, Tf, Tk);
62
        T1n = FMA(T5, Th, Tg);
63
        T3 = W[4];
64
        T4 = T2 * T3;
65
        T1a = Tf * T3;
66
        Tj = Ti * T3;
67
        Tt = W[6];
68
        T12 = Tf * Tt;
69
        T1G = T2 * Tt;
70
        Tv = W[7];
71
        T16 = Tf * Tv;
72
        T1K = T2 * Tv;
73
        T7 = FNMS(T5, T6, T4);
74
        T17 = FNMS(Th, Tt, T16);
75
        T1L = FNMS(T5, Tt, T1K);
76
        T24 = FMA(Th, T3, T1e);
77
        Tb = FMA(T5, T3, Ta);
78
        T13 = FMA(Th, Tv, T12);
79
        T1P = FNMS(Tl, T6, Tj);
80
        T21 = FNMS(Th, T6, T1a);
81
        T1b = FMA(Th, T6, T1a);
82
        T1D = FNMS(T5, T3, Ta);
83
        T1A = FMA(T5, T6, T4);
84
        T1H = FMA(T5, Tv, T1G);
85
        T1f = FNMS(Th, T3, T1e);
86
        Tz = Ti * Tv;
87
        TA = FNMS(Tl, Tt, Tz);
88
        {
89
       E Tu, Tp, TJ, TN;
90
       Tu = Ti * Tt;
91
       Tw = FMA(Tl, Tv, Tu);
92
       Tp = Ti * T6;
93
       Tq = FNMS(Tl, T3, Tp);
94
       Tm = FMA(Tl, T6, Tj);
95
       TJ = Tm * Tt;
96
       TN = Tm * Tv;
97
       TK = FMA(Tq, Tv, TJ);
98
       T1S = FMA(Tl, T3, Tp);
99
       TO = FNMS(Tq, Tt, TN);
100
       {
101
            E T1o, T2g, T1t, T2c;
102
            T1o = T1n * T3;
103
            T2g = T1n * Tv;
104
            T1t = T1n * T6;
105
            T2c = T1n * Tt;
106
            T1p = FNMS(T5, Tf, Tk);
107
            T1q = FNMS(T1p, T6, T1o);
108
            T1u = FMA(T1p, T3, T1t);
109
            T2n = FNMS(T1p, T3, T1t);
110
            T2k = FMA(T1p, T6, T1o);
111
            T2h = FNMS(T1p, Tt, T2g);
112
            T2d = FMA(T1p, Tv, T2c);
113
       }
114
        }
115
         }
116
         {
117
        E Te, T2C, T4L, T57, TD, T58, T2H, T4H, T11, T2v, T4d, T4z, T2P, T3P, T3J;
118
        E T3Z, T2r, T2z, T4n, T4v, T3b, T3T, T3n, T43, T20, T2y, T4k, T4w, T34, T3S;
119
        E T3u, T42, T1y, T2w, T4g, T4y, T2W, T3Q, T3C, T40;
120
        {
121
       E T1, T4K, T8, T9, Tc, T4I, Td, T4J;
122
       T1 = Rp[0];
123
       T4K = Rm[0];
124
       T8 = Rp[WS(rs, 5)];
125
       T9 = T7 * T8;
126
       Tc = Rm[WS(rs, 5)];
127
       T4I = T7 * Tc;
128
       Td = FMA(Tb, Tc, T9);
129
       Te = T1 + Td;
130
       T2C = T1 - Td;
131
       T4J = FNMS(Tb, T8, T4I);
132
       T4L = T4J + T4K;
133
       T57 = T4K - T4J;
134
        }
135
        {
136
       E Tn, To, Tr, T2D, Tx, Ty, TB, T2F;
137
       Tn = Ip[WS(rs, 2)];
138
       To = Tm * Tn;
139
       Tr = Im[WS(rs, 2)];
140
       T2D = Tm * Tr;
141
       Tx = Ip[WS(rs, 7)];
142
       Ty = Tw * Tx;
143
       TB = Im[WS(rs, 7)];
144
       T2F = Tw * TB;
145
       {
146
            E Ts, TC, T2E, T2G;
147
            Ts = FMA(Tq, Tr, To);
148
            TC = FMA(TA, TB, Ty);
149
            TD = Ts + TC;
150
            T58 = Ts - TC;
151
            T2E = FNMS(Tq, Tn, T2D);
152
            T2G = FNMS(TA, Tx, T2F);
153
            T2H = T2E - T2G;
154
            T4H = T2E + T2G;
155
       }
156
        }
157
        {
158
       E TI, T3F, TZ, T2N, TQ, T3H, TV, T2L;
159
       {
160
            E TF, TG, TH, T3E;
161
            TF = Rp[WS(rs, 2)];
162
            TG = Ti * TF;
163
            TH = Rm[WS(rs, 2)];
164
            T3E = Ti * TH;
165
            TI = FMA(Tl, TH, TG);
166
            T3F = FNMS(Tl, TF, T3E);
167
       }
168
       {
169
            E TW, TX, TY, T2M;
170
            TW = Ip[WS(rs, 9)];
171
            TX = Tt * TW;
172
            TY = Im[WS(rs, 9)];
173
            T2M = Tt * TY;
174
            TZ = FMA(Tv, TY, TX);
175
            T2N = FNMS(Tv, TW, T2M);
176
       }
177
       {
178
            E TL, TM, TP, T3G;
179
            TL = Rp[WS(rs, 7)];
180
            TM = TK * TL;
181
            TP = Rm[WS(rs, 7)];
182
            T3G = TK * TP;
183
            TQ = FMA(TO, TP, TM);
184
            T3H = FNMS(TO, TL, T3G);
185
       }
186
       {
187
            E TS, TT, TU, T2K;
188
            TS = Ip[WS(rs, 4)];
189
            TT = T3 * TS;
190
            TU = Im[WS(rs, 4)];
191
            T2K = T3 * TU;
192
            TV = FMA(T6, TU, TT);
193
            T2L = FNMS(T6, TS, T2K);
194
       }
195
       {
196
            E TR, T10, T4b, T4c;
197
            TR = TI + TQ;
198
            T10 = TV + TZ;
199
            T11 = TR - T10;
200
            T2v = TR + T10;
201
            T4b = T3F + T3H;
202
            T4c = T2L + T2N;
203
            T4d = T4b + T4c;
204
            T4z = T4c - T4b;
205
       }
206
       {
207
            E T2J, T2O, T3D, T3I;
208
            T2J = TI - TQ;
209
            T2O = T2L - T2N;
210
            T2P = T2J - T2O;
211
            T3P = T2J + T2O;
212
            T3D = TZ - TV;
213
            T3I = T3F - T3H;
214
            T3J = T3D - T3I;
215
            T3Z = T3I + T3D;
216
       }
217
        }
218
        {
219
       E T26, T3j, T2p, T39, T2a, T3l, T2j, T37;
220
       {
221
            E T22, T23, T25, T3i;
222
            T22 = Rp[WS(rs, 6)];
223
            T23 = T21 * T22;
224
            T25 = Rm[WS(rs, 6)];
225
            T3i = T21 * T25;
226
            T26 = FMA(T24, T25, T23);
227
            T3j = FNMS(T24, T22, T3i);
228
       }
229
       {
230
            E T2l, T2m, T2o, T38;
231
            T2l = Ip[WS(rs, 3)];
232
            T2m = T2k * T2l;
233
            T2o = Im[WS(rs, 3)];
234
            T38 = T2k * T2o;
235
            T2p = FMA(T2n, T2o, T2m);
236
            T39 = FNMS(T2n, T2l, T38);
237
       }
238
       {
239
            E T27, T28, T29, T3k;
240
            T27 = Rp[WS(rs, 1)];
241
            T28 = T1n * T27;
242
            T29 = Rm[WS(rs, 1)];
243
            T3k = T1n * T29;
244
            T2a = FMA(T1p, T29, T28);
245
            T3l = FNMS(T1p, T27, T3k);
246
       }
247
       {
248
            E T2e, T2f, T2i, T36;
249
            T2e = Ip[WS(rs, 8)];
250
            T2f = T2d * T2e;
251
            T2i = Im[WS(rs, 8)];
252
            T36 = T2d * T2i;
253
            T2j = FMA(T2h, T2i, T2f);
254
            T37 = FNMS(T2h, T2e, T36);
255
       }
256
       {
257
            E T2b, T2q, T4l, T4m;
258
            T2b = T26 + T2a;
259
            T2q = T2j + T2p;
260
            T2r = T2b - T2q;
261
            T2z = T2b + T2q;
262
            T4l = T3j + T3l;
263
            T4m = T37 + T39;
264
            T4n = T4l + T4m;
265
            T4v = T4m - T4l;
266
       }
267
       {
268
            E T35, T3a, T3h, T3m;
269
            T35 = T26 - T2a;
270
            T3a = T37 - T39;
271
            T3b = T35 - T3a;
272
            T3T = T35 + T3a;
273
            T3h = T2p - T2j;
274
            T3m = T3j - T3l;
275
            T3n = T3h - T3m;
276
            T43 = T3m + T3h;
277
       }
278
        }
279
        {
280
       E T1F, T3q, T1Y, T32, T1N, T3s, T1U, T30;
281
       {
282
            E T1B, T1C, T1E, T3p;
283
            T1B = Rp[WS(rs, 4)];
284
            T1C = T1A * T1B;
285
            T1E = Rm[WS(rs, 4)];
286
            T3p = T1A * T1E;
287
            T1F = FMA(T1D, T1E, T1C);
288
            T3q = FNMS(T1D, T1B, T3p);
289
       }
290
       {
291
            E T1V, T1W, T1X, T31;
292
            T1V = Ip[WS(rs, 1)];
293
            T1W = Tf * T1V;
294
            T1X = Im[WS(rs, 1)];
295
            T31 = Tf * T1X;
296
            T1Y = FMA(Th, T1X, T1W);
297
            T32 = FNMS(Th, T1V, T31);
298
       }
299
       {
300
            E T1I, T1J, T1M, T3r;
301
            T1I = Rp[WS(rs, 9)];
302
            T1J = T1H * T1I;
303
            T1M = Rm[WS(rs, 9)];
304
            T3r = T1H * T1M;
305
            T1N = FMA(T1L, T1M, T1J);
306
            T3s = FNMS(T1L, T1I, T3r);
307
       }
308
       {
309
            E T1Q, T1R, T1T, T2Z;
310
            T1Q = Ip[WS(rs, 6)];
311
            T1R = T1P * T1Q;
312
            T1T = Im[WS(rs, 6)];
313
            T2Z = T1P * T1T;
314
            T1U = FMA(T1S, T1T, T1R);
315
            T30 = FNMS(T1S, T1Q, T2Z);
316
       }
317
       {
318
            E T1O, T1Z, T4i, T4j;
319
            T1O = T1F + T1N;
320
            T1Z = T1U + T1Y;
321
            T20 = T1O - T1Z;
322
            T2y = T1O + T1Z;
323
            T4i = T3q + T3s;
324
            T4j = T30 + T32;
325
            T4k = T4i + T4j;
326
            T4w = T4j - T4i;
327
       }
328
       {
329
            E T2Y, T33, T3o, T3t;
330
            T2Y = T1F - T1N;
331
            T33 = T30 - T32;
332
            T34 = T2Y - T33;
333
            T3S = T2Y + T33;
334
            T3o = T1Y - T1U;
335
            T3t = T3q - T3s;
336
            T3u = T3o - T3t;
337
            T42 = T3t + T3o;
338
       }
339
        }
340
        {
341
       E T19, T3y, T1w, T2U, T1h, T3A, T1m, T2S;
342
       {
343
            E T14, T15, T18, T3x;
344
            T14 = Rp[WS(rs, 8)];
345
            T15 = T13 * T14;
346
            T18 = Rm[WS(rs, 8)];
347
            T3x = T13 * T18;
348
            T19 = FMA(T17, T18, T15);
349
            T3y = FNMS(T17, T14, T3x);
350
       }
351
       {
352
            E T1r, T1s, T1v, T2T;
353
            T1r = Ip[WS(rs, 5)];
354
            T1s = T1q * T1r;
355
            T1v = Im[WS(rs, 5)];
356
            T2T = T1q * T1v;
357
            T1w = FMA(T1u, T1v, T1s);
358
            T2U = FNMS(T1u, T1r, T2T);
359
       }
360
       {
361
            E T1c, T1d, T1g, T3z;
362
            T1c = Rp[WS(rs, 3)];
363
            T1d = T1b * T1c;
364
            T1g = Rm[WS(rs, 3)];
365
            T3z = T1b * T1g;
366
            T1h = FMA(T1f, T1g, T1d);
367
            T3A = FNMS(T1f, T1c, T3z);
368
       }
369
       {
370
            E T1j, T1k, T1l, T2R;
371
            T1j = Ip[0];
372
            T1k = T2 * T1j;
373
            T1l = Im[0];
374
            T2R = T2 * T1l;
375
            T1m = FMA(T5, T1l, T1k);
376
            T2S = FNMS(T5, T1j, T2R);
377
       }
378
       {
379
            E T1i, T1x, T4e, T4f;
380
            T1i = T19 + T1h;
381
            T1x = T1m + T1w;
382
            T1y = T1i - T1x;
383
            T2w = T1i + T1x;
384
            T4e = T3y + T3A;
385
            T4f = T2S + T2U;
386
            T4g = T4e + T4f;
387
            T4y = T4f - T4e;
388
       }
389
       {
390
            E T2Q, T2V, T3w, T3B;
391
            T2Q = T19 - T1h;
392
            T2V = T2S - T2U;
393
            T2W = T2Q - T2V;
394
            T3Q = T2Q + T2V;
395
            T3w = T1w - T1m;
396
            T3B = T3y - T3A;
397
            T3C = T3w - T3B;
398
            T40 = T3B + T3w;
399
       }
400
        }
401
        {
402
       E T4B, T4D, TE, T2t, T4s, T4t, T4C, T4u;
403
       {
404
            E T4x, T4A, T1z, T2s;
405
            T4x = T4v - T4w;
406
            T4A = T4y - T4z;
407
            T4B = FNMS(KP618033988, T4A, T4x);
408
            T4D = FMA(KP618033988, T4x, T4A);
409
            TE = Te - TD;
410
            T1z = T11 + T1y;
411
            T2s = T20 + T2r;
412
            T2t = T1z + T2s;
413
            T4s = FNMS(KP250000000, T2t, TE);
414
            T4t = T1z - T2s;
415
       }
416
       Rm[WS(rs, 9)] = TE + T2t;
417
       T4C = FMA(KP559016994, T4t, T4s);
418
       Rm[WS(rs, 5)] = FNMS(KP951056516, T4D, T4C);
419
       Rp[WS(rs, 6)] = FMA(KP951056516, T4D, T4C);
420
       T4u = FNMS(KP559016994, T4t, T4s);
421
       Rp[WS(rs, 2)] = FNMS(KP951056516, T4B, T4u);
422
       Rm[WS(rs, 1)] = FMA(KP951056516, T4B, T4u);
423
        }
424
        {
425
       E T54, T56, T4Y, T4X, T4Z, T50, T55, T51;
426
       {
427
            E T52, T53, T4V, T4W;
428
            T52 = T20 - T2r;
429
            T53 = T1y - T11;
430
            T54 = FMA(KP618033988, T53, T52);
431
            T56 = FNMS(KP618033988, T52, T53);
432
            T4Y = T4L - T4H;
433
            T4V = T4z + T4y;
434
            T4W = T4w + T4v;
435
            T4X = T4V + T4W;
436
            T4Z = FMA(KP250000000, T4X, T4Y);
437
            T50 = T4W - T4V;
438
       }
439
       Im[WS(rs, 9)] = T4X - T4Y;
440
       T55 = FMA(KP559016994, T50, T4Z);
441
       Im[WS(rs, 5)] = FMS(KP951056516, T56, T55);
442
       Ip[WS(rs, 6)] = FMA(KP951056516, T56, T55);
443
       T51 = FNMS(KP559016994, T50, T4Z);
444
       Im[WS(rs, 1)] = FMS(KP951056516, T54, T51);
445
       Ip[WS(rs, 2)] = FMA(KP951056516, T54, T51);
446
        }
447
        {
448
       E T4p, T4r, T2u, T2B, T48, T49, T4q, T4a;
449
       {
450
            E T4h, T4o, T2x, T2A;
451
            T4h = T4d - T4g;
452
            T4o = T4k - T4n;
453
            T4p = FMA(KP618033988, T4o, T4h);
454
            T4r = FNMS(KP618033988, T4h, T4o);
455
            T2u = Te + TD;
456
            T2x = T2v + T2w;
457
            T2A = T2y + T2z;
458
            T2B = T2x + T2A;
459
            T48 = FNMS(KP250000000, T2B, T2u);
460
            T49 = T2x - T2A;
461
       }
462
       Rp[0] = T2u + T2B;
463
       T4q = FNMS(KP559016994, T49, T48);
464
       Rm[WS(rs, 7)] = FNMS(KP951056516, T4r, T4q);
465
       Rp[WS(rs, 8)] = FMA(KP951056516, T4r, T4q);
466
       T4a = FMA(KP559016994, T49, T48);
467
       Rp[WS(rs, 4)] = FNMS(KP951056516, T4p, T4a);
468
       Rm[WS(rs, 3)] = FMA(KP951056516, T4p, T4a);
469
        }
470
        {
471
       E T4S, T4U, T4M, T4G, T4N, T4O, T4T, T4P;
472
       {
473
            E T4Q, T4R, T4E, T4F;
474
            T4Q = T2v - T2w;
475
            T4R = T2z - T2y;
476
            T4S = FNMS(KP618033988, T4R, T4Q);
477
            T4U = FMA(KP618033988, T4Q, T4R);
478
            T4M = T4H + T4L;
479
            T4E = T4d + T4g;
480
            T4F = T4k + T4n;
481
            T4G = T4E + T4F;
482
            T4N = FNMS(KP250000000, T4G, T4M);
483
            T4O = T4E - T4F;
484
       }
485
       Ip[0] = T4G + T4M;
486
       T4T = FNMS(KP559016994, T4O, T4N);
487
       Im[WS(rs, 7)] = FMS(KP951056516, T4U, T4T);
488
       Ip[WS(rs, 8)] = FMA(KP951056516, T4U, T4T);
489
       T4P = FMA(KP559016994, T4O, T4N);
490
       Im[WS(rs, 3)] = FMS(KP951056516, T4S, T4P);
491
       Ip[WS(rs, 4)] = FMA(KP951056516, T4S, T4P);
492
        }
493
        {
494
       E T3L, T3N, T2I, T3d, T3e, T3f, T3M, T3g;
495
       {
496
            E T3v, T3K, T2X, T3c;
497
            T3v = T3n - T3u;
498
            T3K = T3C - T3J;
499
            T3L = FNMS(KP618033988, T3K, T3v);
500
            T3N = FMA(KP618033988, T3v, T3K);
501
            T2I = T2C - T2H;
502
            T2X = T2P + T2W;
503
            T3c = T34 + T3b;
504
            T3d = T2X + T3c;
505
            T3e = FNMS(KP250000000, T3d, T2I);
506
            T3f = T2X - T3c;
507
       }
508
       Rm[WS(rs, 4)] = T2I + T3d;
509
       T3M = FMA(KP559016994, T3f, T3e);
510
       Rm[WS(rs, 8)] = FMA(KP951056516, T3N, T3M);
511
       Rm[0] = FNMS(KP951056516, T3N, T3M);
512
       T3g = FNMS(KP559016994, T3f, T3e);
513
       Rp[WS(rs, 3)] = FMA(KP951056516, T3L, T3g);
514
       Rp[WS(rs, 7)] = FNMS(KP951056516, T3L, T3g);
515
        }
516
        {
517
       E T5u, T5w, T5o, T5n, T5p, T5q, T5v, T5r;
518
       {
519
            E T5s, T5t, T5l, T5m;
520
            T5s = T2P - T2W;
521
            T5t = T34 - T3b;
522
            T5u = FMA(KP618033988, T5t, T5s);
523
            T5w = FNMS(KP618033988, T5s, T5t);
524
            T5o = T58 + T57;
525
            T5l = T3J + T3C;
526
            T5m = T3u + T3n;
527
            T5n = T5l + T5m;
528
            T5p = FMA(KP250000000, T5n, T5o);
529
            T5q = T5l - T5m;
530
       }
531
       Im[WS(rs, 4)] = T5n - T5o;
532
       T5v = FMA(KP559016994, T5q, T5p);
533
       Ip[WS(rs, 3)] = FNMS(KP951056516, T5w, T5v);
534
       Ip[WS(rs, 7)] = FMA(KP951056516, T5w, T5v);
535
       T5r = FNMS(KP559016994, T5q, T5p);
536
       Im[WS(rs, 8)] = FMS(KP951056516, T5u, T5r);
537
       Im[0] = -(FMA(KP951056516, T5u, T5r));
538
        }
539
        {
540
       E T45, T47, T3O, T3V, T3W, T3X, T46, T3Y;
541
       {
542
            E T41, T44, T3R, T3U;
543
            T41 = T3Z - T40;
544
            T44 = T42 - T43;
545
            T45 = FMA(KP618033988, T44, T41);
546
            T47 = FNMS(KP618033988, T41, T44);
547
            T3O = T2C + T2H;
548
            T3R = T3P + T3Q;
549
            T3U = T3S + T3T;
550
            T3V = T3R + T3U;
551
            T3W = FNMS(KP250000000, T3V, T3O);
552
            T3X = T3R - T3U;
553
       }
554
       Rp[WS(rs, 5)] = T3O + T3V;
555
       T46 = FNMS(KP559016994, T3X, T3W);
556
       Rm[WS(rs, 6)] = FMA(KP951056516, T47, T46);
557
       Rm[WS(rs, 2)] = FNMS(KP951056516, T47, T46);
558
       T3Y = FMA(KP559016994, T3X, T3W);
559
       Rp[WS(rs, 1)] = FMA(KP951056516, T45, T3Y);
560
       Rp[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y);
561
        }
562
        {
563
       E T5i, T5k, T59, T5c, T5d, T5e, T5j, T5f;
564
       {
565
            E T5g, T5h, T5a, T5b;
566
            T5g = T3S - T3T;
567
            T5h = T3P - T3Q;
568
            T5i = FNMS(KP618033988, T5h, T5g);
569
            T5k = FMA(KP618033988, T5g, T5h);
570
            T59 = T57 - T58;
571
            T5a = T3Z + T40;
572
            T5b = T42 + T43;
573
            T5c = T5a + T5b;
574
            T5d = FNMS(KP250000000, T5c, T59);
575
            T5e = T5a - T5b;
576
       }
577
       Ip[WS(rs, 5)] = T5c + T59;
578
       T5j = FMA(KP559016994, T5e, T5d);
579
       Ip[WS(rs, 1)] = FNMS(KP951056516, T5k, T5j);
580
       Ip[WS(rs, 9)] = FMA(KP951056516, T5k, T5j);
581
       T5f = FNMS(KP559016994, T5e, T5d);
582
       Im[WS(rs, 6)] = FMS(KP951056516, T5i, T5f);
583
       Im[WS(rs, 2)] = -(FMA(KP951056516, T5i, T5f));
584
        }
585
         }
586
    }
587
     }
588
}
589
590
static const tw_instr twinstr[] = {
591
     { TW_CEXP, 1, 1 },
592
     { TW_CEXP, 1, 3 },
593
     { TW_CEXP, 1, 9 },
594
     { TW_CEXP, 1, 19 },
595
     { TW_NEXT, 1, 0 }
596
};
597
598
static const hc2c_desc desc = { 20, "hc2cf2_20", twinstr, &GENUS, { 136, 58, 140, 0 } };
599
600
void X(codelet_hc2cf2_20) (planner *p) {
601
     X(khc2c_register) (p, hc2cf2_20, &desc, HC2C_VIA_RDFT);
602
}
603
#else
604
605
/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cf2_20 -include rdft/scalar/hc2cf.h */
606
607
/*
608
 * This function contains 276 FP additions, 164 FP multiplications,
609
 * (or, 204 additions, 92 multiplications, 72 fused multiply/add),
610
 * 123 stack variables, 4 constants, and 80 memory accesses
611
 */
612
#include "rdft/scalar/hc2cf.h"
613
614
static void hc2cf2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
615
0
{
616
0
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
617
0
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
618
0
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
619
0
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
620
0
     {
621
0
    INT m;
622
0
    for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) {
623
0
         E T2, T5, Tg, Ti, Tk, To, T1h, T1f, T6, T3, T8, T14, T1Q, Tc, T1O;
624
0
         E T1v, T18, T1t, T1n, T24, T1j, T22, Tq, Tu, T1E, T1G, Tx, Ty, Tz, TJ;
625
0
         E T1Z, TB, T1X, T1A, TZ, TL, T1y, TX;
626
0
         {
627
0
        E T7, T16, Ta, T13, T4, T17, Tb, T12;
628
0
        {
629
0
       E Th, Tn, Tj, Tm;
630
0
       T2 = W[0];
631
0
       T5 = W[1];
632
0
       Tg = W[2];
633
0
       Ti = W[3];
634
0
       Th = T2 * Tg;
635
0
       Tn = T5 * Tg;
636
0
       Tj = T5 * Ti;
637
0
       Tm = T2 * Ti;
638
0
       Tk = Th - Tj;
639
0
       To = Tm + Tn;
640
0
       T1h = Tm - Tn;
641
0
       T1f = Th + Tj;
642
0
       T6 = W[5];
643
0
       T7 = T5 * T6;
644
0
       T16 = Tg * T6;
645
0
       Ta = T2 * T6;
646
0
       T13 = Ti * T6;
647
0
       T3 = W[4];
648
0
       T4 = T2 * T3;
649
0
       T17 = Ti * T3;
650
0
       Tb = T5 * T3;
651
0
       T12 = Tg * T3;
652
0
        }
653
0
        T8 = T4 - T7;
654
0
        T14 = T12 + T13;
655
0
        T1Q = T16 + T17;
656
0
        Tc = Ta + Tb;
657
0
        T1O = T12 - T13;
658
0
        T1v = Ta - Tb;
659
0
        T18 = T16 - T17;
660
0
        T1t = T4 + T7;
661
0
        {
662
0
       E T1l, T1m, T1g, T1i;
663
0
       T1l = T1f * T6;
664
0
       T1m = T1h * T3;
665
0
       T1n = T1l + T1m;
666
0
       T24 = T1l - T1m;
667
0
       T1g = T1f * T3;
668
0
       T1i = T1h * T6;
669
0
       T1j = T1g - T1i;
670
0
       T22 = T1g + T1i;
671
0
       {
672
0
            E Tl, Tp, Ts, Tt;
673
0
            Tl = Tk * T3;
674
0
            Tp = To * T6;
675
0
            Tq = Tl + Tp;
676
0
            Ts = Tk * T6;
677
0
            Tt = To * T3;
678
0
            Tu = Ts - Tt;
679
0
            T1E = Tl - Tp;
680
0
            T1G = Ts + Tt;
681
0
            Tx = W[6];
682
0
            Ty = W[7];
683
0
            Tz = FMA(Tk, Tx, To * Ty);
684
0
            TJ = FMA(Tq, Tx, Tu * Ty);
685
0
            T1Z = FNMS(T1h, Tx, T1f * Ty);
686
0
            TB = FNMS(To, Tx, Tk * Ty);
687
0
            T1X = FMA(T1f, Tx, T1h * Ty);
688
0
            T1A = FNMS(T5, Tx, T2 * Ty);
689
0
            TZ = FNMS(Ti, Tx, Tg * Ty);
690
0
            TL = FNMS(Tu, Tx, Tq * Ty);
691
0
            T1y = FMA(T2, Tx, T5 * Ty);
692
0
            TX = FMA(Tg, Tx, Ti * Ty);
693
0
       }
694
0
        }
695
0
         }
696
0
         {
697
0
        E TF, T2b, T4D, T4M, T2K, T3r, T4a, T4m, T1N, T28, T29, T3J, T3M, T44, T3U;
698
0
        E T3V, T4j, T2f, T2g, T2h, T2n, T2s, T4K, T3g, T3h, T4z, T3n, T3o, T3p, T30;
699
0
        E T35, T36, TW, T1r, T1s, T3C, T3F, T43, T3X, T3Y, T4k, T2c, T2d, T2e, T2y;
700
0
        E T2D, T4J, T3d, T3e, T4y, T3k, T3l, T3m, T2P, T2U, T2V;
701
0
        {
702
0
       E T1, T48, Te, T47, Tw, T2H, TD, T2I, T9, Td;
703
0
       T1 = Rp[0];
704
0
       T48 = Rm[0];
705
0
       T9 = Rp[WS(rs, 5)];
706
0
       Td = Rm[WS(rs, 5)];
707
0
       Te = FMA(T8, T9, Tc * Td);
708
0
       T47 = FNMS(Tc, T9, T8 * Td);
709
0
       {
710
0
            E Tr, Tv, TA, TC;
711
0
            Tr = Ip[WS(rs, 2)];
712
0
            Tv = Im[WS(rs, 2)];
713
0
            Tw = FMA(Tq, Tr, Tu * Tv);
714
0
            T2H = FNMS(Tu, Tr, Tq * Tv);
715
0
            TA = Ip[WS(rs, 7)];
716
0
            TC = Im[WS(rs, 7)];
717
0
            TD = FMA(Tz, TA, TB * TC);
718
0
            T2I = FNMS(TB, TA, Tz * TC);
719
0
       }
720
0
       {
721
0
            E Tf, TE, T4B, T4C;
722
0
            Tf = T1 + Te;
723
0
            TE = Tw + TD;
724
0
            TF = Tf - TE;
725
0
            T2b = Tf + TE;
726
0
            T4B = T48 - T47;
727
0
            T4C = Tw - TD;
728
0
            T4D = T4B - T4C;
729
0
            T4M = T4C + T4B;
730
0
       }
731
0
       {
732
0
            E T2G, T2J, T46, T49;
733
0
            T2G = T1 - Te;
734
0
            T2J = T2H - T2I;
735
0
            T2K = T2G - T2J;
736
0
            T3r = T2G + T2J;
737
0
            T46 = T2H + T2I;
738
0
            T49 = T47 + T48;
739
0
            T4a = T46 + T49;
740
0
            T4m = T49 - T46;
741
0
       }
742
0
        }
743
0
        {
744
0
       E T1D, T3H, T2l, T2W, T27, T3L, T2r, T34, T1M, T3I, T2m, T2Z, T1W, T3K, T2q;
745
0
       E T31;
746
0
       {
747
0
            E T1x, T2j, T1C, T2k;
748
0
            {
749
0
           E T1u, T1w, T1z, T1B;
750
0
           T1u = Rp[WS(rs, 4)];
751
0
           T1w = Rm[WS(rs, 4)];
752
0
           T1x = FMA(T1t, T1u, T1v * T1w);
753
0
           T2j = FNMS(T1v, T1u, T1t * T1w);
754
0
           T1z = Rp[WS(rs, 9)];
755
0
           T1B = Rm[WS(rs, 9)];
756
0
           T1C = FMA(T1y, T1z, T1A * T1B);
757
0
           T2k = FNMS(T1A, T1z, T1y * T1B);
758
0
            }
759
0
            T1D = T1x + T1C;
760
0
            T3H = T2j + T2k;
761
0
            T2l = T2j - T2k;
762
0
            T2W = T1x - T1C;
763
0
       }
764
0
       {
765
0
            E T21, T32, T26, T33;
766
0
            {
767
0
           E T1Y, T20, T23, T25;
768
0
           T1Y = Ip[WS(rs, 8)];
769
0
           T20 = Im[WS(rs, 8)];
770
0
           T21 = FMA(T1X, T1Y, T1Z * T20);
771
0
           T32 = FNMS(T1Z, T1Y, T1X * T20);
772
0
           T23 = Ip[WS(rs, 3)];
773
0
           T25 = Im[WS(rs, 3)];
774
0
           T26 = FMA(T22, T23, T24 * T25);
775
0
           T33 = FNMS(T24, T23, T22 * T25);
776
0
            }
777
0
            T27 = T21 + T26;
778
0
            T3L = T32 + T33;
779
0
            T2r = T21 - T26;
780
0
            T34 = T32 - T33;
781
0
       }
782
0
       {
783
0
            E T1I, T2X, T1L, T2Y;
784
0
            {
785
0
           E T1F, T1H, T1J, T1K;
786
0
           T1F = Ip[WS(rs, 6)];
787
0
           T1H = Im[WS(rs, 6)];
788
0
           T1I = FMA(T1E, T1F, T1G * T1H);
789
0
           T2X = FNMS(T1G, T1F, T1E * T1H);
790
0
           T1J = Ip[WS(rs, 1)];
791
0
           T1K = Im[WS(rs, 1)];
792
0
           T1L = FMA(Tg, T1J, Ti * T1K);
793
0
           T2Y = FNMS(Ti, T1J, Tg * T1K);
794
0
            }
795
0
            T1M = T1I + T1L;
796
0
            T3I = T2X + T2Y;
797
0
            T2m = T1I - T1L;
798
0
            T2Z = T2X - T2Y;
799
0
       }
800
0
       {
801
0
            E T1S, T2o, T1V, T2p;
802
0
            {
803
0
           E T1P, T1R, T1T, T1U;
804
0
           T1P = Rp[WS(rs, 6)];
805
0
           T1R = Rm[WS(rs, 6)];
806
0
           T1S = FMA(T1O, T1P, T1Q * T1R);
807
0
           T2o = FNMS(T1Q, T1P, T1O * T1R);
808
0
           T1T = Rp[WS(rs, 1)];
809
0
           T1U = Rm[WS(rs, 1)];
810
0
           T1V = FMA(T1f, T1T, T1h * T1U);
811
0
           T2p = FNMS(T1h, T1T, T1f * T1U);
812
0
            }
813
0
            T1W = T1S + T1V;
814
0
            T3K = T2o + T2p;
815
0
            T2q = T2o - T2p;
816
0
            T31 = T1S - T1V;
817
0
       }
818
0
       T1N = T1D - T1M;
819
0
       T28 = T1W - T27;
820
0
       T29 = T1N + T28;
821
0
       T3J = T3H + T3I;
822
0
       T3M = T3K + T3L;
823
0
       T44 = T3J + T3M;
824
0
       T3U = T3H - T3I;
825
0
       T3V = T3L - T3K;
826
0
       T4j = T3V - T3U;
827
0
       T2f = T1D + T1M;
828
0
       T2g = T1W + T27;
829
0
       T2h = T2f + T2g;
830
0
       T2n = T2l + T2m;
831
0
       T2s = T2q + T2r;
832
0
       T4K = T2n + T2s;
833
0
       T3g = T2l - T2m;
834
0
       T3h = T2q - T2r;
835
0
       T4z = T3g + T3h;
836
0
       T3n = T2W + T2Z;
837
0
       T3o = T31 + T34;
838
0
       T3p = T3n + T3o;
839
0
       T30 = T2W - T2Z;
840
0
       T35 = T31 - T34;
841
0
       T36 = T30 + T35;
842
0
        }
843
0
        {
844
0
       E TO, T3A, T2w, T2L, T1q, T3E, T2z, T2T, TV, T3B, T2x, T2O, T1b, T3D, T2C;
845
0
       E T2Q;
846
0
       {
847
0
            E TI, T2u, TN, T2v;
848
0
            {
849
0
           E TG, TH, TK, TM;
850
0
           TG = Rp[WS(rs, 2)];
851
0
           TH = Rm[WS(rs, 2)];
852
0
           TI = FMA(Tk, TG, To * TH);
853
0
           T2u = FNMS(To, TG, Tk * TH);
854
0
           TK = Rp[WS(rs, 7)];
855
0
           TM = Rm[WS(rs, 7)];
856
0
           TN = FMA(TJ, TK, TL * TM);
857
0
           T2v = FNMS(TL, TK, TJ * TM);
858
0
            }
859
0
            TO = TI + TN;
860
0
            T3A = T2u + T2v;
861
0
            T2w = T2u - T2v;
862
0
            T2L = TI - TN;
863
0
       }
864
0
       {
865
0
            E T1e, T2R, T1p, T2S;
866
0
            {
867
0
           E T1c, T1d, T1k, T1o;
868
0
           T1c = Ip[0];
869
0
           T1d = Im[0];
870
0
           T1e = FMA(T2, T1c, T5 * T1d);
871
0
           T2R = FNMS(T5, T1c, T2 * T1d);
872
0
           T1k = Ip[WS(rs, 5)];
873
0
           T1o = Im[WS(rs, 5)];
874
0
           T1p = FMA(T1j, T1k, T1n * T1o);
875
0
           T2S = FNMS(T1n, T1k, T1j * T1o);
876
0
            }
877
0
            T1q = T1e + T1p;
878
0
            T3E = T2R + T2S;
879
0
            T2z = T1p - T1e;
880
0
            T2T = T2R - T2S;
881
0
       }
882
0
       {
883
0
            E TR, T2M, TU, T2N;
884
0
            {
885
0
           E TP, TQ, TS, TT;
886
0
           TP = Ip[WS(rs, 4)];
887
0
           TQ = Im[WS(rs, 4)];
888
0
           TR = FMA(T3, TP, T6 * TQ);
889
0
           T2M = FNMS(T6, TP, T3 * TQ);
890
0
           TS = Ip[WS(rs, 9)];
891
0
           TT = Im[WS(rs, 9)];
892
0
           TU = FMA(Tx, TS, Ty * TT);
893
0
           T2N = FNMS(Ty, TS, Tx * TT);
894
0
            }
895
0
            TV = TR + TU;
896
0
            T3B = T2M + T2N;
897
0
            T2x = TR - TU;
898
0
            T2O = T2M - T2N;
899
0
       }
900
0
       {
901
0
            E T11, T2A, T1a, T2B;
902
0
            {
903
0
           E TY, T10, T15, T19;
904
0
           TY = Rp[WS(rs, 8)];
905
0
           T10 = Rm[WS(rs, 8)];
906
0
           T11 = FMA(TX, TY, TZ * T10);
907
0
           T2A = FNMS(TZ, TY, TX * T10);
908
0
           T15 = Rp[WS(rs, 3)];
909
0
           T19 = Rm[WS(rs, 3)];
910
0
           T1a = FMA(T14, T15, T18 * T19);
911
0
           T2B = FNMS(T18, T15, T14 * T19);
912
0
            }
913
0
            T1b = T11 + T1a;
914
0
            T3D = T2A + T2B;
915
0
            T2C = T2A - T2B;
916
0
            T2Q = T11 - T1a;
917
0
       }
918
0
       TW = TO - TV;
919
0
       T1r = T1b - T1q;
920
0
       T1s = TW + T1r;
921
0
       T3C = T3A + T3B;
922
0
       T3F = T3D + T3E;
923
0
       T43 = T3C + T3F;
924
0
       T3X = T3A - T3B;
925
0
       T3Y = T3D - T3E;
926
0
       T4k = T3X + T3Y;
927
0
       T2c = TO + TV;
928
0
       T2d = T1b + T1q;
929
0
       T2e = T2c + T2d;
930
0
       T2y = T2w + T2x;
931
0
       T2D = T2z - T2C;
932
0
       T4J = T2D - T2y;
933
0
       T3d = T2w - T2x;
934
0
       T3e = T2C + T2z;
935
0
       T4y = T3d + T3e;
936
0
       T3k = T2L + T2O;
937
0
       T3l = T2Q + T2T;
938
0
       T3m = T3k + T3l;
939
0
       T2P = T2L - T2O;
940
0
       T2U = T2Q - T2T;
941
0
       T2V = T2P + T2U;
942
0
        }
943
0
        {
944
0
       E T3S, T2a, T3R, T40, T42, T3W, T3Z, T41, T3T;
945
0
       T3S = KP559016994 * (T1s - T29);
946
0
       T2a = T1s + T29;
947
0
       T3R = FNMS(KP250000000, T2a, TF);
948
0
       T3W = T3U + T3V;
949
0
       T3Z = T3X - T3Y;
950
0
       T40 = FNMS(KP587785252, T3Z, KP951056516 * T3W);
951
0
       T42 = FMA(KP951056516, T3Z, KP587785252 * T3W);
952
0
       Rm[WS(rs, 9)] = TF + T2a;
953
0
       T41 = T3S + T3R;
954
0
       Rm[WS(rs, 5)] = T41 - T42;
955
0
       Rp[WS(rs, 6)] = T41 + T42;
956
0
       T3T = T3R - T3S;
957
0
       Rp[WS(rs, 2)] = T3T - T40;
958
0
       Rm[WS(rs, 1)] = T3T + T40;
959
0
        }
960
0
        {
961
0
       E T4r, T4l, T4q, T4p, T4t, T4n, T4o, T4u, T4s;
962
0
       T4r = KP559016994 * (T4k + T4j);
963
0
       T4l = T4j - T4k;
964
0
       T4q = FMA(KP250000000, T4l, T4m);
965
0
       T4n = T1r - TW;
966
0
       T4o = T1N - T28;
967
0
       T4p = FMA(KP587785252, T4n, KP951056516 * T4o);
968
0
       T4t = FNMS(KP587785252, T4o, KP951056516 * T4n);
969
0
       Im[WS(rs, 9)] = T4l - T4m;
970
0
       T4u = T4r + T4q;
971
0
       Im[WS(rs, 5)] = T4t - T4u;
972
0
       Ip[WS(rs, 6)] = T4t + T4u;
973
0
       T4s = T4q - T4r;
974
0
       Im[WS(rs, 1)] = T4p - T4s;
975
0
       Ip[WS(rs, 2)] = T4p + T4s;
976
0
        }
977
0
        {
978
0
       E T3x, T2i, T3y, T3O, T3Q, T3G, T3N, T3P, T3z;
979
0
       T3x = KP559016994 * (T2e - T2h);
980
0
       T2i = T2e + T2h;
981
0
       T3y = FNMS(KP250000000, T2i, T2b);
982
0
       T3G = T3C - T3F;
983
0
       T3N = T3J - T3M;
984
0
       T3O = FMA(KP951056516, T3G, KP587785252 * T3N);
985
0
       T3Q = FNMS(KP587785252, T3G, KP951056516 * T3N);
986
0
       Rp[0] = T2b + T2i;
987
0
       T3P = T3y - T3x;
988
0
       Rm[WS(rs, 7)] = T3P - T3Q;
989
0
       Rp[WS(rs, 8)] = T3P + T3Q;
990
0
       T3z = T3x + T3y;
991
0
       Rp[WS(rs, 4)] = T3z - T3O;
992
0
       Rm[WS(rs, 3)] = T3z + T3O;
993
0
        }
994
0
        {
995
0
       E T4e, T45, T4f, T4d, T4h, T4b, T4c, T4i, T4g;
996
0
       T4e = KP559016994 * (T43 - T44);
997
0
       T45 = T43 + T44;
998
0
       T4f = FNMS(KP250000000, T45, T4a);
999
0
       T4b = T2c - T2d;
1000
0
       T4c = T2f - T2g;
1001
0
       T4d = FMA(KP951056516, T4b, KP587785252 * T4c);
1002
0
       T4h = FNMS(KP951056516, T4c, KP587785252 * T4b);
1003
0
       Ip[0] = T45 + T4a;
1004
0
       T4i = T4f - T4e;
1005
0
       Im[WS(rs, 7)] = T4h - T4i;
1006
0
       Ip[WS(rs, 8)] = T4h + T4i;
1007
0
       T4g = T4e + T4f;
1008
0
       Im[WS(rs, 3)] = T4d - T4g;
1009
0
       Ip[WS(rs, 4)] = T4d + T4g;
1010
0
        }
1011
0
        {
1012
0
       E T39, T37, T38, T2F, T3b, T2t, T2E, T3c, T3a;
1013
0
       T39 = KP559016994 * (T2V - T36);
1014
0
       T37 = T2V + T36;
1015
0
       T38 = FNMS(KP250000000, T37, T2K);
1016
0
       T2t = T2n - T2s;
1017
0
       T2E = T2y + T2D;
1018
0
       T2F = FNMS(KP587785252, T2E, KP951056516 * T2t);
1019
0
       T3b = FMA(KP951056516, T2E, KP587785252 * T2t);
1020
0
       Rm[WS(rs, 4)] = T2K + T37;
1021
0
       T3c = T39 + T38;
1022
0
       Rm[WS(rs, 8)] = T3b + T3c;
1023
0
       Rm[0] = T3c - T3b;
1024
0
       T3a = T38 - T39;
1025
0
       Rp[WS(rs, 3)] = T2F + T3a;
1026
0
       Rp[WS(rs, 7)] = T3a - T2F;
1027
0
        }
1028
0
        {
1029
0
       E T4Q, T4L, T4R, T4P, T4U, T4N, T4O, T4T, T4S;
1030
0
       T4Q = KP559016994 * (T4J + T4K);
1031
0
       T4L = T4J - T4K;
1032
0
       T4R = FMA(KP250000000, T4L, T4M);
1033
0
       T4N = T2P - T2U;
1034
0
       T4O = T30 - T35;
1035
0
       T4P = FMA(KP951056516, T4N, KP587785252 * T4O);
1036
0
       T4U = FNMS(KP587785252, T4N, KP951056516 * T4O);
1037
0
       Im[WS(rs, 4)] = T4L - T4M;
1038
0
       T4T = T4Q + T4R;
1039
0
       Ip[WS(rs, 3)] = T4T - T4U;
1040
0
       Ip[WS(rs, 7)] = T4U + T4T;
1041
0
       T4S = T4Q - T4R;
1042
0
       Im[WS(rs, 8)] = T4P + T4S;
1043
0
       Im[0] = T4S - T4P;
1044
0
        }
1045
0
        {
1046
0
       E T3q, T3s, T3t, T3j, T3v, T3f, T3i, T3w, T3u;
1047
0
       T3q = KP559016994 * (T3m - T3p);
1048
0
       T3s = T3m + T3p;
1049
0
       T3t = FNMS(KP250000000, T3s, T3r);
1050
0
       T3f = T3d - T3e;
1051
0
       T3i = T3g - T3h;
1052
0
       T3j = FMA(KP951056516, T3f, KP587785252 * T3i);
1053
0
       T3v = FNMS(KP587785252, T3f, KP951056516 * T3i);
1054
0
       Rp[WS(rs, 5)] = T3r + T3s;
1055
0
       T3w = T3t - T3q;
1056
0
       Rm[WS(rs, 6)] = T3v + T3w;
1057
0
       Rm[WS(rs, 2)] = T3w - T3v;
1058
0
       T3u = T3q + T3t;
1059
0
       Rp[WS(rs, 1)] = T3j + T3u;
1060
0
       Rp[WS(rs, 9)] = T3u - T3j;
1061
0
        }
1062
0
        {
1063
0
       E T4A, T4E, T4F, T4x, T4I, T4v, T4w, T4H, T4G;
1064
0
       T4A = KP559016994 * (T4y - T4z);
1065
0
       T4E = T4y + T4z;
1066
0
       T4F = FNMS(KP250000000, T4E, T4D);
1067
0
       T4v = T3n - T3o;
1068
0
       T4w = T3k - T3l;
1069
0
       T4x = FNMS(KP587785252, T4w, KP951056516 * T4v);
1070
0
       T4I = FMA(KP951056516, T4w, KP587785252 * T4v);
1071
0
       Ip[WS(rs, 5)] = T4E + T4D;
1072
0
       T4H = T4A + T4F;
1073
0
       Ip[WS(rs, 1)] = T4H - T4I;
1074
0
       Ip[WS(rs, 9)] = T4I + T4H;
1075
0
       T4G = T4A - T4F;
1076
0
       Im[WS(rs, 6)] = T4x + T4G;
1077
0
       Im[WS(rs, 2)] = T4G - T4x;
1078
0
        }
1079
0
         }
1080
0
    }
1081
0
     }
1082
0
}
1083
1084
static const tw_instr twinstr[] = {
1085
     { TW_CEXP, 1, 1 },
1086
     { TW_CEXP, 1, 3 },
1087
     { TW_CEXP, 1, 9 },
1088
     { TW_CEXP, 1, 19 },
1089
     { TW_NEXT, 1, 0 }
1090
};
1091
1092
static const hc2c_desc desc = { 20, "hc2cf2_20", twinstr, &GENUS, { 204, 92, 72, 0 } };
1093
1094
1
void X(codelet_hc2cf2_20) (planner *p) {
1095
1
     X(khc2c_register) (p, hc2cf2_20, &desc, HC2C_VIA_RDFT);
1096
1
}
1097
#endif