Coverage Report

Created: 2023-09-25 07:08

/src/fftw3/dft/scalar/codelets/n1_20.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Mon Sep 25 07:03:53 UTC 2023 */
23
24
#include "dft/codelet-dft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
29
30
/*
31
 * This function contains 208 FP additions, 72 FP multiplications,
32
 * (or, 136 additions, 0 multiplications, 72 fused multiply/add),
33
 * 81 stack variables, 4 constants, and 80 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36
37
static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
    INT i;
45
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
46
         E T7, T2N, T3b, TD, TP, T1R, T2f, T1d, Tt, TA, TB, T2w, T2z, T2P, T35;
47
         E T36, T3d, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1T, T29, T2a, T2h, T1h;
48
         E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2O, T32, T33, T3c, TE, TF, TG, TU;
49
         E TZ, T10, T1D, T1I, T1S, T26, T27, T2g, T1e, T1f, T1g;
50
         {
51
        E T3, T1N, TN, T2L, T6, TO, T1Q, T2M;
52
        {
53
       E T1, T2, TL, TM;
54
       T1 = ri[0];
55
       T2 = ri[WS(is, 10)];
56
       T3 = T1 + T2;
57
       T1N = T1 - T2;
58
       TL = ii[0];
59
       TM = ii[WS(is, 10)];
60
       TN = TL - TM;
61
       T2L = TL + TM;
62
        }
63
        {
64
       E T4, T5, T1O, T1P;
65
       T4 = ri[WS(is, 5)];
66
       T5 = ri[WS(is, 15)];
67
       T6 = T4 + T5;
68
       TO = T4 - T5;
69
       T1O = ii[WS(is, 5)];
70
       T1P = ii[WS(is, 15)];
71
       T1Q = T1O - T1P;
72
       T2M = T1O + T1P;
73
        }
74
        T7 = T3 - T6;
75
        T2N = T2L - T2M;
76
        T3b = T2L + T2M;
77
        TD = T3 + T6;
78
        TP = TN - TO;
79
        T1R = T1N - T1Q;
80
        T2f = T1N + T1Q;
81
        T1d = TO + TN;
82
         }
83
         {
84
        E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
85
        E T2y;
86
        {
87
       E Tn, To, T11, T12;
88
       Tn = ri[WS(is, 8)];
89
       To = ri[WS(is, 18)];
90
       Tp = Tn + To;
91
       T1o = Tn - To;
92
       T11 = ii[WS(is, 8)];
93
       T12 = ii[WS(is, 18)];
94
       T13 = T11 - T12;
95
       T2u = T11 + T12;
96
        }
97
        {
98
       E Tq, Tr, T1p, T1q;
99
       Tq = ri[WS(is, 13)];
100
       Tr = ri[WS(is, 3)];
101
       Ts = Tq + Tr;
102
       T14 = Tq - Tr;
103
       T1p = ii[WS(is, 13)];
104
       T1q = ii[WS(is, 3)];
105
       T1r = T1p - T1q;
106
       T2v = T1p + T1q;
107
        }
108
        {
109
       E Tu, Tv, T16, T17;
110
       Tu = ri[WS(is, 12)];
111
       Tv = ri[WS(is, 2)];
112
       Tw = Tu + Tv;
113
       T1t = Tu - Tv;
114
       T16 = ii[WS(is, 12)];
115
       T17 = ii[WS(is, 2)];
116
       T18 = T16 - T17;
117
       T2x = T16 + T17;
118
        }
119
        {
120
       E Tx, Ty, T1u, T1v;
121
       Tx = ri[WS(is, 17)];
122
       Ty = ri[WS(is, 7)];
123
       Tz = Tx + Ty;
124
       T19 = Tx - Ty;
125
       T1u = ii[WS(is, 17)];
126
       T1v = ii[WS(is, 7)];
127
       T1w = T1u - T1v;
128
       T2y = T1u + T1v;
129
        }
130
        Tt = Tp - Ts;
131
        TA = Tw - Tz;
132
        TB = Tt + TA;
133
        T2w = T2u - T2v;
134
        T2z = T2x - T2y;
135
        T2P = T2w + T2z;
136
        T35 = T2u + T2v;
137
        T36 = T2x + T2y;
138
        T3d = T35 + T36;
139
        TH = Tp + Ts;
140
        TI = Tw + Tz;
141
        TJ = TH + TI;
142
        T15 = T13 - T14;
143
        T1a = T18 - T19;
144
        T1b = T15 + T1a;
145
        T1s = T1o - T1r;
146
        T1x = T1t - T1w;
147
        T1T = T1s + T1x;
148
        T29 = T1o + T1r;
149
        T2a = T1t + T1w;
150
        T2h = T29 + T2a;
151
        T1h = T14 + T13;
152
        T1i = T19 + T18;
153
        T1j = T1h + T1i;
154
         }
155
         {
156
        E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
157
        E T2F;
158
        {
159
       E T8, T9, TQ, TR;
160
       T8 = ri[WS(is, 4)];
161
       T9 = ri[WS(is, 14)];
162
       Ta = T8 + T9;
163
       T1z = T8 - T9;
164
       TQ = ii[WS(is, 4)];
165
       TR = ii[WS(is, 14)];
166
       TS = TQ - TR;
167
       T2B = TQ + TR;
168
        }
169
        {
170
       E Tb, Tc, T1A, T1B;
171
       Tb = ri[WS(is, 9)];
172
       Tc = ri[WS(is, 19)];
173
       Td = Tb + Tc;
174
       TT = Tb - Tc;
175
       T1A = ii[WS(is, 9)];
176
       T1B = ii[WS(is, 19)];
177
       T1C = T1A - T1B;
178
       T2C = T1A + T1B;
179
        }
180
        {
181
       E Tf, Tg, TV, TW;
182
       Tf = ri[WS(is, 16)];
183
       Tg = ri[WS(is, 6)];
184
       Th = Tf + Tg;
185
       T1E = Tf - Tg;
186
       TV = ii[WS(is, 16)];
187
       TW = ii[WS(is, 6)];
188
       TX = TV - TW;
189
       T2E = TV + TW;
190
        }
191
        {
192
       E Ti, Tj, T1F, T1G;
193
       Ti = ri[WS(is, 1)];
194
       Tj = ri[WS(is, 11)];
195
       Tk = Ti + Tj;
196
       TY = Ti - Tj;
197
       T1F = ii[WS(is, 1)];
198
       T1G = ii[WS(is, 11)];
199
       T1H = T1F - T1G;
200
       T2F = T1F + T1G;
201
        }
202
        Te = Ta - Td;
203
        Tl = Th - Tk;
204
        Tm = Te + Tl;
205
        T2D = T2B - T2C;
206
        T2G = T2E - T2F;
207
        T2O = T2D + T2G;
208
        T32 = T2B + T2C;
209
        T33 = T2E + T2F;
210
        T3c = T32 + T33;
211
        TE = Ta + Td;
212
        TF = Th + Tk;
213
        TG = TE + TF;
214
        TU = TS - TT;
215
        TZ = TX - TY;
216
        T10 = TU + TZ;
217
        T1D = T1z - T1C;
218
        T1I = T1E - T1H;
219
        T1S = T1D + T1I;
220
        T26 = T1z + T1C;
221
        T27 = T1E + T1H;
222
        T2g = T26 + T27;
223
        T1e = TT + TS;
224
        T1f = TY + TX;
225
        T1g = T1e + T1f;
226
         }
227
         {
228
        E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
229
        T2s = Tm - TB;
230
        TC = Tm + TB;
231
        T2r = FNMS(KP250000000, TC, T7);
232
        T2A = T2w - T2z;
233
        T2H = T2D - T2G;
234
        T2I = FNMS(KP618033988, T2H, T2A);
235
        T2K = FMA(KP618033988, T2A, T2H);
236
        ro[WS(os, 10)] = T7 + TC;
237
        T2J = FMA(KP559016994, T2s, T2r);
238
        ro[WS(os, 14)] = FNMS(KP951056516, T2K, T2J);
239
        ro[WS(os, 6)] = FMA(KP951056516, T2K, T2J);
240
        T2t = FNMS(KP559016994, T2s, T2r);
241
        ro[WS(os, 2)] = FNMS(KP951056516, T2I, T2t);
242
        ro[WS(os, 18)] = FMA(KP951056516, T2I, T2t);
243
         }
244
         {
245
        E T2S, T2Q, T2R, T2W, T2Y, T2U, T2V, T2X, T2T;
246
        T2S = T2O - T2P;
247
        T2Q = T2O + T2P;
248
        T2R = FNMS(KP250000000, T2Q, T2N);
249
        T2U = Tt - TA;
250
        T2V = Te - Tl;
251
        T2W = FNMS(KP618033988, T2V, T2U);
252
        T2Y = FMA(KP618033988, T2U, T2V);
253
        io[WS(os, 10)] = T2N + T2Q;
254
        T2X = FMA(KP559016994, T2S, T2R);
255
        io[WS(os, 6)] = FNMS(KP951056516, T2Y, T2X);
256
        io[WS(os, 14)] = FMA(KP951056516, T2Y, T2X);
257
        T2T = FNMS(KP559016994, T2S, T2R);
258
        io[WS(os, 2)] = FMA(KP951056516, T2W, T2T);
259
        io[WS(os, 18)] = FNMS(KP951056516, T2W, T2T);
260
         }
261
         {
262
        E T30, TK, T2Z, T38, T3a, T34, T37, T39, T31;
263
        T30 = TG - TJ;
264
        TK = TG + TJ;
265
        T2Z = FNMS(KP250000000, TK, TD);
266
        T34 = T32 - T33;
267
        T37 = T35 - T36;
268
        T38 = FMA(KP618033988, T37, T34);
269
        T3a = FNMS(KP618033988, T34, T37);
270
        ro[0] = TD + TK;
271
        T39 = FNMS(KP559016994, T30, T2Z);
272
        ro[WS(os, 12)] = FNMS(KP951056516, T3a, T39);
273
        ro[WS(os, 8)] = FMA(KP951056516, T3a, T39);
274
        T31 = FMA(KP559016994, T30, T2Z);
275
        ro[WS(os, 4)] = FNMS(KP951056516, T38, T31);
276
        ro[WS(os, 16)] = FMA(KP951056516, T38, T31);
277
         }
278
         {
279
        E T3g, T3e, T3f, T3k, T3m, T3i, T3j, T3l, T3h;
280
        T3g = T3c - T3d;
281
        T3e = T3c + T3d;
282
        T3f = FNMS(KP250000000, T3e, T3b);
283
        T3i = TE - TF;
284
        T3j = TH - TI;
285
        T3k = FMA(KP618033988, T3j, T3i);
286
        T3m = FNMS(KP618033988, T3i, T3j);
287
        io[0] = T3b + T3e;
288
        T3l = FNMS(KP559016994, T3g, T3f);
289
        io[WS(os, 8)] = FNMS(KP951056516, T3m, T3l);
290
        io[WS(os, 12)] = FMA(KP951056516, T3m, T3l);
291
        T3h = FMA(KP559016994, T3g, T3f);
292
        io[WS(os, 4)] = FMA(KP951056516, T3k, T3h);
293
        io[WS(os, 16)] = FNMS(KP951056516, T3k, T3h);
294
         }
295
         {
296
        E T24, T1c, T23, T2c, T2e, T28, T2b, T2d, T25;
297
        T24 = T10 - T1b;
298
        T1c = T10 + T1b;
299
        T23 = FNMS(KP250000000, T1c, TP);
300
        T28 = T26 - T27;
301
        T2b = T29 - T2a;
302
        T2c = FMA(KP618033988, T2b, T28);
303
        T2e = FNMS(KP618033988, T28, T2b);
304
        io[WS(os, 5)] = TP + T1c;
305
        T2d = FNMS(KP559016994, T24, T23);
306
        io[WS(os, 13)] = FNMS(KP951056516, T2e, T2d);
307
        io[WS(os, 17)] = FMA(KP951056516, T2e, T2d);
308
        T25 = FMA(KP559016994, T24, T23);
309
        io[WS(os, 1)] = FNMS(KP951056516, T2c, T25);
310
        io[WS(os, 9)] = FMA(KP951056516, T2c, T25);
311
         }
312
         {
313
        E T2k, T2i, T2j, T2o, T2q, T2m, T2n, T2p, T2l;
314
        T2k = T2g - T2h;
315
        T2i = T2g + T2h;
316
        T2j = FNMS(KP250000000, T2i, T2f);
317
        T2m = TU - TZ;
318
        T2n = T15 - T1a;
319
        T2o = FMA(KP618033988, T2n, T2m);
320
        T2q = FNMS(KP618033988, T2m, T2n);
321
        ro[WS(os, 5)] = T2f + T2i;
322
        T2p = FNMS(KP559016994, T2k, T2j);
323
        ro[WS(os, 13)] = FMA(KP951056516, T2q, T2p);
324
        ro[WS(os, 17)] = FNMS(KP951056516, T2q, T2p);
325
        T2l = FMA(KP559016994, T2k, T2j);
326
        ro[WS(os, 1)] = FMA(KP951056516, T2o, T2l);
327
        ro[WS(os, 9)] = FNMS(KP951056516, T2o, T2l);
328
         }
329
         {
330
        E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
331
        T1m = T1g - T1j;
332
        T1k = T1g + T1j;
333
        T1l = FNMS(KP250000000, T1k, T1d);
334
        T1y = T1s - T1x;
335
        T1J = T1D - T1I;
336
        T1K = FNMS(KP618033988, T1J, T1y);
337
        T1M = FMA(KP618033988, T1y, T1J);
338
        io[WS(os, 15)] = T1d + T1k;
339
        T1L = FMA(KP559016994, T1m, T1l);
340
        io[WS(os, 11)] = FNMS(KP951056516, T1M, T1L);
341
        io[WS(os, 19)] = FMA(KP951056516, T1M, T1L);
342
        T1n = FNMS(KP559016994, T1m, T1l);
343
        io[WS(os, 3)] = FNMS(KP951056516, T1K, T1n);
344
        io[WS(os, 7)] = FMA(KP951056516, T1K, T1n);
345
         }
346
         {
347
        E T1W, T1U, T1V, T20, T22, T1Y, T1Z, T21, T1X;
348
        T1W = T1S - T1T;
349
        T1U = T1S + T1T;
350
        T1V = FNMS(KP250000000, T1U, T1R);
351
        T1Y = T1h - T1i;
352
        T1Z = T1e - T1f;
353
        T20 = FNMS(KP618033988, T1Z, T1Y);
354
        T22 = FMA(KP618033988, T1Y, T1Z);
355
        ro[WS(os, 15)] = T1R + T1U;
356
        T21 = FMA(KP559016994, T1W, T1V);
357
        ro[WS(os, 11)] = FMA(KP951056516, T22, T21);
358
        ro[WS(os, 19)] = FNMS(KP951056516, T22, T21);
359
        T1X = FNMS(KP559016994, T1W, T1V);
360
        ro[WS(os, 3)] = FMA(KP951056516, T20, T1X);
361
        ro[WS(os, 7)] = FNMS(KP951056516, T20, T1X);
362
         }
363
    }
364
     }
365
}
366
367
static const kdft_desc desc = { 20, "n1_20", { 136, 0, 72, 0 }, &GENUS, 0, 0, 0, 0 };
368
369
void X(codelet_n1_20) (planner *p) { X(kdft_register) (p, n1_20, &desc);
370
}
371
372
#else
373
374
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
375
376
/*
377
 * This function contains 208 FP additions, 48 FP multiplications,
378
 * (or, 184 additions, 24 multiplications, 24 fused multiply/add),
379
 * 81 stack variables, 4 constants, and 80 memory accesses
380
 */
381
#include "dft/scalar/n.h"
382
383
static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
384
5
{
385
5
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
386
5
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
387
5
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
388
5
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
389
5
     {
390
5
    INT i;
391
76
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
392
71
         E T7, T2Q, T3h, TD, TP, T1U, T2l, T1d, Tt, TA, TB, T2w, T2z, T2S, T35;
393
71
         E T36, T3f, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1W, T29, T2a, T2j, T1h;
394
71
         E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2R, T32, T33, T3e, TE, TF, TG, TU;
395
71
         E TZ, T10, T1D, T1I, T1V, T26, T27, T2i, T1e, T1f, T1g;
396
71
         {
397
71
        E T3, T1Q, TN, T2O, T6, TO, T1T, T2P;
398
71
        {
399
71
       E T1, T2, TL, TM;
400
71
       T1 = ri[0];
401
71
       T2 = ri[WS(is, 10)];
402
71
       T3 = T1 + T2;
403
71
       T1Q = T1 - T2;
404
71
       TL = ii[0];
405
71
       TM = ii[WS(is, 10)];
406
71
       TN = TL - TM;
407
71
       T2O = TL + TM;
408
71
        }
409
71
        {
410
71
       E T4, T5, T1R, T1S;
411
71
       T4 = ri[WS(is, 5)];
412
71
       T5 = ri[WS(is, 15)];
413
71
       T6 = T4 + T5;
414
71
       TO = T4 - T5;
415
71
       T1R = ii[WS(is, 5)];
416
71
       T1S = ii[WS(is, 15)];
417
71
       T1T = T1R - T1S;
418
71
       T2P = T1R + T1S;
419
71
        }
420
71
        T7 = T3 - T6;
421
71
        T2Q = T2O - T2P;
422
71
        T3h = T2O + T2P;
423
71
        TD = T3 + T6;
424
71
        TP = TN - TO;
425
71
        T1U = T1Q - T1T;
426
71
        T2l = T1Q + T1T;
427
71
        T1d = TO + TN;
428
71
         }
429
71
         {
430
71
        E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
431
71
        E T2y;
432
71
        {
433
71
       E Tn, To, T11, T12;
434
71
       Tn = ri[WS(is, 8)];
435
71
       To = ri[WS(is, 18)];
436
71
       Tp = Tn + To;
437
71
       T1o = Tn - To;
438
71
       T11 = ii[WS(is, 8)];
439
71
       T12 = ii[WS(is, 18)];
440
71
       T13 = T11 - T12;
441
71
       T2u = T11 + T12;
442
71
        }
443
71
        {
444
71
       E Tq, Tr, T1p, T1q;
445
71
       Tq = ri[WS(is, 13)];
446
71
       Tr = ri[WS(is, 3)];
447
71
       Ts = Tq + Tr;
448
71
       T14 = Tq - Tr;
449
71
       T1p = ii[WS(is, 13)];
450
71
       T1q = ii[WS(is, 3)];
451
71
       T1r = T1p - T1q;
452
71
       T2v = T1p + T1q;
453
71
        }
454
71
        {
455
71
       E Tu, Tv, T16, T17;
456
71
       Tu = ri[WS(is, 12)];
457
71
       Tv = ri[WS(is, 2)];
458
71
       Tw = Tu + Tv;
459
71
       T1t = Tu - Tv;
460
71
       T16 = ii[WS(is, 12)];
461
71
       T17 = ii[WS(is, 2)];
462
71
       T18 = T16 - T17;
463
71
       T2x = T16 + T17;
464
71
        }
465
71
        {
466
71
       E Tx, Ty, T1u, T1v;
467
71
       Tx = ri[WS(is, 17)];
468
71
       Ty = ri[WS(is, 7)];
469
71
       Tz = Tx + Ty;
470
71
       T19 = Tx - Ty;
471
71
       T1u = ii[WS(is, 17)];
472
71
       T1v = ii[WS(is, 7)];
473
71
       T1w = T1u - T1v;
474
71
       T2y = T1u + T1v;
475
71
        }
476
71
        Tt = Tp - Ts;
477
71
        TA = Tw - Tz;
478
71
        TB = Tt + TA;
479
71
        T2w = T2u - T2v;
480
71
        T2z = T2x - T2y;
481
71
        T2S = T2w + T2z;
482
71
        T35 = T2u + T2v;
483
71
        T36 = T2x + T2y;
484
71
        T3f = T35 + T36;
485
71
        TH = Tp + Ts;
486
71
        TI = Tw + Tz;
487
71
        TJ = TH + TI;
488
71
        T15 = T13 - T14;
489
71
        T1a = T18 - T19;
490
71
        T1b = T15 + T1a;
491
71
        T1s = T1o - T1r;
492
71
        T1x = T1t - T1w;
493
71
        T1W = T1s + T1x;
494
71
        T29 = T1o + T1r;
495
71
        T2a = T1t + T1w;
496
71
        T2j = T29 + T2a;
497
71
        T1h = T14 + T13;
498
71
        T1i = T19 + T18;
499
71
        T1j = T1h + T1i;
500
71
         }
501
71
         {
502
71
        E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
503
71
        E T2F;
504
71
        {
505
71
       E T8, T9, TQ, TR;
506
71
       T8 = ri[WS(is, 4)];
507
71
       T9 = ri[WS(is, 14)];
508
71
       Ta = T8 + T9;
509
71
       T1z = T8 - T9;
510
71
       TQ = ii[WS(is, 4)];
511
71
       TR = ii[WS(is, 14)];
512
71
       TS = TQ - TR;
513
71
       T2B = TQ + TR;
514
71
        }
515
71
        {
516
71
       E Tb, Tc, T1A, T1B;
517
71
       Tb = ri[WS(is, 9)];
518
71
       Tc = ri[WS(is, 19)];
519
71
       Td = Tb + Tc;
520
71
       TT = Tb - Tc;
521
71
       T1A = ii[WS(is, 9)];
522
71
       T1B = ii[WS(is, 19)];
523
71
       T1C = T1A - T1B;
524
71
       T2C = T1A + T1B;
525
71
        }
526
71
        {
527
71
       E Tf, Tg, TV, TW;
528
71
       Tf = ri[WS(is, 16)];
529
71
       Tg = ri[WS(is, 6)];
530
71
       Th = Tf + Tg;
531
71
       T1E = Tf - Tg;
532
71
       TV = ii[WS(is, 16)];
533
71
       TW = ii[WS(is, 6)];
534
71
       TX = TV - TW;
535
71
       T2E = TV + TW;
536
71
        }
537
71
        {
538
71
       E Ti, Tj, T1F, T1G;
539
71
       Ti = ri[WS(is, 1)];
540
71
       Tj = ri[WS(is, 11)];
541
71
       Tk = Ti + Tj;
542
71
       TY = Ti - Tj;
543
71
       T1F = ii[WS(is, 1)];
544
71
       T1G = ii[WS(is, 11)];
545
71
       T1H = T1F - T1G;
546
71
       T2F = T1F + T1G;
547
71
        }
548
71
        Te = Ta - Td;
549
71
        Tl = Th - Tk;
550
71
        Tm = Te + Tl;
551
71
        T2D = T2B - T2C;
552
71
        T2G = T2E - T2F;
553
71
        T2R = T2D + T2G;
554
71
        T32 = T2B + T2C;
555
71
        T33 = T2E + T2F;
556
71
        T3e = T32 + T33;
557
71
        TE = Ta + Td;
558
71
        TF = Th + Tk;
559
71
        TG = TE + TF;
560
71
        TU = TS - TT;
561
71
        TZ = TX - TY;
562
71
        T10 = TU + TZ;
563
71
        T1D = T1z - T1C;
564
71
        T1I = T1E - T1H;
565
71
        T1V = T1D + T1I;
566
71
        T26 = T1z + T1C;
567
71
        T27 = T1E + T1H;
568
71
        T2i = T26 + T27;
569
71
        T1e = TT + TS;
570
71
        T1f = TY + TX;
571
71
        T1g = T1e + T1f;
572
71
         }
573
71
         {
574
71
        E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
575
71
        T2s = KP559016994 * (Tm - TB);
576
71
        TC = Tm + TB;
577
71
        T2r = FNMS(KP250000000, TC, T7);
578
71
        T2A = T2w - T2z;
579
71
        T2H = T2D - T2G;
580
71
        T2I = FNMS(KP587785252, T2H, KP951056516 * T2A);
581
71
        T2K = FMA(KP951056516, T2H, KP587785252 * T2A);
582
71
        ro[WS(os, 10)] = T7 + TC;
583
71
        T2J = T2s + T2r;
584
71
        ro[WS(os, 14)] = T2J - T2K;
585
71
        ro[WS(os, 6)] = T2J + T2K;
586
71
        T2t = T2r - T2s;
587
71
        ro[WS(os, 2)] = T2t - T2I;
588
71
        ro[WS(os, 18)] = T2t + T2I;
589
71
         }
590
71
         {
591
71
        E T2V, T2T, T2U, T2N, T2Y, T2L, T2M, T2X, T2W;
592
71
        T2V = KP559016994 * (T2R - T2S);
593
71
        T2T = T2R + T2S;
594
71
        T2U = FNMS(KP250000000, T2T, T2Q);
595
71
        T2L = Tt - TA;
596
71
        T2M = Te - Tl;
597
71
        T2N = FNMS(KP587785252, T2M, KP951056516 * T2L);
598
71
        T2Y = FMA(KP951056516, T2M, KP587785252 * T2L);
599
71
        io[WS(os, 10)] = T2Q + T2T;
600
71
        T2X = T2V + T2U;
601
71
        io[WS(os, 6)] = T2X - T2Y;
602
71
        io[WS(os, 14)] = T2Y + T2X;
603
71
        T2W = T2U - T2V;
604
71
        io[WS(os, 2)] = T2N + T2W;
605
71
        io[WS(os, 18)] = T2W - T2N;
606
71
         }
607
71
         {
608
71
        E T2Z, TK, T30, T38, T3a, T34, T37, T39, T31;
609
71
        T2Z = KP559016994 * (TG - TJ);
610
71
        TK = TG + TJ;
611
71
        T30 = FNMS(KP250000000, TK, TD);
612
71
        T34 = T32 - T33;
613
71
        T37 = T35 - T36;
614
71
        T38 = FMA(KP951056516, T34, KP587785252 * T37);
615
71
        T3a = FNMS(KP587785252, T34, KP951056516 * T37);
616
71
        ro[0] = TD + TK;
617
71
        T39 = T30 - T2Z;
618
71
        ro[WS(os, 12)] = T39 - T3a;
619
71
        ro[WS(os, 8)] = T39 + T3a;
620
71
        T31 = T2Z + T30;
621
71
        ro[WS(os, 4)] = T31 - T38;
622
71
        ro[WS(os, 16)] = T31 + T38;
623
71
         }
624
71
         {
625
71
        E T3g, T3i, T3j, T3d, T3m, T3b, T3c, T3l, T3k;
626
71
        T3g = KP559016994 * (T3e - T3f);
627
71
        T3i = T3e + T3f;
628
71
        T3j = FNMS(KP250000000, T3i, T3h);
629
71
        T3b = TE - TF;
630
71
        T3c = TH - TI;
631
71
        T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
632
71
        T3m = FNMS(KP587785252, T3b, KP951056516 * T3c);
633
71
        io[0] = T3h + T3i;
634
71
        T3l = T3j - T3g;
635
71
        io[WS(os, 8)] = T3l - T3m;
636
71
        io[WS(os, 12)] = T3m + T3l;
637
71
        T3k = T3g + T3j;
638
71
        io[WS(os, 4)] = T3d + T3k;
639
71
        io[WS(os, 16)] = T3k - T3d;
640
71
         }
641
71
         {
642
71
        E T23, T1c, T24, T2c, T2e, T28, T2b, T2d, T25;
643
71
        T23 = KP559016994 * (T10 - T1b);
644
71
        T1c = T10 + T1b;
645
71
        T24 = FNMS(KP250000000, T1c, TP);
646
71
        T28 = T26 - T27;
647
71
        T2b = T29 - T2a;
648
71
        T2c = FMA(KP951056516, T28, KP587785252 * T2b);
649
71
        T2e = FNMS(KP587785252, T28, KP951056516 * T2b);
650
71
        io[WS(os, 5)] = TP + T1c;
651
71
        T2d = T24 - T23;
652
71
        io[WS(os, 13)] = T2d - T2e;
653
71
        io[WS(os, 17)] = T2d + T2e;
654
71
        T25 = T23 + T24;
655
71
        io[WS(os, 1)] = T25 - T2c;
656
71
        io[WS(os, 9)] = T25 + T2c;
657
71
         }
658
71
         {
659
71
        E T2k, T2m, T2n, T2h, T2p, T2f, T2g, T2q, T2o;
660
71
        T2k = KP559016994 * (T2i - T2j);
661
71
        T2m = T2i + T2j;
662
71
        T2n = FNMS(KP250000000, T2m, T2l);
663
71
        T2f = TU - TZ;
664
71
        T2g = T15 - T1a;
665
71
        T2h = FMA(KP951056516, T2f, KP587785252 * T2g);
666
71
        T2p = FNMS(KP587785252, T2f, KP951056516 * T2g);
667
71
        ro[WS(os, 5)] = T2l + T2m;
668
71
        T2q = T2n - T2k;
669
71
        ro[WS(os, 13)] = T2p + T2q;
670
71
        ro[WS(os, 17)] = T2q - T2p;
671
71
        T2o = T2k + T2n;
672
71
        ro[WS(os, 1)] = T2h + T2o;
673
71
        ro[WS(os, 9)] = T2o - T2h;
674
71
         }
675
71
         {
676
71
        E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
677
71
        T1m = KP559016994 * (T1g - T1j);
678
71
        T1k = T1g + T1j;
679
71
        T1l = FNMS(KP250000000, T1k, T1d);
680
71
        T1y = T1s - T1x;
681
71
        T1J = T1D - T1I;
682
71
        T1K = FNMS(KP587785252, T1J, KP951056516 * T1y);
683
71
        T1M = FMA(KP951056516, T1J, KP587785252 * T1y);
684
71
        io[WS(os, 15)] = T1d + T1k;
685
71
        T1L = T1m + T1l;
686
71
        io[WS(os, 11)] = T1L - T1M;
687
71
        io[WS(os, 19)] = T1L + T1M;
688
71
        T1n = T1l - T1m;
689
71
        io[WS(os, 3)] = T1n - T1K;
690
71
        io[WS(os, 7)] = T1n + T1K;
691
71
         }
692
71
         {
693
71
        E T1Z, T1X, T1Y, T1P, T21, T1N, T1O, T22, T20;
694
71
        T1Z = KP559016994 * (T1V - T1W);
695
71
        T1X = T1V + T1W;
696
71
        T1Y = FNMS(KP250000000, T1X, T1U);
697
71
        T1N = T1h - T1i;
698
71
        T1O = T1e - T1f;
699
71
        T1P = FNMS(KP587785252, T1O, KP951056516 * T1N);
700
71
        T21 = FMA(KP951056516, T1O, KP587785252 * T1N);
701
71
        ro[WS(os, 15)] = T1U + T1X;
702
71
        T22 = T1Z + T1Y;
703
71
        ro[WS(os, 11)] = T21 + T22;
704
71
        ro[WS(os, 19)] = T22 - T21;
705
71
        T20 = T1Y - T1Z;
706
71
        ro[WS(os, 3)] = T1P + T20;
707
71
        ro[WS(os, 7)] = T20 - T1P;
708
71
         }
709
71
    }
710
5
     }
711
5
}
712
713
static const kdft_desc desc = { 20, "n1_20", { 184, 24, 24, 0 }, &GENUS, 0, 0, 0, 0 };
714
715
1
void X(codelet_n1_20) (planner *p) { X(kdft_register) (p, n1_20, &desc);
716
1
}
717
718
#endif