Coverage Report

Created: 2026-01-10 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/scalar/codelets/t2_64.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Jan 10 06:09:13 UTC 2026 */
23
24
#include "dft/codelet-dft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include dft/scalar/t.h */
29
30
/*
31
 * This function contains 1154 FP additions, 840 FP multiplications,
32
 * (or, 520 additions, 206 multiplications, 634 fused multiply/add),
33
 * 316 stack variables, 15 constants, and 256 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36
37
static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
40
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
41
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
42
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
43
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
44
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
45
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
46
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
47
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
48
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
49
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
50
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
51
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
54
     {
55
    INT m;
56
    for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) {
57
         E T2, T3, Tc, T8, Te, T5, T6, Tr, T7, TJ, T14, T3d, T3i, TG, T10;
58
         E T3a, T3g, TL, TP, Tb, Td, T17, Tt, Tu, T1i, Ti, T2U, T1t, T7B, T5O;
59
         E T3N, T3U, T1I, T3G, T3R, T79, T1x, T3D, T2l, T3X, T2d, T1M, T4B, T4x, T4T;
60
         E T2h, T29, T5s, T81, T5w, T7X, T7N, T7h, T64, T6a, T6e, T7l, T60, T7R, T5A;
61
         E T6h, T6J, T7o, T5E, T6k, T6N, T7r, T2X, T6t, T6x, TO, TK, TQ, T7c, TU;
62
         E T2x, T2u, T2y, T7E, T2C, T4b, T48, T4c, T5R, T4g, T3m, T3j, T3n, T4W, T3r;
63
         E Tx, Ty, TC, T1Z, T23, T4s, T4p, T70, T6W, T19, T41, T44, T1a, T1e, T35;
64
         E T31, T59, T55, T1k, T1R, T1V, T1l, T1p, T2Q, T2N, T8i, T8e, Th, T4E, T4H;
65
         E Tj, Tn, T3A, T3w, T5n, T5j;
66
         {
67
        E T1H, Tg, Tw, T1s, T2g, TH, T2t, T47, T3h, T28, T4w, T3M, T2c, T4A, T3Q;
68
        E T1w, T2k, T1L, T5r, T80;
69
        {
70
       E TI, T13, TF, TZ, Ta, T4, T9, Ts;
71
       T2 = W[0];
72
       T3 = W[2];
73
       T4 = T2 * T3;
74
       Tc = W[5];
75
       TI = T3 * Tc;
76
       T13 = T2 * Tc;
77
       T8 = W[4];
78
       Te = W[6];
79
       TF = T3 * T8;
80
       T1H = T8 * Te;
81
       TZ = T2 * T8;
82
       T5 = W[1];
83
       T6 = W[3];
84
       Ta = T2 * T6;
85
       Tr = FMA(T5, T6, T4);
86
       T7 = FNMS(T5, T6, T4);
87
       Tg = T7 * Tc;
88
       Tw = Tr * Tc;
89
       T1s = T3 * Te;
90
       T2g = T2 * Te;
91
       TJ = FMA(T6, T8, TI);
92
       T14 = FNMS(T5, T8, T13);
93
       T3d = FMA(T5, T8, T13);
94
       T3i = FNMS(T6, T8, TI);
95
       TG = FNMS(T6, Tc, TF);
96
       TH = TG * Te;
97
       T10 = FMA(T5, Tc, TZ);
98
       T2t = T10 * Te;
99
       T3a = FNMS(T5, Tc, TZ);
100
       T47 = T3a * Te;
101
       T3g = FMA(T6, Tc, TF);
102
       T3h = T3g * Te;
103
       TL = W[8];
104
       T28 = T3 * TL;
105
       T4w = T8 * TL;
106
       T3M = T2 * TL;
107
       TP = W[9];
108
       T2c = T3 * TP;
109
       T4A = T8 * TP;
110
       T3Q = T2 * TP;
111
       T9 = T7 * T8;
112
       Tb = FMA(T5, T3, Ta);
113
       Td = FMA(Tb, Tc, T9);
114
       T17 = FNMS(Tb, Tc, T9);
115
       Ts = Tr * T8;
116
       Tt = FNMS(T5, T3, Ta);
117
       Tu = FNMS(Tt, Tc, Ts);
118
       T1i = FMA(Tt, Tc, Ts);
119
       Ti = W[7];
120
       T1w = T3 * Ti;
121
       T2k = T2 * Ti;
122
       T1L = T8 * Ti;
123
       T2U = FMA(Tc, Ti, T1H);
124
        }
125
        T1t = FMA(T6, Ti, T1s);
126
        T7B = FNMS(T14, Ti, T2t);
127
        T5O = FNMS(T3d, Ti, T47);
128
        T3N = FMA(T5, TP, T3M);
129
        T3U = FNMS(T6, Ti, T1s);
130
        T1I = FNMS(Tc, Ti, T1H);
131
        T3G = FNMS(T5, Te, T2k);
132
        T3R = FNMS(T5, TL, T3Q);
133
        T79 = FNMS(TJ, Ti, TH);
134
        T1x = FNMS(T6, Te, T1w);
135
        T3D = FMA(T5, Ti, T2g);
136
        T2l = FMA(T5, Te, T2k);
137
        T3X = FMA(T6, Te, T1w);
138
        T2d = FNMS(T6, TL, T2c);
139
        T1M = FMA(Tc, Te, T1L);
140
        T4B = FNMS(Tc, TL, T4A);
141
        T4x = FMA(Tc, TP, T4w);
142
        T4T = FNMS(T3i, Ti, T3h);
143
        T2h = FNMS(T5, Ti, T2g);
144
        T29 = FMA(T6, TP, T28);
145
        T5r = T3g * TL;
146
        T5s = FMA(T3i, TP, T5r);
147
        T80 = T7 * TP;
148
        T81 = FNMS(Tb, TL, T80);
149
        {
150
       E T5v, T7W, T7M, T7g, T63;
151
       T5v = T3g * TP;
152
       T5w = FNMS(T3i, TL, T5v);
153
       T7W = T7 * TL;
154
       T7X = FMA(Tb, TP, T7W);
155
       T7M = TG * TL;
156
       T7N = FMA(TJ, TP, T7M);
157
       T7g = T10 * TL;
158
       T7h = FMA(T14, TP, T7g);
159
       T63 = T3a * TP;
160
       T64 = FNMS(T3d, TL, T63);
161
        }
162
        {
163
       E T69, T6d, T7k, T5Z, T7Q, T5z;
164
       T69 = Tr * TL;
165
       T6a = FMA(Tt, TP, T69);
166
       T6d = Tr * TP;
167
       T6e = FNMS(Tt, TL, T6d);
168
       T7k = T10 * TP;
169
       T7l = FNMS(T14, TL, T7k);
170
       T5Z = T3a * TL;
171
       T60 = FMA(T3d, TP, T5Z);
172
       T7Q = TG * TP;
173
       T7R = FNMS(TJ, TL, T7Q);
174
       T5z = Tr * Te;
175
       T5A = FMA(Tt, Ti, T5z);
176
       T6h = FNMS(Tt, Ti, T5z);
177
        }
178
        {
179
       E T6I, T5D, T6M, T6s, T6w;
180
       T6I = T7 * Te;
181
       T6J = FNMS(Tb, Ti, T6I);
182
       T7o = FMA(Tb, Ti, T6I);
183
       T5D = Tr * Ti;
184
       T5E = FNMS(Tt, Te, T5D);
185
       T6k = FMA(Tt, Te, T5D);
186
       T6M = T7 * Ti;
187
       T6N = FMA(Tb, Te, T6M);
188
       T7r = FNMS(Tb, Te, T6M);
189
       T6s = T2U * TL;
190
       T6w = T2U * TP;
191
       T2X = FNMS(Tc, Te, T1L);
192
       T6t = FMA(T2X, TP, T6s);
193
       T6x = FNMS(T2X, TL, T6w);
194
       {
195
            E TN, TM, TT, T2w, T2v, T2B;
196
            TN = TG * Ti;
197
            TO = FNMS(TJ, Te, TN);
198
            TK = FMA(TJ, Ti, TH);
199
            TM = TK * TL;
200
            TT = TK * TP;
201
            TQ = FMA(TO, TP, TM);
202
            T7c = FMA(TJ, Te, TN);
203
            TU = FNMS(TO, TL, TT);
204
            T2w = T10 * Ti;
205
            T2x = FNMS(T14, Te, T2w);
206
            T2u = FMA(T14, Ti, T2t);
207
            T2v = T2u * TL;
208
            T2B = T2u * TP;
209
            T2y = FMA(T2x, TP, T2v);
210
            T7E = FMA(T14, Te, T2w);
211
            T2C = FNMS(T2x, TL, T2B);
212
       }
213
        }
214
        {
215
       E T4a, T49, T4f, T3l, T3k, T3q;
216
       T4a = T3a * Ti;
217
       T4b = FNMS(T3d, Te, T4a);
218
       T48 = FMA(T3d, Ti, T47);
219
       T49 = T48 * TL;
220
       T4f = T48 * TP;
221
       T4c = FMA(T4b, TP, T49);
222
       T5R = FMA(T3d, Te, T4a);
223
       T4g = FNMS(T4b, TL, T4f);
224
       T3l = T3g * Ti;
225
       T3m = FNMS(T3i, Te, T3l);
226
       T3j = FMA(T3i, Ti, T3h);
227
       T3k = T3j * TL;
228
       T3q = T3j * TP;
229
       T3n = FMA(T3m, TP, T3k);
230
       T4W = FMA(T3i, Te, T3l);
231
       T3r = FNMS(T3m, TL, T3q);
232
       {
233
            E T1Y, T22, Tv, TB, T6Z, T6V;
234
            T1Y = Tu * TL;
235
            T22 = Tu * TP;
236
            Tv = Tu * Te;
237
            TB = Tu * Ti;
238
            Tx = FMA(Tt, T8, Tw);
239
            Ty = FMA(Tx, Ti, Tv);
240
            TC = FNMS(Tx, Te, TB);
241
            T1Z = FMA(Tx, TP, T1Y);
242
            T23 = FNMS(Tx, TL, T22);
243
            T4s = FMA(Tx, Te, TB);
244
            T4p = FNMS(Tx, Ti, Tv);
245
            T6Z = Ty * TP;
246
            T70 = FNMS(TC, TL, T6Z);
247
            T6V = Ty * TL;
248
            T6W = FMA(TC, TP, T6V);
249
       }
250
        }
251
        {
252
       E T30, T34, T18, T1d, T58, T54;
253
       T30 = T17 * TL;
254
       T34 = T17 * TP;
255
       T18 = T17 * Te;
256
       T1d = T17 * Ti;
257
       T19 = FMA(Tb, T8, Tg);
258
       T41 = FMA(T19, Ti, T18);
259
       T44 = FNMS(T19, Te, T1d);
260
       T1a = FNMS(T19, Ti, T18);
261
       T1e = FMA(T19, Te, T1d);
262
       T35 = FNMS(T19, TL, T34);
263
       T31 = FMA(T19, TP, T30);
264
       T58 = T41 * TP;
265
       T59 = FNMS(T44, TL, T58);
266
       T54 = T41 * TL;
267
       T55 = FMA(T44, TP, T54);
268
        }
269
        {
270
       E T1j, T1o, T1Q, T1U, T8h, T8d;
271
       T1j = T1i * TL;
272
       T1o = T1i * TP;
273
       T1Q = T1i * Te;
274
       T1U = T1i * Ti;
275
       T1k = FNMS(Tt, T8, Tw);
276
       T1R = FMA(T1k, Ti, T1Q);
277
       T1V = FNMS(T1k, Te, T1U);
278
       T1l = FMA(T1k, TP, T1j);
279
       T1p = FNMS(T1k, TL, T1o);
280
       T2Q = FMA(T1k, Te, T1U);
281
       T2N = FNMS(T1k, Ti, T1Q);
282
       T8h = T1R * TP;
283
       T8i = FNMS(T1V, TL, T8h);
284
       T8d = T1R * TL;
285
       T8e = FMA(T1V, TP, T8d);
286
        }
287
        {
288
       E T3v, T3z, Tf, Tm, T5m, T5i;
289
       T3v = Td * TL;
290
       T3z = Td * TP;
291
       Tf = Td * Te;
292
       Tm = Td * Ti;
293
       Th = FNMS(Tb, T8, Tg);
294
       T4E = FMA(Th, Ti, Tf);
295
       T4H = FNMS(Th, Te, Tm);
296
       Tj = FNMS(Th, Ti, Tf);
297
       Tn = FMA(Th, Te, Tm);
298
       T3A = FNMS(Th, TL, T3z);
299
       T3w = FMA(Th, TP, T3v);
300
       T5m = T4E * TP;
301
       T5n = FNMS(T4H, TL, T5m);
302
       T5i = T4E * TL;
303
       T5j = FMA(T4H, TP, T5i);
304
        }
305
         }
306
         {
307
        E TY, Tg4, Tl9, TlD, T8w, TdS, Tkd, TkE, T2G, Tge, Tgh, TiK, T98, Te1, T9f;
308
        E Te0, T39, Tgq, Tgn, TiN, T9p, Te5, T9M, Te8, T74, Thr, Thc, Tja, TbI, TeE;
309
        E TcB, TeP, T1B, TkD, Tg7, Tk7, T8D, TdT, T8K, TdU, T27, Tg9, Tgc, TiJ, T8T;
310
        E TdY, T90, TdX, T4k, TgB, Tgy, TiT, T9Y, Tec, Tal, Tef, T5d, Th0, TgL, TiZ;
311
        E Taz, Tel, Tbs, Tew, T3K, Tgo, Tgt, TiO, T9E, Te9, T9P, Te6, T4L, Tgz, TgE;
312
        E TiU, Tad, Teg, Tao, Ted, T5I, TgM, Th3, Tj0, TaO, Tex, Tbv, Tem, T7v, Thd;
313
        E Thu, Tjb, TbX, TeQ, TcE, TeF, T68, Tj5, TgS, Th5, Tbj, Tez, Tbx, Teq, T6B;
314
        E Tj6, TgX, Th6, Tb4, TeA, Tby, Tet, T7V, Tjg, Thj, Thw, Tcs, TeS, TcG, TeJ;
315
        E T8m, Tjh, Tho, Thx, Tcd, TeT, TcH, TeM;
316
        {
317
       E T1, Tkb, Tp, Tka, TE, T8s, TW, T8u;
318
       T1 = ri[0];
319
       Tkb = ii[0];
320
       {
321
            E Tk, Tl, To, Tk9;
322
            Tk = ri[WS(rs, 32)];
323
            Tl = Tj * Tk;
324
            To = ii[WS(rs, 32)];
325
            Tk9 = Tj * To;
326
            Tp = FMA(Tn, To, Tl);
327
            Tka = FNMS(Tn, Tk, Tk9);
328
       }
329
       {
330
            E Tz, TA, TD, T8r;
331
            Tz = ri[WS(rs, 16)];
332
            TA = Ty * Tz;
333
            TD = ii[WS(rs, 16)];
334
            T8r = Ty * TD;
335
            TE = FMA(TC, TD, TA);
336
            T8s = FNMS(TC, Tz, T8r);
337
       }
338
       {
339
            E TR, TS, TV, T8t;
340
            TR = ri[WS(rs, 48)];
341
            TS = TQ * TR;
342
            TV = ii[WS(rs, 48)];
343
            T8t = TQ * TV;
344
            TW = FMA(TU, TV, TS);
345
            T8u = FNMS(TU, TR, T8t);
346
       }
347
       {
348
            E Tq, TX, Tl7, Tl8;
349
            Tq = T1 + Tp;
350
            TX = TE + TW;
351
            TY = Tq + TX;
352
            Tg4 = Tq - TX;
353
            Tl7 = Tkb - Tka;
354
            Tl8 = TE - TW;
355
            Tl9 = Tl7 - Tl8;
356
            TlD = Tl8 + Tl7;
357
       }
358
       {
359
            E T8q, T8v, Tk8, Tkc;
360
            T8q = T1 - Tp;
361
            T8v = T8s - T8u;
362
            T8w = T8q - T8v;
363
            TdS = T8q + T8v;
364
            Tk8 = T8s + T8u;
365
            Tkc = Tka + Tkb;
366
            Tkd = Tk8 + Tkc;
367
            TkE = Tkc - Tk8;
368
       }
369
        }
370
        {
371
       E T2f, T93, T2E, T9d, T2n, T95, T2s, T9b;
372
       {
373
            E T2a, T2b, T2e, T92;
374
            T2a = ri[WS(rs, 60)];
375
            T2b = T29 * T2a;
376
            T2e = ii[WS(rs, 60)];
377
            T92 = T29 * T2e;
378
            T2f = FMA(T2d, T2e, T2b);
379
            T93 = FNMS(T2d, T2a, T92);
380
       }
381
       {
382
            E T2z, T2A, T2D, T9c;
383
            T2z = ri[WS(rs, 44)];
384
            T2A = T2y * T2z;
385
            T2D = ii[WS(rs, 44)];
386
            T9c = T2y * T2D;
387
            T2E = FMA(T2C, T2D, T2A);
388
            T9d = FNMS(T2C, T2z, T9c);
389
       }
390
       {
391
            E T2i, T2j, T2m, T94;
392
            T2i = ri[WS(rs, 28)];
393
            T2j = T2h * T2i;
394
            T2m = ii[WS(rs, 28)];
395
            T94 = T2h * T2m;
396
            T2n = FMA(T2l, T2m, T2j);
397
            T95 = FNMS(T2l, T2i, T94);
398
       }
399
       {
400
            E T2p, T2q, T2r, T9a;
401
            T2p = ri[WS(rs, 12)];
402
            T2q = TG * T2p;
403
            T2r = ii[WS(rs, 12)];
404
            T9a = TG * T2r;
405
            T2s = FMA(TJ, T2r, T2q);
406
            T9b = FNMS(TJ, T2p, T9a);
407
       }
408
       {
409
            E T2o, T2F, Tgf, Tgg;
410
            T2o = T2f + T2n;
411
            T2F = T2s + T2E;
412
            T2G = T2o + T2F;
413
            Tge = T2o - T2F;
414
            Tgf = T93 + T95;
415
            Tgg = T9b + T9d;
416
            Tgh = Tgf - Tgg;
417
            TiK = Tgf + Tgg;
418
       }
419
       {
420
            E T96, T97, T99, T9e;
421
            T96 = T93 - T95;
422
            T97 = T2s - T2E;
423
            T98 = T96 + T97;
424
            Te1 = T96 - T97;
425
            T99 = T2f - T2n;
426
            T9e = T9b - T9d;
427
            T9f = T99 - T9e;
428
            Te0 = T99 + T9e;
429
       }
430
        }
431
        {
432
       E T2M, T9k, T37, T9K, T2S, T9m, T2Z, T9I;
433
       {
434
            E T2J, T2K, T2L, T9j;
435
            T2J = ri[WS(rs, 2)];
436
            T2K = Tr * T2J;
437
            T2L = ii[WS(rs, 2)];
438
            T9j = Tr * T2L;
439
            T2M = FMA(Tt, T2L, T2K);
440
            T9k = FNMS(Tt, T2J, T9j);
441
       }
442
       {
443
            E T32, T33, T36, T9J;
444
            T32 = ri[WS(rs, 50)];
445
            T33 = T31 * T32;
446
            T36 = ii[WS(rs, 50)];
447
            T9J = T31 * T36;
448
            T37 = FMA(T35, T36, T33);
449
            T9K = FNMS(T35, T32, T9J);
450
       }
451
       {
452
            E T2O, T2P, T2R, T9l;
453
            T2O = ri[WS(rs, 34)];
454
            T2P = T2N * T2O;
455
            T2R = ii[WS(rs, 34)];
456
            T9l = T2N * T2R;
457
            T2S = FMA(T2Q, T2R, T2P);
458
            T9m = FNMS(T2Q, T2O, T9l);
459
       }
460
       {
461
            E T2V, T2W, T2Y, T9H;
462
            T2V = ri[WS(rs, 18)];
463
            T2W = T2U * T2V;
464
            T2Y = ii[WS(rs, 18)];
465
            T9H = T2U * T2Y;
466
            T2Z = FMA(T2X, T2Y, T2W);
467
            T9I = FNMS(T2X, T2V, T9H);
468
       }
469
       {
470
            E T2T, T38, Tgl, Tgm;
471
            T2T = T2M + T2S;
472
            T38 = T2Z + T37;
473
            T39 = T2T + T38;
474
            Tgq = T2T - T38;
475
            Tgl = T9k + T9m;
476
            Tgm = T9I + T9K;
477
            Tgn = Tgl - Tgm;
478
            TiN = Tgl + Tgm;
479
       }
480
       {
481
            E T9n, T9o, T9G, T9L;
482
            T9n = T9k - T9m;
483
            T9o = T2Z - T37;
484
            T9p = T9n + T9o;
485
            Te5 = T9n - T9o;
486
            T9G = T2M - T2S;
487
            T9L = T9I - T9K;
488
            T9M = T9G - T9L;
489
            Te8 = T9G + T9L;
490
       }
491
        }
492
        {
493
       E T6H, TbD, T72, Tcz, T6P, TbF, T6U, Tcx;
494
       {
495
            E T6E, T6F, T6G, TbC;
496
            T6E = ri[WS(rs, 63)];
497
            T6F = TL * T6E;
498
            T6G = ii[WS(rs, 63)];
499
            TbC = TL * T6G;
500
            T6H = FMA(TP, T6G, T6F);
501
            TbD = FNMS(TP, T6E, TbC);
502
       }
503
       {
504
            E T6X, T6Y, T71, Tcy;
505
            T6X = ri[WS(rs, 47)];
506
            T6Y = T6W * T6X;
507
            T71 = ii[WS(rs, 47)];
508
            Tcy = T6W * T71;
509
            T72 = FMA(T70, T71, T6Y);
510
            Tcz = FNMS(T70, T6X, Tcy);
511
       }
512
       {
513
            E T6K, T6L, T6O, TbE;
514
            T6K = ri[WS(rs, 31)];
515
            T6L = T6J * T6K;
516
            T6O = ii[WS(rs, 31)];
517
            TbE = T6J * T6O;
518
            T6P = FMA(T6N, T6O, T6L);
519
            TbF = FNMS(T6N, T6K, TbE);
520
       }
521
       {
522
            E T6R, T6S, T6T, Tcw;
523
            T6R = ri[WS(rs, 15)];
524
            T6S = TK * T6R;
525
            T6T = ii[WS(rs, 15)];
526
            Tcw = TK * T6T;
527
            T6U = FMA(TO, T6T, T6S);
528
            Tcx = FNMS(TO, T6R, Tcw);
529
       }
530
       {
531
            E T6Q, T73, Tha, Thb;
532
            T6Q = T6H + T6P;
533
            T73 = T6U + T72;
534
            T74 = T6Q + T73;
535
            Thr = T6Q - T73;
536
            Tha = TbD + TbF;
537
            Thb = Tcx + Tcz;
538
            Thc = Tha - Thb;
539
            Tja = Tha + Thb;
540
       }
541
       {
542
            E TbG, TbH, Tcv, TcA;
543
            TbG = TbD - TbF;
544
            TbH = T6U - T72;
545
            TbI = TbG + TbH;
546
            TeE = TbG - TbH;
547
            Tcv = T6H - T6P;
548
            TcA = Tcx - Tcz;
549
            TcB = Tcv - TcA;
550
            TeP = Tcv + TcA;
551
       }
552
        }
553
        {
554
       E T16, T8y, T1z, T8I, T1g, T8A, T1r, T8G;
555
       {
556
            E T11, T12, T15, T8x;
557
            T11 = ri[WS(rs, 8)];
558
            T12 = T10 * T11;
559
            T15 = ii[WS(rs, 8)];
560
            T8x = T10 * T15;
561
            T16 = FMA(T14, T15, T12);
562
            T8y = FNMS(T14, T11, T8x);
563
       }
564
       {
565
            E T1u, T1v, T1y, T8H;
566
            T1u = ri[WS(rs, 24)];
567
            T1v = T1t * T1u;
568
            T1y = ii[WS(rs, 24)];
569
            T8H = T1t * T1y;
570
            T1z = FMA(T1x, T1y, T1v);
571
            T8I = FNMS(T1x, T1u, T8H);
572
       }
573
       {
574
            E T1b, T1c, T1f, T8z;
575
            T1b = ri[WS(rs, 40)];
576
            T1c = T1a * T1b;
577
            T1f = ii[WS(rs, 40)];
578
            T8z = T1a * T1f;
579
            T1g = FMA(T1e, T1f, T1c);
580
            T8A = FNMS(T1e, T1b, T8z);
581
       }
582
       {
583
            E T1m, T1n, T1q, T8F;
584
            T1m = ri[WS(rs, 56)];
585
            T1n = T1l * T1m;
586
            T1q = ii[WS(rs, 56)];
587
            T8F = T1l * T1q;
588
            T1r = FMA(T1p, T1q, T1n);
589
            T8G = FNMS(T1p, T1m, T8F);
590
       }
591
       {
592
            E T1h, T1A, Tg5, Tg6;
593
            T1h = T16 + T1g;
594
            T1A = T1r + T1z;
595
            T1B = T1h + T1A;
596
            TkD = T1A - T1h;
597
            Tg5 = T8y + T8A;
598
            Tg6 = T8G + T8I;
599
            Tg7 = Tg5 - Tg6;
600
            Tk7 = Tg5 + Tg6;
601
       }
602
       {
603
            E T8B, T8C, T8E, T8J;
604
            T8B = T8y - T8A;
605
            T8C = T16 - T1g;
606
            T8D = T8B - T8C;
607
            TdT = T8C + T8B;
608
            T8E = T1r - T1z;
609
            T8J = T8G - T8I;
610
            T8K = T8E + T8J;
611
            TdU = T8E - T8J;
612
       }
613
        }
614
        {
615
       E T1G, T8O, T25, T8Y, T1O, T8Q, T1X, T8W;
616
       {
617
            E T1D, T1E, T1F, T8N;
618
            T1D = ri[WS(rs, 4)];
619
            T1E = T7 * T1D;
620
            T1F = ii[WS(rs, 4)];
621
            T8N = T7 * T1F;
622
            T1G = FMA(Tb, T1F, T1E);
623
            T8O = FNMS(Tb, T1D, T8N);
624
       }
625
       {
626
            E T20, T21, T24, T8X;
627
            T20 = ri[WS(rs, 52)];
628
            T21 = T1Z * T20;
629
            T24 = ii[WS(rs, 52)];
630
            T8X = T1Z * T24;
631
            T25 = FMA(T23, T24, T21);
632
            T8Y = FNMS(T23, T20, T8X);
633
       }
634
       {
635
            E T1J, T1K, T1N, T8P;
636
            T1J = ri[WS(rs, 36)];
637
            T1K = T1I * T1J;
638
            T1N = ii[WS(rs, 36)];
639
            T8P = T1I * T1N;
640
            T1O = FMA(T1M, T1N, T1K);
641
            T8Q = FNMS(T1M, T1J, T8P);
642
       }
643
       {
644
            E T1S, T1T, T1W, T8V;
645
            T1S = ri[WS(rs, 20)];
646
            T1T = T1R * T1S;
647
            T1W = ii[WS(rs, 20)];
648
            T8V = T1R * T1W;
649
            T1X = FMA(T1V, T1W, T1T);
650
            T8W = FNMS(T1V, T1S, T8V);
651
       }
652
       {
653
            E T1P, T26, Tga, Tgb;
654
            T1P = T1G + T1O;
655
            T26 = T1X + T25;
656
            T27 = T1P + T26;
657
            Tg9 = T1P - T26;
658
            Tga = T8O + T8Q;
659
            Tgb = T8W + T8Y;
660
            Tgc = Tga - Tgb;
661
            TiJ = Tga + Tgb;
662
       }
663
       {
664
            E T8R, T8S, T8U, T8Z;
665
            T8R = T8O - T8Q;
666
            T8S = T1X - T25;
667
            T8T = T8R + T8S;
668
            TdY = T8R - T8S;
669
            T8U = T1G - T1O;
670
            T8Z = T8W - T8Y;
671
            T90 = T8U - T8Z;
672
            TdX = T8U + T8Z;
673
       }
674
        }
675
        {
676
       E T3T, T9T, T4i, Taj, T3Z, T9V, T46, Tah;
677
       {
678
            E T3O, T3P, T3S, T9S;
679
            T3O = ri[WS(rs, 62)];
680
            T3P = T3N * T3O;
681
            T3S = ii[WS(rs, 62)];
682
            T9S = T3N * T3S;
683
            T3T = FMA(T3R, T3S, T3P);
684
            T9T = FNMS(T3R, T3O, T9S);
685
       }
686
       {
687
            E T4d, T4e, T4h, Tai;
688
            T4d = ri[WS(rs, 46)];
689
            T4e = T4c * T4d;
690
            T4h = ii[WS(rs, 46)];
691
            Tai = T4c * T4h;
692
            T4i = FMA(T4g, T4h, T4e);
693
            Taj = FNMS(T4g, T4d, Tai);
694
       }
695
       {
696
            E T3V, T3W, T3Y, T9U;
697
            T3V = ri[WS(rs, 30)];
698
            T3W = T3U * T3V;
699
            T3Y = ii[WS(rs, 30)];
700
            T9U = T3U * T3Y;
701
            T3Z = FMA(T3X, T3Y, T3W);
702
            T9V = FNMS(T3X, T3V, T9U);
703
       }
704
       {
705
            E T42, T43, T45, Tag;
706
            T42 = ri[WS(rs, 14)];
707
            T43 = T41 * T42;
708
            T45 = ii[WS(rs, 14)];
709
            Tag = T41 * T45;
710
            T46 = FMA(T44, T45, T43);
711
            Tah = FNMS(T44, T42, Tag);
712
       }
713
       {
714
            E T40, T4j, Tgw, Tgx;
715
            T40 = T3T + T3Z;
716
            T4j = T46 + T4i;
717
            T4k = T40 + T4j;
718
            TgB = T40 - T4j;
719
            Tgw = T9T + T9V;
720
            Tgx = Tah + Taj;
721
            Tgy = Tgw - Tgx;
722
            TiT = Tgw + Tgx;
723
       }
724
       {
725
            E T9W, T9X, Taf, Tak;
726
            T9W = T9T - T9V;
727
            T9X = T46 - T4i;
728
            T9Y = T9W + T9X;
729
            Tec = T9W - T9X;
730
            Taf = T3T - T3Z;
731
            Tak = Tah - Taj;
732
            Tal = Taf - Tak;
733
            Tef = Taf + Tak;
734
       }
735
        }
736
        {
737
       E T4S, Tau, T5b, Tbq, T4Y, Taw, T53, Tbo;
738
       {
739
            E T4P, T4Q, T4R, Tat;
740
            T4P = ri[WS(rs, 1)];
741
            T4Q = T2 * T4P;
742
            T4R = ii[WS(rs, 1)];
743
            Tat = T2 * T4R;
744
            T4S = FMA(T5, T4R, T4Q);
745
            Tau = FNMS(T5, T4P, Tat);
746
       }
747
       {
748
            E T56, T57, T5a, Tbp;
749
            T56 = ri[WS(rs, 49)];
750
            T57 = T55 * T56;
751
            T5a = ii[WS(rs, 49)];
752
            Tbp = T55 * T5a;
753
            T5b = FMA(T59, T5a, T57);
754
            Tbq = FNMS(T59, T56, Tbp);
755
       }
756
       {
757
            E T4U, T4V, T4X, Tav;
758
            T4U = ri[WS(rs, 33)];
759
            T4V = T4T * T4U;
760
            T4X = ii[WS(rs, 33)];
761
            Tav = T4T * T4X;
762
            T4Y = FMA(T4W, T4X, T4V);
763
            Taw = FNMS(T4W, T4U, Tav);
764
       }
765
       {
766
            E T50, T51, T52, Tbn;
767
            T50 = ri[WS(rs, 17)];
768
            T51 = T48 * T50;
769
            T52 = ii[WS(rs, 17)];
770
            Tbn = T48 * T52;
771
            T53 = FMA(T4b, T52, T51);
772
            Tbo = FNMS(T4b, T50, Tbn);
773
       }
774
       {
775
            E T4Z, T5c, TgJ, TgK;
776
            T4Z = T4S + T4Y;
777
            T5c = T53 + T5b;
778
            T5d = T4Z + T5c;
779
            Th0 = T4Z - T5c;
780
            TgJ = Tau + Taw;
781
            TgK = Tbo + Tbq;
782
            TgL = TgJ - TgK;
783
            TiZ = TgJ + TgK;
784
       }
785
       {
786
            E Tax, Tay, Tbm, Tbr;
787
            Tax = Tau - Taw;
788
            Tay = T53 - T5b;
789
            Taz = Tax + Tay;
790
            Tel = Tax - Tay;
791
            Tbm = T4S - T4Y;
792
            Tbr = Tbo - Tbq;
793
            Tbs = Tbm - Tbr;
794
            Tew = Tbm + Tbr;
795
       }
796
        }
797
        {
798
       E T3f, T9s, T3I, T9B, T3t, T9u, T3C, T9z;
799
       {
800
            E T3b, T3c, T3e, T9r;
801
            T3b = ri[WS(rs, 10)];
802
            T3c = T3a * T3b;
803
            T3e = ii[WS(rs, 10)];
804
            T9r = T3a * T3e;
805
            T3f = FMA(T3d, T3e, T3c);
806
            T9s = FNMS(T3d, T3b, T9r);
807
       }
808
       {
809
            E T3E, T3F, T3H, T9A;
810
            T3E = ri[WS(rs, 26)];
811
            T3F = T3D * T3E;
812
            T3H = ii[WS(rs, 26)];
813
            T9A = T3D * T3H;
814
            T3I = FMA(T3G, T3H, T3F);
815
            T9B = FNMS(T3G, T3E, T9A);
816
       }
817
       {
818
            E T3o, T3p, T3s, T9t;
819
            T3o = ri[WS(rs, 42)];
820
            T3p = T3n * T3o;
821
            T3s = ii[WS(rs, 42)];
822
            T9t = T3n * T3s;
823
            T3t = FMA(T3r, T3s, T3p);
824
            T9u = FNMS(T3r, T3o, T9t);
825
       }
826
       {
827
            E T3x, T3y, T3B, T9y;
828
            T3x = ri[WS(rs, 58)];
829
            T3y = T3w * T3x;
830
            T3B = ii[WS(rs, 58)];
831
            T9y = T3w * T3B;
832
            T3C = FMA(T3A, T3B, T3y);
833
            T9z = FNMS(T3A, T3x, T9y);
834
       }
835
       {
836
            E T3u, T3J, Tgr, Tgs;
837
            T3u = T3f + T3t;
838
            T3J = T3C + T3I;
839
            T3K = T3u + T3J;
840
            Tgo = T3J - T3u;
841
            Tgr = T9s + T9u;
842
            Tgs = T9z + T9B;
843
            Tgt = Tgr - Tgs;
844
            TiO = Tgr + Tgs;
845
            {
846
           E T9w, T9O, T9D, T9N;
847
           {
848
          E T9q, T9v, T9x, T9C;
849
          T9q = T3f - T3t;
850
          T9v = T9s - T9u;
851
          T9w = T9q + T9v;
852
          T9O = T9v - T9q;
853
          T9x = T3C - T3I;
854
          T9C = T9z - T9B;
855
          T9D = T9x - T9C;
856
          T9N = T9x + T9C;
857
           }
858
           T9E = T9w - T9D;
859
           Te9 = T9w + T9D;
860
           T9P = T9N - T9O;
861
           Te6 = T9O + T9N;
862
            }
863
       }
864
        }
865
        {
866
       E T4o, Ta1, T4J, Taa, T4u, Ta3, T4D, Ta8;
867
       {
868
            E T4l, T4m, T4n, Ta0;
869
            T4l = ri[WS(rs, 6)];
870
            T4m = T3g * T4l;
871
            T4n = ii[WS(rs, 6)];
872
            Ta0 = T3g * T4n;
873
            T4o = FMA(T3i, T4n, T4m);
874
            Ta1 = FNMS(T3i, T4l, Ta0);
875
       }
876
       {
877
            E T4F, T4G, T4I, Ta9;
878
            T4F = ri[WS(rs, 22)];
879
            T4G = T4E * T4F;
880
            T4I = ii[WS(rs, 22)];
881
            Ta9 = T4E * T4I;
882
            T4J = FMA(T4H, T4I, T4G);
883
            Taa = FNMS(T4H, T4F, Ta9);
884
       }
885
       {
886
            E T4q, T4r, T4t, Ta2;
887
            T4q = ri[WS(rs, 38)];
888
            T4r = T4p * T4q;
889
            T4t = ii[WS(rs, 38)];
890
            Ta2 = T4p * T4t;
891
            T4u = FMA(T4s, T4t, T4r);
892
            Ta3 = FNMS(T4s, T4q, Ta2);
893
       }
894
       {
895
            E T4y, T4z, T4C, Ta7;
896
            T4y = ri[WS(rs, 54)];
897
            T4z = T4x * T4y;
898
            T4C = ii[WS(rs, 54)];
899
            Ta7 = T4x * T4C;
900
            T4D = FMA(T4B, T4C, T4z);
901
            Ta8 = FNMS(T4B, T4y, Ta7);
902
       }
903
       {
904
            E T4v, T4K, TgC, TgD;
905
            T4v = T4o + T4u;
906
            T4K = T4D + T4J;
907
            T4L = T4v + T4K;
908
            Tgz = T4K - T4v;
909
            TgC = Ta1 + Ta3;
910
            TgD = Ta8 + Taa;
911
            TgE = TgC - TgD;
912
            TiU = TgC + TgD;
913
            {
914
           E Ta5, Tan, Tac, Tam;
915
           {
916
          E T9Z, Ta4, Ta6, Tab;
917
          T9Z = T4o - T4u;
918
          Ta4 = Ta1 - Ta3;
919
          Ta5 = T9Z + Ta4;
920
          Tan = Ta4 - T9Z;
921
          Ta6 = T4D - T4J;
922
          Tab = Ta8 - Taa;
923
          Tac = Ta6 - Tab;
924
          Tam = Ta6 + Tab;
925
           }
926
           Tad = Ta5 - Tac;
927
           Teg = Ta5 + Tac;
928
           Tao = Tam - Tan;
929
           Ted = Tan + Tam;
930
            }
931
       }
932
        }
933
        {
934
       E T5h, TaC, T5G, TaL, T5p, TaE, T5y, TaJ;
935
       {
936
            E T5e, T5f, T5g, TaB;
937
            T5e = ri[WS(rs, 9)];
938
            T5f = T8 * T5e;
939
            T5g = ii[WS(rs, 9)];
940
            TaB = T8 * T5g;
941
            T5h = FMA(Tc, T5g, T5f);
942
            TaC = FNMS(Tc, T5e, TaB);
943
       }
944
       {
945
            E T5B, T5C, T5F, TaK;
946
            T5B = ri[WS(rs, 25)];
947
            T5C = T5A * T5B;
948
            T5F = ii[WS(rs, 25)];
949
            TaK = T5A * T5F;
950
            T5G = FMA(T5E, T5F, T5C);
951
            TaL = FNMS(T5E, T5B, TaK);
952
       }
953
       {
954
            E T5k, T5l, T5o, TaD;
955
            T5k = ri[WS(rs, 41)];
956
            T5l = T5j * T5k;
957
            T5o = ii[WS(rs, 41)];
958
            TaD = T5j * T5o;
959
            T5p = FMA(T5n, T5o, T5l);
960
            TaE = FNMS(T5n, T5k, TaD);
961
       }
962
       {
963
            E T5t, T5u, T5x, TaI;
964
            T5t = ri[WS(rs, 57)];
965
            T5u = T5s * T5t;
966
            T5x = ii[WS(rs, 57)];
967
            TaI = T5s * T5x;
968
            T5y = FMA(T5w, T5x, T5u);
969
            TaJ = FNMS(T5w, T5t, TaI);
970
       }
971
       {
972
            E T5q, T5H, Th1, Th2;
973
            T5q = T5h + T5p;
974
            T5H = T5y + T5G;
975
            T5I = T5q + T5H;
976
            TgM = T5H - T5q;
977
            Th1 = TaC + TaE;
978
            Th2 = TaJ + TaL;
979
            Th3 = Th1 - Th2;
980
            Tj0 = Th1 + Th2;
981
            {
982
           E TaG, Tbu, TaN, Tbt;
983
           {
984
          E TaA, TaF, TaH, TaM;
985
          TaA = T5h - T5p;
986
          TaF = TaC - TaE;
987
          TaG = TaA + TaF;
988
          Tbu = TaF - TaA;
989
          TaH = T5y - T5G;
990
          TaM = TaJ - TaL;
991
          TaN = TaH - TaM;
992
          Tbt = TaH + TaM;
993
           }
994
           TaO = TaG - TaN;
995
           Tex = TaG + TaN;
996
           Tbv = Tbt - Tbu;
997
           Tem = Tbu + Tbt;
998
            }
999
       }
1000
        }
1001
        {
1002
       E T78, TbL, T7t, TbU, T7e, TbN, T7n, TbS;
1003
       {
1004
            E T75, T76, T77, TbK;
1005
            T75 = ri[WS(rs, 7)];
1006
            T76 = T1i * T75;
1007
            T77 = ii[WS(rs, 7)];
1008
            TbK = T1i * T77;
1009
            T78 = FMA(T1k, T77, T76);
1010
            TbL = FNMS(T1k, T75, TbK);
1011
       }
1012
       {
1013
            E T7p, T7q, T7s, TbT;
1014
            T7p = ri[WS(rs, 23)];
1015
            T7q = T7o * T7p;
1016
            T7s = ii[WS(rs, 23)];
1017
            TbT = T7o * T7s;
1018
            T7t = FMA(T7r, T7s, T7q);
1019
            TbU = FNMS(T7r, T7p, TbT);
1020
       }
1021
       {
1022
            E T7a, T7b, T7d, TbM;
1023
            T7a = ri[WS(rs, 39)];
1024
            T7b = T79 * T7a;
1025
            T7d = ii[WS(rs, 39)];
1026
            TbM = T79 * T7d;
1027
            T7e = FMA(T7c, T7d, T7b);
1028
            TbN = FNMS(T7c, T7a, TbM);
1029
       }
1030
       {
1031
            E T7i, T7j, T7m, TbR;
1032
            T7i = ri[WS(rs, 55)];
1033
            T7j = T7h * T7i;
1034
            T7m = ii[WS(rs, 55)];
1035
            TbR = T7h * T7m;
1036
            T7n = FMA(T7l, T7m, T7j);
1037
            TbS = FNMS(T7l, T7i, TbR);
1038
       }
1039
       {
1040
            E T7f, T7u, Ths, Tht;
1041
            T7f = T78 + T7e;
1042
            T7u = T7n + T7t;
1043
            T7v = T7f + T7u;
1044
            Thd = T7u - T7f;
1045
            Ths = TbL + TbN;
1046
            Tht = TbS + TbU;
1047
            Thu = Ths - Tht;
1048
            Tjb = Ths + Tht;
1049
            {
1050
           E TbP, TcD, TbW, TcC;
1051
           {
1052
          E TbJ, TbO, TbQ, TbV;
1053
          TbJ = T78 - T7e;
1054
          TbO = TbL - TbN;
1055
          TbP = TbJ + TbO;
1056
          TcD = TbO - TbJ;
1057
          TbQ = T7n - T7t;
1058
          TbV = TbS - TbU;
1059
          TbW = TbQ - TbV;
1060
          TcC = TbQ + TbV;
1061
           }
1062
           TbX = TbP - TbW;
1063
           TeQ = TbP + TbW;
1064
           TcE = TcC - TcD;
1065
           TeF = TcD + TcC;
1066
            }
1067
       }
1068
        }
1069
        {
1070
       E T5N, Tbd, T66, Tb9, T5T, Tbf, T5Y, Tb7;
1071
       {
1072
            E T5K, T5L, T5M, Tbc;
1073
            T5K = ri[WS(rs, 5)];
1074
            T5L = Td * T5K;
1075
            T5M = ii[WS(rs, 5)];
1076
            Tbc = Td * T5M;
1077
            T5N = FMA(Th, T5M, T5L);
1078
            Tbd = FNMS(Th, T5K, Tbc);
1079
       }
1080
       {
1081
            E T61, T62, T65, Tb8;
1082
            T61 = ri[WS(rs, 53)];
1083
            T62 = T60 * T61;
1084
            T65 = ii[WS(rs, 53)];
1085
            Tb8 = T60 * T65;
1086
            T66 = FMA(T64, T65, T62);
1087
            Tb9 = FNMS(T64, T61, Tb8);
1088
       }
1089
       {
1090
            E T5P, T5Q, T5S, Tbe;
1091
            T5P = ri[WS(rs, 37)];
1092
            T5Q = T5O * T5P;
1093
            T5S = ii[WS(rs, 37)];
1094
            Tbe = T5O * T5S;
1095
            T5T = FMA(T5R, T5S, T5Q);
1096
            Tbf = FNMS(T5R, T5P, Tbe);
1097
       }
1098
       {
1099
            E T5V, T5W, T5X, Tb6;
1100
            T5V = ri[WS(rs, 21)];
1101
            T5W = T3j * T5V;
1102
            T5X = ii[WS(rs, 21)];
1103
            Tb6 = T3j * T5X;
1104
            T5Y = FMA(T3m, T5X, T5W);
1105
            Tb7 = FNMS(T3m, T5V, Tb6);
1106
       }
1107
       {
1108
            E T5U, T67, TgR, TgO, TgP, TgQ;
1109
            T5U = T5N + T5T;
1110
            T67 = T5Y + T66;
1111
            TgR = T5U - T67;
1112
            TgO = Tbd + Tbf;
1113
            TgP = Tb7 + Tb9;
1114
            TgQ = TgO - TgP;
1115
            T68 = T5U + T67;
1116
            Tj5 = TgO + TgP;
1117
            TgS = TgQ - TgR;
1118
            Th5 = TgR + TgQ;
1119
       }
1120
       {
1121
            E Tbb, Tep, Tbi, Teo;
1122
            {
1123
           E Tb5, Tba, Tbg, Tbh;
1124
           Tb5 = T5N - T5T;
1125
           Tba = Tb7 - Tb9;
1126
           Tbb = Tb5 - Tba;
1127
           Tep = Tb5 + Tba;
1128
           Tbg = Tbd - Tbf;
1129
           Tbh = T5Y - T66;
1130
           Tbi = Tbg + Tbh;
1131
           Teo = Tbg - Tbh;
1132
            }
1133
            Tbj = FNMS(KP414213562, Tbi, Tbb);
1134
            Tez = FMA(KP414213562, Teo, Tep);
1135
            Tbx = FMA(KP414213562, Tbb, Tbi);
1136
            Teq = FNMS(KP414213562, Tep, Teo);
1137
       }
1138
        }
1139
        {
1140
       E T6g, TaY, T6z, TaU, T6m, Tb0, T6r, TaS;
1141
       {
1142
            E T6b, T6c, T6f, TaX;
1143
            T6b = ri[WS(rs, 61)];
1144
            T6c = T6a * T6b;
1145
            T6f = ii[WS(rs, 61)];
1146
            TaX = T6a * T6f;
1147
            T6g = FMA(T6e, T6f, T6c);
1148
            TaY = FNMS(T6e, T6b, TaX);
1149
       }
1150
       {
1151
            E T6u, T6v, T6y, TaT;
1152
            T6u = ri[WS(rs, 45)];
1153
            T6v = T6t * T6u;
1154
            T6y = ii[WS(rs, 45)];
1155
            TaT = T6t * T6y;
1156
            T6z = FMA(T6x, T6y, T6v);
1157
            TaU = FNMS(T6x, T6u, TaT);
1158
       }
1159
       {
1160
            E T6i, T6j, T6l, TaZ;
1161
            T6i = ri[WS(rs, 29)];
1162
            T6j = T6h * T6i;
1163
            T6l = ii[WS(rs, 29)];
1164
            TaZ = T6h * T6l;
1165
            T6m = FMA(T6k, T6l, T6j);
1166
            Tb0 = FNMS(T6k, T6i, TaZ);
1167
       }
1168
       {
1169
            E T6o, T6p, T6q, TaR;
1170
            T6o = ri[WS(rs, 13)];
1171
            T6p = T17 * T6o;
1172
            T6q = ii[WS(rs, 13)];
1173
            TaR = T17 * T6q;
1174
            T6r = FMA(T19, T6q, T6p);
1175
            TaS = FNMS(T19, T6o, TaR);
1176
       }
1177
       {
1178
            E T6n, T6A, TgT, TgU, TgV, TgW;
1179
            T6n = T6g + T6m;
1180
            T6A = T6r + T6z;
1181
            TgT = T6n - T6A;
1182
            TgU = TaY + Tb0;
1183
            TgV = TaS + TaU;
1184
            TgW = TgU - TgV;
1185
            T6B = T6n + T6A;
1186
            Tj6 = TgU + TgV;
1187
            TgX = TgT + TgW;
1188
            Th6 = TgT - TgW;
1189
       }
1190
       {
1191
            E TaW, Tes, Tb3, Ter;
1192
            {
1193
           E TaQ, TaV, Tb1, Tb2;
1194
           TaQ = T6g - T6m;
1195
           TaV = TaS - TaU;
1196
           TaW = TaQ - TaV;
1197
           Tes = TaQ + TaV;
1198
           Tb1 = TaY - Tb0;
1199
           Tb2 = T6r - T6z;
1200
           Tb3 = Tb1 + Tb2;
1201
           Ter = Tb1 - Tb2;
1202
            }
1203
            Tb4 = FMA(KP414213562, Tb3, TaW);
1204
            TeA = FNMS(KP414213562, Ter, Tes);
1205
            Tby = FNMS(KP414213562, TaW, Tb3);
1206
            Tet = FMA(KP414213562, Tes, Ter);
1207
       }
1208
        }
1209
        {
1210
       E T7A, Tcm, T7T, Tci, T7G, Tco, T7L, Tcg;
1211
       {
1212
            E T7x, T7y, T7z, Tcl;
1213
            T7x = ri[WS(rs, 3)];
1214
            T7y = T3 * T7x;
1215
            T7z = ii[WS(rs, 3)];
1216
            Tcl = T3 * T7z;
1217
            T7A = FMA(T6, T7z, T7y);
1218
            Tcm = FNMS(T6, T7x, Tcl);
1219
       }
1220
       {
1221
            E T7O, T7P, T7S, Tch;
1222
            T7O = ri[WS(rs, 51)];
1223
            T7P = T7N * T7O;
1224
            T7S = ii[WS(rs, 51)];
1225
            Tch = T7N * T7S;
1226
            T7T = FMA(T7R, T7S, T7P);
1227
            Tci = FNMS(T7R, T7O, Tch);
1228
       }
1229
       {
1230
            E T7C, T7D, T7F, Tcn;
1231
            T7C = ri[WS(rs, 35)];
1232
            T7D = T7B * T7C;
1233
            T7F = ii[WS(rs, 35)];
1234
            Tcn = T7B * T7F;
1235
            T7G = FMA(T7E, T7F, T7D);
1236
            Tco = FNMS(T7E, T7C, Tcn);
1237
       }
1238
       {
1239
            E T7I, T7J, T7K, Tcf;
1240
            T7I = ri[WS(rs, 19)];
1241
            T7J = T2u * T7I;
1242
            T7K = ii[WS(rs, 19)];
1243
            Tcf = T2u * T7K;
1244
            T7L = FMA(T2x, T7K, T7J);
1245
            Tcg = FNMS(T2x, T7I, Tcf);
1246
       }
1247
       {
1248
            E T7H, T7U, Thi, Thf, Thg, Thh;
1249
            T7H = T7A + T7G;
1250
            T7U = T7L + T7T;
1251
            Thi = T7H - T7U;
1252
            Thf = Tcm + Tco;
1253
            Thg = Tcg + Tci;
1254
            Thh = Thf - Thg;
1255
            T7V = T7H + T7U;
1256
            Tjg = Thf + Thg;
1257
            Thj = Thh - Thi;
1258
            Thw = Thi + Thh;
1259
       }
1260
       {
1261
            E Tck, TeI, Tcr, TeH;
1262
            {
1263
           E Tce, Tcj, Tcp, Tcq;
1264
           Tce = T7A - T7G;
1265
           Tcj = Tcg - Tci;
1266
           Tck = Tce - Tcj;
1267
           TeI = Tce + Tcj;
1268
           Tcp = Tcm - Tco;
1269
           Tcq = T7L - T7T;
1270
           Tcr = Tcp + Tcq;
1271
           TeH = Tcp - Tcq;
1272
            }
1273
            Tcs = FNMS(KP414213562, Tcr, Tck);
1274
            TeS = FMA(KP414213562, TeH, TeI);
1275
            TcG = FMA(KP414213562, Tck, Tcr);
1276
            TeJ = FNMS(KP414213562, TeI, TeH);
1277
       }
1278
        }
1279
        {
1280
       E T83, Tc7, T8k, Tc3, T87, Tc9, T8c, Tc1;
1281
       {
1282
            E T7Y, T7Z, T82, Tc6;
1283
            T7Y = ri[WS(rs, 59)];
1284
            T7Z = T7X * T7Y;
1285
            T82 = ii[WS(rs, 59)];
1286
            Tc6 = T7X * T82;
1287
            T83 = FMA(T81, T82, T7Z);
1288
            Tc7 = FNMS(T81, T7Y, Tc6);
1289
       }
1290
       {
1291
            E T8f, T8g, T8j, Tc2;
1292
            T8f = ri[WS(rs, 43)];
1293
            T8g = T8e * T8f;
1294
            T8j = ii[WS(rs, 43)];
1295
            Tc2 = T8e * T8j;
1296
            T8k = FMA(T8i, T8j, T8g);
1297
            Tc3 = FNMS(T8i, T8f, Tc2);
1298
       }
1299
       {
1300
            E T84, T85, T86, Tc8;
1301
            T84 = ri[WS(rs, 27)];
1302
            T85 = Te * T84;
1303
            T86 = ii[WS(rs, 27)];
1304
            Tc8 = Te * T86;
1305
            T87 = FMA(Ti, T86, T85);
1306
            Tc9 = FNMS(Ti, T84, Tc8);
1307
       }
1308
       {
1309
            E T89, T8a, T8b, Tc0;
1310
            T89 = ri[WS(rs, 11)];
1311
            T8a = Tu * T89;
1312
            T8b = ii[WS(rs, 11)];
1313
            Tc0 = Tu * T8b;
1314
            T8c = FMA(Tx, T8b, T8a);
1315
            Tc1 = FNMS(Tx, T89, Tc0);
1316
       }
1317
       {
1318
            E T88, T8l, Thk, Thl, Thm, Thn;
1319
            T88 = T83 + T87;
1320
            T8l = T8c + T8k;
1321
            Thk = T88 - T8l;
1322
            Thl = Tc7 + Tc9;
1323
            Thm = Tc1 + Tc3;
1324
            Thn = Thl - Thm;
1325
            T8m = T88 + T8l;
1326
            Tjh = Thl + Thm;
1327
            Tho = Thk + Thn;
1328
            Thx = Thk - Thn;
1329
       }
1330
       {
1331
            E Tc5, TeL, Tcc, TeK;
1332
            {
1333
           E TbZ, Tc4, Tca, Tcb;
1334
           TbZ = T83 - T87;
1335
           Tc4 = Tc1 - Tc3;
1336
           Tc5 = TbZ - Tc4;
1337
           TeL = TbZ + Tc4;
1338
           Tca = Tc7 - Tc9;
1339
           Tcb = T8c - T8k;
1340
           Tcc = Tca + Tcb;
1341
           TeK = Tca - Tcb;
1342
            }
1343
            Tcd = FMA(KP414213562, Tcc, Tc5);
1344
            TeT = FNMS(KP414213562, TeK, TeL);
1345
            TcH = FNMS(KP414213562, Tc5, Tcc);
1346
            TeM = FMA(KP414213562, TeL, TeK);
1347
       }
1348
        }
1349
        {
1350
       E T2I, TjG, T4N, Tkj, Tkf, Tkk, TjJ, Tk5, T8o, Tk2, TjU, TjY, T6D, Tk1, TjP;
1351
       E TjX;
1352
       {
1353
            E T1C, T2H, TjH, TjI;
1354
            T1C = TY + T1B;
1355
            T2H = T27 + T2G;
1356
            T2I = T1C + T2H;
1357
            TjG = T1C - T2H;
1358
            {
1359
           E T3L, T4M, Tk6, Tke;
1360
           T3L = T39 + T3K;
1361
           T4M = T4k + T4L;
1362
           T4N = T3L + T4M;
1363
           Tkj = T4M - T3L;
1364
           Tk6 = TiJ + TiK;
1365
           Tke = Tk7 + Tkd;
1366
           Tkf = Tk6 + Tke;
1367
           Tkk = Tke - Tk6;
1368
            }
1369
            TjH = TiN + TiO;
1370
            TjI = TiT + TiU;
1371
            TjJ = TjH - TjI;
1372
            Tk5 = TjH + TjI;
1373
            {
1374
           E T7w, T8n, TjQ, TjR, TjS, TjT;
1375
           T7w = T74 + T7v;
1376
           T8n = T7V + T8m;
1377
           TjQ = T7w - T8n;
1378
           TjR = Tja + Tjb;
1379
           TjS = Tjg + Tjh;
1380
           TjT = TjR - TjS;
1381
           T8o = T7w + T8n;
1382
           Tk2 = TjR + TjS;
1383
           TjU = TjQ - TjT;
1384
           TjY = TjQ + TjT;
1385
            }
1386
            {
1387
           E T5J, T6C, TjL, TjM, TjN, TjO;
1388
           T5J = T5d + T5I;
1389
           T6C = T68 + T6B;
1390
           TjL = T5J - T6C;
1391
           TjM = TiZ + Tj0;
1392
           TjN = Tj5 + Tj6;
1393
           TjO = TjM - TjN;
1394
           T6D = T5J + T6C;
1395
           Tk1 = TjM + TjN;
1396
           TjP = TjL + TjO;
1397
           TjX = TjO - TjL;
1398
            }
1399
       }
1400
       {
1401
            E T4O, T8p, Tk4, Tkg;
1402
            T4O = T2I + T4N;
1403
            T8p = T6D + T8o;
1404
            ri[WS(rs, 32)] = T4O - T8p;
1405
            ri[0] = T4O + T8p;
1406
            Tk4 = Tk1 + Tk2;
1407
            Tkg = Tk5 + Tkf;
1408
            ii[0] = Tk4 + Tkg;
1409
            ii[WS(rs, 32)] = Tkg - Tk4;
1410
       }
1411
       {
1412
            E TjK, TjV, Tkl, Tkm;
1413
            TjK = TjG + TjJ;
1414
            TjV = TjP + TjU;
1415
            ri[WS(rs, 40)] = FNMS(KP707106781, TjV, TjK);
1416
            ri[WS(rs, 8)] = FMA(KP707106781, TjV, TjK);
1417
            Tkl = Tkj + Tkk;
1418
            Tkm = TjX + TjY;
1419
            ii[WS(rs, 8)] = FMA(KP707106781, Tkm, Tkl);
1420
            ii[WS(rs, 40)] = FNMS(KP707106781, Tkm, Tkl);
1421
       }
1422
       {
1423
            E TjW, TjZ, Tkn, Tko;
1424
            TjW = TjG - TjJ;
1425
            TjZ = TjX - TjY;
1426
            ri[WS(rs, 56)] = FNMS(KP707106781, TjZ, TjW);
1427
            ri[WS(rs, 24)] = FMA(KP707106781, TjZ, TjW);
1428
            Tkn = Tkk - Tkj;
1429
            Tko = TjU - TjP;
1430
            ii[WS(rs, 24)] = FMA(KP707106781, Tko, Tkn);
1431
            ii[WS(rs, 56)] = FNMS(KP707106781, Tko, Tkn);
1432
       }
1433
       {
1434
            E Tk0, Tk3, Tkh, Tki;
1435
            Tk0 = T2I - T4N;
1436
            Tk3 = Tk1 - Tk2;
1437
            ri[WS(rs, 48)] = Tk0 - Tk3;
1438
            ri[WS(rs, 16)] = Tk0 + Tk3;
1439
            Tkh = T8o - T6D;
1440
            Tki = Tkf - Tk5;
1441
            ii[WS(rs, 16)] = Tkh + Tki;
1442
            ii[WS(rs, 48)] = Tki - Tkh;
1443
       }
1444
        }
1445
        {
1446
       E TiM, Tjq, Tkr, Tkx, TiX, Tky, Tjt, Tks, Tj9, TjD, Tjn, Tjx, Tjk, TjE, Tjo;
1447
       E TjA;
1448
       {
1449
            E TiI, TiL, Tkp, Tkq;
1450
            TiI = TY - T1B;
1451
            TiL = TiJ - TiK;
1452
            TiM = TiI - TiL;
1453
            Tjq = TiI + TiL;
1454
            Tkp = T2G - T27;
1455
            Tkq = Tkd - Tk7;
1456
            Tkr = Tkp + Tkq;
1457
            Tkx = Tkq - Tkp;
1458
       }
1459
       {
1460
            E TiR, Tjr, TiW, Tjs;
1461
            {
1462
           E TiP, TiQ, TiS, TiV;
1463
           TiP = TiN - TiO;
1464
           TiQ = T39 - T3K;
1465
           TiR = TiP - TiQ;
1466
           Tjr = TiQ + TiP;
1467
           TiS = T4k - T4L;
1468
           TiV = TiT - TiU;
1469
           TiW = TiS + TiV;
1470
           Tjs = TiS - TiV;
1471
            }
1472
            TiX = TiR - TiW;
1473
            Tky = Tjs - Tjr;
1474
            Tjt = Tjr + Tjs;
1475
            Tks = TiR + TiW;
1476
       }
1477
       {
1478
            E Tj3, Tjw, Tj8, Tjv;
1479
            {
1480
           E Tj1, Tj2, Tj4, Tj7;
1481
           Tj1 = TiZ - Tj0;
1482
           Tj2 = T6B - T68;
1483
           Tj3 = Tj1 - Tj2;
1484
           Tjw = Tj1 + Tj2;
1485
           Tj4 = T5d - T5I;
1486
           Tj7 = Tj5 - Tj6;
1487
           Tj8 = Tj4 - Tj7;
1488
           Tjv = Tj4 + Tj7;
1489
            }
1490
            Tj9 = FMA(KP414213562, Tj8, Tj3);
1491
            TjD = FNMS(KP414213562, Tjv, Tjw);
1492
            Tjn = FNMS(KP414213562, Tj3, Tj8);
1493
            Tjx = FMA(KP414213562, Tjw, Tjv);
1494
       }
1495
       {
1496
            E Tje, Tjz, Tjj, Tjy;
1497
            {
1498
           E Tjc, Tjd, Tjf, Tji;
1499
           Tjc = Tja - Tjb;
1500
           Tjd = T8m - T7V;
1501
           Tje = Tjc - Tjd;
1502
           Tjz = Tjc + Tjd;
1503
           Tjf = T74 - T7v;
1504
           Tji = Tjg - Tjh;
1505
           Tjj = Tjf - Tji;
1506
           Tjy = Tjf + Tji;
1507
            }
1508
            Tjk = FNMS(KP414213562, Tjj, Tje);
1509
            TjE = FMA(KP414213562, Tjy, Tjz);
1510
            Tjo = FMA(KP414213562, Tje, Tjj);
1511
            TjA = FNMS(KP414213562, Tjz, Tjy);
1512
       }
1513
       {
1514
            E TiY, Tjl, Tkz, TkA;
1515
            TiY = FMA(KP707106781, TiX, TiM);
1516
            Tjl = Tj9 - Tjk;
1517
            ri[WS(rs, 44)] = FNMS(KP923879532, Tjl, TiY);
1518
            ri[WS(rs, 12)] = FMA(KP923879532, Tjl, TiY);
1519
            Tkz = FMA(KP707106781, Tky, Tkx);
1520
            TkA = Tjo - Tjn;
1521
            ii[WS(rs, 12)] = FMA(KP923879532, TkA, Tkz);
1522
            ii[WS(rs, 44)] = FNMS(KP923879532, TkA, Tkz);
1523
       }
1524
       {
1525
            E Tjm, Tjp, TkB, TkC;
1526
            Tjm = FNMS(KP707106781, TiX, TiM);
1527
            Tjp = Tjn + Tjo;
1528
            ri[WS(rs, 28)] = FNMS(KP923879532, Tjp, Tjm);
1529
            ri[WS(rs, 60)] = FMA(KP923879532, Tjp, Tjm);
1530
            TkB = FNMS(KP707106781, Tky, Tkx);
1531
            TkC = Tj9 + Tjk;
1532
            ii[WS(rs, 28)] = FNMS(KP923879532, TkC, TkB);
1533
            ii[WS(rs, 60)] = FMA(KP923879532, TkC, TkB);
1534
       }
1535
       {
1536
            E Tju, TjB, Tkt, Tku;
1537
            Tju = FMA(KP707106781, Tjt, Tjq);
1538
            TjB = Tjx + TjA;
1539
            ri[WS(rs, 36)] = FNMS(KP923879532, TjB, Tju);
1540
            ri[WS(rs, 4)] = FMA(KP923879532, TjB, Tju);
1541
            Tkt = FMA(KP707106781, Tks, Tkr);
1542
            Tku = TjD + TjE;
1543
            ii[WS(rs, 4)] = FMA(KP923879532, Tku, Tkt);
1544
            ii[WS(rs, 36)] = FNMS(KP923879532, Tku, Tkt);
1545
       }
1546
       {
1547
            E TjC, TjF, Tkv, Tkw;
1548
            TjC = FNMS(KP707106781, Tjt, Tjq);
1549
            TjF = TjD - TjE;
1550
            ri[WS(rs, 52)] = FNMS(KP923879532, TjF, TjC);
1551
            ri[WS(rs, 20)] = FMA(KP923879532, TjF, TjC);
1552
            Tkv = FNMS(KP707106781, Tks, Tkr);
1553
            Tkw = TjA - Tjx;
1554
            ii[WS(rs, 20)] = FMA(KP923879532, Tkw, Tkv);
1555
            ii[WS(rs, 52)] = FNMS(KP923879532, Tkw, Tkv);
1556
       }
1557
        }
1558
        {
1559
       E Tgk, Tl1, ThG, TkV, Ti0, TkN, Tis, TkH, TgH, TkO, ThJ, TkI, Tim, TiG, Tiq;
1560
       E TiC, Th9, ThT, ThD, ThN, Ti7, Tl2, Tiv, TkW, Tif, TiF, Tip, Tiz, ThA, ThU;
1561
       E ThE, ThQ;
1562
       {
1563
            E Tg8, TkT, Tgj, TkU, Tgd, Tgi;
1564
            Tg8 = Tg4 + Tg7;
1565
            TkT = TkE - TkD;
1566
            Tgd = Tg9 + Tgc;
1567
            Tgi = Tge - Tgh;
1568
            Tgj = Tgd + Tgi;
1569
            TkU = Tgi - Tgd;
1570
            Tgk = FNMS(KP707106781, Tgj, Tg8);
1571
            Tl1 = FNMS(KP707106781, TkU, TkT);
1572
            ThG = FMA(KP707106781, Tgj, Tg8);
1573
            TkV = FMA(KP707106781, TkU, TkT);
1574
       }
1575
       {
1576
            E ThW, TkF, ThZ, TkG, ThX, ThY;
1577
            ThW = Tg4 - Tg7;
1578
            TkF = TkD + TkE;
1579
            ThX = Tgc - Tg9;
1580
            ThY = Tge + Tgh;
1581
            ThZ = ThX - ThY;
1582
            TkG = ThX + ThY;
1583
            Ti0 = FMA(KP707106781, ThZ, ThW);
1584
            TkN = FNMS(KP707106781, TkG, TkF);
1585
            Tis = FNMS(KP707106781, ThZ, ThW);
1586
            TkH = FMA(KP707106781, TkG, TkF);
1587
       }
1588
       {
1589
            E Tgv, ThH, TgG, ThI;
1590
            {
1591
           E Tgp, Tgu, TgA, TgF;
1592
           Tgp = Tgn + Tgo;
1593
           Tgu = Tgq + Tgt;
1594
           Tgv = FNMS(KP414213562, Tgu, Tgp);
1595
           ThH = FMA(KP414213562, Tgp, Tgu);
1596
           TgA = Tgy + Tgz;
1597
           TgF = TgB + TgE;
1598
           TgG = FMA(KP414213562, TgF, TgA);
1599
           ThI = FNMS(KP414213562, TgA, TgF);
1600
            }
1601
            TgH = Tgv - TgG;
1602
            TkO = ThI - ThH;
1603
            ThJ = ThH + ThI;
1604
            TkI = Tgv + TgG;
1605
       }
1606
       {
1607
            E Tii, TiB, Til, TiA;
1608
            {
1609
           E Tig, Tih, Tij, Tik;
1610
           Tig = Thr - Thu;
1611
           Tih = Tho - Thj;
1612
           Tii = FNMS(KP707106781, Tih, Tig);
1613
           TiB = FMA(KP707106781, Tih, Tig);
1614
           Tij = Thc - Thd;
1615
           Tik = Thw - Thx;
1616
           Til = FNMS(KP707106781, Tik, Tij);
1617
           TiA = FMA(KP707106781, Tik, Tij);
1618
            }
1619
            Tim = FNMS(KP668178637, Til, Tii);
1620
            TiG = FMA(KP198912367, TiA, TiB);
1621
            Tiq = FMA(KP668178637, Tii, Til);
1622
            TiC = FNMS(KP198912367, TiB, TiA);
1623
       }
1624
       {
1625
            E TgZ, ThM, Th8, ThL;
1626
            {
1627
           E TgN, TgY, Th4, Th7;
1628
           TgN = TgL + TgM;
1629
           TgY = TgS + TgX;
1630
           TgZ = FNMS(KP707106781, TgY, TgN);
1631
           ThM = FMA(KP707106781, TgY, TgN);
1632
           Th4 = Th0 + Th3;
1633
           Th7 = Th5 + Th6;
1634
           Th8 = FNMS(KP707106781, Th7, Th4);
1635
           ThL = FMA(KP707106781, Th7, Th4);
1636
            }
1637
            Th9 = FMA(KP668178637, Th8, TgZ);
1638
            ThT = FNMS(KP198912367, ThL, ThM);
1639
            ThD = FNMS(KP668178637, TgZ, Th8);
1640
            ThN = FMA(KP198912367, ThM, ThL);
1641
       }
1642
       {
1643
            E Ti3, Tit, Ti6, Tiu;
1644
            {
1645
           E Ti1, Ti2, Ti4, Ti5;
1646
           Ti1 = Tgn - Tgo;
1647
           Ti2 = Tgq - Tgt;
1648
           Ti3 = FMA(KP414213562, Ti2, Ti1);
1649
           Tit = FNMS(KP414213562, Ti1, Ti2);
1650
           Ti4 = Tgy - Tgz;
1651
           Ti5 = TgB - TgE;
1652
           Ti6 = FNMS(KP414213562, Ti5, Ti4);
1653
           Tiu = FMA(KP414213562, Ti4, Ti5);
1654
            }
1655
            Ti7 = Ti3 - Ti6;
1656
            Tl2 = Ti3 + Ti6;
1657
            Tiv = Tit + Tiu;
1658
            TkW = Tiu - Tit;
1659
       }
1660
       {
1661
            E Tib, Tiy, Tie, Tix;
1662
            {
1663
           E Ti9, Tia, Tic, Tid;
1664
           Ti9 = Th0 - Th3;
1665
           Tia = TgX - TgS;
1666
           Tib = FNMS(KP707106781, Tia, Ti9);
1667
           Tiy = FMA(KP707106781, Tia, Ti9);
1668
           Tic = TgL - TgM;
1669
           Tid = Th5 - Th6;
1670
           Tie = FNMS(KP707106781, Tid, Tic);
1671
           Tix = FMA(KP707106781, Tid, Tic);
1672
            }
1673
            Tif = FMA(KP668178637, Tie, Tib);
1674
            TiF = FNMS(KP198912367, Tix, Tiy);
1675
            Tip = FNMS(KP668178637, Tib, Tie);
1676
            Tiz = FMA(KP198912367, Tiy, Tix);
1677
       }
1678
       {
1679
            E Thq, ThP, Thz, ThO;
1680
            {
1681
           E The, Thp, Thv, Thy;
1682
           The = Thc + Thd;
1683
           Thp = Thj + Tho;
1684
           Thq = FNMS(KP707106781, Thp, The);
1685
           ThP = FMA(KP707106781, Thp, The);
1686
           Thv = Thr + Thu;
1687
           Thy = Thw + Thx;
1688
           Thz = FNMS(KP707106781, Thy, Thv);
1689
           ThO = FMA(KP707106781, Thy, Thv);
1690
            }
1691
            ThA = FNMS(KP668178637, Thz, Thq);
1692
            ThU = FMA(KP198912367, ThO, ThP);
1693
            ThE = FMA(KP668178637, Thq, Thz);
1694
            ThQ = FNMS(KP198912367, ThP, ThO);
1695
       }
1696
       {
1697
            E TgI, ThB, TkP, TkQ;
1698
            TgI = FMA(KP923879532, TgH, Tgk);
1699
            ThB = Th9 - ThA;
1700
            ri[WS(rs, 42)] = FNMS(KP831469612, ThB, TgI);
1701
            ri[WS(rs, 10)] = FMA(KP831469612, ThB, TgI);
1702
            TkP = FMA(KP923879532, TkO, TkN);
1703
            TkQ = ThE - ThD;
1704
            ii[WS(rs, 10)] = FMA(KP831469612, TkQ, TkP);
1705
            ii[WS(rs, 42)] = FNMS(KP831469612, TkQ, TkP);
1706
       }
1707
       {
1708
            E ThC, ThF, TkR, TkS;
1709
            ThC = FNMS(KP923879532, TgH, Tgk);
1710
            ThF = ThD + ThE;
1711
            ri[WS(rs, 26)] = FNMS(KP831469612, ThF, ThC);
1712
            ri[WS(rs, 58)] = FMA(KP831469612, ThF, ThC);
1713
            TkR = FNMS(KP923879532, TkO, TkN);
1714
            TkS = Th9 + ThA;
1715
            ii[WS(rs, 26)] = FNMS(KP831469612, TkS, TkR);
1716
            ii[WS(rs, 58)] = FMA(KP831469612, TkS, TkR);
1717
       }
1718
       {
1719
            E ThK, ThR, TkJ, TkK;
1720
            ThK = FMA(KP923879532, ThJ, ThG);
1721
            ThR = ThN + ThQ;
1722
            ri[WS(rs, 34)] = FNMS(KP980785280, ThR, ThK);
1723
            ri[WS(rs, 2)] = FMA(KP980785280, ThR, ThK);
1724
            TkJ = FMA(KP923879532, TkI, TkH);
1725
            TkK = ThT + ThU;
1726
            ii[WS(rs, 2)] = FMA(KP980785280, TkK, TkJ);
1727
            ii[WS(rs, 34)] = FNMS(KP980785280, TkK, TkJ);
1728
       }
1729
       {
1730
            E ThS, ThV, TkL, TkM;
1731
            ThS = FNMS(KP923879532, ThJ, ThG);
1732
            ThV = ThT - ThU;
1733
            ri[WS(rs, 50)] = FNMS(KP980785280, ThV, ThS);
1734
            ri[WS(rs, 18)] = FMA(KP980785280, ThV, ThS);
1735
            TkL = FNMS(KP923879532, TkI, TkH);
1736
            TkM = ThQ - ThN;
1737
            ii[WS(rs, 18)] = FMA(KP980785280, TkM, TkL);
1738
            ii[WS(rs, 50)] = FNMS(KP980785280, TkM, TkL);
1739
       }
1740
       {
1741
            E Ti8, Tin, TkX, TkY;
1742
            Ti8 = FMA(KP923879532, Ti7, Ti0);
1743
            Tin = Tif + Tim;
1744
            ri[WS(rs, 38)] = FNMS(KP831469612, Tin, Ti8);
1745
            ri[WS(rs, 6)] = FMA(KP831469612, Tin, Ti8);
1746
            TkX = FMA(KP923879532, TkW, TkV);
1747
            TkY = Tip + Tiq;
1748
            ii[WS(rs, 6)] = FMA(KP831469612, TkY, TkX);
1749
            ii[WS(rs, 38)] = FNMS(KP831469612, TkY, TkX);
1750
       }
1751
       {
1752
            E Tio, Tir, TkZ, Tl0;
1753
            Tio = FNMS(KP923879532, Ti7, Ti0);
1754
            Tir = Tip - Tiq;
1755
            ri[WS(rs, 54)] = FNMS(KP831469612, Tir, Tio);
1756
            ri[WS(rs, 22)] = FMA(KP831469612, Tir, Tio);
1757
            TkZ = FNMS(KP923879532, TkW, TkV);
1758
            Tl0 = Tim - Tif;
1759
            ii[WS(rs, 22)] = FMA(KP831469612, Tl0, TkZ);
1760
            ii[WS(rs, 54)] = FNMS(KP831469612, Tl0, TkZ);
1761
       }
1762
       {
1763
            E Tiw, TiD, Tl3, Tl4;
1764
            Tiw = FNMS(KP923879532, Tiv, Tis);
1765
            TiD = Tiz - TiC;
1766
            ri[WS(rs, 46)] = FNMS(KP980785280, TiD, Tiw);
1767
            ri[WS(rs, 14)] = FMA(KP980785280, TiD, Tiw);
1768
            Tl3 = FNMS(KP923879532, Tl2, Tl1);
1769
            Tl4 = TiG - TiF;
1770
            ii[WS(rs, 14)] = FMA(KP980785280, Tl4, Tl3);
1771
            ii[WS(rs, 46)] = FNMS(KP980785280, Tl4, Tl3);
1772
       }
1773
       {
1774
            E TiE, TiH, Tl5, Tl6;
1775
            TiE = FMA(KP923879532, Tiv, Tis);
1776
            TiH = TiF + TiG;
1777
            ri[WS(rs, 30)] = FNMS(KP980785280, TiH, TiE);
1778
            ri[WS(rs, 62)] = FMA(KP980785280, TiH, TiE);
1779
            Tl5 = FMA(KP923879532, Tl2, Tl1);
1780
            Tl6 = Tiz + TiC;
1781
            ii[WS(rs, 30)] = FNMS(KP980785280, Tl6, Tl5);
1782
            ii[WS(rs, 62)] = FMA(KP980785280, Tl6, Tl5);
1783
       }
1784
        }
1785
        {
1786
       E Tar, TlO, TcT, TlI, TbB, Td3, TcN, TcX, Tdw, TdQ, TdA, TdM, Tdp, TdP, Tdz;
1787
       E TdJ, Tdh, Tm2, TdF, TlW, TcK, Td4, TcO, Td0, T9i, TlV, Tm1, TcQ, Tda, TlH;
1788
       E TlN, TdC;
1789
       {
1790
            E T9R, TcR, Taq, TcS;
1791
            {
1792
           E T9F, T9Q, Tae, Tap;
1793
           T9F = FNMS(KP707106781, T9E, T9p);
1794
           T9Q = FNMS(KP707106781, T9P, T9M);
1795
           T9R = FNMS(KP668178637, T9Q, T9F);
1796
           TcR = FMA(KP668178637, T9F, T9Q);
1797
           Tae = FNMS(KP707106781, Tad, T9Y);
1798
           Tap = FNMS(KP707106781, Tao, Tal);
1799
           Taq = FMA(KP668178637, Tap, Tae);
1800
           TcS = FNMS(KP668178637, Tae, Tap);
1801
            }
1802
            Tar = T9R - Taq;
1803
            TlO = TcS - TcR;
1804
            TcT = TcR + TcS;
1805
            TlI = T9R + Taq;
1806
       }
1807
       {
1808
            E Tbl, TcW, TbA, TcV;
1809
            {
1810
           E TaP, Tbk, Tbw, Tbz;
1811
           TaP = FNMS(KP707106781, TaO, Taz);
1812
           Tbk = Tb4 - Tbj;
1813
           Tbl = FNMS(KP923879532, Tbk, TaP);
1814
           TcW = FMA(KP923879532, Tbk, TaP);
1815
           Tbw = FNMS(KP707106781, Tbv, Tbs);
1816
           Tbz = Tbx - Tby;
1817
           TbA = FNMS(KP923879532, Tbz, Tbw);
1818
           TcV = FMA(KP923879532, Tbz, Tbw);
1819
            }
1820
            TbB = FMA(KP534511135, TbA, Tbl);
1821
            Td3 = FNMS(KP303346683, TcV, TcW);
1822
            TcN = FNMS(KP534511135, Tbl, TbA);
1823
            TcX = FMA(KP303346683, TcW, TcV);
1824
       }
1825
       {
1826
            E Tds, TdL, Tdv, TdK;
1827
            {
1828
           E Tdq, Tdr, Tdt, Tdu;
1829
           Tdq = FMA(KP707106781, TcE, TcB);
1830
           Tdr = Tcs + Tcd;
1831
           Tds = FNMS(KP923879532, Tdr, Tdq);
1832
           TdL = FMA(KP923879532, Tdr, Tdq);
1833
           Tdt = FMA(KP707106781, TbX, TbI);
1834
           Tdu = TcG + TcH;
1835
           Tdv = FNMS(KP923879532, Tdu, Tdt);
1836
           TdK = FMA(KP923879532, Tdu, Tdt);
1837
            }
1838
            Tdw = FNMS(KP820678790, Tdv, Tds);
1839
            TdQ = FMA(KP098491403, TdK, TdL);
1840
            TdA = FMA(KP820678790, Tds, Tdv);
1841
            TdM = FNMS(KP098491403, TdL, TdK);
1842
       }
1843
       {
1844
            E Tdl, TdI, Tdo, TdH;
1845
            {
1846
           E Tdj, Tdk, Tdm, Tdn;
1847
           Tdj = FMA(KP707106781, Tbv, Tbs);
1848
           Tdk = Tbj + Tb4;
1849
           Tdl = FNMS(KP923879532, Tdk, Tdj);
1850
           TdI = FMA(KP923879532, Tdk, Tdj);
1851
           Tdm = FMA(KP707106781, TaO, Taz);
1852
           Tdn = Tbx + Tby;
1853
           Tdo = FNMS(KP923879532, Tdn, Tdm);
1854
           TdH = FMA(KP923879532, Tdn, Tdm);
1855
            }
1856
            Tdp = FMA(KP820678790, Tdo, Tdl);
1857
            TdP = FNMS(KP098491403, TdH, TdI);
1858
            Tdz = FNMS(KP820678790, Tdl, Tdo);
1859
            TdJ = FMA(KP098491403, TdI, TdH);
1860
       }
1861
       {
1862
            E Tdd, TdD, Tdg, TdE;
1863
            {
1864
           E Tdb, Tdc, Tde, Tdf;
1865
           Tdb = FMA(KP707106781, T9E, T9p);
1866
           Tdc = FMA(KP707106781, T9P, T9M);
1867
           Tdd = FMA(KP198912367, Tdc, Tdb);
1868
           TdD = FNMS(KP198912367, Tdb, Tdc);
1869
           Tde = FMA(KP707106781, Tad, T9Y);
1870
           Tdf = FMA(KP707106781, Tao, Tal);
1871
           Tdg = FNMS(KP198912367, Tdf, Tde);
1872
           TdE = FMA(KP198912367, Tde, Tdf);
1873
            }
1874
            Tdh = Tdd - Tdg;
1875
            Tm2 = Tdd + Tdg;
1876
            TdF = TdD + TdE;
1877
            TlW = TdE - TdD;
1878
       }
1879
       {
1880
            E Tcu, TcZ, TcJ, TcY;
1881
            {
1882
           E TbY, Tct, TcF, TcI;
1883
           TbY = FNMS(KP707106781, TbX, TbI);
1884
           Tct = Tcd - Tcs;
1885
           Tcu = FNMS(KP923879532, Tct, TbY);
1886
           TcZ = FMA(KP923879532, Tct, TbY);
1887
           TcF = FNMS(KP707106781, TcE, TcB);
1888
           TcI = TcG - TcH;
1889
           TcJ = FNMS(KP923879532, TcI, TcF);
1890
           TcY = FMA(KP923879532, TcI, TcF);
1891
            }
1892
            TcK = FNMS(KP534511135, TcJ, Tcu);
1893
            Td4 = FMA(KP303346683, TcY, TcZ);
1894
            TcO = FMA(KP534511135, Tcu, TcJ);
1895
            Td0 = FNMS(KP303346683, TcZ, TcY);
1896
       }
1897
       {
1898
            E T8M, Td6, TlF, TlT, T9h, TlU, Td9, TlG, T8L, TlE;
1899
            T8L = T8D - T8K;
1900
            T8M = FMA(KP707106781, T8L, T8w);
1901
            Td6 = FNMS(KP707106781, T8L, T8w);
1902
            TlE = TdU - TdT;
1903
            TlF = FMA(KP707106781, TlE, TlD);
1904
            TlT = FNMS(KP707106781, TlE, TlD);
1905
            {
1906
           E T91, T9g, Td7, Td8;
1907
           T91 = FMA(KP414213562, T90, T8T);
1908
           T9g = FNMS(KP414213562, T9f, T98);
1909
           T9h = T91 - T9g;
1910
           TlU = T91 + T9g;
1911
           Td7 = FNMS(KP414213562, T8T, T90);
1912
           Td8 = FMA(KP414213562, T98, T9f);
1913
           Td9 = Td7 + Td8;
1914
           TlG = Td8 - Td7;
1915
            }
1916
            T9i = FNMS(KP923879532, T9h, T8M);
1917
            TlV = FNMS(KP923879532, TlU, TlT);
1918
            Tm1 = FMA(KP923879532, TlU, TlT);
1919
            TcQ = FMA(KP923879532, T9h, T8M);
1920
            Tda = FNMS(KP923879532, Td9, Td6);
1921
            TlH = FMA(KP923879532, TlG, TlF);
1922
            TlN = FNMS(KP923879532, TlG, TlF);
1923
            TdC = FMA(KP923879532, Td9, Td6);
1924
       }
1925
       {
1926
            E Tas, TcL, TlP, TlQ;
1927
            Tas = FMA(KP831469612, Tar, T9i);
1928
            TcL = TbB - TcK;
1929
            ri[WS(rs, 43)] = FNMS(KP881921264, TcL, Tas);
1930
            ri[WS(rs, 11)] = FMA(KP881921264, TcL, Tas);
1931
            TlP = FMA(KP831469612, TlO, TlN);
1932
            TlQ = TcO - TcN;
1933
            ii[WS(rs, 11)] = FMA(KP881921264, TlQ, TlP);
1934
            ii[WS(rs, 43)] = FNMS(KP881921264, TlQ, TlP);
1935
       }
1936
       {
1937
            E TcM, TcP, TlR, TlS;
1938
            TcM = FNMS(KP831469612, Tar, T9i);
1939
            TcP = TcN + TcO;
1940
            ri[WS(rs, 27)] = FNMS(KP881921264, TcP, TcM);
1941
            ri[WS(rs, 59)] = FMA(KP881921264, TcP, TcM);
1942
            TlR = FNMS(KP831469612, TlO, TlN);
1943
            TlS = TbB + TcK;
1944
            ii[WS(rs, 27)] = FNMS(KP881921264, TlS, TlR);
1945
            ii[WS(rs, 59)] = FMA(KP881921264, TlS, TlR);
1946
       }
1947
       {
1948
            E TcU, Td1, TlJ, TlK;
1949
            TcU = FMA(KP831469612, TcT, TcQ);
1950
            Td1 = TcX + Td0;
1951
            ri[WS(rs, 35)] = FNMS(KP956940335, Td1, TcU);
1952
            ri[WS(rs, 3)] = FMA(KP956940335, Td1, TcU);
1953
            TlJ = FMA(KP831469612, TlI, TlH);
1954
            TlK = Td3 + Td4;
1955
            ii[WS(rs, 3)] = FMA(KP956940335, TlK, TlJ);
1956
            ii[WS(rs, 35)] = FNMS(KP956940335, TlK, TlJ);
1957
       }
1958
       {
1959
            E Td2, Td5, TlL, TlM;
1960
            Td2 = FNMS(KP831469612, TcT, TcQ);
1961
            Td5 = Td3 - Td4;
1962
            ri[WS(rs, 51)] = FNMS(KP956940335, Td5, Td2);
1963
            ri[WS(rs, 19)] = FMA(KP956940335, Td5, Td2);
1964
            TlL = FNMS(KP831469612, TlI, TlH);
1965
            TlM = Td0 - TcX;
1966
            ii[WS(rs, 19)] = FMA(KP956940335, TlM, TlL);
1967
            ii[WS(rs, 51)] = FNMS(KP956940335, TlM, TlL);
1968
       }
1969
       {
1970
            E Tdi, Tdx, TlX, TlY;
1971
            Tdi = FMA(KP980785280, Tdh, Tda);
1972
            Tdx = Tdp + Tdw;
1973
            ri[WS(rs, 39)] = FNMS(KP773010453, Tdx, Tdi);
1974
            ri[WS(rs, 7)] = FMA(KP773010453, Tdx, Tdi);
1975
            TlX = FMA(KP980785280, TlW, TlV);
1976
            TlY = Tdz + TdA;
1977
            ii[WS(rs, 7)] = FMA(KP773010453, TlY, TlX);
1978
            ii[WS(rs, 39)] = FNMS(KP773010453, TlY, TlX);
1979
       }
1980
       {
1981
            E Tdy, TdB, TlZ, Tm0;
1982
            Tdy = FNMS(KP980785280, Tdh, Tda);
1983
            TdB = Tdz - TdA;
1984
            ri[WS(rs, 55)] = FNMS(KP773010453, TdB, Tdy);
1985
            ri[WS(rs, 23)] = FMA(KP773010453, TdB, Tdy);
1986
            TlZ = FNMS(KP980785280, TlW, TlV);
1987
            Tm0 = Tdw - Tdp;
1988
            ii[WS(rs, 23)] = FMA(KP773010453, Tm0, TlZ);
1989
            ii[WS(rs, 55)] = FNMS(KP773010453, Tm0, TlZ);
1990
       }
1991
       {
1992
            E TdG, TdN, Tm3, Tm4;
1993
            TdG = FNMS(KP980785280, TdF, TdC);
1994
            TdN = TdJ - TdM;
1995
            ri[WS(rs, 47)] = FNMS(KP995184726, TdN, TdG);
1996
            ri[WS(rs, 15)] = FMA(KP995184726, TdN, TdG);
1997
            Tm3 = FNMS(KP980785280, Tm2, Tm1);
1998
            Tm4 = TdQ - TdP;
1999
            ii[WS(rs, 15)] = FMA(KP995184726, Tm4, Tm3);
2000
            ii[WS(rs, 47)] = FNMS(KP995184726, Tm4, Tm3);
2001
       }
2002
       {
2003
            E TdO, TdR, Tm5, Tm6;
2004
            TdO = FMA(KP980785280, TdF, TdC);
2005
            TdR = TdP + TdQ;
2006
            ri[WS(rs, 31)] = FNMS(KP995184726, TdR, TdO);
2007
            ri[WS(rs, 63)] = FMA(KP995184726, TdR, TdO);
2008
            Tm5 = FMA(KP980785280, Tm2, Tm1);
2009
            Tm6 = TdJ + TdM;
2010
            ii[WS(rs, 31)] = FNMS(KP995184726, Tm6, Tm5);
2011
            ii[WS(rs, 63)] = FMA(KP995184726, Tm6, Tm5);
2012
       }
2013
        }
2014
        {
2015
       E Tej, Tlk, Tf5, Tle, TeD, Tff, TeZ, Tf9, TfI, Tg2, TfM, TfY, TfB, Tg1, TfL;
2016
       E TfV, Tft, Tly, TfR, Tls, TeW, Tfg, Tf0, Tfc, Te4, Tlr, Tlx, Tf2, Tfm, Tld;
2017
       E Tlj, TfO;
2018
       {
2019
            E Teb, Tf3, Tei, Tf4;
2020
            {
2021
           E Te7, Tea, Tee, Teh;
2022
           Te7 = FMA(KP707106781, Te6, Te5);
2023
           Tea = FMA(KP707106781, Te9, Te8);
2024
           Teb = FNMS(KP198912367, Tea, Te7);
2025
           Tf3 = FMA(KP198912367, Te7, Tea);
2026
           Tee = FMA(KP707106781, Ted, Tec);
2027
           Teh = FMA(KP707106781, Teg, Tef);
2028
           Tei = FMA(KP198912367, Teh, Tee);
2029
           Tf4 = FNMS(KP198912367, Tee, Teh);
2030
            }
2031
            Tej = Teb - Tei;
2032
            Tlk = Tf4 - Tf3;
2033
            Tf5 = Tf3 + Tf4;
2034
            Tle = Teb + Tei;
2035
       }
2036
       {
2037
            E Tev, Tf8, TeC, Tf7;
2038
            {
2039
           E Ten, Teu, Tey, TeB;
2040
           Ten = FMA(KP707106781, Tem, Tel);
2041
           Teu = Teq + Tet;
2042
           Tev = FNMS(KP923879532, Teu, Ten);
2043
           Tf8 = FMA(KP923879532, Teu, Ten);
2044
           Tey = FMA(KP707106781, Tex, Tew);
2045
           TeB = Tez + TeA;
2046
           TeC = FNMS(KP923879532, TeB, Tey);
2047
           Tf7 = FMA(KP923879532, TeB, Tey);
2048
            }
2049
            TeD = FMA(KP820678790, TeC, Tev);
2050
            Tff = FNMS(KP098491403, Tf7, Tf8);
2051
            TeZ = FNMS(KP820678790, Tev, TeC);
2052
            Tf9 = FMA(KP098491403, Tf8, Tf7);
2053
       }
2054
       {
2055
            E TfE, TfX, TfH, TfW;
2056
            {
2057
           E TfC, TfD, TfF, TfG;
2058
           TfC = FNMS(KP707106781, TeQ, TeP);
2059
           TfD = TeM - TeJ;
2060
           TfE = FNMS(KP923879532, TfD, TfC);
2061
           TfX = FMA(KP923879532, TfD, TfC);
2062
           TfF = FNMS(KP707106781, TeF, TeE);
2063
           TfG = TeS - TeT;
2064
           TfH = FNMS(KP923879532, TfG, TfF);
2065
           TfW = FMA(KP923879532, TfG, TfF);
2066
            }
2067
            TfI = FNMS(KP534511135, TfH, TfE);
2068
            Tg2 = FMA(KP303346683, TfW, TfX);
2069
            TfM = FMA(KP534511135, TfE, TfH);
2070
            TfY = FNMS(KP303346683, TfX, TfW);
2071
       }
2072
       {
2073
            E Tfx, TfU, TfA, TfT;
2074
            {
2075
           E Tfv, Tfw, Tfy, Tfz;
2076
           Tfv = FNMS(KP707106781, Tex, Tew);
2077
           Tfw = Tet - Teq;
2078
           Tfx = FNMS(KP923879532, Tfw, Tfv);
2079
           TfU = FMA(KP923879532, Tfw, Tfv);
2080
           Tfy = FNMS(KP707106781, Tem, Tel);
2081
           Tfz = Tez - TeA;
2082
           TfA = FNMS(KP923879532, Tfz, Tfy);
2083
           TfT = FMA(KP923879532, Tfz, Tfy);
2084
            }
2085
            TfB = FMA(KP534511135, TfA, Tfx);
2086
            Tg1 = FNMS(KP303346683, TfT, TfU);
2087
            TfL = FNMS(KP534511135, Tfx, TfA);
2088
            TfV = FMA(KP303346683, TfU, TfT);
2089
       }
2090
       {
2091
            E Tfp, TfP, Tfs, TfQ;
2092
            {
2093
           E Tfn, Tfo, Tfq, Tfr;
2094
           Tfn = FNMS(KP707106781, Te6, Te5);
2095
           Tfo = FNMS(KP707106781, Te9, Te8);
2096
           Tfp = FMA(KP668178637, Tfo, Tfn);
2097
           TfP = FNMS(KP668178637, Tfn, Tfo);
2098
           Tfq = FNMS(KP707106781, Ted, Tec);
2099
           Tfr = FNMS(KP707106781, Teg, Tef);
2100
           Tfs = FNMS(KP668178637, Tfr, Tfq);
2101
           TfQ = FMA(KP668178637, Tfq, Tfr);
2102
            }
2103
            Tft = Tfp - Tfs;
2104
            Tly = Tfp + Tfs;
2105
            TfR = TfP + TfQ;
2106
            Tls = TfQ - TfP;
2107
       }
2108
       {
2109
            E TeO, Tfb, TeV, Tfa;
2110
            {
2111
           E TeG, TeN, TeR, TeU;
2112
           TeG = FMA(KP707106781, TeF, TeE);
2113
           TeN = TeJ + TeM;
2114
           TeO = FNMS(KP923879532, TeN, TeG);
2115
           Tfb = FMA(KP923879532, TeN, TeG);
2116
           TeR = FMA(KP707106781, TeQ, TeP);
2117
           TeU = TeS + TeT;
2118
           TeV = FNMS(KP923879532, TeU, TeR);
2119
           Tfa = FMA(KP923879532, TeU, TeR);
2120
            }
2121
            TeW = FNMS(KP820678790, TeV, TeO);
2122
            Tfg = FMA(KP098491403, Tfa, Tfb);
2123
            Tf0 = FMA(KP820678790, TeO, TeV);
2124
            Tfc = FNMS(KP098491403, Tfb, Tfa);
2125
       }
2126
       {
2127
            E TdW, Tfi, Tlb, Tlp, Te3, Tlq, Tfl, Tlc, TdV, Tla;
2128
            TdV = TdT + TdU;
2129
            TdW = FMA(KP707106781, TdV, TdS);
2130
            Tfi = FNMS(KP707106781, TdV, TdS);
2131
            Tla = T8D + T8K;
2132
            Tlb = FMA(KP707106781, Tla, Tl9);
2133
            Tlp = FNMS(KP707106781, Tla, Tl9);
2134
            {
2135
           E TdZ, Te2, Tfj, Tfk;
2136
           TdZ = FMA(KP414213562, TdY, TdX);
2137
           Te2 = FNMS(KP414213562, Te1, Te0);
2138
           Te3 = TdZ + Te2;
2139
           Tlq = Te2 - TdZ;
2140
           Tfj = FNMS(KP414213562, TdX, TdY);
2141
           Tfk = FMA(KP414213562, Te0, Te1);
2142
           Tfl = Tfj - Tfk;
2143
           Tlc = Tfj + Tfk;
2144
            }
2145
            Te4 = FNMS(KP923879532, Te3, TdW);
2146
            Tlr = FMA(KP923879532, Tlq, Tlp);
2147
            Tlx = FNMS(KP923879532, Tlq, Tlp);
2148
            Tf2 = FMA(KP923879532, Te3, TdW);
2149
            Tfm = FMA(KP923879532, Tfl, Tfi);
2150
            Tld = FMA(KP923879532, Tlc, Tlb);
2151
            Tlj = FNMS(KP923879532, Tlc, Tlb);
2152
            TfO = FNMS(KP923879532, Tfl, Tfi);
2153
       }
2154
       {
2155
            E Tek, TeX, Tll, Tlm;
2156
            Tek = FMA(KP980785280, Tej, Te4);
2157
            TeX = TeD - TeW;
2158
            ri[WS(rs, 41)] = FNMS(KP773010453, TeX, Tek);
2159
            ri[WS(rs, 9)] = FMA(KP773010453, TeX, Tek);
2160
            Tll = FMA(KP980785280, Tlk, Tlj);
2161
            Tlm = Tf0 - TeZ;
2162
            ii[WS(rs, 9)] = FMA(KP773010453, Tlm, Tll);
2163
            ii[WS(rs, 41)] = FNMS(KP773010453, Tlm, Tll);
2164
       }
2165
       {
2166
            E TeY, Tf1, Tln, Tlo;
2167
            TeY = FNMS(KP980785280, Tej, Te4);
2168
            Tf1 = TeZ + Tf0;
2169
            ri[WS(rs, 25)] = FNMS(KP773010453, Tf1, TeY);
2170
            ri[WS(rs, 57)] = FMA(KP773010453, Tf1, TeY);
2171
            Tln = FNMS(KP980785280, Tlk, Tlj);
2172
            Tlo = TeD + TeW;
2173
            ii[WS(rs, 25)] = FNMS(KP773010453, Tlo, Tln);
2174
            ii[WS(rs, 57)] = FMA(KP773010453, Tlo, Tln);
2175
       }
2176
       {
2177
            E Tf6, Tfd, Tlf, Tlg;
2178
            Tf6 = FMA(KP980785280, Tf5, Tf2);
2179
            Tfd = Tf9 + Tfc;
2180
            ri[WS(rs, 33)] = FNMS(KP995184726, Tfd, Tf6);
2181
            ri[WS(rs, 1)] = FMA(KP995184726, Tfd, Tf6);
2182
            Tlf = FMA(KP980785280, Tle, Tld);
2183
            Tlg = Tff + Tfg;
2184
            ii[WS(rs, 1)] = FMA(KP995184726, Tlg, Tlf);
2185
            ii[WS(rs, 33)] = FNMS(KP995184726, Tlg, Tlf);
2186
       }
2187
       {
2188
            E Tfe, Tfh, Tlh, Tli;
2189
            Tfe = FNMS(KP980785280, Tf5, Tf2);
2190
            Tfh = Tff - Tfg;
2191
            ri[WS(rs, 49)] = FNMS(KP995184726, Tfh, Tfe);
2192
            ri[WS(rs, 17)] = FMA(KP995184726, Tfh, Tfe);
2193
            Tlh = FNMS(KP980785280, Tle, Tld);
2194
            Tli = Tfc - Tf9;
2195
            ii[WS(rs, 17)] = FMA(KP995184726, Tli, Tlh);
2196
            ii[WS(rs, 49)] = FNMS(KP995184726, Tli, Tlh);
2197
       }
2198
       {
2199
            E Tfu, TfJ, Tlt, Tlu;
2200
            Tfu = FMA(KP831469612, Tft, Tfm);
2201
            TfJ = TfB + TfI;
2202
            ri[WS(rs, 37)] = FNMS(KP881921264, TfJ, Tfu);
2203
            ri[WS(rs, 5)] = FMA(KP881921264, TfJ, Tfu);
2204
            Tlt = FMA(KP831469612, Tls, Tlr);
2205
            Tlu = TfL + TfM;
2206
            ii[WS(rs, 5)] = FMA(KP881921264, Tlu, Tlt);
2207
            ii[WS(rs, 37)] = FNMS(KP881921264, Tlu, Tlt);
2208
       }
2209
       {
2210
            E TfK, TfN, Tlv, Tlw;
2211
            TfK = FNMS(KP831469612, Tft, Tfm);
2212
            TfN = TfL - TfM;
2213
            ri[WS(rs, 53)] = FNMS(KP881921264, TfN, TfK);
2214
            ri[WS(rs, 21)] = FMA(KP881921264, TfN, TfK);
2215
            Tlv = FNMS(KP831469612, Tls, Tlr);
2216
            Tlw = TfI - TfB;
2217
            ii[WS(rs, 21)] = FMA(KP881921264, Tlw, Tlv);
2218
            ii[WS(rs, 53)] = FNMS(KP881921264, Tlw, Tlv);
2219
       }
2220
       {
2221
            E TfS, TfZ, Tlz, TlA;
2222
            TfS = FNMS(KP831469612, TfR, TfO);
2223
            TfZ = TfV - TfY;
2224
            ri[WS(rs, 45)] = FNMS(KP956940335, TfZ, TfS);
2225
            ri[WS(rs, 13)] = FMA(KP956940335, TfZ, TfS);
2226
            Tlz = FNMS(KP831469612, Tly, Tlx);
2227
            TlA = Tg2 - Tg1;
2228
            ii[WS(rs, 13)] = FMA(KP956940335, TlA, Tlz);
2229
            ii[WS(rs, 45)] = FNMS(KP956940335, TlA, Tlz);
2230
       }
2231
       {
2232
            E Tg0, Tg3, TlB, TlC;
2233
            Tg0 = FMA(KP831469612, TfR, TfO);
2234
            Tg3 = Tg1 + Tg2;
2235
            ri[WS(rs, 29)] = FNMS(KP956940335, Tg3, Tg0);
2236
            ri[WS(rs, 61)] = FMA(KP956940335, Tg3, Tg0);
2237
            TlB = FMA(KP831469612, Tly, Tlx);
2238
            TlC = TfV + TfY;
2239
            ii[WS(rs, 29)] = FNMS(KP956940335, TlC, TlB);
2240
            ii[WS(rs, 61)] = FMA(KP956940335, TlC, TlB);
2241
       }
2242
        }
2243
         }
2244
    }
2245
     }
2246
}
2247
2248
static const tw_instr twinstr[] = {
2249
     { TW_CEXP, 0, 1 },
2250
     { TW_CEXP, 0, 3 },
2251
     { TW_CEXP, 0, 9 },
2252
     { TW_CEXP, 0, 27 },
2253
     { TW_CEXP, 0, 63 },
2254
     { TW_NEXT, 1, 0 }
2255
};
2256
2257
static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, { 520, 206, 634, 0 }, 0, 0, 0 };
2258
2259
void X(codelet_t2_64) (planner *p) {
2260
     X(kdft_dit_register) (p, t2_64, &desc);
2261
}
2262
#else
2263
2264
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include dft/scalar/t.h */
2265
2266
/*
2267
 * This function contains 1154 FP additions, 660 FP multiplications,
2268
 * (or, 880 additions, 386 multiplications, 274 fused multiply/add),
2269
 * 302 stack variables, 15 constants, and 256 memory accesses
2270
 */
2271
#include "dft/scalar/t.h"
2272
2273
static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
2274
0
{
2275
0
     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
2276
0
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
2277
0
     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
2278
0
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
2279
0
     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
2280
0
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
2281
0
     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
2282
0
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
2283
0
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
2284
0
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
2285
0
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
2286
0
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
2287
0
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
2288
0
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
2289
0
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
2290
0
     {
2291
0
    INT m;
2292
0
    for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) {
2293
0
         E T2, T5, T3, T6, Te, T9, TP, T3e, T1e, T39, T3c, TT, T1a, T37, T8;
2294
0
         E Tw, Td, Ty, Tm, Th, T1C, T3K, T1V, T3x, T3I, T1G, T1R, T3v, T2m, T2q;
2295
0
         E T5Y, T6u, T53, T5B, T62, T6w, T57, T5D, T2V, T2X, Tg, TE, T3Y, T3V, T3j;
2296
0
         E Tl, TA, T3g, T1j, T1t, TV, T2C, T2z, T1u, TZ, T1h, To, T1p, T6j, T6H;
2297
0
         E Ts, T1l, T6l, T6F, T2P, T4b, T4x, T5i, T2R, T49, T4z, T5g, TG, T4k, T4m;
2298
0
         E TK, T21, T3O, T3Q, T25, TW, T10, T11, T79, T6X, T5M, T6b, T1v, T30, T69;
2299
0
         E T77, T13, T2F, T2D, T6p, T6O, T1x, T2a, T2f, T6V, T28, T6r, T2h, T6Q, T32;
2300
0
         E T5K, T5w, T4G, T4Q, T3m, T4h, T4I, T5y, T3k, T4f, T41, T4S, T4Y, T3q, T3D;
2301
0
         E T3F, T5r, T3s, T4W, T3Z, T5p;
2302
0
         {
2303
0
        E Ta, Tj, Tx, TC, Tf, Tk, Tz, TD, T1B, T1E, T2o, T2l, T1T, T1Q, T1A;
2304
0
        E T1F, T2p, T2k, T1U, T1P;
2305
0
        {
2306
0
       E T4, T1d, T19, Tb, T1c, T7, Tc, T18, TR, TO, TS, TN;
2307
0
       T2 = W[0];
2308
0
       T5 = W[1];
2309
0
       T3 = W[2];
2310
0
       T6 = W[3];
2311
0
       Te = W[5];
2312
0
       T9 = W[4];
2313
0
       T4 = T2 * T3;
2314
0
       T1d = T5 * T9;
2315
0
       T19 = T5 * Te;
2316
0
       Tb = T2 * T6;
2317
0
       T1c = T2 * Te;
2318
0
       T7 = T5 * T6;
2319
0
       Tc = T5 * T3;
2320
0
       T18 = T2 * T9;
2321
0
       TR = T3 * Te;
2322
0
       TO = T6 * Te;
2323
0
       TS = T6 * T9;
2324
0
       TN = T3 * T9;
2325
0
       TP = TN - TO;
2326
0
       T3e = TR - TS;
2327
0
       T1e = T1c - T1d;
2328
0
       T39 = T1c + T1d;
2329
0
       T3c = TN + TO;
2330
0
       TT = TR + TS;
2331
0
       T1a = T18 + T19;
2332
0
       T37 = T18 - T19;
2333
0
       T8 = T4 - T7;
2334
0
       Ta = T8 * T9;
2335
0
       Tj = T8 * Te;
2336
0
       Tw = T4 + T7;
2337
0
       Tx = Tw * T9;
2338
0
       TC = Tw * Te;
2339
0
       Td = Tb + Tc;
2340
0
       Tf = Td * Te;
2341
0
       Tk = Td * T9;
2342
0
       Ty = Tb - Tc;
2343
0
       Tz = Ty * Te;
2344
0
       TD = Ty * T9;
2345
0
       Tm = W[7];
2346
0
       T1B = T6 * Tm;
2347
0
       T1E = T3 * Tm;
2348
0
       T2o = T2 * Tm;
2349
0
       T2l = T5 * Tm;
2350
0
       T1T = T9 * Tm;
2351
0
       T1Q = Te * Tm;
2352
0
       Th = W[6];
2353
0
       T1A = T3 * Th;
2354
0
       T1F = T6 * Th;
2355
0
       T2p = T5 * Th;
2356
0
       T2k = T2 * Th;
2357
0
       T1U = Te * Th;
2358
0
       T1P = T9 * Th;
2359
0
        }
2360
0
        T1C = T1A + T1B;
2361
0
        T3K = T1E + T1F;
2362
0
        T1V = T1T + T1U;
2363
0
        T3x = T2o - T2p;
2364
0
        T3I = T1A - T1B;
2365
0
        T1G = T1E - T1F;
2366
0
        T1R = T1P - T1Q;
2367
0
        {
2368
0
       E T5W, T5X, T55, T56;
2369
0
       T3v = T2k + T2l;
2370
0
       T2m = T2k - T2l;
2371
0
       T2q = T2o + T2p;
2372
0
       T5W = T8 * Th;
2373
0
       T5X = Td * Tm;
2374
0
       T5Y = T5W - T5X;
2375
0
       T6u = T5W + T5X;
2376
0
       {
2377
0
            E T51, T52, T60, T61;
2378
0
            T51 = Tw * Th;
2379
0
            T52 = Ty * Tm;
2380
0
            T53 = T51 + T52;
2381
0
            T5B = T51 - T52;
2382
0
            T60 = T8 * Tm;
2383
0
            T61 = Td * Th;
2384
0
            T62 = T60 + T61;
2385
0
            T6w = T60 - T61;
2386
0
       }
2387
0
       T55 = Tw * Tm;
2388
0
       T56 = Ty * Th;
2389
0
       T57 = T55 - T56;
2390
0
       T5D = T55 + T56;
2391
0
       {
2392
0
            E Ti, Tq, TF, TJ, T3W, T3X, T3T, T3U, T3h, T3i, Tn, Tr, TB, TI, T3d;
2393
0
            E T3f, T1k, T1o, T1Z, T23, TQ, TU, T2A, T2B, T2x, T2y, T20, T24, TX, TY;
2394
0
            E T1i, T1n;
2395
0
            T2V = T1P + T1Q;
2396
0
            T2X = T1T - T1U;
2397
0
            Tg = Ta + Tf;
2398
0
            Ti = Tg * Th;
2399
0
            Tq = Tg * Tm;
2400
0
            TE = TC + TD;
2401
0
            TF = TE * Tm;
2402
0
            TJ = TE * Th;
2403
0
            T3W = T37 * Tm;
2404
0
            T3X = T39 * Th;
2405
0
            T3Y = T3W - T3X;
2406
0
            T3T = T37 * Th;
2407
0
            T3U = T39 * Tm;
2408
0
            T3V = T3T + T3U;
2409
0
            T3h = T3c * Tm;
2410
0
            T3i = T3e * Th;
2411
0
            T3j = T3h - T3i;
2412
0
            Tl = Tj - Tk;
2413
0
            Tn = Tl * Tm;
2414
0
            Tr = Tl * Th;
2415
0
            TA = Tx - Tz;
2416
0
            TB = TA * Th;
2417
0
            TI = TA * Tm;
2418
0
            T3d = T3c * Th;
2419
0
            T3f = T3e * Tm;
2420
0
            T3g = T3d + T3f;
2421
0
            T1j = Tj + Tk;
2422
0
            T1k = T1j * Tm;
2423
0
            T1o = T1j * Th;
2424
0
            T1t = Tx + Tz;
2425
0
            T1Z = T1t * Th;
2426
0
            T23 = T1t * Tm;
2427
0
            TQ = TP * Th;
2428
0
            TU = TT * Tm;
2429
0
            TV = TQ + TU;
2430
0
            T2A = T1a * Tm;
2431
0
            T2B = T1e * Th;
2432
0
            T2C = T2A - T2B;
2433
0
            T2x = T1a * Th;
2434
0
            T2y = T1e * Tm;
2435
0
            T2z = T2x + T2y;
2436
0
            T1u = TC - TD;
2437
0
            T20 = T1u * Tm;
2438
0
            T24 = T1u * Th;
2439
0
            TX = TP * Tm;
2440
0
            TY = TT * Th;
2441
0
            TZ = TX - TY;
2442
0
            T1h = Ta - Tf;
2443
0
            T1i = T1h * Th;
2444
0
            T1n = T1h * Tm;
2445
0
            To = Ti - Tn;
2446
0
            T1p = T1n + T1o;
2447
0
            T6j = TQ - TU;
2448
0
            T6H = T2A + T2B;
2449
0
            Ts = Tq + Tr;
2450
0
            T1l = T1i - T1k;
2451
0
            T6l = TX + TY;
2452
0
            T6F = T2x - T2y;
2453
0
            T2P = T1Z - T20;
2454
0
            T4b = TI + TJ;
2455
0
            T4x = T3d - T3f;
2456
0
            T5i = T3W + T3X;
2457
0
            T2R = T23 + T24;
2458
0
            T49 = TB - TF;
2459
0
            T4z = T3h + T3i;
2460
0
            T5g = T3T - T3U;
2461
0
            TG = TB + TF;
2462
0
            T4k = Ti + Tn;
2463
0
            T4m = Tq - Tr;
2464
0
            TK = TI - TJ;
2465
0
            T21 = T1Z + T20;
2466
0
            T3O = T1i + T1k;
2467
0
            T3Q = T1n - T1o;
2468
0
            T25 = T23 - T24;
2469
0
            TW = W[8];
2470
0
            T10 = W[9];
2471
0
            T11 = FMA(TV, TW, TZ * T10);
2472
0
            T79 = FNMS(T25, TW, T21 * T10);
2473
0
            T6X = FNMS(Td, TW, T8 * T10);
2474
0
            T5M = FNMS(T2X, TW, T2V * T10);
2475
0
            T6b = FNMS(TK, TW, TG * T10);
2476
0
            T1v = FMA(T1t, TW, T1u * T10);
2477
0
            T30 = FMA(T1h, TW, T1j * T10);
2478
0
            T69 = FMA(TG, TW, TK * T10);
2479
0
            T77 = FMA(T21, TW, T25 * T10);
2480
0
            T13 = FNMS(TZ, TW, TV * T10);
2481
0
            T2F = FNMS(T2C, TW, T2z * T10);
2482
0
            T2D = FMA(T2z, TW, T2C * T10);
2483
0
            T6p = FMA(T1a, TW, T1e * T10);
2484
0
            T6O = FMA(TP, TW, TT * T10);
2485
0
            T1x = FNMS(T1u, TW, T1t * T10);
2486
0
            T2a = FNMS(TE, TW, TA * T10);
2487
0
            T2f = FMA(T3, TW, T6 * T10);
2488
0
            T6V = FMA(T8, TW, Td * T10);
2489
0
            T28 = FMA(TA, TW, TE * T10);
2490
0
            T6r = FNMS(T1e, TW, T1a * T10);
2491
0
            T2h = FNMS(T6, TW, T3 * T10);
2492
0
            T6Q = FNMS(TT, TW, TP * T10);
2493
0
            T32 = FNMS(T1j, TW, T1h * T10);
2494
0
            T5K = FMA(T2V, TW, T2X * T10);
2495
0
            T5w = FMA(Tw, TW, Ty * T10);
2496
0
            T4G = FMA(T3O, TW, T3Q * T10);
2497
0
            T4Q = FMA(T4k, TW, T4m * T10);
2498
0
            T3m = FNMS(T3j, TW, T3g * T10);
2499
0
            T4h = FNMS(Te, TW, T9 * T10);
2500
0
            T4I = FNMS(T3Q, TW, T3O * T10);
2501
0
            T5y = FNMS(Ty, TW, Tw * T10);
2502
0
            T3k = FMA(T3g, TW, T3j * T10);
2503
0
            T4f = FMA(T9, TW, Te * T10);
2504
0
            T41 = FNMS(T3Y, TW, T3V * T10);
2505
0
            T4S = FNMS(T4m, TW, T4k * T10);
2506
0
            T4Y = FNMS(T3e, TW, T3c * T10);
2507
0
            T3q = FMA(Tg, TW, Tl * T10);
2508
0
            T3D = FMA(T2, TW, T5 * T10);
2509
0
            T3F = FNMS(T5, TW, T2 * T10);
2510
0
            T5r = FNMS(T39, TW, T37 * T10);
2511
0
            T3s = FNMS(Tl, TW, Tg * T10);
2512
0
            T4W = FMA(T3c, TW, T3e * T10);
2513
0
            T3Z = FMA(T3V, TW, T3Y * T10);
2514
0
            T5p = FMA(T37, TW, T39 * T10);
2515
0
       }
2516
0
        }
2517
0
         }
2518
0
         {
2519
0
        E T17, TdV, Tj3, Tjx, T7l, TbJ, Ti3, Tix, T1K, Tiw, TdY, ThY, T7w, Tj0, TbM;
2520
0
        E Tjw, T2e, TgA, T7I, TaY, TbQ, Tda, Te4, TfO, T2J, TgB, T7T, TaZ, TbT, Tdb;
2521
0
        E Te9, TfP, T36, T3B, TgH, TgE, TgF, TgG, T80, TbW, Tel, TfT, T8b, Tc0, T8k;
2522
0
        E TbX, Teg, TfS, T8h, TbZ, T45, T4q, TgJ, TgK, TgL, TgM, T8r, Tc6, Tew, TfW;
2523
0
        E T8C, Tc4, T8L, Tc7, Ter, TfV, T8I, Tc3, T6B, Th1, Tfm, Tga, Th8, ThI, T9N;
2524
0
        E Tcv, T9Y, TcH, Tav, Tcw, Tf5, Tg7, Tas, TcG, T5c, TgV, TeV, Tg0, TgS, ThD;
2525
0
        E T8U, Tcc, T95, Tco, T9C, Tcd, TeE, Tg3, T9z, Tcn, T5R, TgT, TeO, TeW, TgY;
2526
0
        E ThE, T9h, T9F, T9s, T9E, Tck, Tcq, TeJ, TeX, Tch, Tcr, T7e, Th9, Tff, Tfn;
2527
0
        E Th4, ThJ, Taa, Tay, Tal, Tax, TcD, TcJ, Tfa, Tfo, TcA, TcK;
2528
0
        {
2529
0
       E T1, Ti1, Tu, Ti0, TM, T7i, T15, T7j, Tp, Tt;
2530
0
       T1 = ri[0];
2531
0
       Ti1 = ii[0];
2532
0
       Tp = ri[WS(rs, 32)];
2533
0
       Tt = ii[WS(rs, 32)];
2534
0
       Tu = FMA(To, Tp, Ts * Tt);
2535
0
       Ti0 = FNMS(Ts, Tp, To * Tt);
2536
0
       {
2537
0
            E TH, TL, T12, T14;
2538
0
            TH = ri[WS(rs, 16)];
2539
0
            TL = ii[WS(rs, 16)];
2540
0
            TM = FMA(TG, TH, TK * TL);
2541
0
            T7i = FNMS(TK, TH, TG * TL);
2542
0
            T12 = ri[WS(rs, 48)];
2543
0
            T14 = ii[WS(rs, 48)];
2544
0
            T15 = FMA(T11, T12, T13 * T14);
2545
0
            T7j = FNMS(T13, T12, T11 * T14);
2546
0
       }
2547
0
       {
2548
0
            E Tv, T16, Tj1, Tj2;
2549
0
            Tv = T1 + Tu;
2550
0
            T16 = TM + T15;
2551
0
            T17 = Tv + T16;
2552
0
            TdV = Tv - T16;
2553
0
            Tj1 = Ti1 - Ti0;
2554
0
            Tj2 = TM - T15;
2555
0
            Tj3 = Tj1 - Tj2;
2556
0
            Tjx = Tj2 + Tj1;
2557
0
       }
2558
0
       {
2559
0
            E T7h, T7k, ThZ, Ti2;
2560
0
            T7h = T1 - Tu;
2561
0
            T7k = T7i - T7j;
2562
0
            T7l = T7h - T7k;
2563
0
            TbJ = T7h + T7k;
2564
0
            ThZ = T7i + T7j;
2565
0
            Ti2 = Ti0 + Ti1;
2566
0
            Ti3 = ThZ + Ti2;
2567
0
            Tix = Ti2 - ThZ;
2568
0
       }
2569
0
        }
2570
0
        {
2571
0
       E T1g, T7m, T1r, T7n, T7o, T7p, T1z, T7s, T1I, T7t, T7r, T7u;
2572
0
       {
2573
0
            E T1b, T1f, T1m, T1q;
2574
0
            T1b = ri[WS(rs, 8)];
2575
0
            T1f = ii[WS(rs, 8)];
2576
0
            T1g = FMA(T1a, T1b, T1e * T1f);
2577
0
            T7m = FNMS(T1e, T1b, T1a * T1f);
2578
0
            T1m = ri[WS(rs, 40)];
2579
0
            T1q = ii[WS(rs, 40)];
2580
0
            T1r = FMA(T1l, T1m, T1p * T1q);
2581
0
            T7n = FNMS(T1p, T1m, T1l * T1q);
2582
0
       }
2583
0
       T7o = T7m - T7n;
2584
0
       T7p = T1g - T1r;
2585
0
       {
2586
0
            E T1w, T1y, T1D, T1H;
2587
0
            T1w = ri[WS(rs, 56)];
2588
0
            T1y = ii[WS(rs, 56)];
2589
0
            T1z = FMA(T1v, T1w, T1x * T1y);
2590
0
            T7s = FNMS(T1x, T1w, T1v * T1y);
2591
0
            T1D = ri[WS(rs, 24)];
2592
0
            T1H = ii[WS(rs, 24)];
2593
0
            T1I = FMA(T1C, T1D, T1G * T1H);
2594
0
            T7t = FNMS(T1G, T1D, T1C * T1H);
2595
0
       }
2596
0
       T7r = T1z - T1I;
2597
0
       T7u = T7s - T7t;
2598
0
       {
2599
0
            E T1s, T1J, TdW, TdX;
2600
0
            T1s = T1g + T1r;
2601
0
            T1J = T1z + T1I;
2602
0
            T1K = T1s + T1J;
2603
0
            Tiw = T1J - T1s;
2604
0
            TdW = T7m + T7n;
2605
0
            TdX = T7s + T7t;
2606
0
            TdY = TdW - TdX;
2607
0
            ThY = TdW + TdX;
2608
0
       }
2609
0
       {
2610
0
            E T7q, T7v, TbK, TbL;
2611
0
            T7q = T7o - T7p;
2612
0
            T7v = T7r + T7u;
2613
0
            T7w = KP707106781 * (T7q - T7v);
2614
0
            Tj0 = KP707106781 * (T7q + T7v);
2615
0
            TbK = T7p + T7o;
2616
0
            TbL = T7r - T7u;
2617
0
            TbM = KP707106781 * (TbK + TbL);
2618
0
            Tjw = KP707106781 * (TbL - TbK);
2619
0
       }
2620
0
        }
2621
0
        {
2622
0
       E T1Y, Te0, T7A, T7D, T2d, Te1, T7B, T7G, T7C, T7H;
2623
0
       {
2624
0
            E T1O, T7y, T1X, T7z;
2625
0
            {
2626
0
           E T1M, T1N, T1S, T1W;
2627
0
           T1M = ri[WS(rs, 4)];
2628
0
           T1N = ii[WS(rs, 4)];
2629
0
           T1O = FMA(T8, T1M, Td * T1N);
2630
0
           T7y = FNMS(Td, T1M, T8 * T1N);
2631
0
           T1S = ri[WS(rs, 36)];
2632
0
           T1W = ii[WS(rs, 36)];
2633
0
           T1X = FMA(T1R, T1S, T1V * T1W);
2634
0
           T7z = FNMS(T1V, T1S, T1R * T1W);
2635
0
            }
2636
0
            T1Y = T1O + T1X;
2637
0
            Te0 = T7y + T7z;
2638
0
            T7A = T7y - T7z;
2639
0
            T7D = T1O - T1X;
2640
0
       }
2641
0
       {
2642
0
            E T27, T7E, T2c, T7F;
2643
0
            {
2644
0
           E T22, T26, T29, T2b;
2645
0
           T22 = ri[WS(rs, 20)];
2646
0
           T26 = ii[WS(rs, 20)];
2647
0
           T27 = FMA(T21, T22, T25 * T26);
2648
0
           T7E = FNMS(T25, T22, T21 * T26);
2649
0
           T29 = ri[WS(rs, 52)];
2650
0
           T2b = ii[WS(rs, 52)];
2651
0
           T2c = FMA(T28, T29, T2a * T2b);
2652
0
           T7F = FNMS(T2a, T29, T28 * T2b);
2653
0
            }
2654
0
            T2d = T27 + T2c;
2655
0
            Te1 = T7E + T7F;
2656
0
            T7B = T27 - T2c;
2657
0
            T7G = T7E - T7F;
2658
0
       }
2659
0
       T2e = T1Y + T2d;
2660
0
       TgA = Te0 + Te1;
2661
0
       T7C = T7A + T7B;
2662
0
       T7H = T7D - T7G;
2663
0
       T7I = FNMS(KP923879532, T7H, KP382683432 * T7C);
2664
0
       TaY = FMA(KP923879532, T7C, KP382683432 * T7H);
2665
0
       {
2666
0
            E TbO, TbP, Te2, Te3;
2667
0
            TbO = T7A - T7B;
2668
0
            TbP = T7D + T7G;
2669
0
            TbQ = FNMS(KP382683432, TbP, KP923879532 * TbO);
2670
0
            Tda = FMA(KP382683432, TbO, KP923879532 * TbP);
2671
0
            Te2 = Te0 - Te1;
2672
0
            Te3 = T1Y - T2d;
2673
0
            Te4 = Te2 - Te3;
2674
0
            TfO = Te3 + Te2;
2675
0
       }
2676
0
        }
2677
0
        {
2678
0
       E T2t, Te6, T7L, T7O, T2I, Te7, T7M, T7R, T7N, T7S;
2679
0
       {
2680
0
            E T2j, T7J, T2s, T7K;
2681
0
            {
2682
0
           E T2g, T2i, T2n, T2r;
2683
0
           T2g = ri[WS(rs, 60)];
2684
0
           T2i = ii[WS(rs, 60)];
2685
0
           T2j = FMA(T2f, T2g, T2h * T2i);
2686
0
           T7J = FNMS(T2h, T2g, T2f * T2i);
2687
0
           T2n = ri[WS(rs, 28)];
2688
0
           T2r = ii[WS(rs, 28)];
2689
0
           T2s = FMA(T2m, T2n, T2q * T2r);
2690
0
           T7K = FNMS(T2q, T2n, T2m * T2r);
2691
0
            }
2692
0
            T2t = T2j + T2s;
2693
0
            Te6 = T7J + T7K;
2694
0
            T7L = T7J - T7K;
2695
0
            T7O = T2j - T2s;
2696
0
       }
2697
0
       {
2698
0
            E T2w, T7P, T2H, T7Q;
2699
0
            {
2700
0
           E T2u, T2v, T2E, T2G;
2701
0
           T2u = ri[WS(rs, 12)];
2702
0
           T2v = ii[WS(rs, 12)];
2703
0
           T2w = FMA(TP, T2u, TT * T2v);
2704
0
           T7P = FNMS(TT, T2u, TP * T2v);
2705
0
           T2E = ri[WS(rs, 44)];
2706
0
           T2G = ii[WS(rs, 44)];
2707
0
           T2H = FMA(T2D, T2E, T2F * T2G);
2708
0
           T7Q = FNMS(T2F, T2E, T2D * T2G);
2709
0
            }
2710
0
            T2I = T2w + T2H;
2711
0
            Te7 = T7P + T7Q;
2712
0
            T7M = T2w - T2H;
2713
0
            T7R = T7P - T7Q;
2714
0
       }
2715
0
       T2J = T2t + T2I;
2716
0
       TgB = Te6 + Te7;
2717
0
       T7N = T7L + T7M;
2718
0
       T7S = T7O - T7R;
2719
0
       T7T = FMA(KP382683432, T7N, KP923879532 * T7S);
2720
0
       TaZ = FNMS(KP923879532, T7N, KP382683432 * T7S);
2721
0
       {
2722
0
            E TbR, TbS, Te5, Te8;
2723
0
            TbR = T7L - T7M;
2724
0
            TbS = T7O + T7R;
2725
0
            TbT = FMA(KP923879532, TbR, KP382683432 * TbS);
2726
0
            Tdb = FNMS(KP382683432, TbR, KP923879532 * TbS);
2727
0
            Te5 = T2t - T2I;
2728
0
            Te8 = Te6 - Te7;
2729
0
            Te9 = Te5 + Te8;
2730
0
            TfP = Te5 - Te8;
2731
0
       }
2732
0
        }
2733
0
        {
2734
0
       E T2O, T7W, T2T, T7X, T2U, Tec, T2Z, T8e, T34, T8f, T35, Ted, T3p, Tei, T86;
2735
0
       E T89, T3A, Tej, T81, T84;
2736
0
       {
2737
0
            E T2M, T2N, T2Q, T2S;
2738
0
            T2M = ri[WS(rs, 2)];
2739
0
            T2N = ii[WS(rs, 2)];
2740
0
            T2O = FMA(Tw, T2M, Ty * T2N);
2741
0
            T7W = FNMS(Ty, T2M, Tw * T2N);
2742
0
            T2Q = ri[WS(rs, 34)];
2743
0
            T2S = ii[WS(rs, 34)];
2744
0
            T2T = FMA(T2P, T2Q, T2R * T2S);
2745
0
            T7X = FNMS(T2R, T2Q, T2P * T2S);
2746
0
       }
2747
0
       T2U = T2O + T2T;
2748
0
       Tec = T7W + T7X;
2749
0
       {
2750
0
            E T2W, T2Y, T31, T33;
2751
0
            T2W = ri[WS(rs, 18)];
2752
0
            T2Y = ii[WS(rs, 18)];
2753
0
            T2Z = FMA(T2V, T2W, T2X * T2Y);
2754
0
            T8e = FNMS(T2X, T2W, T2V * T2Y);
2755
0
            T31 = ri[WS(rs, 50)];
2756
0
            T33 = ii[WS(rs, 50)];
2757
0
            T34 = FMA(T30, T31, T32 * T33);
2758
0
            T8f = FNMS(T32, T31, T30 * T33);
2759
0
       }
2760
0
       T35 = T2Z + T34;
2761
0
       Ted = T8e + T8f;
2762
0
       {
2763
0
            E T3b, T87, T3o, T88;
2764
0
            {
2765
0
           E T38, T3a, T3l, T3n;
2766
0
           T38 = ri[WS(rs, 10)];
2767
0
           T3a = ii[WS(rs, 10)];
2768
0
           T3b = FMA(T37, T38, T39 * T3a);
2769
0
           T87 = FNMS(T39, T38, T37 * T3a);
2770
0
           T3l = ri[WS(rs, 42)];
2771
0
           T3n = ii[WS(rs, 42)];
2772
0
           T3o = FMA(T3k, T3l, T3m * T3n);
2773
0
           T88 = FNMS(T3m, T3l, T3k * T3n);
2774
0
            }
2775
0
            T3p = T3b + T3o;
2776
0
            Tei = T87 + T88;
2777
0
            T86 = T3b - T3o;
2778
0
            T89 = T87 - T88;
2779
0
       }
2780
0
       {
2781
0
            E T3u, T82, T3z, T83;
2782
0
            {
2783
0
           E T3r, T3t, T3w, T3y;
2784
0
           T3r = ri[WS(rs, 58)];
2785
0
           T3t = ii[WS(rs, 58)];
2786
0
           T3u = FMA(T3q, T3r, T3s * T3t);
2787
0
           T82 = FNMS(T3s, T3r, T3q * T3t);
2788
0
           T3w = ri[WS(rs, 26)];
2789
0
           T3y = ii[WS(rs, 26)];
2790
0
           T3z = FMA(T3v, T3w, T3x * T3y);
2791
0
           T83 = FNMS(T3x, T3w, T3v * T3y);
2792
0
            }
2793
0
            T3A = T3u + T3z;
2794
0
            Tej = T82 + T83;
2795
0
            T81 = T3u - T3z;
2796
0
            T84 = T82 - T83;
2797
0
       }
2798
0
       T36 = T2U + T35;
2799
0
       T3B = T3p + T3A;
2800
0
       TgH = T36 - T3B;
2801
0
       TgE = Tec + Ted;
2802
0
       TgF = Tei + Tej;
2803
0
       TgG = TgE - TgF;
2804
0
       {
2805
0
            E T7Y, T7Z, Teh, Tek;
2806
0
            T7Y = T7W - T7X;
2807
0
            T7Z = T2Z - T34;
2808
0
            T80 = T7Y + T7Z;
2809
0
            TbW = T7Y - T7Z;
2810
0
            Teh = T2U - T35;
2811
0
            Tek = Tei - Tej;
2812
0
            Tel = Teh - Tek;
2813
0
            TfT = Teh + Tek;
2814
0
       }
2815
0
       {
2816
0
            E T85, T8a, T8i, T8j;
2817
0
            T85 = T81 - T84;
2818
0
            T8a = T86 + T89;
2819
0
            T8b = KP707106781 * (T85 - T8a);
2820
0
            Tc0 = KP707106781 * (T8a + T85);
2821
0
            T8i = T89 - T86;
2822
0
            T8j = T81 + T84;
2823
0
            T8k = KP707106781 * (T8i - T8j);
2824
0
            TbX = KP707106781 * (T8i + T8j);
2825
0
       }
2826
0
       {
2827
0
            E Tee, Tef, T8d, T8g;
2828
0
            Tee = Tec - Ted;
2829
0
            Tef = T3A - T3p;
2830
0
            Teg = Tee - Tef;
2831
0
            TfS = Tee + Tef;
2832
0
            T8d = T2O - T2T;
2833
0
            T8g = T8e - T8f;
2834
0
            T8h = T8d - T8g;
2835
0
            TbZ = T8d + T8g;
2836
0
       }
2837
0
        }
2838
0
        {
2839
0
       E T3H, T8n, T3M, T8o, T3N, Ten, T3S, T8F, T43, T8G, T44, Teo, T4e, Tet, T8x;
2840
0
       E T8A, T4p, Teu, T8s, T8v;
2841
0
       {
2842
0
            E T3E, T3G, T3J, T3L;
2843
0
            T3E = ri[WS(rs, 62)];
2844
0
            T3G = ii[WS(rs, 62)];
2845
0
            T3H = FMA(T3D, T3E, T3F * T3G);
2846
0
            T8n = FNMS(T3F, T3E, T3D * T3G);
2847
0
            T3J = ri[WS(rs, 30)];
2848
0
            T3L = ii[WS(rs, 30)];
2849
0
            T3M = FMA(T3I, T3J, T3K * T3L);
2850
0
            T8o = FNMS(T3K, T3J, T3I * T3L);
2851
0
       }
2852
0
       T3N = T3H + T3M;
2853
0
       Ten = T8n + T8o;
2854
0
       {
2855
0
            E T3P, T3R, T40, T42;
2856
0
            T3P = ri[WS(rs, 14)];
2857
0
            T3R = ii[WS(rs, 14)];
2858
0
            T3S = FMA(T3O, T3P, T3Q * T3R);
2859
0
            T8F = FNMS(T3Q, T3P, T3O * T3R);
2860
0
            T40 = ri[WS(rs, 46)];
2861
0
            T42 = ii[WS(rs, 46)];
2862
0
            T43 = FMA(T3Z, T40, T41 * T42);
2863
0
            T8G = FNMS(T41, T40, T3Z * T42);
2864
0
       }
2865
0
       T44 = T3S + T43;
2866
0
       Teo = T8F + T8G;
2867
0
       {
2868
0
            E T48, T8y, T4d, T8z;
2869
0
            {
2870
0
           E T46, T47, T4a, T4c;
2871
0
           T46 = ri[WS(rs, 6)];
2872
0
           T47 = ii[WS(rs, 6)];
2873
0
           T48 = FMA(T3c, T46, T3e * T47);
2874
0
           T8y = FNMS(T3e, T46, T3c * T47);
2875
0
           T4a = ri[WS(rs, 38)];
2876
0
           T4c = ii[WS(rs, 38)];
2877
0
           T4d = FMA(T49, T4a, T4b * T4c);
2878
0
           T8z = FNMS(T4b, T4a, T49 * T4c);
2879
0
            }
2880
0
            T4e = T48 + T4d;
2881
0
            Tet = T8y + T8z;
2882
0
            T8x = T48 - T4d;
2883
0
            T8A = T8y - T8z;
2884
0
       }
2885
0
       {
2886
0
            E T4j, T8t, T4o, T8u;
2887
0
            {
2888
0
           E T4g, T4i, T4l, T4n;
2889
0
           T4g = ri[WS(rs, 54)];
2890
0
           T4i = ii[WS(rs, 54)];
2891
0
           T4j = FMA(T4f, T4g, T4h * T4i);
2892
0
           T8t = FNMS(T4h, T4g, T4f * T4i);
2893
0
           T4l = ri[WS(rs, 22)];
2894
0
           T4n = ii[WS(rs, 22)];
2895
0
           T4o = FMA(T4k, T4l, T4m * T4n);
2896
0
           T8u = FNMS(T4m, T4l, T4k * T4n);
2897
0
            }
2898
0
            T4p = T4j + T4o;
2899
0
            Teu = T8t + T8u;
2900
0
            T8s = T4j - T4o;
2901
0
            T8v = T8t - T8u;
2902
0
       }
2903
0
       T45 = T3N + T44;
2904
0
       T4q = T4e + T4p;
2905
0
       TgJ = T45 - T4q;
2906
0
       TgK = Ten + Teo;
2907
0
       TgL = Tet + Teu;
2908
0
       TgM = TgK - TgL;
2909
0
       {
2910
0
            E T8p, T8q, Tes, Tev;
2911
0
            T8p = T8n - T8o;
2912
0
            T8q = T3S - T43;
2913
0
            T8r = T8p + T8q;
2914
0
            Tc6 = T8p - T8q;
2915
0
            Tes = T3N - T44;
2916
0
            Tev = Tet - Teu;
2917
0
            Tew = Tes - Tev;
2918
0
            TfW = Tes + Tev;
2919
0
       }
2920
0
       {
2921
0
            E T8w, T8B, T8J, T8K;
2922
0
            T8w = T8s - T8v;
2923
0
            T8B = T8x + T8A;
2924
0
            T8C = KP707106781 * (T8w - T8B);
2925
0
            Tc4 = KP707106781 * (T8B + T8w);
2926
0
            T8J = T8A - T8x;
2927
0
            T8K = T8s + T8v;
2928
0
            T8L = KP707106781 * (T8J - T8K);
2929
0
            Tc7 = KP707106781 * (T8J + T8K);
2930
0
       }
2931
0
       {
2932
0
            E Tep, Teq, T8E, T8H;
2933
0
            Tep = Ten - Teo;
2934
0
            Teq = T4p - T4e;
2935
0
            Ter = Tep - Teq;
2936
0
            TfV = Tep + Teq;
2937
0
            T8E = T3H - T3M;
2938
0
            T8H = T8F - T8G;
2939
0
            T8I = T8E - T8H;
2940
0
            Tc3 = T8E + T8H;
2941
0
       }
2942
0
        }
2943
0
        {
2944
0
       E T5V, Tao, T64, Tap, T65, Tfi, T68, T9K, T6d, T9L, T6e, Tfj, T6o, Tf2, T9Q;
2945
0
       E T9R, T6z, Tf3, T9T, T9W;
2946
0
       {
2947
0
            E T5T, T5U, T5Z, T63;
2948
0
            T5T = ri[WS(rs, 63)];
2949
0
            T5U = ii[WS(rs, 63)];
2950
0
            T5V = FMA(TW, T5T, T10 * T5U);
2951
0
            Tao = FNMS(T10, T5T, TW * T5U);
2952
0
            T5Z = ri[WS(rs, 31)];
2953
0
            T63 = ii[WS(rs, 31)];
2954
0
            T64 = FMA(T5Y, T5Z, T62 * T63);
2955
0
            Tap = FNMS(T62, T5Z, T5Y * T63);
2956
0
       }
2957
0
       T65 = T5V + T64;
2958
0
       Tfi = Tao + Tap;
2959
0
       {
2960
0
            E T66, T67, T6a, T6c;
2961
0
            T66 = ri[WS(rs, 15)];
2962
0
            T67 = ii[WS(rs, 15)];
2963
0
            T68 = FMA(TV, T66, TZ * T67);
2964
0
            T9K = FNMS(TZ, T66, TV * T67);
2965
0
            T6a = ri[WS(rs, 47)];
2966
0
            T6c = ii[WS(rs, 47)];
2967
0
            T6d = FMA(T69, T6a, T6b * T6c);
2968
0
            T9L = FNMS(T6b, T6a, T69 * T6c);
2969
0
       }
2970
0
       T6e = T68 + T6d;
2971
0
       Tfj = T9K + T9L;
2972
0
       {
2973
0
            E T6i, T9O, T6n, T9P;
2974
0
            {
2975
0
           E T6g, T6h, T6k, T6m;
2976
0
           T6g = ri[WS(rs, 7)];
2977
0
           T6h = ii[WS(rs, 7)];
2978
0
           T6i = FMA(T1t, T6g, T1u * T6h);
2979
0
           T9O = FNMS(T1u, T6g, T1t * T6h);
2980
0
           T6k = ri[WS(rs, 39)];
2981
0
           T6m = ii[WS(rs, 39)];
2982
0
           T6n = FMA(T6j, T6k, T6l * T6m);
2983
0
           T9P = FNMS(T6l, T6k, T6j * T6m);
2984
0
            }
2985
0
            T6o = T6i + T6n;
2986
0
            Tf2 = T9O + T9P;
2987
0
            T9Q = T9O - T9P;
2988
0
            T9R = T6i - T6n;
2989
0
       }
2990
0
       {
2991
0
            E T6t, T9U, T6y, T9V;
2992
0
            {
2993
0
           E T6q, T6s, T6v, T6x;
2994
0
           T6q = ri[WS(rs, 55)];
2995
0
           T6s = ii[WS(rs, 55)];
2996
0
           T6t = FMA(T6p, T6q, T6r * T6s);
2997
0
           T9U = FNMS(T6r, T6q, T6p * T6s);
2998
0
           T6v = ri[WS(rs, 23)];
2999
0
           T6x = ii[WS(rs, 23)];
3000
0
           T6y = FMA(T6u, T6v, T6w * T6x);
3001
0
           T9V = FNMS(T6w, T6v, T6u * T6x);
3002
0
            }
3003
0
            T6z = T6t + T6y;
3004
0
            Tf3 = T9U + T9V;
3005
0
            T9T = T6t - T6y;
3006
0
            T9W = T9U - T9V;
3007
0
       }
3008
0
       {
3009
0
            E T6f, T6A, Tfk, Tfl;
3010
0
            T6f = T65 + T6e;
3011
0
            T6A = T6o + T6z;
3012
0
            T6B = T6f + T6A;
3013
0
            Th1 = T6f - T6A;
3014
0
            Tfk = Tfi - Tfj;
3015
0
            Tfl = T6z - T6o;
3016
0
            Tfm = Tfk - Tfl;
3017
0
            Tga = Tfk + Tfl;
3018
0
       }
3019
0
       {
3020
0
            E Th6, Th7, T9J, T9M;
3021
0
            Th6 = Tfi + Tfj;
3022
0
            Th7 = Tf2 + Tf3;
3023
0
            Th8 = Th6 - Th7;
3024
0
            ThI = Th6 + Th7;
3025
0
            T9J = T5V - T64;
3026
0
            T9M = T9K - T9L;
3027
0
            T9N = T9J - T9M;
3028
0
            Tcv = T9J + T9M;
3029
0
       }
3030
0
       {
3031
0
            E T9S, T9X, Tat, Tau;
3032
0
            T9S = T9Q - T9R;
3033
0
            T9X = T9T + T9W;
3034
0
            T9Y = KP707106781 * (T9S - T9X);
3035
0
            TcH = KP707106781 * (T9S + T9X);
3036
0
            Tat = T9T - T9W;
3037
0
            Tau = T9R + T9Q;
3038
0
            Tav = KP707106781 * (Tat - Tau);
3039
0
            Tcw = KP707106781 * (Tau + Tat);
3040
0
       }
3041
0
       {
3042
0
            E Tf1, Tf4, Taq, Tar;
3043
0
            Tf1 = T65 - T6e;
3044
0
            Tf4 = Tf2 - Tf3;
3045
0
            Tf5 = Tf1 - Tf4;
3046
0
            Tg7 = Tf1 + Tf4;
3047
0
            Taq = Tao - Tap;
3048
0
            Tar = T68 - T6d;
3049
0
            Tas = Taq + Tar;
3050
0
            TcG = Taq - Tar;
3051
0
       }
3052
0
        }
3053
0
        {
3054
0
       E T4w, T8Q, T4B, T8R, T4C, TeA, T4F, T9w, T4K, T9x, T4L, TeB, T4V, TeS, T90;
3055
0
       E T93, T5a, TeT, T8V, T8Y;
3056
0
       {
3057
0
            E T4u, T4v, T4y, T4A;
3058
0
            T4u = ri[WS(rs, 1)];
3059
0
            T4v = ii[WS(rs, 1)];
3060
0
            T4w = FMA(T2, T4u, T5 * T4v);
3061
0
            T8Q = FNMS(T5, T4u, T2 * T4v);
3062
0
            T4y = ri[WS(rs, 33)];
3063
0
            T4A = ii[WS(rs, 33)];
3064
0
            T4B = FMA(T4x, T4y, T4z * T4A);
3065
0
            T8R = FNMS(T4z, T4y, T4x * T4A);
3066
0
       }
3067
0
       T4C = T4w + T4B;
3068
0
       TeA = T8Q + T8R;
3069
0
       {
3070
0
            E T4D, T4E, T4H, T4J;
3071
0
            T4D = ri[WS(rs, 17)];
3072
0
            T4E = ii[WS(rs, 17)];
3073
0
            T4F = FMA(T3V, T4D, T3Y * T4E);
3074
0
            T9w = FNMS(T3Y, T4D, T3V * T4E);
3075
0
            T4H = ri[WS(rs, 49)];
3076
0
            T4J = ii[WS(rs, 49)];
3077
0
            T4K = FMA(T4G, T4H, T4I * T4J);
3078
0
            T9x = FNMS(T4I, T4H, T4G * T4J);
3079
0
       }
3080
0
       T4L = T4F + T4K;
3081
0
       TeB = T9w + T9x;
3082
0
       {
3083
0
            E T4P, T91, T4U, T92;
3084
0
            {
3085
0
           E T4N, T4O, T4R, T4T;
3086
0
           T4N = ri[WS(rs, 9)];
3087
0
           T4O = ii[WS(rs, 9)];
3088
0
           T4P = FMA(T9, T4N, Te * T4O);
3089
0
           T91 = FNMS(Te, T4N, T9 * T4O);
3090
0
           T4R = ri[WS(rs, 41)];
3091
0
           T4T = ii[WS(rs, 41)];
3092
0
           T4U = FMA(T4Q, T4R, T4S * T4T);
3093
0
           T92 = FNMS(T4S, T4R, T4Q * T4T);
3094
0
            }
3095
0
            T4V = T4P + T4U;
3096
0
            TeS = T91 + T92;
3097
0
            T90 = T4P - T4U;
3098
0
            T93 = T91 - T92;
3099
0
       }
3100
0
       {
3101
0
            E T50, T8W, T59, T8X;
3102
0
            {
3103
0
           E T4X, T4Z, T54, T58;
3104
0
           T4X = ri[WS(rs, 57)];
3105
0
           T4Z = ii[WS(rs, 57)];
3106
0
           T50 = FMA(T4W, T4X, T4Y * T4Z);
3107
0
           T8W = FNMS(T4Y, T4X, T4W * T4Z);
3108
0
           T54 = ri[WS(rs, 25)];
3109
0
           T58 = ii[WS(rs, 25)];
3110
0
           T59 = FMA(T53, T54, T57 * T58);
3111
0
           T8X = FNMS(T57, T54, T53 * T58);
3112
0
            }
3113
0
            T5a = T50 + T59;
3114
0
            TeT = T8W + T8X;
3115
0
            T8V = T50 - T59;
3116
0
            T8Y = T8W - T8X;
3117
0
       }
3118
0
       {
3119
0
            E T4M, T5b, TeR, TeU;
3120
0
            T4M = T4C + T4L;
3121
0
            T5b = T4V + T5a;
3122
0
            T5c = T4M + T5b;
3123
0
            TgV = T4M - T5b;
3124
0
            TeR = T4C - T4L;
3125
0
            TeU = TeS - TeT;
3126
0
            TeV = TeR - TeU;
3127
0
            Tg0 = TeR + TeU;
3128
0
       }
3129
0
       {
3130
0
            E TgQ, TgR, T8S, T8T;
3131
0
            TgQ = TeA + TeB;
3132
0
            TgR = TeS + TeT;
3133
0
            TgS = TgQ - TgR;
3134
0
            ThD = TgQ + TgR;
3135
0
            T8S = T8Q - T8R;
3136
0
            T8T = T4F - T4K;
3137
0
            T8U = T8S + T8T;
3138
0
            Tcc = T8S - T8T;
3139
0
       }
3140
0
       {
3141
0
            E T8Z, T94, T9A, T9B;
3142
0
            T8Z = T8V - T8Y;
3143
0
            T94 = T90 + T93;
3144
0
            T95 = KP707106781 * (T8Z - T94);
3145
0
            Tco = KP707106781 * (T94 + T8Z);
3146
0
            T9A = T93 - T90;
3147
0
            T9B = T8V + T8Y;
3148
0
            T9C = KP707106781 * (T9A - T9B);
3149
0
            Tcd = KP707106781 * (T9A + T9B);
3150
0
       }
3151
0
       {
3152
0
            E TeC, TeD, T9v, T9y;
3153
0
            TeC = TeA - TeB;
3154
0
            TeD = T5a - T4V;
3155
0
            TeE = TeC - TeD;
3156
0
            Tg3 = TeC + TeD;
3157
0
            T9v = T4w - T4B;
3158
0
            T9y = T9w - T9x;
3159
0
            T9z = T9v - T9y;
3160
0
            Tcn = T9v + T9y;
3161
0
       }
3162
0
        }
3163
0
        {
3164
0
       E T5l, TeL, T9k, T9n, T5P, TeH, T9a, T9f, T5u, TeM, T9l, T9q, T5G, TeG, T97;
3165
0
       E T9e;
3166
0
       {
3167
0
            E T5f, T9i, T5k, T9j;
3168
0
            {
3169
0
           E T5d, T5e, T5h, T5j;
3170
0
           T5d = ri[WS(rs, 5)];
3171
0
           T5e = ii[WS(rs, 5)];
3172
0
           T5f = FMA(Tg, T5d, Tl * T5e);
3173
0
           T9i = FNMS(Tl, T5d, Tg * T5e);
3174
0
           T5h = ri[WS(rs, 37)];
3175
0
           T5j = ii[WS(rs, 37)];
3176
0
           T5k = FMA(T5g, T5h, T5i * T5j);
3177
0
           T9j = FNMS(T5i, T5h, T5g * T5j);
3178
0
            }
3179
0
            T5l = T5f + T5k;
3180
0
            TeL = T9i + T9j;
3181
0
            T9k = T9i - T9j;
3182
0
            T9n = T5f - T5k;
3183
0
       }
3184
0
       {
3185
0
            E T5J, T98, T5O, T99;
3186
0
            {
3187
0
           E T5H, T5I, T5L, T5N;
3188
0
           T5H = ri[WS(rs, 13)];
3189
0
           T5I = ii[WS(rs, 13)];
3190
0
           T5J = FMA(T1h, T5H, T1j * T5I);
3191
0
           T98 = FNMS(T1j, T5H, T1h * T5I);
3192
0
           T5L = ri[WS(rs, 45)];
3193
0
           T5N = ii[WS(rs, 45)];
3194
0
           T5O = FMA(T5K, T5L, T5M * T5N);
3195
0
           T99 = FNMS(T5M, T5L, T5K * T5N);
3196
0
            }
3197
0
            T5P = T5J + T5O;
3198
0
            TeH = T98 + T99;
3199
0
            T9a = T98 - T99;
3200
0
            T9f = T5J - T5O;
3201
0
       }
3202
0
       {
3203
0
            E T5o, T9o, T5t, T9p;
3204
0
            {
3205
0
           E T5m, T5n, T5q, T5s;
3206
0
           T5m = ri[WS(rs, 21)];
3207
0
           T5n = ii[WS(rs, 21)];
3208
0
           T5o = FMA(T3g, T5m, T3j * T5n);
3209
0
           T9o = FNMS(T3j, T5m, T3g * T5n);
3210
0
           T5q = ri[WS(rs, 53)];
3211
0
           T5s = ii[WS(rs, 53)];
3212
0
           T5t = FMA(T5p, T5q, T5r * T5s);
3213
0
           T9p = FNMS(T5r, T5q, T5p * T5s);
3214
0
            }
3215
0
            T5u = T5o + T5t;
3216
0
            TeM = T9o + T9p;
3217
0
            T9l = T5o - T5t;
3218
0
            T9q = T9o - T9p;
3219
0
       }
3220
0
       {
3221
0
            E T5A, T9c, T5F, T9d;
3222
0
            {
3223
0
           E T5x, T5z, T5C, T5E;
3224
0
           T5x = ri[WS(rs, 61)];
3225
0
           T5z = ii[WS(rs, 61)];
3226
0
           T5A = FMA(T5w, T5x, T5y * T5z);
3227
0
           T9c = FNMS(T5y, T5x, T5w * T5z);
3228
0
           T5C = ri[WS(rs, 29)];
3229
0
           T5E = ii[WS(rs, 29)];
3230
0
           T5F = FMA(T5B, T5C, T5D * T5E);
3231
0
           T9d = FNMS(T5D, T5C, T5B * T5E);
3232
0
            }
3233
0
            T5G = T5A + T5F;
3234
0
            TeG = T9c + T9d;
3235
0
            T97 = T5A - T5F;
3236
0
            T9e = T9c - T9d;
3237
0
       }
3238
0
       {
3239
0
            E T5v, T5Q, TeK, TeN;
3240
0
            T5v = T5l + T5u;
3241
0
            T5Q = T5G + T5P;
3242
0
            T5R = T5v + T5Q;
3243
0
            TgT = T5Q - T5v;
3244
0
            TeK = T5l - T5u;
3245
0
            TeN = TeL - TeM;
3246
0
            TeO = TeK + TeN;
3247
0
            TeW = TeN - TeK;
3248
0
       }
3249
0
       {
3250
0
            E TgW, TgX, T9b, T9g;
3251
0
            TgW = TeL + TeM;
3252
0
            TgX = TeG + TeH;
3253
0
            TgY = TgW - TgX;
3254
0
            ThE = TgW + TgX;
3255
0
            T9b = T97 - T9a;
3256
0
            T9g = T9e + T9f;
3257
0
            T9h = FNMS(KP923879532, T9g, KP382683432 * T9b);
3258
0
            T9F = FMA(KP382683432, T9g, KP923879532 * T9b);
3259
0
       }
3260
0
       {
3261
0
            E T9m, T9r, Tci, Tcj;
3262
0
            T9m = T9k + T9l;
3263
0
            T9r = T9n - T9q;
3264
0
            T9s = FMA(KP923879532, T9m, KP382683432 * T9r);
3265
0
            T9E = FNMS(KP923879532, T9r, KP382683432 * T9m);
3266
0
            Tci = T9k - T9l;
3267
0
            Tcj = T9n + T9q;
3268
0
            Tck = FMA(KP382683432, Tci, KP923879532 * Tcj);
3269
0
            Tcq = FNMS(KP382683432, Tcj, KP923879532 * Tci);
3270
0
       }
3271
0
       {
3272
0
            E TeF, TeI, Tcf, Tcg;
3273
0
            TeF = T5G - T5P;
3274
0
            TeI = TeG - TeH;
3275
0
            TeJ = TeF - TeI;
3276
0
            TeX = TeF + TeI;
3277
0
            Tcf = T97 + T9a;
3278
0
            Tcg = T9e - T9f;
3279
0
            Tch = FNMS(KP382683432, Tcg, KP923879532 * Tcf);
3280
0
            Tcr = FMA(KP923879532, Tcg, KP382683432 * Tcf);
3281
0
       }
3282
0
        }
3283
0
        {
3284
0
       E T6K, Tf6, Ta2, Ta5, T7c, Tfd, Tae, Taj, T6T, Tf7, Ta3, Ta8, T73, Tfc, Tad;
3285
0
       E Tag;
3286
0
       {
3287
0
            E T6E, Ta0, T6J, Ta1;
3288
0
            {
3289
0
           E T6C, T6D, T6G, T6I;
3290
0
           T6C = ri[WS(rs, 3)];
3291
0
           T6D = ii[WS(rs, 3)];
3292
0
           T6E = FMA(T3, T6C, T6 * T6D);
3293
0
           Ta0 = FNMS(T6, T6C, T3 * T6D);
3294
0
           T6G = ri[WS(rs, 35)];
3295
0
           T6I = ii[WS(rs, 35)];
3296
0
           T6J = FMA(T6F, T6G, T6H * T6I);
3297
0
           Ta1 = FNMS(T6H, T6G, T6F * T6I);
3298
0
            }
3299
0
            T6K = T6E + T6J;
3300
0
            Tf6 = Ta0 + Ta1;
3301
0
            Ta2 = Ta0 - Ta1;
3302
0
            Ta5 = T6E - T6J;
3303
0
       }
3304
0
       {
3305
0
            E T76, Tah, T7b, Tai;
3306
0
            {
3307
0
           E T74, T75, T78, T7a;
3308
0
           T74 = ri[WS(rs, 11)];
3309
0
           T75 = ii[WS(rs, 11)];
3310
0
           T76 = FMA(TA, T74, TE * T75);
3311
0
           Tah = FNMS(TE, T74, TA * T75);
3312
0
           T78 = ri[WS(rs, 43)];
3313
0
           T7a = ii[WS(rs, 43)];
3314
0
           T7b = FMA(T77, T78, T79 * T7a);
3315
0
           Tai = FNMS(T79, T78, T77 * T7a);
3316
0
            }
3317
0
            T7c = T76 + T7b;
3318
0
            Tfd = Tah + Tai;
3319
0
            Tae = T76 - T7b;
3320
0
            Taj = Tah - Tai;
3321
0
       }
3322
0
       {
3323
0
            E T6N, Ta6, T6S, Ta7;
3324
0
            {
3325
0
           E T6L, T6M, T6P, T6R;
3326
0
           T6L = ri[WS(rs, 19)];
3327
0
           T6M = ii[WS(rs, 19)];
3328
0
           T6N = FMA(T2z, T6L, T2C * T6M);
3329
0
           Ta6 = FNMS(T2C, T6L, T2z * T6M);
3330
0
           T6P = ri[WS(rs, 51)];
3331
0
           T6R = ii[WS(rs, 51)];
3332
0
           T6S = FMA(T6O, T6P, T6Q * T6R);
3333
0
           Ta7 = FNMS(T6Q, T6P, T6O * T6R);
3334
0
            }
3335
0
            T6T = T6N + T6S;
3336
0
            Tf7 = Ta6 + Ta7;
3337
0
            Ta3 = T6N - T6S;
3338
0
            Ta8 = Ta6 - Ta7;
3339
0
       }
3340
0
       {
3341
0
            E T6Z, Tab, T72, Tac;
3342
0
            {
3343
0
           E T6W, T6Y, T70, T71;
3344
0
           T6W = ri[WS(rs, 59)];
3345
0
           T6Y = ii[WS(rs, 59)];
3346
0
           T6Z = FMA(T6V, T6W, T6X * T6Y);
3347
0
           Tab = FNMS(T6X, T6W, T6V * T6Y);
3348
0
           T70 = ri[WS(rs, 27)];
3349
0
           T71 = ii[WS(rs, 27)];
3350
0
           T72 = FMA(Th, T70, Tm * T71);
3351
0
           Tac = FNMS(Tm, T70, Th * T71);
3352
0
            }
3353
0
            T73 = T6Z + T72;
3354
0
            Tfc = Tab + Tac;
3355
0
            Tad = Tab - Tac;
3356
0
            Tag = T6Z - T72;
3357
0
       }
3358
0
       {
3359
0
            E T6U, T7d, Tfb, Tfe;
3360
0
            T6U = T6K + T6T;
3361
0
            T7d = T73 + T7c;
3362
0
            T7e = T6U + T7d;
3363
0
            Th9 = T7d - T6U;
3364
0
            Tfb = T73 - T7c;
3365
0
            Tfe = Tfc - Tfd;
3366
0
            Tff = Tfb + Tfe;
3367
0
            Tfn = Tfb - Tfe;
3368
0
       }
3369
0
       {
3370
0
            E Th2, Th3, Ta4, Ta9;
3371
0
            Th2 = Tf6 + Tf7;
3372
0
            Th3 = Tfc + Tfd;
3373
0
            Th4 = Th2 - Th3;
3374
0
            ThJ = Th2 + Th3;
3375
0
            Ta4 = Ta2 + Ta3;
3376
0
            Ta9 = Ta5 - Ta8;
3377
0
            Taa = FNMS(KP923879532, Ta9, KP382683432 * Ta4);
3378
0
            Tay = FMA(KP923879532, Ta4, KP382683432 * Ta9);
3379
0
       }
3380
0
       {
3381
0
            E Taf, Tak, TcB, TcC;
3382
0
            Taf = Tad + Tae;
3383
0
            Tak = Tag - Taj;
3384
0
            Tal = FMA(KP382683432, Taf, KP923879532 * Tak);
3385
0
            Tax = FNMS(KP923879532, Taf, KP382683432 * Tak);
3386
0
            TcB = Tad - Tae;
3387
0
            TcC = Tag + Taj;
3388
0
            TcD = FMA(KP923879532, TcB, KP382683432 * TcC);
3389
0
            TcJ = FNMS(KP382683432, TcB, KP923879532 * TcC);
3390
0
       }
3391
0
       {
3392
0
            E Tf8, Tf9, Tcy, Tcz;
3393
0
            Tf8 = Tf6 - Tf7;
3394
0
            Tf9 = T6K - T6T;
3395
0
            Tfa = Tf8 - Tf9;
3396
0
            Tfo = Tf9 + Tf8;
3397
0
            Tcy = Ta2 - Ta3;
3398
0
            Tcz = Ta5 + Ta8;
3399
0
            TcA = FNMS(KP382683432, Tcz, KP923879532 * Tcy);
3400
0
            TcK = FMA(KP382683432, Tcy, KP923879532 * Tcz);
3401
0
       }
3402
0
        }
3403
0
        {
3404
0
       E T2L, Thx, ThU, ThV, Ti5, Tib, T4s, Tia, T7g, Ti7, ThG, ThO, ThL, ThP, ThA;
3405
0
       E ThW;
3406
0
       {
3407
0
            E T1L, T2K, ThS, ThT;
3408
0
            T1L = T17 + T1K;
3409
0
            T2K = T2e + T2J;
3410
0
            T2L = T1L + T2K;
3411
0
            Thx = T1L - T2K;
3412
0
            ThS = ThD + ThE;
3413
0
            ThT = ThI + ThJ;
3414
0
            ThU = ThS - ThT;
3415
0
            ThV = ThS + ThT;
3416
0
       }
3417
0
       {
3418
0
            E ThX, Ti4, T3C, T4r;
3419
0
            ThX = TgA + TgB;
3420
0
            Ti4 = ThY + Ti3;
3421
0
            Ti5 = ThX + Ti4;
3422
0
            Tib = Ti4 - ThX;
3423
0
            T3C = T36 + T3B;
3424
0
            T4r = T45 + T4q;
3425
0
            T4s = T3C + T4r;
3426
0
            Tia = T4r - T3C;
3427
0
       }
3428
0
       {
3429
0
            E T5S, T7f, ThC, ThF;
3430
0
            T5S = T5c + T5R;
3431
0
            T7f = T6B + T7e;
3432
0
            T7g = T5S + T7f;
3433
0
            Ti7 = T7f - T5S;
3434
0
            ThC = T5c - T5R;
3435
0
            ThF = ThD - ThE;
3436
0
            ThG = ThC + ThF;
3437
0
            ThO = ThF - ThC;
3438
0
       }
3439
0
       {
3440
0
            E ThH, ThK, Thy, Thz;
3441
0
            ThH = T6B - T7e;
3442
0
            ThK = ThI - ThJ;
3443
0
            ThL = ThH - ThK;
3444
0
            ThP = ThH + ThK;
3445
0
            Thy = TgE + TgF;
3446
0
            Thz = TgK + TgL;
3447
0
            ThA = Thy - Thz;
3448
0
            ThW = Thy + Thz;
3449
0
       }
3450
0
       {
3451
0
            E T4t, Ti6, ThR, Ti8;
3452
0
            T4t = T2L + T4s;
3453
0
            ri[WS(rs, 32)] = T4t - T7g;
3454
0
            ri[0] = T4t + T7g;
3455
0
            Ti6 = ThW + Ti5;
3456
0
            ii[0] = ThV + Ti6;
3457
0
            ii[WS(rs, 32)] = Ti6 - ThV;
3458
0
            ThR = T2L - T4s;
3459
0
            ri[WS(rs, 48)] = ThR - ThU;
3460
0
            ri[WS(rs, 16)] = ThR + ThU;
3461
0
            Ti8 = Ti5 - ThW;
3462
0
            ii[WS(rs, 16)] = Ti7 + Ti8;
3463
0
            ii[WS(rs, 48)] = Ti8 - Ti7;
3464
0
       }
3465
0
       {
3466
0
            E ThB, ThM, Ti9, Tic;
3467
0
            ThB = Thx + ThA;
3468
0
            ThM = KP707106781 * (ThG + ThL);
3469
0
            ri[WS(rs, 40)] = ThB - ThM;
3470
0
            ri[WS(rs, 8)] = ThB + ThM;
3471
0
            Ti9 = KP707106781 * (ThO + ThP);
3472
0
            Tic = Tia + Tib;
3473
0
            ii[WS(rs, 8)] = Ti9 + Tic;
3474
0
            ii[WS(rs, 40)] = Tic - Ti9;
3475
0
       }
3476
0
       {
3477
0
            E ThN, ThQ, Tid, Tie;
3478
0
            ThN = Thx - ThA;
3479
0
            ThQ = KP707106781 * (ThO - ThP);
3480
0
            ri[WS(rs, 56)] = ThN - ThQ;
3481
0
            ri[WS(rs, 24)] = ThN + ThQ;
3482
0
            Tid = KP707106781 * (ThL - ThG);
3483
0
            Tie = Tib - Tia;
3484
0
            ii[WS(rs, 24)] = Tid + Tie;
3485
0
            ii[WS(rs, 56)] = Tie - Tid;
3486
0
       }
3487
0
        }
3488
0
        {
3489
0
       E TgD, Thh, Thr, Thv, Tij, Tip, TgO, Tig, Th0, The, Thk, Tio, Tho, Thu, Thb;
3490
0
       E Thf;
3491
0
       {
3492
0
            E Tgz, TgC, Thp, Thq;
3493
0
            Tgz = T17 - T1K;
3494
0
            TgC = TgA - TgB;
3495
0
            TgD = Tgz - TgC;
3496
0
            Thh = Tgz + TgC;
3497
0
            Thp = Th1 + Th4;
3498
0
            Thq = Th8 + Th9;
3499
0
            Thr = FNMS(KP382683432, Thq, KP923879532 * Thp);
3500
0
            Thv = FMA(KP923879532, Thq, KP382683432 * Thp);
3501
0
       }
3502
0
       {
3503
0
            E Tih, Tii, TgI, TgN;
3504
0
            Tih = T2J - T2e;
3505
0
            Tii = Ti3 - ThY;
3506
0
            Tij = Tih + Tii;
3507
0
            Tip = Tii - Tih;
3508
0
            TgI = TgG - TgH;
3509
0
            TgN = TgJ + TgM;
3510
0
            TgO = KP707106781 * (TgI - TgN);
3511
0
            Tig = KP707106781 * (TgI + TgN);
3512
0
       }
3513
0
       {
3514
0
            E TgU, TgZ, Thi, Thj;
3515
0
            TgU = TgS - TgT;
3516
0
            TgZ = TgV - TgY;
3517
0
            Th0 = FMA(KP923879532, TgU, KP382683432 * TgZ);
3518
0
            The = FNMS(KP923879532, TgZ, KP382683432 * TgU);
3519
0
            Thi = TgH + TgG;
3520
0
            Thj = TgJ - TgM;
3521
0
            Thk = KP707106781 * (Thi + Thj);
3522
0
            Tio = KP707106781 * (Thj - Thi);
3523
0
       }
3524
0
       {
3525
0
            E Thm, Thn, Th5, Tha;
3526
0
            Thm = TgS + TgT;
3527
0
            Thn = TgV + TgY;
3528
0
            Tho = FMA(KP382683432, Thm, KP923879532 * Thn);
3529
0
            Thu = FNMS(KP382683432, Thn, KP923879532 * Thm);
3530
0
            Th5 = Th1 - Th4;
3531
0
            Tha = Th8 - Th9;
3532
0
            Thb = FNMS(KP923879532, Tha, KP382683432 * Th5);
3533
0
            Thf = FMA(KP382683432, Tha, KP923879532 * Th5);
3534
0
       }
3535
0
       {
3536
0
            E TgP, Thc, Tin, Tiq;
3537
0
            TgP = TgD + TgO;
3538
0
            Thc = Th0 + Thb;
3539
0
            ri[WS(rs, 44)] = TgP - Thc;
3540
0
            ri[WS(rs, 12)] = TgP + Thc;
3541
0
            Tin = The + Thf;
3542
0
            Tiq = Tio + Tip;
3543
0
            ii[WS(rs, 12)] = Tin + Tiq;
3544
0
            ii[WS(rs, 44)] = Tiq - Tin;
3545
0
       }
3546
0
       {
3547
0
            E Thd, Thg, Tir, Tis;
3548
0
            Thd = TgD - TgO;
3549
0
            Thg = The - Thf;
3550
0
            ri[WS(rs, 60)] = Thd - Thg;
3551
0
            ri[WS(rs, 28)] = Thd + Thg;
3552
0
            Tir = Thb - Th0;
3553
0
            Tis = Tip - Tio;
3554
0
            ii[WS(rs, 28)] = Tir + Tis;
3555
0
            ii[WS(rs, 60)] = Tis - Tir;
3556
0
       }
3557
0
       {
3558
0
            E Thl, Ths, Tif, Tik;
3559
0
            Thl = Thh + Thk;
3560
0
            Ths = Tho + Thr;
3561
0
            ri[WS(rs, 36)] = Thl - Ths;
3562
0
            ri[WS(rs, 4)] = Thl + Ths;
3563
0
            Tif = Thu + Thv;
3564
0
            Tik = Tig + Tij;
3565
0
            ii[WS(rs, 4)] = Tif + Tik;
3566
0
            ii[WS(rs, 36)] = Tik - Tif;
3567
0
       }
3568
0
       {
3569
0
            E Tht, Thw, Til, Tim;
3570
0
            Tht = Thh - Thk;
3571
0
            Thw = Thu - Thv;
3572
0
            ri[WS(rs, 52)] = Tht - Thw;
3573
0
            ri[WS(rs, 20)] = Tht + Thw;
3574
0
            Til = Thr - Tho;
3575
0
            Tim = Tij - Tig;
3576
0
            ii[WS(rs, 20)] = Til + Tim;
3577
0
            ii[WS(rs, 52)] = Tim - Til;
3578
0
       }
3579
0
        }
3580
0
        {
3581
0
       E Teb, Tfx, Tey, TiK, TiN, TiT, TfA, TiS, Tfr, TfL, Tfv, TfH, Tf0, TfK, Tfu;
3582
0
       E TfE;
3583
0
       {
3584
0
            E TdZ, Tea, Tfy, Tfz;
3585
0
            TdZ = TdV - TdY;
3586
0
            Tea = KP707106781 * (Te4 - Te9);
3587
0
            Teb = TdZ - Tea;
3588
0
            Tfx = TdZ + Tea;
3589
0
            {
3590
0
           E Tem, Tex, TiL, TiM;
3591
0
           Tem = FNMS(KP923879532, Tel, KP382683432 * Teg);
3592
0
           Tex = FMA(KP382683432, Ter, KP923879532 * Tew);
3593
0
           Tey = Tem - Tex;
3594
0
           TiK = Tem + Tex;
3595
0
           TiL = KP707106781 * (TfP - TfO);
3596
0
           TiM = Tix - Tiw;
3597
0
           TiN = TiL + TiM;
3598
0
           TiT = TiM - TiL;
3599
0
            }
3600
0
            Tfy = FMA(KP923879532, Teg, KP382683432 * Tel);
3601
0
            Tfz = FNMS(KP923879532, Ter, KP382683432 * Tew);
3602
0
            TfA = Tfy + Tfz;
3603
0
            TiS = Tfz - Tfy;
3604
0
            {
3605
0
           E Tfh, TfF, Tfq, TfG, Tfg, Tfp;
3606
0
           Tfg = KP707106781 * (Tfa - Tff);
3607
0
           Tfh = Tf5 - Tfg;
3608
0
           TfF = Tf5 + Tfg;
3609
0
           Tfp = KP707106781 * (Tfn - Tfo);
3610
0
           Tfq = Tfm - Tfp;
3611
0
           TfG = Tfm + Tfp;
3612
0
           Tfr = FNMS(KP980785280, Tfq, KP195090322 * Tfh);
3613
0
           TfL = FMA(KP831469612, TfG, KP555570233 * TfF);
3614
0
           Tfv = FMA(KP195090322, Tfq, KP980785280 * Tfh);
3615
0
           TfH = FNMS(KP555570233, TfG, KP831469612 * TfF);
3616
0
            }
3617
0
            {
3618
0
           E TeQ, TfC, TeZ, TfD, TeP, TeY;
3619
0
           TeP = KP707106781 * (TeJ - TeO);
3620
0
           TeQ = TeE - TeP;
3621
0
           TfC = TeE + TeP;
3622
0
           TeY = KP707106781 * (TeW - TeX);
3623
0
           TeZ = TeV - TeY;
3624
0
           TfD = TeV + TeY;
3625
0
           Tf0 = FMA(KP980785280, TeQ, KP195090322 * TeZ);
3626
0
           TfK = FNMS(KP555570233, TfD, KP831469612 * TfC);
3627
0
           Tfu = FNMS(KP980785280, TeZ, KP195090322 * TeQ);
3628
0
           TfE = FMA(KP555570233, TfC, KP831469612 * TfD);
3629
0
            }
3630
0
       }
3631
0
       {
3632
0
            E Tez, Tfs, TiR, TiU;
3633
0
            Tez = Teb + Tey;
3634
0
            Tfs = Tf0 + Tfr;
3635
0
            ri[WS(rs, 46)] = Tez - Tfs;
3636
0
            ri[WS(rs, 14)] = Tez + Tfs;
3637
0
            TiR = Tfu + Tfv;
3638
0
            TiU = TiS + TiT;
3639
0
            ii[WS(rs, 14)] = TiR + TiU;
3640
0
            ii[WS(rs, 46)] = TiU - TiR;
3641
0
       }
3642
0
       {
3643
0
            E Tft, Tfw, TiV, TiW;
3644
0
            Tft = Teb - Tey;
3645
0
            Tfw = Tfu - Tfv;
3646
0
            ri[WS(rs, 62)] = Tft - Tfw;
3647
0
            ri[WS(rs, 30)] = Tft + Tfw;
3648
0
            TiV = Tfr - Tf0;
3649
0
            TiW = TiT - TiS;
3650
0
            ii[WS(rs, 30)] = TiV + TiW;
3651
0
            ii[WS(rs, 62)] = TiW - TiV;
3652
0
       }
3653
0
       {
3654
0
            E TfB, TfI, TiJ, TiO;
3655
0
            TfB = Tfx + TfA;
3656
0
            TfI = TfE + TfH;
3657
0
            ri[WS(rs, 38)] = TfB - TfI;
3658
0
            ri[WS(rs, 6)] = TfB + TfI;
3659
0
            TiJ = TfK + TfL;
3660
0
            TiO = TiK + TiN;
3661
0
            ii[WS(rs, 6)] = TiJ + TiO;
3662
0
            ii[WS(rs, 38)] = TiO - TiJ;
3663
0
       }
3664
0
       {
3665
0
            E TfJ, TfM, TiP, TiQ;
3666
0
            TfJ = Tfx - TfA;
3667
0
            TfM = TfK - TfL;
3668
0
            ri[WS(rs, 54)] = TfJ - TfM;
3669
0
            ri[WS(rs, 22)] = TfJ + TfM;
3670
0
            TiP = TfH - TfE;
3671
0
            TiQ = TiN - TiK;
3672
0
            ii[WS(rs, 22)] = TiP + TiQ;
3673
0
            ii[WS(rs, 54)] = TiQ - TiP;
3674
0
       }
3675
0
        }
3676
0
        {
3677
0
       E TfR, Tgj, TfY, Tiu, Tiz, TiF, Tgm, TiE, Tgd, Tgx, Tgh, Tgt, Tg6, Tgw, Tgg;
3678
0
       E Tgq;
3679
0
       {
3680
0
            E TfN, TfQ, Tgk, Tgl;
3681
0
            TfN = TdV + TdY;
3682
0
            TfQ = KP707106781 * (TfO + TfP);
3683
0
            TfR = TfN - TfQ;
3684
0
            Tgj = TfN + TfQ;
3685
0
            {
3686
0
           E TfU, TfX, Tiv, Tiy;
3687
0
           TfU = FNMS(KP382683432, TfT, KP923879532 * TfS);
3688
0
           TfX = FMA(KP923879532, TfV, KP382683432 * TfW);
3689
0
           TfY = TfU - TfX;
3690
0
           Tiu = TfU + TfX;
3691
0
           Tiv = KP707106781 * (Te4 + Te9);
3692
0
           Tiy = Tiw + Tix;
3693
0
           Tiz = Tiv + Tiy;
3694
0
           TiF = Tiy - Tiv;
3695
0
            }
3696
0
            Tgk = FMA(KP382683432, TfS, KP923879532 * TfT);
3697
0
            Tgl = FNMS(KP382683432, TfV, KP923879532 * TfW);
3698
0
            Tgm = Tgk + Tgl;
3699
0
            TiE = Tgl - Tgk;
3700
0
            {
3701
0
           E Tg9, Tgr, Tgc, Tgs, Tg8, Tgb;
3702
0
           Tg8 = KP707106781 * (Tfo + Tfn);
3703
0
           Tg9 = Tg7 - Tg8;
3704
0
           Tgr = Tg7 + Tg8;
3705
0
           Tgb = KP707106781 * (Tfa + Tff);
3706
0
           Tgc = Tga - Tgb;
3707
0
           Tgs = Tga + Tgb;
3708
0
           Tgd = FNMS(KP831469612, Tgc, KP555570233 * Tg9);
3709
0
           Tgx = FMA(KP195090322, Tgr, KP980785280 * Tgs);
3710
0
           Tgh = FMA(KP831469612, Tg9, KP555570233 * Tgc);
3711
0
           Tgt = FNMS(KP195090322, Tgs, KP980785280 * Tgr);
3712
0
            }
3713
0
            {
3714
0
           E Tg2, Tgo, Tg5, Tgp, Tg1, Tg4;
3715
0
           Tg1 = KP707106781 * (TeO + TeJ);
3716
0
           Tg2 = Tg0 - Tg1;
3717
0
           Tgo = Tg0 + Tg1;
3718
0
           Tg4 = KP707106781 * (TeW + TeX);
3719
0
           Tg5 = Tg3 - Tg4;
3720
0
           Tgp = Tg3 + Tg4;
3721
0
           Tg6 = FMA(KP555570233, Tg2, KP831469612 * Tg5);
3722
0
           Tgw = FNMS(KP195090322, Tgo, KP980785280 * Tgp);
3723
0
           Tgg = FNMS(KP831469612, Tg2, KP555570233 * Tg5);
3724
0
           Tgq = FMA(KP980785280, Tgo, KP195090322 * Tgp);
3725
0
            }
3726
0
       }
3727
0
       {
3728
0
            E TfZ, Tge, TiD, TiG;
3729
0
            TfZ = TfR + TfY;
3730
0
            Tge = Tg6 + Tgd;
3731
0
            ri[WS(rs, 42)] = TfZ - Tge;
3732
0
            ri[WS(rs, 10)] = TfZ + Tge;
3733
0
            TiD = Tgg + Tgh;
3734
0
            TiG = TiE + TiF;
3735
0
            ii[WS(rs, 10)] = TiD + TiG;
3736
0
            ii[WS(rs, 42)] = TiG - TiD;
3737
0
       }
3738
0
       {
3739
0
            E Tgf, Tgi, TiH, TiI;
3740
0
            Tgf = TfR - TfY;
3741
0
            Tgi = Tgg - Tgh;
3742
0
            ri[WS(rs, 58)] = Tgf - Tgi;
3743
0
            ri[WS(rs, 26)] = Tgf + Tgi;
3744
0
            TiH = Tgd - Tg6;
3745
0
            TiI = TiF - TiE;
3746
0
            ii[WS(rs, 26)] = TiH + TiI;
3747
0
            ii[WS(rs, 58)] = TiI - TiH;
3748
0
       }
3749
0
       {
3750
0
            E Tgn, Tgu, Tit, TiA;
3751
0
            Tgn = Tgj + Tgm;
3752
0
            Tgu = Tgq + Tgt;
3753
0
            ri[WS(rs, 34)] = Tgn - Tgu;
3754
0
            ri[WS(rs, 2)] = Tgn + Tgu;
3755
0
            Tit = Tgw + Tgx;
3756
0
            TiA = Tiu + Tiz;
3757
0
            ii[WS(rs, 2)] = Tit + TiA;
3758
0
            ii[WS(rs, 34)] = TiA - Tit;
3759
0
       }
3760
0
       {
3761
0
            E Tgv, Tgy, TiB, TiC;
3762
0
            Tgv = Tgj - Tgm;
3763
0
            Tgy = Tgw - Tgx;
3764
0
            ri[WS(rs, 50)] = Tgv - Tgy;
3765
0
            ri[WS(rs, 18)] = Tgv + Tgy;
3766
0
            TiB = Tgt - Tgq;
3767
0
            TiC = Tiz - Tiu;
3768
0
            ii[WS(rs, 18)] = TiB + TiC;
3769
0
            ii[WS(rs, 50)] = TiC - TiB;
3770
0
       }
3771
0
        }
3772
0
        {
3773
0
       E T7V, TaH, TjN, TjT, T8O, TjS, TaK, TjK, T9I, TaU, TaE, TaO, TaB, TaV, TaF;
3774
0
       E TaR;
3775
0
       {
3776
0
            E T7x, T7U, TjL, TjM;
3777
0
            T7x = T7l - T7w;
3778
0
            T7U = T7I - T7T;
3779
0
            T7V = T7x - T7U;
3780
0
            TaH = T7x + T7U;
3781
0
            TjL = TaZ - TaY;
3782
0
            TjM = Tjx - Tjw;
3783
0
            TjN = TjL + TjM;
3784
0
            TjT = TjM - TjL;
3785
0
       }
3786
0
       {
3787
0
            E T8m, TaI, T8N, TaJ;
3788
0
            {
3789
0
           E T8c, T8l, T8D, T8M;
3790
0
           T8c = T80 - T8b;
3791
0
           T8l = T8h - T8k;
3792
0
           T8m = FNMS(KP980785280, T8l, KP195090322 * T8c);
3793
0
           TaI = FMA(KP980785280, T8c, KP195090322 * T8l);
3794
0
           T8D = T8r - T8C;
3795
0
           T8M = T8I - T8L;
3796
0
           T8N = FMA(KP195090322, T8D, KP980785280 * T8M);
3797
0
           TaJ = FNMS(KP980785280, T8D, KP195090322 * T8M);
3798
0
            }
3799
0
            T8O = T8m - T8N;
3800
0
            TjS = TaJ - TaI;
3801
0
            TaK = TaI + TaJ;
3802
0
            TjK = T8m + T8N;
3803
0
       }
3804
0
       {
3805
0
            E T9u, TaM, T9H, TaN;
3806
0
            {
3807
0
           E T96, T9t, T9D, T9G;
3808
0
           T96 = T8U - T95;
3809
0
           T9t = T9h - T9s;
3810
0
           T9u = T96 - T9t;
3811
0
           TaM = T96 + T9t;
3812
0
           T9D = T9z - T9C;
3813
0
           T9G = T9E - T9F;
3814
0
           T9H = T9D - T9G;
3815
0
           TaN = T9D + T9G;
3816
0
            }
3817
0
            T9I = FMA(KP995184726, T9u, KP098017140 * T9H);
3818
0
            TaU = FNMS(KP634393284, TaN, KP773010453 * TaM);
3819
0
            TaE = FNMS(KP995184726, T9H, KP098017140 * T9u);
3820
0
            TaO = FMA(KP634393284, TaM, KP773010453 * TaN);
3821
0
       }
3822
0
       {
3823
0
            E Tan, TaP, TaA, TaQ;
3824
0
            {
3825
0
           E T9Z, Tam, Taw, Taz;
3826
0
           T9Z = T9N - T9Y;
3827
0
           Tam = Taa - Tal;
3828
0
           Tan = T9Z - Tam;
3829
0
           TaP = T9Z + Tam;
3830
0
           Taw = Tas - Tav;
3831
0
           Taz = Tax - Tay;
3832
0
           TaA = Taw - Taz;
3833
0
           TaQ = Taw + Taz;
3834
0
            }
3835
0
            TaB = FNMS(KP995184726, TaA, KP098017140 * Tan);
3836
0
            TaV = FMA(KP773010453, TaQ, KP634393284 * TaP);
3837
0
            TaF = FMA(KP098017140, TaA, KP995184726 * Tan);
3838
0
            TaR = FNMS(KP634393284, TaQ, KP773010453 * TaP);
3839
0
       }
3840
0
       {
3841
0
            E T8P, TaC, TjR, TjU;
3842
0
            T8P = T7V + T8O;
3843
0
            TaC = T9I + TaB;
3844
0
            ri[WS(rs, 47)] = T8P - TaC;
3845
0
            ri[WS(rs, 15)] = T8P + TaC;
3846
0
            TjR = TaE + TaF;
3847
0
            TjU = TjS + TjT;
3848
0
            ii[WS(rs, 15)] = TjR + TjU;
3849
0
            ii[WS(rs, 47)] = TjU - TjR;
3850
0
       }
3851
0
       {
3852
0
            E TaD, TaG, TjV, TjW;
3853
0
            TaD = T7V - T8O;
3854
0
            TaG = TaE - TaF;
3855
0
            ri[WS(rs, 63)] = TaD - TaG;
3856
0
            ri[WS(rs, 31)] = TaD + TaG;
3857
0
            TjV = TaB - T9I;
3858
0
            TjW = TjT - TjS;
3859
0
            ii[WS(rs, 31)] = TjV + TjW;
3860
0
            ii[WS(rs, 63)] = TjW - TjV;
3861
0
       }
3862
0
       {
3863
0
            E TaL, TaS, TjJ, TjO;
3864
0
            TaL = TaH + TaK;
3865
0
            TaS = TaO + TaR;
3866
0
            ri[WS(rs, 39)] = TaL - TaS;
3867
0
            ri[WS(rs, 7)] = TaL + TaS;
3868
0
            TjJ = TaU + TaV;
3869
0
            TjO = TjK + TjN;
3870
0
            ii[WS(rs, 7)] = TjJ + TjO;
3871
0
            ii[WS(rs, 39)] = TjO - TjJ;
3872
0
       }
3873
0
       {
3874
0
            E TaT, TaW, TjP, TjQ;
3875
0
            TaT = TaH - TaK;
3876
0
            TaW = TaU - TaV;
3877
0
            ri[WS(rs, 55)] = TaT - TaW;
3878
0
            ri[WS(rs, 23)] = TaT + TaW;
3879
0
            TjP = TaR - TaO;
3880
0
            TjQ = TjN - TjK;
3881
0
            ii[WS(rs, 23)] = TjP + TjQ;
3882
0
            ii[WS(rs, 55)] = TjQ - TjP;
3883
0
       }
3884
0
        }
3885
0
        {
3886
0
       E TbV, TcT, Tjj, Tjp, Tca, Tjo, TcW, Tjg, Tcu, Td6, TcQ, Td0, TcN, Td7, TcR;
3887
0
       E Td3;
3888
0
       {
3889
0
            E TbN, TbU, Tjh, Tji;
3890
0
            TbN = TbJ - TbM;
3891
0
            TbU = TbQ - TbT;
3892
0
            TbV = TbN - TbU;
3893
0
            TcT = TbN + TbU;
3894
0
            Tjh = Tdb - Tda;
3895
0
            Tji = Tj3 - Tj0;
3896
0
            Tjj = Tjh + Tji;
3897
0
            Tjp = Tji - Tjh;
3898
0
       }
3899
0
       {
3900
0
            E Tc2, TcU, Tc9, TcV;
3901
0
            {
3902
0
           E TbY, Tc1, Tc5, Tc8;
3903
0
           TbY = TbW - TbX;
3904
0
           Tc1 = TbZ - Tc0;
3905
0
           Tc2 = FNMS(KP831469612, Tc1, KP555570233 * TbY);
3906
0
           TcU = FMA(KP555570233, Tc1, KP831469612 * TbY);
3907
0
           Tc5 = Tc3 - Tc4;
3908
0
           Tc8 = Tc6 - Tc7;
3909
0
           Tc9 = FMA(KP831469612, Tc5, KP555570233 * Tc8);
3910
0
           TcV = FNMS(KP831469612, Tc8, KP555570233 * Tc5);
3911
0
            }
3912
0
            Tca = Tc2 - Tc9;
3913
0
            Tjo = TcV - TcU;
3914
0
            TcW = TcU + TcV;
3915
0
            Tjg = Tc2 + Tc9;
3916
0
       }
3917
0
       {
3918
0
            E Tcm, TcY, Tct, TcZ;
3919
0
            {
3920
0
           E Tce, Tcl, Tcp, Tcs;
3921
0
           Tce = Tcc - Tcd;
3922
0
           Tcl = Tch - Tck;
3923
0
           Tcm = Tce - Tcl;
3924
0
           TcY = Tce + Tcl;
3925
0
           Tcp = Tcn - Tco;
3926
0
           Tcs = Tcq - Tcr;
3927
0
           Tct = Tcp - Tcs;
3928
0
           TcZ = Tcp + Tcs;
3929
0
            }
3930
0
            Tcu = FMA(KP956940335, Tcm, KP290284677 * Tct);
3931
0
            Td6 = FNMS(KP471396736, TcZ, KP881921264 * TcY);
3932
0
            TcQ = FNMS(KP956940335, Tct, KP290284677 * Tcm);
3933
0
            Td0 = FMA(KP471396736, TcY, KP881921264 * TcZ);
3934
0
       }
3935
0
       {
3936
0
            E TcF, Td1, TcM, Td2;
3937
0
            {
3938
0
           E Tcx, TcE, TcI, TcL;
3939
0
           Tcx = Tcv - Tcw;
3940
0
           TcE = TcA - TcD;
3941
0
           TcF = Tcx - TcE;
3942
0
           Td1 = Tcx + TcE;
3943
0
           TcI = TcG - TcH;
3944
0
           TcL = TcJ - TcK;
3945
0
           TcM = TcI - TcL;
3946
0
           Td2 = TcI + TcL;
3947
0
            }
3948
0
            TcN = FNMS(KP956940335, TcM, KP290284677 * TcF);
3949
0
            Td7 = FMA(KP881921264, Td2, KP471396736 * Td1);
3950
0
            TcR = FMA(KP290284677, TcM, KP956940335 * TcF);
3951
0
            Td3 = FNMS(KP471396736, Td2, KP881921264 * Td1);
3952
0
       }
3953
0
       {
3954
0
            E Tcb, TcO, Tjn, Tjq;
3955
0
            Tcb = TbV + Tca;
3956
0
            TcO = Tcu + TcN;
3957
0
            ri[WS(rs, 45)] = Tcb - TcO;
3958
0
            ri[WS(rs, 13)] = Tcb + TcO;
3959
0
            Tjn = TcQ + TcR;
3960
0
            Tjq = Tjo + Tjp;
3961
0
            ii[WS(rs, 13)] = Tjn + Tjq;
3962
0
            ii[WS(rs, 45)] = Tjq - Tjn;
3963
0
       }
3964
0
       {
3965
0
            E TcP, TcS, Tjr, Tjs;
3966
0
            TcP = TbV - Tca;
3967
0
            TcS = TcQ - TcR;
3968
0
            ri[WS(rs, 61)] = TcP - TcS;
3969
0
            ri[WS(rs, 29)] = TcP + TcS;
3970
0
            Tjr = TcN - Tcu;
3971
0
            Tjs = Tjp - Tjo;
3972
0
            ii[WS(rs, 29)] = Tjr + Tjs;
3973
0
            ii[WS(rs, 61)] = Tjs - Tjr;
3974
0
       }
3975
0
       {
3976
0
            E TcX, Td4, Tjf, Tjk;
3977
0
            TcX = TcT + TcW;
3978
0
            Td4 = Td0 + Td3;
3979
0
            ri[WS(rs, 37)] = TcX - Td4;
3980
0
            ri[WS(rs, 5)] = TcX + Td4;
3981
0
            Tjf = Td6 + Td7;
3982
0
            Tjk = Tjg + Tjj;
3983
0
            ii[WS(rs, 5)] = Tjf + Tjk;
3984
0
            ii[WS(rs, 37)] = Tjk - Tjf;
3985
0
       }
3986
0
       {
3987
0
            E Td5, Td8, Tjl, Tjm;
3988
0
            Td5 = TcT - TcW;
3989
0
            Td8 = Td6 - Td7;
3990
0
            ri[WS(rs, 53)] = Td5 - Td8;
3991
0
            ri[WS(rs, 21)] = Td5 + Td8;
3992
0
            Tjl = Td3 - Td0;
3993
0
            Tjm = Tjj - Tjg;
3994
0
            ii[WS(rs, 21)] = Tjl + Tjm;
3995
0
            ii[WS(rs, 53)] = Tjm - Tjl;
3996
0
       }
3997
0
        }
3998
0
        {
3999
0
       E Tdd, TdF, Tj5, Tjb, Tdk, Tja, TdI, TiY, Tds, TdS, TdC, TdM, Tdz, TdT, TdD;
4000
0
       E TdP;
4001
0
       {
4002
0
            E Td9, Tdc, TiZ, Tj4;
4003
0
            Td9 = TbJ + TbM;
4004
0
            Tdc = Tda + Tdb;
4005
0
            Tdd = Td9 - Tdc;
4006
0
            TdF = Td9 + Tdc;
4007
0
            TiZ = TbQ + TbT;
4008
0
            Tj4 = Tj0 + Tj3;
4009
0
            Tj5 = TiZ + Tj4;
4010
0
            Tjb = Tj4 - TiZ;
4011
0
       }
4012
0
       {
4013
0
            E Tdg, TdG, Tdj, TdH;
4014
0
            {
4015
0
           E Tde, Tdf, Tdh, Tdi;
4016
0
           Tde = TbW + TbX;
4017
0
           Tdf = TbZ + Tc0;
4018
0
           Tdg = FNMS(KP195090322, Tdf, KP980785280 * Tde);
4019
0
           TdG = FMA(KP980785280, Tdf, KP195090322 * Tde);
4020
0
           Tdh = Tc3 + Tc4;
4021
0
           Tdi = Tc6 + Tc7;
4022
0
           Tdj = FMA(KP195090322, Tdh, KP980785280 * Tdi);
4023
0
           TdH = FNMS(KP195090322, Tdi, KP980785280 * Tdh);
4024
0
            }
4025
0
            Tdk = Tdg - Tdj;
4026
0
            Tja = TdH - TdG;
4027
0
            TdI = TdG + TdH;
4028
0
            TiY = Tdg + Tdj;
4029
0
       }
4030
0
       {
4031
0
            E Tdo, TdK, Tdr, TdL;
4032
0
            {
4033
0
           E Tdm, Tdn, Tdp, Tdq;
4034
0
           Tdm = Tcn + Tco;
4035
0
           Tdn = Tck + Tch;
4036
0
           Tdo = Tdm - Tdn;
4037
0
           TdK = Tdm + Tdn;
4038
0
           Tdp = Tcc + Tcd;
4039
0
           Tdq = Tcq + Tcr;
4040
0
           Tdr = Tdp - Tdq;
4041
0
           TdL = Tdp + Tdq;
4042
0
            }
4043
0
            Tds = FMA(KP634393284, Tdo, KP773010453 * Tdr);
4044
0
            TdS = FNMS(KP098017140, TdK, KP995184726 * TdL);
4045
0
            TdC = FNMS(KP773010453, Tdo, KP634393284 * Tdr);
4046
0
            TdM = FMA(KP995184726, TdK, KP098017140 * TdL);
4047
0
       }
4048
0
       {
4049
0
            E Tdv, TdN, Tdy, TdO;
4050
0
            {
4051
0
           E Tdt, Tdu, Tdw, Tdx;
4052
0
           Tdt = Tcv + Tcw;
4053
0
           Tdu = TcK + TcJ;
4054
0
           Tdv = Tdt - Tdu;
4055
0
           TdN = Tdt + Tdu;
4056
0
           Tdw = TcG + TcH;
4057
0
           Tdx = TcA + TcD;
4058
0
           Tdy = Tdw - Tdx;
4059
0
           TdO = Tdw + Tdx;
4060
0
            }
4061
0
            Tdz = FNMS(KP773010453, Tdy, KP634393284 * Tdv);
4062
0
            TdT = FMA(KP098017140, TdN, KP995184726 * TdO);
4063
0
            TdD = FMA(KP773010453, Tdv, KP634393284 * Tdy);
4064
0
            TdP = FNMS(KP098017140, TdO, KP995184726 * TdN);
4065
0
       }
4066
0
       {
4067
0
            E Tdl, TdA, Tj9, Tjc;
4068
0
            Tdl = Tdd + Tdk;
4069
0
            TdA = Tds + Tdz;
4070
0
            ri[WS(rs, 41)] = Tdl - TdA;
4071
0
            ri[WS(rs, 9)] = Tdl + TdA;
4072
0
            Tj9 = TdC + TdD;
4073
0
            Tjc = Tja + Tjb;
4074
0
            ii[WS(rs, 9)] = Tj9 + Tjc;
4075
0
            ii[WS(rs, 41)] = Tjc - Tj9;
4076
0
       }
4077
0
       {
4078
0
            E TdB, TdE, Tjd, Tje;
4079
0
            TdB = Tdd - Tdk;
4080
0
            TdE = TdC - TdD;
4081
0
            ri[WS(rs, 57)] = TdB - TdE;
4082
0
            ri[WS(rs, 25)] = TdB + TdE;
4083
0
            Tjd = Tdz - Tds;
4084
0
            Tje = Tjb - Tja;
4085
0
            ii[WS(rs, 25)] = Tjd + Tje;
4086
0
            ii[WS(rs, 57)] = Tje - Tjd;
4087
0
       }
4088
0
       {
4089
0
            E TdJ, TdQ, TiX, Tj6;
4090
0
            TdJ = TdF + TdI;
4091
0
            TdQ = TdM + TdP;
4092
0
            ri[WS(rs, 33)] = TdJ - TdQ;
4093
0
            ri[WS(rs, 1)] = TdJ + TdQ;
4094
0
            TiX = TdS + TdT;
4095
0
            Tj6 = TiY + Tj5;
4096
0
            ii[WS(rs, 1)] = TiX + Tj6;
4097
0
            ii[WS(rs, 33)] = Tj6 - TiX;
4098
0
       }
4099
0
       {
4100
0
            E TdR, TdU, Tj7, Tj8;
4101
0
            TdR = TdF - TdI;
4102
0
            TdU = TdS - TdT;
4103
0
            ri[WS(rs, 49)] = TdR - TdU;
4104
0
            ri[WS(rs, 17)] = TdR + TdU;
4105
0
            Tj7 = TdP - TdM;
4106
0
            Tj8 = Tj5 - TiY;
4107
0
            ii[WS(rs, 17)] = Tj7 + Tj8;
4108
0
            ii[WS(rs, 49)] = Tj8 - Tj7;
4109
0
       }
4110
0
        }
4111
0
        {
4112
0
       E Tb1, Tbt, Tjz, TjF, Tb8, TjE, Tbw, Tju, Tbg, TbG, Tbq, TbA, Tbn, TbH, Tbr;
4113
0
       E TbD;
4114
0
       {
4115
0
            E TaX, Tb0, Tjv, Tjy;
4116
0
            TaX = T7l + T7w;
4117
0
            Tb0 = TaY + TaZ;
4118
0
            Tb1 = TaX - Tb0;
4119
0
            Tbt = TaX + Tb0;
4120
0
            Tjv = T7I + T7T;
4121
0
            Tjy = Tjw + Tjx;
4122
0
            Tjz = Tjv + Tjy;
4123
0
            TjF = Tjy - Tjv;
4124
0
       }
4125
0
       {
4126
0
            E Tb4, Tbu, Tb7, Tbv;
4127
0
            {
4128
0
           E Tb2, Tb3, Tb5, Tb6;
4129
0
           Tb2 = T80 + T8b;
4130
0
           Tb3 = T8h + T8k;
4131
0
           Tb4 = FNMS(KP555570233, Tb3, KP831469612 * Tb2);
4132
0
           Tbu = FMA(KP555570233, Tb2, KP831469612 * Tb3);
4133
0
           Tb5 = T8r + T8C;
4134
0
           Tb6 = T8I + T8L;
4135
0
           Tb7 = FMA(KP831469612, Tb5, KP555570233 * Tb6);
4136
0
           Tbv = FNMS(KP555570233, Tb5, KP831469612 * Tb6);
4137
0
            }
4138
0
            Tb8 = Tb4 - Tb7;
4139
0
            TjE = Tbv - Tbu;
4140
0
            Tbw = Tbu + Tbv;
4141
0
            Tju = Tb4 + Tb7;
4142
0
       }
4143
0
       {
4144
0
            E Tbc, Tby, Tbf, Tbz;
4145
0
            {
4146
0
           E Tba, Tbb, Tbd, Tbe;
4147
0
           Tba = T9z + T9C;
4148
0
           Tbb = T9s + T9h;
4149
0
           Tbc = Tba - Tbb;
4150
0
           Tby = Tba + Tbb;
4151
0
           Tbd = T8U + T95;
4152
0
           Tbe = T9E + T9F;
4153
0
           Tbf = Tbd - Tbe;
4154
0
           Tbz = Tbd + Tbe;
4155
0
            }
4156
0
            Tbg = FMA(KP471396736, Tbc, KP881921264 * Tbf);
4157
0
            TbG = FNMS(KP290284677, Tby, KP956940335 * Tbz);
4158
0
            Tbq = FNMS(KP881921264, Tbc, KP471396736 * Tbf);
4159
0
            TbA = FMA(KP956940335, Tby, KP290284677 * Tbz);
4160
0
       }
4161
0
       {
4162
0
            E Tbj, TbB, Tbm, TbC;
4163
0
            {
4164
0
           E Tbh, Tbi, Tbk, Tbl;
4165
0
           Tbh = T9N + T9Y;
4166
0
           Tbi = Tay + Tax;
4167
0
           Tbj = Tbh - Tbi;
4168
0
           TbB = Tbh + Tbi;
4169
0
           Tbk = Tas + Tav;
4170
0
           Tbl = Taa + Tal;
4171
0
           Tbm = Tbk - Tbl;
4172
0
           TbC = Tbk + Tbl;
4173
0
            }
4174
0
            Tbn = FNMS(KP881921264, Tbm, KP471396736 * Tbj);
4175
0
            TbH = FMA(KP290284677, TbB, KP956940335 * TbC);
4176
0
            Tbr = FMA(KP881921264, Tbj, KP471396736 * Tbm);
4177
0
            TbD = FNMS(KP290284677, TbC, KP956940335 * TbB);
4178
0
       }
4179
0
       {
4180
0
            E Tb9, Tbo, TjD, TjG;
4181
0
            Tb9 = Tb1 + Tb8;
4182
0
            Tbo = Tbg + Tbn;
4183
0
            ri[WS(rs, 43)] = Tb9 - Tbo;
4184
0
            ri[WS(rs, 11)] = Tb9 + Tbo;
4185
0
            TjD = Tbq + Tbr;
4186
0
            TjG = TjE + TjF;
4187
0
            ii[WS(rs, 11)] = TjD + TjG;
4188
0
            ii[WS(rs, 43)] = TjG - TjD;
4189
0
       }
4190
0
       {
4191
0
            E Tbp, Tbs, TjH, TjI;
4192
0
            Tbp = Tb1 - Tb8;
4193
0
            Tbs = Tbq - Tbr;
4194
0
            ri[WS(rs, 59)] = Tbp - Tbs;
4195
0
            ri[WS(rs, 27)] = Tbp + Tbs;
4196
0
            TjH = Tbn - Tbg;
4197
0
            TjI = TjF - TjE;
4198
0
            ii[WS(rs, 27)] = TjH + TjI;
4199
0
            ii[WS(rs, 59)] = TjI - TjH;
4200
0
       }
4201
0
       {
4202
0
            E Tbx, TbE, Tjt, TjA;
4203
0
            Tbx = Tbt + Tbw;
4204
0
            TbE = TbA + TbD;
4205
0
            ri[WS(rs, 35)] = Tbx - TbE;
4206
0
            ri[WS(rs, 3)] = Tbx + TbE;
4207
0
            Tjt = TbG + TbH;
4208
0
            TjA = Tju + Tjz;
4209
0
            ii[WS(rs, 3)] = Tjt + TjA;
4210
0
            ii[WS(rs, 35)] = TjA - Tjt;
4211
0
       }
4212
0
       {
4213
0
            E TbF, TbI, TjB, TjC;
4214
0
            TbF = Tbt - Tbw;
4215
0
            TbI = TbG - TbH;
4216
0
            ri[WS(rs, 51)] = TbF - TbI;
4217
0
            ri[WS(rs, 19)] = TbF + TbI;
4218
0
            TjB = TbD - TbA;
4219
0
            TjC = Tjz - Tju;
4220
0
            ii[WS(rs, 19)] = TjB + TjC;
4221
0
            ii[WS(rs, 51)] = TjC - TjB;
4222
0
       }
4223
0
        }
4224
0
         }
4225
0
    }
4226
0
     }
4227
0
}
4228
4229
static const tw_instr twinstr[] = {
4230
     { TW_CEXP, 0, 1 },
4231
     { TW_CEXP, 0, 3 },
4232
     { TW_CEXP, 0, 9 },
4233
     { TW_CEXP, 0, 27 },
4234
     { TW_CEXP, 0, 63 },
4235
     { TW_NEXT, 1, 0 }
4236
};
4237
4238
static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, { 880, 386, 274, 0 }, 0, 0, 0 };
4239
4240
1
void X(codelet_t2_64) (planner *p) {
4241
1
     X(kdft_dit_register) (p, t2_64, &desc);
4242
1
}
4243
#endif