Coverage Report

Created: 2025-07-18 06:52

/src/fftw3/rdft/scalar/r2cb/r2cbIII_20.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Fri Jul 18 06:51:45 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cbIII_20 -dft-III -include rdft/scalar/r2cbIII.h */
29
30
/*
31
 * This function contains 94 FP additions, 56 FP multiplications,
32
 * (or, 58 additions, 20 multiplications, 36 fused multiply/add),
33
 * 43 stack variables, 6 constants, and 40 memory accesses
34
 */
35
#include "rdft/scalar/r2cbIII.h"
36
37
static void r2cbIII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
42
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
43
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
44
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
45
     {
46
    INT i;
47
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) {
48
         E T1, Tk, T1l, TZ, T8, Tj, TQ, Ts, TV, TI, TT, TU, Ta, Tv, T1i;
49
         E T1a, Th, Tu, T11, TD, T16, TL, T14, T15;
50
         {
51
        E T7, TY, T4, TX;
52
        T1 = Cr[WS(csr, 2)];
53
        {
54
       E T5, T6, T2, T3;
55
       T5 = Cr[WS(csr, 9)];
56
       T6 = Cr[WS(csr, 5)];
57
       T7 = T5 + T6;
58
       TY = T5 - T6;
59
       T2 = Cr[WS(csr, 6)];
60
       T3 = Cr[WS(csr, 1)];
61
       T4 = T2 + T3;
62
       TX = T2 - T3;
63
        }
64
        Tk = T4 - T7;
65
        T1l = FNMS(KP618033988, TX, TY);
66
        TZ = FMA(KP618033988, TY, TX);
67
        T8 = T4 + T7;
68
        Tj = FNMS(KP250000000, T8, T1);
69
         }
70
         {
71
        E Tr, TS, To, TR;
72
        TQ = Ci[WS(csi, 2)];
73
        {
74
       E Tp, Tq, Tm, Tn;
75
       Tp = Ci[WS(csi, 5)];
76
       Tq = Ci[WS(csi, 9)];
77
       Tr = Tp - Tq;
78
       TS = Tp + Tq;
79
       Tm = Ci[WS(csi, 6)];
80
       Tn = Ci[WS(csi, 1)];
81
       To = Tm + Tn;
82
       TR = Tm - Tn;
83
        }
84
        Ts = FMA(KP618033988, Tr, To);
85
        TV = TR + TS;
86
        TI = FNMS(KP618033988, To, Tr);
87
        TT = TR - TS;
88
        TU = FNMS(KP250000000, TT, TQ);
89
         }
90
         {
91
        E Tg, T19, Td, T18;
92
        Ta = Cr[WS(csr, 7)];
93
        {
94
       E Te, Tf, Tb, Tc;
95
       Te = Cr[0];
96
       Tf = Cr[WS(csr, 4)];
97
       Tg = Te + Tf;
98
       T19 = Te - Tf;
99
       Tb = Cr[WS(csr, 3)];
100
       Tc = Cr[WS(csr, 8)];
101
       Td = Tb + Tc;
102
       T18 = Tb - Tc;
103
        }
104
        Tv = Td - Tg;
105
        T1i = FNMS(KP618033988, T18, T19);
106
        T1a = FMA(KP618033988, T19, T18);
107
        Th = Td + Tg;
108
        Tu = FNMS(KP250000000, Th, Ta);
109
         }
110
         {
111
        E TC, T13, Tz, T12;
112
        T11 = Ci[WS(csi, 7)];
113
        {
114
       E TA, TB, Tx, Ty;
115
       TA = Ci[WS(csi, 4)];
116
       TB = Ci[0];
117
       TC = TA - TB;
118
       T13 = TB + TA;
119
       Tx = Ci[WS(csi, 3)];
120
       Ty = Ci[WS(csi, 8)];
121
       Tz = Tx + Ty;
122
       T12 = Tx - Ty;
123
        }
124
        TD = FMA(KP618033988, TC, Tz);
125
        T16 = T12 + T13;
126
        TL = FNMS(KP618033988, Tz, TC);
127
        T14 = T12 - T13;
128
        T15 = FNMS(KP250000000, T14, T11);
129
         }
130
         {
131
        E T9, Ti, T1w, T1t, T1u, T1v;
132
        T9 = T1 + T8;
133
        Ti = Ta + Th;
134
        T1w = T9 - Ti;
135
        T1t = TT + TQ;
136
        T1u = T14 + T11;
137
        T1v = T1t + T1u;
138
        R0[0] = KP2_000000000 * (T9 + Ti);
139
        R0[WS(rs, 5)] = KP2_000000000 * (T1u - T1t);
140
        R1[WS(rs, 2)] = KP1_414213562 * (T1v - T1w);
141
        R1[WS(rs, 7)] = KP1_414213562 * (T1w + T1v);
142
         }
143
         {
144
        E TJ, TN, T1m, T1q, TM, TO, T1j, T1r;
145
        {
146
       E TH, T1k, TK, T1h;
147
       TH = FNMS(KP559016994, Tk, Tj);
148
       TJ = FNMS(KP951056516, TI, TH);
149
       TN = FMA(KP951056516, TI, TH);
150
       T1k = FNMS(KP559016994, TV, TU);
151
       T1m = FNMS(KP951056516, T1l, T1k);
152
       T1q = FMA(KP951056516, T1l, T1k);
153
       TK = FNMS(KP559016994, Tv, Tu);
154
       TM = FMA(KP951056516, TL, TK);
155
       TO = FNMS(KP951056516, TL, TK);
156
       T1h = FNMS(KP559016994, T16, T15);
157
       T1j = FMA(KP951056516, T1i, T1h);
158
       T1r = FNMS(KP951056516, T1i, T1h);
159
        }
160
        R0[WS(rs, 4)] = KP2_000000000 * (TJ + TM);
161
        R0[WS(rs, 6)] = -(KP2_000000000 * (TN + TO));
162
        R0[WS(rs, 9)] = KP2_000000000 * (T1r - T1q);
163
        R0[WS(rs, 1)] = KP2_000000000 * (T1j - T1m);
164
        {
165
       E T1p, T1s, T1n, T1o;
166
       T1p = TM - TJ;
167
       T1s = T1q + T1r;
168
       R1[WS(rs, 1)] = KP1_414213562 * (T1p - T1s);
169
       R1[WS(rs, 6)] = KP1_414213562 * (T1p + T1s);
170
       T1n = TN - TO;
171
       T1o = T1m + T1j;
172
       R1[WS(rs, 8)] = KP1_414213562 * (T1n - T1o);
173
       R1[WS(rs, 3)] = KP1_414213562 * (T1n + T1o);
174
        }
175
         }
176
         {
177
        E Tt, TF, T1b, T1f, TE, TG, T10, T1e;
178
        {
179
       E Tl, T17, Tw, TW;
180
       Tl = FMA(KP559016994, Tk, Tj);
181
       Tt = FNMS(KP951056516, Ts, Tl);
182
       TF = FMA(KP951056516, Ts, Tl);
183
       T17 = FMA(KP559016994, T16, T15);
184
       T1b = FNMS(KP951056516, T1a, T17);
185
       T1f = FMA(KP951056516, T1a, T17);
186
       Tw = FMA(KP559016994, Tv, Tu);
187
       TE = FMA(KP951056516, TD, Tw);
188
       TG = FNMS(KP951056516, TD, Tw);
189
       TW = FMA(KP559016994, TV, TU);
190
       T10 = FMA(KP951056516, TZ, TW);
191
       T1e = FNMS(KP951056516, TZ, TW);
192
        }
193
        R0[WS(rs, 8)] = KP2_000000000 * (Tt + TE);
194
        R0[WS(rs, 2)] = -(KP2_000000000 * (TF + TG));
195
        R0[WS(rs, 7)] = KP2_000000000 * (T1e - T1f);
196
        R0[WS(rs, 3)] = KP2_000000000 * (T10 - T1b);
197
        {
198
       E T1d, T1g, TP, T1c;
199
       T1d = TF - TG;
200
       T1g = T1e + T1f;
201
       R1[WS(rs, 4)] = KP1_414213562 * (T1d - T1g);
202
       R1[WS(rs, 9)] = -(KP1_414213562 * (T1d + T1g));
203
       TP = Tt - TE;
204
       T1c = T10 + T1b;
205
       R1[0] = KP1_414213562 * (TP - T1c);
206
       R1[WS(rs, 5)] = -(KP1_414213562 * (TP + T1c));
207
        }
208
         }
209
    }
210
     }
211
}
212
213
static const kr2c_desc desc = { 20, "r2cbIII_20", { 58, 20, 36, 0 }, &GENUS };
214
215
void X(codelet_r2cbIII_20) (planner *p) { X(kr2c_register) (p, r2cbIII_20, &desc);
216
}
217
218
#else
219
220
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 20 -name r2cbIII_20 -dft-III -include rdft/scalar/r2cbIII.h */
221
222
/*
223
 * This function contains 94 FP additions, 44 FP multiplications,
224
 * (or, 82 additions, 32 multiplications, 12 fused multiply/add),
225
 * 43 stack variables, 6 constants, and 40 memory accesses
226
 */
227
#include "rdft/scalar/r2cbIII.h"
228
229
static void r2cbIII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
230
0
{
231
0
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
232
0
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
233
0
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
234
0
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
235
0
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
236
0
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
237
0
     {
238
0
    INT i;
239
0
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(80, rs), MAKE_VOLATILE_STRIDE(80, csr), MAKE_VOLATILE_STRIDE(80, csi)) {
240
0
         E T1, Tj, T1k, T13, T8, Tk, T17, Ts, T16, TI, T18, T19, Ta, Tu, T1i;
241
0
         E TS, Th, Tv, TX, TD, TV, TL, TW, TY;
242
0
         {
243
0
        E T7, T12, T4, T11;
244
0
        T1 = Cr[WS(csr, 2)];
245
0
        {
246
0
       E T5, T6, T2, T3;
247
0
       T5 = Cr[WS(csr, 9)];
248
0
       T6 = Cr[WS(csr, 5)];
249
0
       T7 = T5 + T6;
250
0
       T12 = T5 - T6;
251
0
       T2 = Cr[WS(csr, 6)];
252
0
       T3 = Cr[WS(csr, 1)];
253
0
       T4 = T2 + T3;
254
0
       T11 = T2 - T3;
255
0
        }
256
0
        Tj = KP559016994 * (T4 - T7);
257
0
        T1k = FNMS(KP951056516, T12, KP587785252 * T11);
258
0
        T13 = FMA(KP951056516, T11, KP587785252 * T12);
259
0
        T8 = T4 + T7;
260
0
        Tk = FNMS(KP250000000, T8, T1);
261
0
         }
262
0
         {
263
0
        E Tr, T15, To, T14;
264
0
        T17 = Ci[WS(csi, 2)];
265
0
        {
266
0
       E Tp, Tq, Tm, Tn;
267
0
       Tp = Ci[WS(csi, 5)];
268
0
       Tq = Ci[WS(csi, 9)];
269
0
       Tr = Tp - Tq;
270
0
       T15 = Tp + Tq;
271
0
       Tm = Ci[WS(csi, 6)];
272
0
       Tn = Ci[WS(csi, 1)];
273
0
       To = Tm + Tn;
274
0
       T14 = Tm - Tn;
275
0
        }
276
0
        Ts = FMA(KP951056516, To, KP587785252 * Tr);
277
0
        T16 = KP559016994 * (T14 + T15);
278
0
        TI = FNMS(KP951056516, Tr, KP587785252 * To);
279
0
        T18 = T14 - T15;
280
0
        T19 = FNMS(KP250000000, T18, T17);
281
0
         }
282
0
         {
283
0
        E Tg, TR, Td, TQ;
284
0
        Ta = Cr[WS(csr, 7)];
285
0
        {
286
0
       E Te, Tf, Tb, Tc;
287
0
       Te = Cr[0];
288
0
       Tf = Cr[WS(csr, 4)];
289
0
       Tg = Te + Tf;
290
0
       TR = Te - Tf;
291
0
       Tb = Cr[WS(csr, 3)];
292
0
       Tc = Cr[WS(csr, 8)];
293
0
       Td = Tb + Tc;
294
0
       TQ = Tb - Tc;
295
0
        }
296
0
        Tu = KP559016994 * (Td - Tg);
297
0
        T1i = FNMS(KP951056516, TR, KP587785252 * TQ);
298
0
        TS = FMA(KP951056516, TQ, KP587785252 * TR);
299
0
        Th = Td + Tg;
300
0
        Tv = FNMS(KP250000000, Th, Ta);
301
0
         }
302
0
         {
303
0
        E TC, TU, Tz, TT;
304
0
        TX = Ci[WS(csi, 7)];
305
0
        {
306
0
       E TA, TB, Tx, Ty;
307
0
       TA = Ci[WS(csi, 4)];
308
0
       TB = Ci[0];
309
0
       TC = TA - TB;
310
0
       TU = TB + TA;
311
0
       Tx = Ci[WS(csi, 3)];
312
0
       Ty = Ci[WS(csi, 8)];
313
0
       Tz = Tx + Ty;
314
0
       TT = Ty - Tx;
315
0
        }
316
0
        TD = FMA(KP951056516, Tz, KP587785252 * TC);
317
0
        TV = KP559016994 * (TT - TU);
318
0
        TL = FNMS(KP587785252, Tz, KP951056516 * TC);
319
0
        TW = TT + TU;
320
0
        TY = FMA(KP250000000, TW, TX);
321
0
         }
322
0
         {
323
0
        E T9, Ti, T1w, T1t, T1u, T1v;
324
0
        T9 = T1 + T8;
325
0
        Ti = Ta + Th;
326
0
        T1w = T9 - Ti;
327
0
        T1t = T18 + T17;
328
0
        T1u = TX - TW;
329
0
        T1v = T1t + T1u;
330
0
        R0[0] = KP2_000000000 * (T9 + Ti);
331
0
        R0[WS(rs, 5)] = KP2_000000000 * (T1u - T1t);
332
0
        R1[WS(rs, 2)] = KP1_414213562 * (T1v - T1w);
333
0
        R1[WS(rs, 7)] = KP1_414213562 * (T1w + T1v);
334
0
         }
335
0
         {
336
0
        E TJ, TO, T1m, T1q, TM, TN, T1j, T1r;
337
0
        {
338
0
       E TH, T1l, TK, T1h;
339
0
       TH = Tk - Tj;
340
0
       TJ = TH + TI;
341
0
       TO = TH - TI;
342
0
       T1l = T19 - T16;
343
0
       T1m = T1k + T1l;
344
0
       T1q = T1l - T1k;
345
0
       TK = Tv - Tu;
346
0
       TM = TK + TL;
347
0
       TN = TL - TK;
348
0
       T1h = TV + TY;
349
0
       T1j = T1h - T1i;
350
0
       T1r = T1i + T1h;
351
0
        }
352
0
        R0[WS(rs, 4)] = KP2_000000000 * (TJ + TM);
353
0
        R0[WS(rs, 6)] = KP2_000000000 * (TN - TO);
354
0
        R0[WS(rs, 9)] = KP2_000000000 * (T1r - T1q);
355
0
        R0[WS(rs, 1)] = KP2_000000000 * (T1j - T1m);
356
0
        {
357
0
       E T1p, T1s, T1n, T1o;
358
0
       T1p = TM - TJ;
359
0
       T1s = T1q + T1r;
360
0
       R1[WS(rs, 1)] = KP1_414213562 * (T1p - T1s);
361
0
       R1[WS(rs, 6)] = KP1_414213562 * (T1p + T1s);
362
0
       T1n = TO + TN;
363
0
       T1o = T1m + T1j;
364
0
       R1[WS(rs, 8)] = KP1_414213562 * (T1n - T1o);
365
0
       R1[WS(rs, 3)] = KP1_414213562 * (T1n + T1o);
366
0
        }
367
0
         }
368
0
         {
369
0
        E Tt, TG, T1b, T1f, TE, TF, T10, T1e;
370
0
        {
371
0
       E Tl, T1a, Tw, TZ;
372
0
       Tl = Tj + Tk;
373
0
       Tt = Tl - Ts;
374
0
       TG = Tl + Ts;
375
0
       T1a = T16 + T19;
376
0
       T1b = T13 + T1a;
377
0
       T1f = T1a - T13;
378
0
       Tw = Tu + Tv;
379
0
       TE = Tw + TD;
380
0
       TF = TD - Tw;
381
0
       TZ = TV - TY;
382
0
       T10 = TS + TZ;
383
0
       T1e = TZ - TS;
384
0
        }
385
0
        R0[WS(rs, 8)] = KP2_000000000 * (Tt + TE);
386
0
        R0[WS(rs, 2)] = KP2_000000000 * (TF - TG);
387
0
        R0[WS(rs, 7)] = KP2_000000000 * (T1f + T1e);
388
0
        R0[WS(rs, 3)] = KP2_000000000 * (T1b + T10);
389
0
        {
390
0
       E T1d, T1g, TP, T1c;
391
0
       T1d = TG + TF;
392
0
       T1g = T1e - T1f;
393
0
       R1[WS(rs, 4)] = KP1_414213562 * (T1d + T1g);
394
0
       R1[WS(rs, 9)] = KP1_414213562 * (T1g - T1d);
395
0
       TP = Tt - TE;
396
0
       T1c = T10 - T1b;
397
0
       R1[0] = KP1_414213562 * (TP + T1c);
398
0
       R1[WS(rs, 5)] = KP1_414213562 * (T1c - TP);
399
0
        }
400
0
         }
401
0
    }
402
0
     }
403
0
}
404
405
static const kr2c_desc desc = { 20, "r2cbIII_20", { 82, 32, 12, 0 }, &GENUS };
406
407
1
void X(codelet_r2cbIII_20) (planner *p) { X(kr2c_register) (p, r2cbIII_20, &desc);
408
1
}
409
410
#endif