Coverage Report

Created: 2024-09-08 06:43

/src/fftw3/rdft/scalar/r2cb/r2cbIII_15.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Sep  8 06:42:26 UTC 2024 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include rdft/scalar/r2cbIII.h */
29
30
/*
31
 * This function contains 64 FP additions, 43 FP multiplications,
32
 * (or, 21 additions, 0 multiplications, 43 fused multiply/add),
33
 * 42 stack variables, 9 constants, and 30 memory accesses
34
 */
35
#include "rdft/scalar/r2cbIII.h"
36
37
static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
42
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
43
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
44
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
45
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
46
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
47
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
48
     {
49
    INT i;
50
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
51
         E Tk, TA, T5, Th, Tz, T6, Tn, TX, TR, Td, Tm, TI, Tv, TN, TD;
52
         E TL, TM, Ti, Tj, T12, Te, T11;
53
         Ti = Ci[WS(csi, 4)];
54
         Tj = Ci[WS(csi, 1)];
55
         Tk = FMA(KP618033988, Tj, Ti);
56
         TA = FNMS(KP618033988, Ti, Tj);
57
         {
58
        E T1, T4, Tg, T2, T3, Tf;
59
        T1 = Cr[WS(csr, 7)];
60
        T2 = Cr[WS(csr, 4)];
61
        T3 = Cr[WS(csr, 1)];
62
        T4 = T2 + T3;
63
        Tg = T2 - T3;
64
        T5 = FMA(KP2_000000000, T4, T1);
65
        Tf = FNMS(KP500000000, T4, T1);
66
        Th = FMA(KP1_118033988, Tg, Tf);
67
        Tz = FNMS(KP1_118033988, Tg, Tf);
68
         }
69
         {
70
        E Tc, TP, T9, TQ;
71
        T6 = Cr[WS(csr, 2)];
72
        {
73
       E Ta, Tb, T7, T8;
74
       Ta = Cr[WS(csr, 3)];
75
       Tb = Cr[WS(csr, 6)];
76
       Tc = Ta + Tb;
77
       TP = Ta - Tb;
78
       T7 = Cr[0];
79
       T8 = Cr[WS(csr, 5)];
80
       T9 = T7 + T8;
81
       TQ = T7 - T8;
82
        }
83
        Tn = T9 - Tc;
84
        TX = FMA(KP618033988, TP, TQ);
85
        TR = FNMS(KP618033988, TQ, TP);
86
        Td = T9 + Tc;
87
        Tm = FNMS(KP250000000, Td, T6);
88
         }
89
         {
90
        E Tu, TK, Tr, TJ;
91
        TI = Ci[WS(csi, 2)];
92
        {
93
       E Ts, Tt, Tp, Tq;
94
       Ts = Ci[WS(csi, 3)];
95
       Tt = Ci[WS(csi, 6)];
96
       Tu = Ts - Tt;
97
       TK = Ts + Tt;
98
       Tp = Ci[0];
99
       Tq = Ci[WS(csi, 5)];
100
       Tr = Tp + Tq;
101
       TJ = Tq - Tp;
102
        }
103
        Tv = FMA(KP618033988, Tu, Tr);
104
        TN = TJ + TK;
105
        TD = FNMS(KP618033988, Tr, Tu);
106
        TL = TJ - TK;
107
        TM = FNMS(KP250000000, TL, TI);
108
         }
109
         T12 = TL + TI;
110
         Te = T6 + Td;
111
         T11 = Te - T5;
112
         R0[0] = FMA(KP2_000000000, Te, T5);
113
         R0[WS(rs, 5)] = FMS(KP1_732050807, T12, T11);
114
         R1[WS(rs, 2)] = FMA(KP1_732050807, T12, T11);
115
         {
116
        E TB, TF, TE, TG, TS, TU, TC, TO, TH, TT;
117
        TB = FNMS(KP1_902113032, TA, Tz);
118
        TF = FMA(KP1_902113032, TA, Tz);
119
        TC = FNMS(KP559016994, Tn, Tm);
120
        TE = FMA(KP951056516, TD, TC);
121
        TG = FNMS(KP951056516, TD, TC);
122
        TO = FNMS(KP559016994, TN, TM);
123
        TS = FMA(KP951056516, TR, TO);
124
        TU = FNMS(KP951056516, TR, TO);
125
        R0[WS(rs, 6)] = FMA(KP2_000000000, TE, TB);
126
        R1[WS(rs, 1)] = -(FMA(KP2_000000000, TG, TF));
127
        TH = TB - TE;
128
        R0[WS(rs, 1)] = FNMS(KP1_732050807, TS, TH);
129
        R1[WS(rs, 3)] = -(FMA(KP1_732050807, TS, TH));
130
        TT = TF - TG;
131
        R0[WS(rs, 4)] = FNMS(KP1_732050807, TU, TT);
132
        R1[WS(rs, 6)] = -(FMA(KP1_732050807, TU, TT));
133
         }
134
         {
135
        E Tl, Tx, Tw, Ty, TY, T10, To, TW, TV, TZ;
136
        Tl = FNMS(KP1_902113032, Tk, Th);
137
        Tx = FMA(KP1_902113032, Tk, Th);
138
        To = FMA(KP559016994, Tn, Tm);
139
        Tw = FMA(KP951056516, Tv, To);
140
        Ty = FNMS(KP951056516, Tv, To);
141
        TW = FMA(KP559016994, TN, TM);
142
        TY = FNMS(KP951056516, TX, TW);
143
        T10 = FMA(KP951056516, TX, TW);
144
        R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tw, Tl));
145
        R0[WS(rs, 3)] = FMA(KP2_000000000, Ty, Tx);
146
        TV = Ty - Tx;
147
        R1[0] = FNMS(KP1_732050807, TY, TV);
148
        R1[WS(rs, 5)] = FMA(KP1_732050807, TY, TV);
149
        TZ = Tl - Tw;
150
        R0[WS(rs, 7)] = FNMS(KP1_732050807, T10, TZ);
151
        R0[WS(rs, 2)] = FMA(KP1_732050807, T10, TZ);
152
         }
153
    }
154
     }
155
}
156
157
static const kr2c_desc desc = { 15, "r2cbIII_15", { 21, 0, 43, 0 }, &GENUS };
158
159
void X(codelet_r2cbIII_15) (planner *p) { X(kr2c_register) (p, r2cbIII_15, &desc);
160
}
161
162
#else
163
164
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 15 -name r2cbIII_15 -dft-III -include rdft/scalar/r2cbIII.h */
165
166
/*
167
 * This function contains 64 FP additions, 26 FP multiplications,
168
 * (or, 49 additions, 11 multiplications, 15 fused multiply/add),
169
 * 47 stack variables, 14 constants, and 30 memory accesses
170
 */
171
#include "rdft/scalar/r2cbIII.h"
172
173
static void r2cbIII_15(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
174
0
{
175
0
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
176
0
     DK(KP433012701, +0.433012701892219323381861585376468091735701313);
177
0
     DK(KP968245836, +0.968245836551854221294816349945599902708230426);
178
0
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
179
0
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
180
0
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
181
0
     DK(KP1_647278207, +1.647278207092663851754840078556380006059321028);
182
0
     DK(KP1_018073920, +1.018073920910254366901961726787815297021466329);
183
0
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
184
0
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
185
0
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
186
0
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
187
0
     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
188
0
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
189
0
     {
190
0
    INT i;
191
0
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(60, rs), MAKE_VOLATILE_STRIDE(60, csr), MAKE_VOLATILE_STRIDE(60, csi)) {
192
0
         E Tv, TD, T5, Ts, TC, T6, Tf, TW, TK, Td, Tg, TP, To, TN, TA;
193
0
         E TO, TQ, Tt, Tu, T12, Te, T11;
194
0
         Tt = Ci[WS(csi, 4)];
195
0
         Tu = Ci[WS(csi, 1)];
196
0
         Tv = FMA(KP1_902113032, Tt, KP1_175570504 * Tu);
197
0
         TD = FNMS(KP1_175570504, Tt, KP1_902113032 * Tu);
198
0
         {
199
0
        E T1, T4, Tq, T2, T3, Tr;
200
0
        T1 = Cr[WS(csr, 7)];
201
0
        T2 = Cr[WS(csr, 4)];
202
0
        T3 = Cr[WS(csr, 1)];
203
0
        T4 = T2 + T3;
204
0
        Tq = KP1_118033988 * (T2 - T3);
205
0
        T5 = FMA(KP2_000000000, T4, T1);
206
0
        Tr = FNMS(KP500000000, T4, T1);
207
0
        Ts = Tq + Tr;
208
0
        TC = Tr - Tq;
209
0
         }
210
0
         {
211
0
        E Tc, TJ, T9, TI;
212
0
        T6 = Cr[WS(csr, 2)];
213
0
        {
214
0
       E Ta, Tb, T7, T8;
215
0
       Ta = Cr[WS(csr, 3)];
216
0
       Tb = Cr[WS(csr, 6)];
217
0
       Tc = Ta + Tb;
218
0
       TJ = Ta - Tb;
219
0
       T7 = Cr[0];
220
0
       T8 = Cr[WS(csr, 5)];
221
0
       T9 = T7 + T8;
222
0
       TI = T7 - T8;
223
0
        }
224
0
        Tf = KP559016994 * (T9 - Tc);
225
0
        TW = FNMS(KP1_647278207, TJ, KP1_018073920 * TI);
226
0
        TK = FMA(KP1_647278207, TI, KP1_018073920 * TJ);
227
0
        Td = T9 + Tc;
228
0
        Tg = FNMS(KP250000000, Td, T6);
229
0
         }
230
0
         {
231
0
        E Tn, TM, Tk, TL;
232
0
        TP = Ci[WS(csi, 2)];
233
0
        {
234
0
       E Tl, Tm, Ti, Tj;
235
0
       Tl = Ci[WS(csi, 3)];
236
0
       Tm = Ci[WS(csi, 6)];
237
0
       Tn = Tl - Tm;
238
0
       TM = Tl + Tm;
239
0
       Ti = Ci[0];
240
0
       Tj = Ci[WS(csi, 5)];
241
0
       Tk = Ti + Tj;
242
0
       TL = Ti - Tj;
243
0
        }
244
0
        To = FMA(KP951056516, Tk, KP587785252 * Tn);
245
0
        TN = KP968245836 * (TL - TM);
246
0
        TA = FNMS(KP587785252, Tk, KP951056516 * Tn);
247
0
        TO = TL + TM;
248
0
        TQ = FMA(KP433012701, TO, KP1_732050807 * TP);
249
0
         }
250
0
         T12 = KP1_732050807 * (TP - TO);
251
0
         Te = T6 + Td;
252
0
         T11 = Te - T5;
253
0
         R0[0] = FMA(KP2_000000000, Te, T5);
254
0
         R0[WS(rs, 5)] = T12 - T11;
255
0
         R1[WS(rs, 2)] = T11 + T12;
256
0
         {
257
0
        E TE, TG, TB, TF, TY, T10, Tz, TX, TV, TZ;
258
0
        TE = TC - TD;
259
0
        TG = TC + TD;
260
0
        Tz = Tg - Tf;
261
0
        TB = Tz + TA;
262
0
        TF = TA - Tz;
263
0
        TX = TN + TQ;
264
0
        TY = TW - TX;
265
0
        T10 = TW + TX;
266
0
        R0[WS(rs, 6)] = FMA(KP2_000000000, TB, TE);
267
0
        R1[WS(rs, 1)] = FMS(KP2_000000000, TF, TG);
268
0
        TV = TE - TB;
269
0
        R0[WS(rs, 1)] = TV + TY;
270
0
        R1[WS(rs, 3)] = TY - TV;
271
0
        TZ = TF + TG;
272
0
        R0[WS(rs, 4)] = TZ - T10;
273
0
        R1[WS(rs, 6)] = -(TZ + T10);
274
0
         }
275
0
         {
276
0
        E Tw, Ty, Tp, Tx, TS, TU, Th, TR, TH, TT;
277
0
        Tw = Ts - Tv;
278
0
        Ty = Ts + Tv;
279
0
        Th = Tf + Tg;
280
0
        Tp = Th + To;
281
0
        Tx = Th - To;
282
0
        TR = TN - TQ;
283
0
        TS = TK + TR;
284
0
        TU = TR - TK;
285
0
        R1[WS(rs, 4)] = -(FMA(KP2_000000000, Tp, Tw));
286
0
        R0[WS(rs, 3)] = FMA(KP2_000000000, Tx, Ty);
287
0
        TH = Tx - Ty;
288
0
        R1[WS(rs, 5)] = TH - TS;
289
0
        R1[0] = TH + TS;
290
0
        TT = Tw - Tp;
291
0
        R0[WS(rs, 2)] = TT - TU;
292
0
        R0[WS(rs, 7)] = TT + TU;
293
0
         }
294
0
    }
295
0
     }
296
0
}
297
298
static const kr2c_desc desc = { 15, "r2cbIII_15", { 49, 11, 15, 0 }, &GENUS };
299
300
1
void X(codelet_r2cbIII_15) (planner *p) { X(kr2c_register) (p, r2cbIII_15, &desc);
301
1
}
302
303
#endif