Coverage Report

Created: 2024-09-08 06:43

/src/fftw3/rdft/scalar/r2cb/r2cb_16.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sun Sep  8 06:42:08 UTC 2024 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cb_16 -include rdft/scalar/r2cb.h */
29
30
/*
31
 * This function contains 58 FP additions, 32 FP multiplications,
32
 * (or, 26 additions, 0 multiplications, 32 fused multiply/add),
33
 * 31 stack variables, 4 constants, and 32 memory accesses
34
 */
35
#include "rdft/scalar/r2cb.h"
36
37
static void r2cb_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
40
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
42
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
43
     {
44
    INT i;
45
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) {
46
         E T5, TL, Tj, TD, T8, TM, To, TE, Tc, TP, Tf, TQ, Tu, Tz, TR;
47
         E TO, TH, TG;
48
         {
49
        E T4, Ti, T3, Th, T1, T2;
50
        T4 = Cr[WS(csr, 4)];
51
        Ti = Ci[WS(csi, 4)];
52
        T1 = Cr[0];
53
        T2 = Cr[WS(csr, 8)];
54
        T3 = T1 + T2;
55
        Th = T1 - T2;
56
        T5 = FMA(KP2_000000000, T4, T3);
57
        TL = FNMS(KP2_000000000, T4, T3);
58
        Tj = FNMS(KP2_000000000, Ti, Th);
59
        TD = FMA(KP2_000000000, Ti, Th);
60
         }
61
         {
62
        E T6, T7, Tk, Tl, Tm, Tn;
63
        T6 = Cr[WS(csr, 2)];
64
        T7 = Cr[WS(csr, 6)];
65
        Tk = T6 - T7;
66
        Tl = Ci[WS(csi, 2)];
67
        Tm = Ci[WS(csi, 6)];
68
        Tn = Tl + Tm;
69
        T8 = T6 + T7;
70
        TM = Tl - Tm;
71
        To = Tk - Tn;
72
        TE = Tk + Tn;
73
         }
74
         {
75
        E Tq, Ty, Tv, Tt;
76
        {
77
       E Ta, Tb, Tw, Tx;
78
       Ta = Cr[WS(csr, 1)];
79
       Tb = Cr[WS(csr, 7)];
80
       Tc = Ta + Tb;
81
       Tq = Ta - Tb;
82
       Tw = Ci[WS(csi, 1)];
83
       Tx = Ci[WS(csi, 7)];
84
       Ty = Tw + Tx;
85
       TP = Tw - Tx;
86
        }
87
        {
88
       E Td, Te, Tr, Ts;
89
       Td = Cr[WS(csr, 5)];
90
       Te = Cr[WS(csr, 3)];
91
       Tf = Td + Te;
92
       Tv = Td - Te;
93
       Tr = Ci[WS(csi, 5)];
94
       Ts = Ci[WS(csi, 3)];
95
       Tt = Tr + Ts;
96
       TQ = Tr - Ts;
97
        }
98
        Tu = Tq - Tt;
99
        Tz = Tv + Ty;
100
        TR = TP - TQ;
101
        TO = Tc - Tf;
102
        TH = Tq + Tt;
103
        TG = Ty - Tv;
104
         }
105
         {
106
        E T9, Tg, TT, TU;
107
        T9 = FMA(KP2_000000000, T8, T5);
108
        Tg = Tc + Tf;
109
        R0[WS(rs, 4)] = FNMS(KP2_000000000, Tg, T9);
110
        R0[0] = FMA(KP2_000000000, Tg, T9);
111
        TT = FMA(KP2_000000000, TM, TL);
112
        TU = TO + TR;
113
        R0[WS(rs, 3)] = FNMS(KP1_414213562, TU, TT);
114
        R0[WS(rs, 7)] = FMA(KP1_414213562, TU, TT);
115
         }
116
         {
117
        E TV, TW, Tp, TA;
118
        TV = FNMS(KP2_000000000, T8, T5);
119
        TW = TQ + TP;
120
        R0[WS(rs, 2)] = FNMS(KP2_000000000, TW, TV);
121
        R0[WS(rs, 6)] = FMA(KP2_000000000, TW, TV);
122
        Tp = FMA(KP1_414213562, To, Tj);
123
        TA = FNMS(KP414213562, Tz, Tu);
124
        R1[WS(rs, 4)] = FNMS(KP1_847759065, TA, Tp);
125
        R1[0] = FMA(KP1_847759065, TA, Tp);
126
         }
127
         {
128
        E TB, TC, TJ, TK;
129
        TB = FNMS(KP1_414213562, To, Tj);
130
        TC = FMA(KP414213562, Tu, Tz);
131
        R1[WS(rs, 2)] = FNMS(KP1_847759065, TC, TB);
132
        R1[WS(rs, 6)] = FMA(KP1_847759065, TC, TB);
133
        TJ = FMA(KP1_414213562, TE, TD);
134
        TK = FMA(KP414213562, TG, TH);
135
        R1[WS(rs, 3)] = FNMS(KP1_847759065, TK, TJ);
136
        R1[WS(rs, 7)] = FMA(KP1_847759065, TK, TJ);
137
         }
138
         {
139
        E TN, TS, TF, TI;
140
        TN = FNMS(KP2_000000000, TM, TL);
141
        TS = TO - TR;
142
        R0[WS(rs, 5)] = FNMS(KP1_414213562, TS, TN);
143
        R0[WS(rs, 1)] = FMA(KP1_414213562, TS, TN);
144
        TF = FNMS(KP1_414213562, TE, TD);
145
        TI = FNMS(KP414213562, TH, TG);
146
        R1[WS(rs, 1)] = FNMS(KP1_847759065, TI, TF);
147
        R1[WS(rs, 5)] = FMA(KP1_847759065, TI, TF);
148
         }
149
    }
150
     }
151
}
152
153
static const kr2c_desc desc = { 16, "r2cb_16", { 26, 0, 32, 0 }, &GENUS };
154
155
void X(codelet_r2cb_16) (planner *p) { X(kr2c_register) (p, r2cb_16, &desc);
156
}
157
158
#else
159
160
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 16 -name r2cb_16 -include rdft/scalar/r2cb.h */
161
162
/*
163
 * This function contains 58 FP additions, 18 FP multiplications,
164
 * (or, 54 additions, 14 multiplications, 4 fused multiply/add),
165
 * 31 stack variables, 4 constants, and 32 memory accesses
166
 */
167
#include "rdft/scalar/r2cb.h"
168
169
static void r2cb_16(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
170
0
{
171
0
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
172
0
     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
173
0
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
174
0
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
175
0
     {
176
0
    INT i;
177
0
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(64, rs), MAKE_VOLATILE_STRIDE(64, csr), MAKE_VOLATILE_STRIDE(64, csi)) {
178
0
         E T9, TS, Tl, TG, T6, TR, Ti, TD, Td, Tq, Tg, Tt, Tn, Tu, TV;
179
0
         E TU, TN, TK;
180
0
         {
181
0
        E T7, T8, TE, Tj, Tk, TF;
182
0
        T7 = Cr[WS(csr, 2)];
183
0
        T8 = Cr[WS(csr, 6)];
184
0
        TE = T7 - T8;
185
0
        Tj = Ci[WS(csi, 2)];
186
0
        Tk = Ci[WS(csi, 6)];
187
0
        TF = Tj + Tk;
188
0
        T9 = KP2_000000000 * (T7 + T8);
189
0
        TS = KP1_414213562 * (TE + TF);
190
0
        Tl = KP2_000000000 * (Tj - Tk);
191
0
        TG = KP1_414213562 * (TE - TF);
192
0
         }
193
0
         {
194
0
        E T5, TC, T3, TA;
195
0
        {
196
0
       E T4, TB, T1, T2;
197
0
       T4 = Cr[WS(csr, 4)];
198
0
       T5 = KP2_000000000 * T4;
199
0
       TB = Ci[WS(csi, 4)];
200
0
       TC = KP2_000000000 * TB;
201
0
       T1 = Cr[0];
202
0
       T2 = Cr[WS(csr, 8)];
203
0
       T3 = T1 + T2;
204
0
       TA = T1 - T2;
205
0
        }
206
0
        T6 = T3 + T5;
207
0
        TR = TA + TC;
208
0
        Ti = T3 - T5;
209
0
        TD = TA - TC;
210
0
         }
211
0
         {
212
0
        E TI, TM, TL, TJ;
213
0
        {
214
0
       E Tb, Tc, To, Tp;
215
0
       Tb = Cr[WS(csr, 1)];
216
0
       Tc = Cr[WS(csr, 7)];
217
0
       Td = Tb + Tc;
218
0
       TI = Tb - Tc;
219
0
       To = Ci[WS(csi, 1)];
220
0
       Tp = Ci[WS(csi, 7)];
221
0
       Tq = To - Tp;
222
0
       TM = To + Tp;
223
0
        }
224
0
        {
225
0
       E Te, Tf, Tr, Ts;
226
0
       Te = Cr[WS(csr, 5)];
227
0
       Tf = Cr[WS(csr, 3)];
228
0
       Tg = Te + Tf;
229
0
       TL = Te - Tf;
230
0
       Tr = Ci[WS(csi, 5)];
231
0
       Ts = Ci[WS(csi, 3)];
232
0
       Tt = Tr - Ts;
233
0
       TJ = Tr + Ts;
234
0
        }
235
0
        Tn = Td - Tg;
236
0
        Tu = Tq - Tt;
237
0
        TV = TM - TL;
238
0
        TU = TI + TJ;
239
0
        TN = TL + TM;
240
0
        TK = TI - TJ;
241
0
         }
242
0
         {
243
0
        E Ta, Th, TT, TW;
244
0
        Ta = T6 + T9;
245
0
        Th = KP2_000000000 * (Td + Tg);
246
0
        R0[WS(rs, 4)] = Ta - Th;
247
0
        R0[0] = Ta + Th;
248
0
        TT = TR - TS;
249
0
        TW = FNMS(KP1_847759065, TV, KP765366864 * TU);
250
0
        R1[WS(rs, 5)] = TT - TW;
251
0
        R1[WS(rs, 1)] = TT + TW;
252
0
         }
253
0
         {
254
0
        E TX, TY, Tm, Tv;
255
0
        TX = TR + TS;
256
0
        TY = FMA(KP1_847759065, TU, KP765366864 * TV);
257
0
        R1[WS(rs, 3)] = TX - TY;
258
0
        R1[WS(rs, 7)] = TX + TY;
259
0
        Tm = Ti - Tl;
260
0
        Tv = KP1_414213562 * (Tn - Tu);
261
0
        R0[WS(rs, 5)] = Tm - Tv;
262
0
        R0[WS(rs, 1)] = Tm + Tv;
263
0
         }
264
0
         {
265
0
        E Tw, Tx, TH, TO;
266
0
        Tw = Ti + Tl;
267
0
        Tx = KP1_414213562 * (Tn + Tu);
268
0
        R0[WS(rs, 3)] = Tw - Tx;
269
0
        R0[WS(rs, 7)] = Tw + Tx;
270
0
        TH = TD + TG;
271
0
        TO = FNMS(KP765366864, TN, KP1_847759065 * TK);
272
0
        R1[WS(rs, 4)] = TH - TO;
273
0
        R1[0] = TH + TO;
274
0
         }
275
0
         {
276
0
        E TP, TQ, Ty, Tz;
277
0
        TP = TD - TG;
278
0
        TQ = FMA(KP765366864, TK, KP1_847759065 * TN);
279
0
        R1[WS(rs, 2)] = TP - TQ;
280
0
        R1[WS(rs, 6)] = TP + TQ;
281
0
        Ty = T6 - T9;
282
0
        Tz = KP2_000000000 * (Tt + Tq);
283
0
        R0[WS(rs, 2)] = Ty - Tz;
284
0
        R0[WS(rs, 6)] = Ty + Tz;
285
0
         }
286
0
    }
287
0
     }
288
0
}
289
290
static const kr2c_desc desc = { 16, "r2cb_16", { 54, 14, 4, 0 }, &GENUS };
291
292
1
void X(codelet_r2cb_16) (planner *p) { X(kr2c_register) (p, r2cb_16, &desc);
293
1
}
294
295
#endif