Coverage Report

Created: 2023-09-25 07:08

/src/fftw3/rdft/scalar/r2cb/hc2cb_6.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Mon Sep 25 07:07:22 UTC 2023 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include rdft/scalar/hc2cb.h */
29
30
/*
31
 * This function contains 46 FP additions, 32 FP multiplications,
32
 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
33
 * 31 stack variables, 2 constants, and 24 memory accesses
34
 */
35
#include "rdft/scalar/hc2cb.h"
36
37
static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
    INT m;
43
    for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
44
         E Td, Tn, TO, TJ, TN, Tk, Tr, T3, TC, Ts, TQ, Ta, Tm, TF, TG;
45
         {
46
        E Tb, Tc, Tj, TI, Tg, TH;
47
        Tb = Ip[0];
48
        Tc = Im[WS(rs, 2)];
49
        Td = Tb - Tc;
50
        {
51
       E Th, Ti, Te, Tf;
52
       Th = Ip[WS(rs, 1)];
53
       Ti = Im[WS(rs, 1)];
54
       Tj = Th - Ti;
55
       TI = Th + Ti;
56
       Te = Ip[WS(rs, 2)];
57
       Tf = Im[0];
58
       Tg = Te - Tf;
59
       TH = Te + Tf;
60
        }
61
        Tn = Tj - Tg;
62
        TO = TH - TI;
63
        TJ = TH + TI;
64
        TN = Tb + Tc;
65
        Tk = Tg + Tj;
66
        Tr = FNMS(KP500000000, Tk, Td);
67
         }
68
         {
69
        E T9, TE, T6, TD, T1, T2;
70
        T1 = Rp[0];
71
        T2 = Rm[WS(rs, 2)];
72
        T3 = T1 + T2;
73
        TC = T1 - T2;
74
        {
75
       E T7, T8, T4, T5;
76
       T7 = Rm[WS(rs, 1)];
77
       T8 = Rp[WS(rs, 1)];
78
       T9 = T7 + T8;
79
       TE = T7 - T8;
80
       T4 = Rp[WS(rs, 2)];
81
       T5 = Rm[0];
82
       T6 = T4 + T5;
83
       TD = T4 - T5;
84
        }
85
        Ts = T6 - T9;
86
        TQ = TD - TE;
87
        Ta = T6 + T9;
88
        Tm = FNMS(KP500000000, Ta, T3);
89
        TF = TD + TE;
90
        TG = FNMS(KP500000000, TF, TC);
91
         }
92
         Rp[0] = T3 + Ta;
93
         Rm[0] = Td + Tk;
94
         {
95
        E To, Tt, Tp, Tu, Tl, Tq;
96
        To = FNMS(KP866025403, Tn, Tm);
97
        Tt = FNMS(KP866025403, Ts, Tr);
98
        Tl = W[2];
99
        Tp = Tl * To;
100
        Tu = Tl * Tt;
101
        Tq = W[3];
102
        Rp[WS(rs, 1)] = FNMS(Tq, Tt, Tp);
103
        Rm[WS(rs, 1)] = FMA(Tq, To, Tu);
104
         }
105
         {
106
        E T13, TZ, T11, T12, T14, T10;
107
        T13 = TN + TO;
108
        T10 = TC + TF;
109
        TZ = W[4];
110
        T11 = TZ * T10;
111
        T12 = W[5];
112
        T14 = T12 * T10;
113
        Ip[WS(rs, 1)] = FNMS(T12, T13, T11);
114
        Im[WS(rs, 1)] = FMA(TZ, T13, T14);
115
         }
116
         {
117
        E Tw, Tz, Tx, TA, Tv, Ty;
118
        Tw = FMA(KP866025403, Tn, Tm);
119
        Tz = FMA(KP866025403, Ts, Tr);
120
        Tv = W[6];
121
        Tx = Tv * Tw;
122
        TA = Tv * Tz;
123
        Ty = W[7];
124
        Rp[WS(rs, 2)] = FNMS(Ty, Tz, Tx);
125
        Rm[WS(rs, 2)] = FMA(Ty, Tw, TA);
126
         }
127
         {
128
        E TR, TX, TT, TV, TW, TY, TB, TL, TM, TS, TP, TU, TK;
129
        TP = FNMS(KP500000000, TO, TN);
130
        TR = FMA(KP866025403, TQ, TP);
131
        TX = FNMS(KP866025403, TQ, TP);
132
        TU = FMA(KP866025403, TJ, TG);
133
        TT = W[8];
134
        TV = TT * TU;
135
        TW = W[9];
136
        TY = TW * TU;
137
        TK = FNMS(KP866025403, TJ, TG);
138
        TB = W[0];
139
        TL = TB * TK;
140
        TM = W[1];
141
        TS = TM * TK;
142
        Ip[0] = FNMS(TM, TR, TL);
143
        Im[0] = FMA(TB, TR, TS);
144
        Ip[WS(rs, 2)] = FNMS(TW, TX, TV);
145
        Im[WS(rs, 2)] = FMA(TT, TX, TY);
146
         }
147
    }
148
     }
149
}
150
151
static const tw_instr twinstr[] = {
152
     { TW_FULL, 1, 6 },
153
     { TW_NEXT, 1, 0 }
154
};
155
156
static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, { 24, 10, 22, 0 } };
157
158
void X(codelet_hc2cb_6) (planner *p) {
159
     X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT);
160
}
161
#else
162
163
/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 6 -dif -name hc2cb_6 -include rdft/scalar/hc2cb.h */
164
165
/*
166
 * This function contains 46 FP additions, 28 FP multiplications,
167
 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
168
 * 25 stack variables, 2 constants, and 24 memory accesses
169
 */
170
#include "rdft/scalar/hc2cb.h"
171
172
static void hc2cb_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
173
0
{
174
0
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
175
0
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
176
0
     {
177
0
    INT m;
178
0
    for (m = mb, W = W + ((mb - 1) * 10); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 10, MAKE_VOLATILE_STRIDE(24, rs)) {
179
0
         E T3, Ty, Td, TE, Ta, TO, Tr, TB, Tk, TL, Tn, TH;
180
0
         {
181
0
        E T1, T2, Tb, Tc;
182
0
        T1 = Rp[0];
183
0
        T2 = Rm[WS(rs, 2)];
184
0
        T3 = T1 + T2;
185
0
        Ty = T1 - T2;
186
0
        Tb = Ip[0];
187
0
        Tc = Im[WS(rs, 2)];
188
0
        Td = Tb - Tc;
189
0
        TE = Tb + Tc;
190
0
         }
191
0
         {
192
0
        E T6, Tz, T9, TA;
193
0
        {
194
0
       E T4, T5, T7, T8;
195
0
       T4 = Rp[WS(rs, 2)];
196
0
       T5 = Rm[0];
197
0
       T6 = T4 + T5;
198
0
       Tz = T4 - T5;
199
0
       T7 = Rm[WS(rs, 1)];
200
0
       T8 = Rp[WS(rs, 1)];
201
0
       T9 = T7 + T8;
202
0
       TA = T7 - T8;
203
0
        }
204
0
        Ta = T6 + T9;
205
0
        TO = KP866025403 * (Tz - TA);
206
0
        Tr = KP866025403 * (T6 - T9);
207
0
        TB = Tz + TA;
208
0
         }
209
0
         {
210
0
        E Tg, TG, Tj, TF;
211
0
        {
212
0
       E Te, Tf, Th, Ti;
213
0
       Te = Ip[WS(rs, 2)];
214
0
       Tf = Im[0];
215
0
       Tg = Te - Tf;
216
0
       TG = Te + Tf;
217
0
       Th = Ip[WS(rs, 1)];
218
0
       Ti = Im[WS(rs, 1)];
219
0
       Tj = Th - Ti;
220
0
       TF = Th + Ti;
221
0
        }
222
0
        Tk = Tg + Tj;
223
0
        TL = KP866025403 * (TG + TF);
224
0
        Tn = KP866025403 * (Tj - Tg);
225
0
        TH = TF - TG;
226
0
         }
227
0
         Rp[0] = T3 + Ta;
228
0
         Rm[0] = Td + Tk;
229
0
         {
230
0
        E TC, TI, Tx, TD;
231
0
        TC = Ty + TB;
232
0
        TI = TE - TH;
233
0
        Tx = W[4];
234
0
        TD = W[5];
235
0
        Ip[WS(rs, 1)] = FNMS(TD, TI, Tx * TC);
236
0
        Im[WS(rs, 1)] = FMA(TD, TC, Tx * TI);
237
0
         }
238
0
         {
239
0
        E To, Tu, Ts, Tw, Tm, Tq;
240
0
        Tm = FNMS(KP500000000, Ta, T3);
241
0
        To = Tm - Tn;
242
0
        Tu = Tm + Tn;
243
0
        Tq = FNMS(KP500000000, Tk, Td);
244
0
        Ts = Tq - Tr;
245
0
        Tw = Tr + Tq;
246
0
        {
247
0
       E Tl, Tp, Tt, Tv;
248
0
       Tl = W[2];
249
0
       Tp = W[3];
250
0
       Rp[WS(rs, 1)] = FNMS(Tp, Ts, Tl * To);
251
0
       Rm[WS(rs, 1)] = FMA(Tl, Ts, Tp * To);
252
0
       Tt = W[6];
253
0
       Tv = W[7];
254
0
       Rp[WS(rs, 2)] = FNMS(Tv, Tw, Tt * Tu);
255
0
       Rm[WS(rs, 2)] = FMA(Tt, Tw, Tv * Tu);
256
0
        }
257
0
         }
258
0
         {
259
0
        E TM, TS, TQ, TU, TK, TP;
260
0
        TK = FNMS(KP500000000, TB, Ty);
261
0
        TM = TK - TL;
262
0
        TS = TK + TL;
263
0
        TP = FMA(KP500000000, TH, TE);
264
0
        TQ = TO + TP;
265
0
        TU = TP - TO;
266
0
        {
267
0
       E TJ, TN, TR, TT;
268
0
       TJ = W[0];
269
0
       TN = W[1];
270
0
       Ip[0] = FNMS(TN, TQ, TJ * TM);
271
0
       Im[0] = FMA(TN, TM, TJ * TQ);
272
0
       TR = W[8];
273
0
       TT = W[9];
274
0
       Ip[WS(rs, 2)] = FNMS(TT, TU, TR * TS);
275
0
       Im[WS(rs, 2)] = FMA(TT, TS, TR * TU);
276
0
        }
277
0
         }
278
0
    }
279
0
     }
280
0
}
281
282
static const tw_instr twinstr[] = {
283
     { TW_FULL, 1, 6 },
284
     { TW_NEXT, 1, 0 }
285
};
286
287
static const hc2c_desc desc = { 6, "hc2cb_6", twinstr, &GENUS, { 32, 14, 14, 0 } };
288
289
1
void X(codelet_hc2cb_6) (planner *p) {
290
1
     X(khc2c_register) (p, hc2cb_6, &desc, HC2C_VIA_RDFT);
291
1
}
292
#endif