Coverage Report

Created: 2025-07-18 06:52

/src/fftw3/rdft/scalar/r2cf/hc2cfdft2_4.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Fri Jul 18 06:51:18 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_hc2cdft.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include rdft/scalar/hc2cf.h */
29
30
/*
31
 * This function contains 32 FP additions, 24 FP multiplications,
32
 * (or, 24 additions, 16 multiplications, 8 fused multiply/add),
33
 * 37 stack variables, 1 constants, and 16 memory accesses
34
 */
35
#include "rdft/scalar/hc2cf.h"
36
37
static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
40
     {
41
    INT m;
42
    for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) {
43
         E T1, T5, T2, T4, T6, Tc, T3, Tb;
44
         T1 = W[0];
45
         T5 = W[3];
46
         T2 = W[2];
47
         T3 = T1 * T2;
48
         Tb = T1 * T5;
49
         T4 = W[1];
50
         T6 = FMA(T4, T5, T3);
51
         Tc = FNMS(T4, T2, Tb);
52
         {
53
        E Tj, Tp, To, TE, Tw, T9, Tt, Ta, TC, Tf, Tr, Ts, Tx;
54
        {
55
       E Th, Ti, Tl, Tm, Tn;
56
       Th = Ip[0];
57
       Ti = Im[0];
58
       Tj = Th - Ti;
59
       Tp = Th + Ti;
60
       Tl = Rm[0];
61
       Tm = Rp[0];
62
       Tn = Tl - Tm;
63
       To = T1 * Tn;
64
       TE = T4 * Tn;
65
       Tw = Tm + Tl;
66
        }
67
        {
68
       E T7, T8, Td, Te;
69
       T7 = Ip[WS(rs, 1)];
70
       T8 = Im[WS(rs, 1)];
71
       T9 = T7 - T8;
72
       Tt = T7 + T8;
73
       Ta = T6 * T9;
74
       TC = T2 * Tt;
75
       Td = Rp[WS(rs, 1)];
76
       Te = Rm[WS(rs, 1)];
77
       Tf = Td + Te;
78
       Tr = Td - Te;
79
       Ts = T2 * Tr;
80
       Tx = T6 * Tf;
81
        }
82
        {
83
       E Tk, TB, Tz, TH, Tv, TA, TG, TI, Tg, Ty;
84
       Tg = FNMS(Tc, Tf, Ta);
85
       Tk = Tg + Tj;
86
       TB = Tj - Tg;
87
       Ty = FMA(Tc, T9, Tx);
88
       Tz = Tw - Ty;
89
       TH = Tw + Ty;
90
       {
91
            E Tq, Tu, TD, TF;
92
            Tq = FNMS(T4, Tp, To);
93
            Tu = FMA(T5, Tt, Ts);
94
            Tv = Tq - Tu;
95
            TA = Tu + Tq;
96
            TD = FNMS(T5, Tr, TC);
97
            TF = FMA(T1, Tp, TE);
98
            TG = TD - TF;
99
            TI = TD + TF;
100
       }
101
       Ip[0] = KP500000000 * (Tk + Tv);
102
       Rp[0] = KP500000000 * (TH + TI);
103
       Im[WS(rs, 1)] = KP500000000 * (Tv - Tk);
104
       Rm[WS(rs, 1)] = KP500000000 * (TH - TI);
105
       Rm[0] = KP500000000 * (Tz - TA);
106
       Im[0] = KP500000000 * (TG - TB);
107
       Rp[WS(rs, 1)] = KP500000000 * (Tz + TA);
108
       Ip[WS(rs, 1)] = KP500000000 * (TB + TG);
109
        }
110
         }
111
    }
112
     }
113
}
114
115
static const tw_instr twinstr[] = {
116
     { TW_CEXP, 1, 1 },
117
     { TW_CEXP, 1, 3 },
118
     { TW_NEXT, 1, 0 }
119
};
120
121
static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, { 24, 16, 8, 0 } };
122
123
void X(codelet_hc2cfdft2_4) (planner *p) {
124
     X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT);
125
}
126
#else
127
128
/* Generated by: ../../../genfft/gen_hc2cdft.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -dit -name hc2cfdft2_4 -include rdft/scalar/hc2cf.h */
129
130
/*
131
 * This function contains 32 FP additions, 24 FP multiplications,
132
 * (or, 24 additions, 16 multiplications, 8 fused multiply/add),
133
 * 24 stack variables, 1 constants, and 16 memory accesses
134
 */
135
#include "rdft/scalar/hc2cf.h"
136
137
static void hc2cfdft2_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
138
0
{
139
0
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
140
0
     {
141
0
    INT m;
142
0
    for (m = mb, W = W + ((mb - 1) * 4); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 4, MAKE_VOLATILE_STRIDE(16, rs)) {
143
0
         E T1, T3, T2, T4, T5, T9;
144
0
         T1 = W[0];
145
0
         T3 = W[1];
146
0
         T2 = W[2];
147
0
         T4 = W[3];
148
0
         T5 = FMA(T1, T2, T3 * T4);
149
0
         T9 = FNMS(T3, T2, T1 * T4);
150
0
         {
151
0
        E Tg, Tr, Tm, Tx, Td, Tw, Tp, Ts;
152
0
        {
153
0
       E Te, Tf, Tl, Ti, Tj, Tk;
154
0
       Te = Ip[0];
155
0
       Tf = Im[0];
156
0
       Tl = Te + Tf;
157
0
       Ti = Rm[0];
158
0
       Tj = Rp[0];
159
0
       Tk = Ti - Tj;
160
0
       Tg = Te - Tf;
161
0
       Tr = Tj + Ti;
162
0
       Tm = FNMS(T3, Tl, T1 * Tk);
163
0
       Tx = FMA(T3, Tk, T1 * Tl);
164
0
        }
165
0
        {
166
0
       E T8, To, Tc, Tn;
167
0
       {
168
0
            E T6, T7, Ta, Tb;
169
0
            T6 = Ip[WS(rs, 1)];
170
0
            T7 = Im[WS(rs, 1)];
171
0
            T8 = T6 - T7;
172
0
            To = T6 + T7;
173
0
            Ta = Rp[WS(rs, 1)];
174
0
            Tb = Rm[WS(rs, 1)];
175
0
            Tc = Ta + Tb;
176
0
            Tn = Ta - Tb;
177
0
       }
178
0
       Td = FNMS(T9, Tc, T5 * T8);
179
0
       Tw = FNMS(T4, Tn, T2 * To);
180
0
       Tp = FMA(T2, Tn, T4 * To);
181
0
       Ts = FMA(T5, Tc, T9 * T8);
182
0
        }
183
0
        {
184
0
       E Th, Tq, Tz, TA;
185
0
       Th = Td + Tg;
186
0
       Tq = Tm - Tp;
187
0
       Ip[0] = KP500000000 * (Th + Tq);
188
0
       Im[WS(rs, 1)] = KP500000000 * (Tq - Th);
189
0
       Tz = Tr + Ts;
190
0
       TA = Tw + Tx;
191
0
       Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
192
0
       Rp[0] = KP500000000 * (Tz + TA);
193
0
        }
194
0
        {
195
0
       E Tt, Tu, Tv, Ty;
196
0
       Tt = Tr - Ts;
197
0
       Tu = Tp + Tm;
198
0
       Rm[0] = KP500000000 * (Tt - Tu);
199
0
       Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
200
0
       Tv = Tg - Td;
201
0
       Ty = Tw - Tx;
202
0
       Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
203
0
       Im[0] = KP500000000 * (Ty - Tv);
204
0
        }
205
0
         }
206
0
    }
207
0
     }
208
0
}
209
210
static const tw_instr twinstr[] = {
211
     { TW_CEXP, 1, 1 },
212
     { TW_CEXP, 1, 3 },
213
     { TW_NEXT, 1, 0 }
214
};
215
216
static const hc2c_desc desc = { 4, "hc2cfdft2_4", twinstr, &GENUS, { 24, 16, 8, 0 } };
217
218
1
void X(codelet_hc2cfdft2_4) (planner *p) {
219
1
     X(khc2c_register) (p, hc2cfdft2_4, &desc, HC2C_VIA_DFT);
220
1
}
221
#endif