Coverage Report

Created: 2025-08-26 06:35

/src/fftw3/dft/scalar/codelets/n1_8.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Tue Aug 26 06:31:28 UTC 2025 */
23
24
#include "dft/codelet-dft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include dft/scalar/n.h */
29
30
/*
31
 * This function contains 52 FP additions, 8 FP multiplications,
32
 * (or, 44 additions, 0 multiplications, 8 fused multiply/add),
33
 * 28 stack variables, 1 constants, and 32 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36
37
static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
     {
41
    INT i;
42
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) {
43
         E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu;
44
         E TG;
45
         {
46
        E T1, T2, Tj, Tk;
47
        T1 = ri[0];
48
        T2 = ri[WS(is, 4)];
49
        T3 = T1 + T2;
50
        Tn = T1 - T2;
51
        {
52
       E Tg, Th, T4, T5;
53
       Tg = ii[0];
54
       Th = ii[WS(is, 4)];
55
       Ti = Tg + Th;
56
       TC = Tg - Th;
57
       T4 = ri[WS(is, 2)];
58
       T5 = ri[WS(is, 6)];
59
       T6 = T4 + T5;
60
       TB = T4 - T5;
61
        }
62
        Tj = ii[WS(is, 2)];
63
        Tk = ii[WS(is, 6)];
64
        Tl = Tj + Tk;
65
        To = Tj - Tk;
66
        {
67
       E Tb, Tc, Tv, Tw, Tx, Ty;
68
       Tb = ri[WS(is, 7)];
69
       Tc = ri[WS(is, 3)];
70
       Tv = Tb - Tc;
71
       Tw = ii[WS(is, 7)];
72
       Tx = ii[WS(is, 3)];
73
       Ty = Tw - Tx;
74
       Td = Tb + Tc;
75
       TN = Tw + Tx;
76
       Tz = Tv - Ty;
77
       TH = Tv + Ty;
78
        }
79
        {
80
       E T8, T9, Tq, Tr, Ts, Tt;
81
       T8 = ri[WS(is, 1)];
82
       T9 = ri[WS(is, 5)];
83
       Tq = T8 - T9;
84
       Tr = ii[WS(is, 1)];
85
       Ts = ii[WS(is, 5)];
86
       Tt = Tr - Ts;
87
       Ta = T8 + T9;
88
       TM = Tr + Ts;
89
       Tu = Tq + Tt;
90
       TG = Tt - Tq;
91
        }
92
         }
93
         {
94
        E T7, Te, TP, TQ;
95
        T7 = T3 + T6;
96
        Te = Ta + Td;
97
        ro[WS(os, 4)] = T7 - Te;
98
        ro[0] = T7 + Te;
99
        TP = Ti + Tl;
100
        TQ = TM + TN;
101
        io[WS(os, 4)] = TP - TQ;
102
        io[0] = TP + TQ;
103
         }
104
         {
105
        E Tf, Tm, TL, TO;
106
        Tf = Td - Ta;
107
        Tm = Ti - Tl;
108
        io[WS(os, 2)] = Tf + Tm;
109
        io[WS(os, 6)] = Tm - Tf;
110
        TL = T3 - T6;
111
        TO = TM - TN;
112
        ro[WS(os, 6)] = TL - TO;
113
        ro[WS(os, 2)] = TL + TO;
114
         }
115
         {
116
        E Tp, TA, TJ, TK;
117
        Tp = Tn + To;
118
        TA = Tu + Tz;
119
        ro[WS(os, 5)] = FNMS(KP707106781, TA, Tp);
120
        ro[WS(os, 1)] = FMA(KP707106781, TA, Tp);
121
        TJ = TC - TB;
122
        TK = TG + TH;
123
        io[WS(os, 5)] = FNMS(KP707106781, TK, TJ);
124
        io[WS(os, 1)] = FMA(KP707106781, TK, TJ);
125
         }
126
         {
127
        E TD, TE, TF, TI;
128
        TD = TB + TC;
129
        TE = Tz - Tu;
130
        io[WS(os, 7)] = FNMS(KP707106781, TE, TD);
131
        io[WS(os, 3)] = FMA(KP707106781, TE, TD);
132
        TF = Tn - To;
133
        TI = TG - TH;
134
        ro[WS(os, 7)] = FNMS(KP707106781, TI, TF);
135
        ro[WS(os, 3)] = FMA(KP707106781, TI, TF);
136
         }
137
    }
138
     }
139
}
140
141
static const kdft_desc desc = { 8, "n1_8", { 44, 0, 8, 0 }, &GENUS, 0, 0, 0, 0 };
142
143
void X(codelet_n1_8) (planner *p) { X(kdft_register) (p, n1_8, &desc);
144
}
145
146
#else
147
148
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include dft/scalar/n.h */
149
150
/*
151
 * This function contains 52 FP additions, 4 FP multiplications,
152
 * (or, 52 additions, 4 multiplications, 0 fused multiply/add),
153
 * 28 stack variables, 1 constants, and 32 memory accesses
154
 */
155
#include "dft/scalar/n.h"
156
157
static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
158
28
{
159
28
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
160
28
     {
161
28
    INT i;
162
173
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) {
163
145
         E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu;
164
145
         E TG;
165
145
         {
166
145
        E T1, T2, Tj, Tk;
167
145
        T1 = ri[0];
168
145
        T2 = ri[WS(is, 4)];
169
145
        T3 = T1 + T2;
170
145
        Tn = T1 - T2;
171
145
        {
172
145
       E Tg, Th, T4, T5;
173
145
       Tg = ii[0];
174
145
       Th = ii[WS(is, 4)];
175
145
       Ti = Tg + Th;
176
145
       TC = Tg - Th;
177
145
       T4 = ri[WS(is, 2)];
178
145
       T5 = ri[WS(is, 6)];
179
145
       T6 = T4 + T5;
180
145
       TB = T4 - T5;
181
145
        }
182
145
        Tj = ii[WS(is, 2)];
183
145
        Tk = ii[WS(is, 6)];
184
145
        Tl = Tj + Tk;
185
145
        To = Tj - Tk;
186
145
        {
187
145
       E Tb, Tc, Tv, Tw, Tx, Ty;
188
145
       Tb = ri[WS(is, 7)];
189
145
       Tc = ri[WS(is, 3)];
190
145
       Tv = Tb - Tc;
191
145
       Tw = ii[WS(is, 7)];
192
145
       Tx = ii[WS(is, 3)];
193
145
       Ty = Tw - Tx;
194
145
       Td = Tb + Tc;
195
145
       TN = Tw + Tx;
196
145
       Tz = Tv - Ty;
197
145
       TH = Tv + Ty;
198
145
        }
199
145
        {
200
145
       E T8, T9, Tq, Tr, Ts, Tt;
201
145
       T8 = ri[WS(is, 1)];
202
145
       T9 = ri[WS(is, 5)];
203
145
       Tq = T8 - T9;
204
145
       Tr = ii[WS(is, 1)];
205
145
       Ts = ii[WS(is, 5)];
206
145
       Tt = Tr - Ts;
207
145
       Ta = T8 + T9;
208
145
       TM = Tr + Ts;
209
145
       Tu = Tq + Tt;
210
145
       TG = Tt - Tq;
211
145
        }
212
145
         }
213
145
         {
214
145
        E T7, Te, TP, TQ;
215
145
        T7 = T3 + T6;
216
145
        Te = Ta + Td;
217
145
        ro[WS(os, 4)] = T7 - Te;
218
145
        ro[0] = T7 + Te;
219
145
        TP = Ti + Tl;
220
145
        TQ = TM + TN;
221
145
        io[WS(os, 4)] = TP - TQ;
222
145
        io[0] = TP + TQ;
223
145
         }
224
145
         {
225
145
        E Tf, Tm, TL, TO;
226
145
        Tf = Td - Ta;
227
145
        Tm = Ti - Tl;
228
145
        io[WS(os, 2)] = Tf + Tm;
229
145
        io[WS(os, 6)] = Tm - Tf;
230
145
        TL = T3 - T6;
231
145
        TO = TM - TN;
232
145
        ro[WS(os, 6)] = TL - TO;
233
145
        ro[WS(os, 2)] = TL + TO;
234
145
         }
235
145
         {
236
145
        E Tp, TA, TJ, TK;
237
145
        Tp = Tn + To;
238
145
        TA = KP707106781 * (Tu + Tz);
239
145
        ro[WS(os, 5)] = Tp - TA;
240
145
        ro[WS(os, 1)] = Tp + TA;
241
145
        TJ = TC - TB;
242
145
        TK = KP707106781 * (TG + TH);
243
145
        io[WS(os, 5)] = TJ - TK;
244
145
        io[WS(os, 1)] = TJ + TK;
245
145
         }
246
145
         {
247
145
        E TD, TE, TF, TI;
248
145
        TD = TB + TC;
249
145
        TE = KP707106781 * (Tz - Tu);
250
145
        io[WS(os, 7)] = TD - TE;
251
145
        io[WS(os, 3)] = TD + TE;
252
145
        TF = Tn - To;
253
145
        TI = KP707106781 * (TG - TH);
254
145
        ro[WS(os, 7)] = TF - TI;
255
145
        ro[WS(os, 3)] = TF + TI;
256
145
         }
257
145
    }
258
28
     }
259
28
}
260
261
static const kdft_desc desc = { 8, "n1_8", { 52, 4, 0, 0 }, &GENUS, 0, 0, 0, 0 };
262
263
1
void X(codelet_n1_8) (planner *p) { X(kdft_register) (p, n1_8, &desc);
264
1
}
265
266
#endif