Coverage Report

Created: 2026-02-14 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/scalar/codelets/q1_3.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Sat Feb 14 07:02:28 UTC 2026 */
23
24
#include "dft/codelet-dft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include dft/scalar/q.h */
29
30
/*
31
 * This function contains 48 FP additions, 42 FP multiplications,
32
 * (or, 18 additions, 12 multiplications, 30 fused multiply/add),
33
 * 35 stack variables, 2 constants, and 36 memory accesses
34
 */
35
#include "dft/scalar/q.h"
36
37
static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
    INT m;
43
    for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
44
         E T1, T4, T6, Tg, Td, Te, T9, Tf, Tp, Ts, Tu, TE, TB, TC, Tx;
45
         E TD, TZ, T10, TV, T11, TN, TQ, TS, T12;
46
         {
47
        E T2, T3, Tv, Tw;
48
        T1 = rio[0];
49
        T2 = rio[WS(rs, 1)];
50
        T3 = rio[WS(rs, 2)];
51
        T4 = T2 + T3;
52
        T6 = FNMS(KP500000000, T4, T1);
53
        Tg = T3 - T2;
54
        {
55
       E T7, T8, Tq, Tr;
56
       Td = iio[0];
57
       T7 = iio[WS(rs, 1)];
58
       T8 = iio[WS(rs, 2)];
59
       Te = T7 + T8;
60
       T9 = T7 - T8;
61
       Tf = FNMS(KP500000000, Te, Td);
62
       Tp = rio[WS(vs, 1)];
63
       Tq = rio[WS(vs, 1) + WS(rs, 1)];
64
       Tr = rio[WS(vs, 1) + WS(rs, 2)];
65
       Ts = Tq + Tr;
66
       Tu = FNMS(KP500000000, Ts, Tp);
67
       TE = Tr - Tq;
68
        }
69
        TB = iio[WS(vs, 1)];
70
        Tv = iio[WS(vs, 1) + WS(rs, 1)];
71
        Tw = iio[WS(vs, 1) + WS(rs, 2)];
72
        TC = Tv + Tw;
73
        Tx = Tv - Tw;
74
        TD = FNMS(KP500000000, TC, TB);
75
        {
76
       E TT, TU, TO, TP;
77
       TZ = iio[WS(vs, 2)];
78
       TT = iio[WS(vs, 2) + WS(rs, 1)];
79
       TU = iio[WS(vs, 2) + WS(rs, 2)];
80
       T10 = TT + TU;
81
       TV = TT - TU;
82
       T11 = FNMS(KP500000000, T10, TZ);
83
       TN = rio[WS(vs, 2)];
84
       TO = rio[WS(vs, 2) + WS(rs, 1)];
85
       TP = rio[WS(vs, 2) + WS(rs, 2)];
86
       TQ = TO + TP;
87
       TS = FNMS(KP500000000, TQ, TN);
88
       T12 = TP - TO;
89
        }
90
         }
91
         rio[0] = T1 + T4;
92
         iio[0] = Td + Te;
93
         rio[WS(rs, 1)] = Tp + Ts;
94
         iio[WS(rs, 1)] = TB + TC;
95
         iio[WS(rs, 2)] = TZ + T10;
96
         rio[WS(rs, 2)] = TN + TQ;
97
         {
98
        E Ta, Th, Tb, Ti, T5, Tc;
99
        Ta = FMA(KP866025403, T9, T6);
100
        Th = FMA(KP866025403, Tg, Tf);
101
        T5 = W[0];
102
        Tb = T5 * Ta;
103
        Ti = T5 * Th;
104
        Tc = W[1];
105
        rio[WS(vs, 1)] = FMA(Tc, Th, Tb);
106
        iio[WS(vs, 1)] = FNMS(Tc, Ta, Ti);
107
         }
108
         {
109
        E T16, T19, T17, T1a, T15, T18;
110
        T16 = FNMS(KP866025403, TV, TS);
111
        T19 = FNMS(KP866025403, T12, T11);
112
        T15 = W[2];
113
        T17 = T15 * T16;
114
        T1a = T15 * T19;
115
        T18 = W[3];
116
        rio[WS(vs, 2) + WS(rs, 2)] = FMA(T18, T19, T17);
117
        iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T18, T16, T1a);
118
         }
119
         {
120
        E TI, TL, TJ, TM, TH, TK;
121
        TI = FNMS(KP866025403, Tx, Tu);
122
        TL = FNMS(KP866025403, TE, TD);
123
        TH = W[2];
124
        TJ = TH * TI;
125
        TM = TH * TL;
126
        TK = W[3];
127
        rio[WS(vs, 2) + WS(rs, 1)] = FMA(TK, TL, TJ);
128
        iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TK, TI, TM);
129
         }
130
         {
131
        E Ty, TF, Tz, TG, Tt, TA;
132
        Ty = FMA(KP866025403, Tx, Tu);
133
        TF = FMA(KP866025403, TE, TD);
134
        Tt = W[0];
135
        Tz = Tt * Ty;
136
        TG = Tt * TF;
137
        TA = W[1];
138
        rio[WS(vs, 1) + WS(rs, 1)] = FMA(TA, TF, Tz);
139
        iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TA, Ty, TG);
140
         }
141
         {
142
        E TW, T13, TX, T14, TR, TY;
143
        TW = FMA(KP866025403, TV, TS);
144
        T13 = FMA(KP866025403, T12, T11);
145
        TR = W[0];
146
        TX = TR * TW;
147
        T14 = TR * T13;
148
        TY = W[1];
149
        rio[WS(vs, 1) + WS(rs, 2)] = FMA(TY, T13, TX);
150
        iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TY, TW, T14);
151
         }
152
         {
153
        E Tk, Tn, Tl, To, Tj, Tm;
154
        Tk = FNMS(KP866025403, T9, T6);
155
        Tn = FNMS(KP866025403, Tg, Tf);
156
        Tj = W[2];
157
        Tl = Tj * Tk;
158
        To = Tj * Tn;
159
        Tm = W[3];
160
        rio[WS(vs, 2)] = FMA(Tm, Tn, Tl);
161
        iio[WS(vs, 2)] = FNMS(Tm, Tk, To);
162
         }
163
    }
164
     }
165
}
166
167
static const tw_instr twinstr[] = {
168
     { TW_FULL, 0, 3 },
169
     { TW_NEXT, 1, 0 }
170
};
171
172
static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, { 18, 12, 30, 0 }, 0, 0, 0 };
173
174
void X(codelet_q1_3) (planner *p) {
175
     X(kdft_difsq_register) (p, q1_3, &desc);
176
}
177
#else
178
179
/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include dft/scalar/q.h */
180
181
/*
182
 * This function contains 48 FP additions, 36 FP multiplications,
183
 * (or, 30 additions, 18 multiplications, 18 fused multiply/add),
184
 * 35 stack variables, 2 constants, and 36 memory accesses
185
 */
186
#include "dft/scalar/q.h"
187
188
static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
189
0
{
190
0
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
191
0
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
192
0
     {
193
0
    INT m;
194
0
    for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
195
0
         E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt;
196
0
         E Tz, TR, TS, TN, TT, TF, TI, TK, TQ;
197
0
         {
198
0
        E T2, T3, Tr, Ts;
199
0
        T1 = rio[0];
200
0
        T2 = rio[WS(rs, 1)];
201
0
        T3 = rio[WS(rs, 2)];
202
0
        T4 = T2 + T3;
203
0
        T6 = FNMS(KP500000000, T4, T1);
204
0
        Tc = KP866025403 * (T3 - T2);
205
0
        {
206
0
       E T7, T8, Tm, Tn;
207
0
       Td = iio[0];
208
0
       T7 = iio[WS(rs, 1)];
209
0
       T8 = iio[WS(rs, 2)];
210
0
       Te = T7 + T8;
211
0
       T9 = KP866025403 * (T7 - T8);
212
0
       Tf = FNMS(KP500000000, Te, Td);
213
0
       Tl = rio[WS(vs, 1)];
214
0
       Tm = rio[WS(vs, 1) + WS(rs, 1)];
215
0
       Tn = rio[WS(vs, 1) + WS(rs, 2)];
216
0
       To = Tm + Tn;
217
0
       Tq = FNMS(KP500000000, To, Tl);
218
0
       Tw = KP866025403 * (Tn - Tm);
219
0
        }
220
0
        Tx = iio[WS(vs, 1)];
221
0
        Tr = iio[WS(vs, 1) + WS(rs, 1)];
222
0
        Ts = iio[WS(vs, 1) + WS(rs, 2)];
223
0
        Ty = Tr + Ts;
224
0
        Tt = KP866025403 * (Tr - Ts);
225
0
        Tz = FNMS(KP500000000, Ty, Tx);
226
0
        {
227
0
       E TL, TM, TG, TH;
228
0
       TR = iio[WS(vs, 2)];
229
0
       TL = iio[WS(vs, 2) + WS(rs, 1)];
230
0
       TM = iio[WS(vs, 2) + WS(rs, 2)];
231
0
       TS = TL + TM;
232
0
       TN = KP866025403 * (TL - TM);
233
0
       TT = FNMS(KP500000000, TS, TR);
234
0
       TF = rio[WS(vs, 2)];
235
0
       TG = rio[WS(vs, 2) + WS(rs, 1)];
236
0
       TH = rio[WS(vs, 2) + WS(rs, 2)];
237
0
       TI = TG + TH;
238
0
       TK = FNMS(KP500000000, TI, TF);
239
0
       TQ = KP866025403 * (TH - TG);
240
0
        }
241
0
         }
242
0
         rio[0] = T1 + T4;
243
0
         iio[0] = Td + Te;
244
0
         rio[WS(rs, 1)] = Tl + To;
245
0
         iio[WS(rs, 1)] = Tx + Ty;
246
0
         iio[WS(rs, 2)] = TR + TS;
247
0
         rio[WS(rs, 2)] = TF + TI;
248
0
         {
249
0
        E Ta, Tg, T5, Tb;
250
0
        Ta = T6 + T9;
251
0
        Tg = Tc + Tf;
252
0
        T5 = W[0];
253
0
        Tb = W[1];
254
0
        rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg);
255
0
        iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg);
256
0
         }
257
0
         {
258
0
        E TW, TY, TV, TX;
259
0
        TW = TK - TN;
260
0
        TY = TT - TQ;
261
0
        TV = W[2];
262
0
        TX = W[3];
263
0
        rio[WS(vs, 2) + WS(rs, 2)] = FMA(TV, TW, TX * TY);
264
0
        iio[WS(vs, 2) + WS(rs, 2)] = FNMS(TX, TW, TV * TY);
265
0
         }
266
0
         {
267
0
        E TC, TE, TB, TD;
268
0
        TC = Tq - Tt;
269
0
        TE = Tz - Tw;
270
0
        TB = W[2];
271
0
        TD = W[3];
272
0
        rio[WS(vs, 2) + WS(rs, 1)] = FMA(TB, TC, TD * TE);
273
0
        iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TD, TC, TB * TE);
274
0
         }
275
0
         {
276
0
        E Tu, TA, Tp, Tv;
277
0
        Tu = Tq + Tt;
278
0
        TA = Tw + Tz;
279
0
        Tp = W[0];
280
0
        Tv = W[1];
281
0
        rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tp, Tu, Tv * TA);
282
0
        iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tv, Tu, Tp * TA);
283
0
         }
284
0
         {
285
0
        E TO, TU, TJ, TP;
286
0
        TO = TK + TN;
287
0
        TU = TQ + TT;
288
0
        TJ = W[0];
289
0
        TP = W[1];
290
0
        rio[WS(vs, 1) + WS(rs, 2)] = FMA(TJ, TO, TP * TU);
291
0
        iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TP, TO, TJ * TU);
292
0
         }
293
0
         {
294
0
        E Ti, Tk, Th, Tj;
295
0
        Ti = T6 - T9;
296
0
        Tk = Tf - Tc;
297
0
        Th = W[2];
298
0
        Tj = W[3];
299
0
        rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk);
300
0
        iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk);
301
0
         }
302
0
    }
303
0
     }
304
0
}
305
306
static const tw_instr twinstr[] = {
307
     { TW_FULL, 0, 3 },
308
     { TW_NEXT, 1, 0 }
309
};
310
311
static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, { 30, 18, 18, 0 }, 0, 0, 0 };
312
313
1
void X(codelet_q1_3) (planner *p) {
314
1
     X(kdft_difsq_register) (p, q1_3, &desc);
315
1
}
316
#endif