Coverage Report

Created: 2025-07-11 06:55

/src/fftw3/dft/scalar/codelets/n1_10.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Fri Jul 11 06:51:30 UTC 2025 */
23
24
#include "dft/codelet-dft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include dft/scalar/n.h */
29
30
/*
31
 * This function contains 84 FP additions, 36 FP multiplications,
32
 * (or, 48 additions, 0 multiplications, 36 fused multiply/add),
33
 * 41 stack variables, 4 constants, and 40 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36
37
static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     {
44
    INT i;
45
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
46
         E T3, Tj, TN, T1b, TU, TV, T1j, T1i, Tm, Tp, Tq, Ta, Th, Ti, TA;
47
         E TH, T17, T14, T1c, T1d, T1e, TO, TP, TQ;
48
         {
49
        E T1, T2, TL, TM;
50
        T1 = ri[0];
51
        T2 = ri[WS(is, 5)];
52
        T3 = T1 - T2;
53
        Tj = T1 + T2;
54
        TL = ii[0];
55
        TM = ii[WS(is, 5)];
56
        TN = TL - TM;
57
        T1b = TL + TM;
58
         }
59
         {
60
        E T6, Tk, Tg, To, T9, Tl, Td, Tn;
61
        {
62
       E T4, T5, Te, Tf;
63
       T4 = ri[WS(is, 2)];
64
       T5 = ri[WS(is, 7)];
65
       T6 = T4 - T5;
66
       Tk = T4 + T5;
67
       Te = ri[WS(is, 6)];
68
       Tf = ri[WS(is, 1)];
69
       Tg = Te - Tf;
70
       To = Te + Tf;
71
        }
72
        {
73
       E T7, T8, Tb, Tc;
74
       T7 = ri[WS(is, 8)];
75
       T8 = ri[WS(is, 3)];
76
       T9 = T7 - T8;
77
       Tl = T7 + T8;
78
       Tb = ri[WS(is, 4)];
79
       Tc = ri[WS(is, 9)];
80
       Td = Tb - Tc;
81
       Tn = Tb + Tc;
82
        }
83
        TU = T6 - T9;
84
        TV = Td - Tg;
85
        T1j = Tk - Tl;
86
        T1i = Tn - To;
87
        Tm = Tk + Tl;
88
        Tp = Tn + To;
89
        Tq = Tm + Tp;
90
        Ta = T6 + T9;
91
        Th = Td + Tg;
92
        Ti = Ta + Th;
93
         }
94
         {
95
        E Tw, T15, TG, T13, Tz, T16, TD, T12;
96
        {
97
       E Tu, Tv, TE, TF;
98
       Tu = ii[WS(is, 2)];
99
       Tv = ii[WS(is, 7)];
100
       Tw = Tu - Tv;
101
       T15 = Tu + Tv;
102
       TE = ii[WS(is, 6)];
103
       TF = ii[WS(is, 1)];
104
       TG = TE - TF;
105
       T13 = TE + TF;
106
        }
107
        {
108
       E Tx, Ty, TB, TC;
109
       Tx = ii[WS(is, 8)];
110
       Ty = ii[WS(is, 3)];
111
       Tz = Tx - Ty;
112
       T16 = Tx + Ty;
113
       TB = ii[WS(is, 4)];
114
       TC = ii[WS(is, 9)];
115
       TD = TB - TC;
116
       T12 = TB + TC;
117
        }
118
        TA = Tw - Tz;
119
        TH = TD - TG;
120
        T17 = T15 - T16;
121
        T14 = T12 - T13;
122
        T1c = T15 + T16;
123
        T1d = T12 + T13;
124
        T1e = T1c + T1d;
125
        TO = Tw + Tz;
126
        TP = TD + TG;
127
        TQ = TO + TP;
128
         }
129
         ro[WS(os, 5)] = T3 + Ti;
130
         io[WS(os, 5)] = TN + TQ;
131
         ro[0] = Tj + Tq;
132
         io[0] = T1b + T1e;
133
         {
134
        E TI, TK, Tt, TJ, Tr, Ts;
135
        TI = FMA(KP618033988, TH, TA);
136
        TK = FNMS(KP618033988, TA, TH);
137
        Tr = FNMS(KP250000000, Ti, T3);
138
        Ts = Ta - Th;
139
        Tt = FMA(KP559016994, Ts, Tr);
140
        TJ = FNMS(KP559016994, Ts, Tr);
141
        ro[WS(os, 9)] = FNMS(KP951056516, TI, Tt);
142
        ro[WS(os, 3)] = FMA(KP951056516, TK, TJ);
143
        ro[WS(os, 1)] = FMA(KP951056516, TI, Tt);
144
        ro[WS(os, 7)] = FNMS(KP951056516, TK, TJ);
145
         }
146
         {
147
        E TW, TY, TT, TX, TR, TS;
148
        TW = FMA(KP618033988, TV, TU);
149
        TY = FNMS(KP618033988, TU, TV);
150
        TR = FNMS(KP250000000, TQ, TN);
151
        TS = TO - TP;
152
        TT = FMA(KP559016994, TS, TR);
153
        TX = FNMS(KP559016994, TS, TR);
154
        io[WS(os, 1)] = FNMS(KP951056516, TW, TT);
155
        io[WS(os, 7)] = FMA(KP951056516, TY, TX);
156
        io[WS(os, 9)] = FMA(KP951056516, TW, TT);
157
        io[WS(os, 3)] = FNMS(KP951056516, TY, TX);
158
         }
159
         {
160
        E T18, T1a, T11, T19, TZ, T10;
161
        T18 = FNMS(KP618033988, T17, T14);
162
        T1a = FMA(KP618033988, T14, T17);
163
        TZ = FNMS(KP250000000, Tq, Tj);
164
        T10 = Tm - Tp;
165
        T11 = FNMS(KP559016994, T10, TZ);
166
        T19 = FMA(KP559016994, T10, TZ);
167
        ro[WS(os, 2)] = FNMS(KP951056516, T18, T11);
168
        ro[WS(os, 6)] = FMA(KP951056516, T1a, T19);
169
        ro[WS(os, 8)] = FMA(KP951056516, T18, T11);
170
        ro[WS(os, 4)] = FNMS(KP951056516, T1a, T19);
171
         }
172
         {
173
        E T1k, T1m, T1h, T1l, T1f, T1g;
174
        T1k = FNMS(KP618033988, T1j, T1i);
175
        T1m = FMA(KP618033988, T1i, T1j);
176
        T1f = FNMS(KP250000000, T1e, T1b);
177
        T1g = T1c - T1d;
178
        T1h = FNMS(KP559016994, T1g, T1f);
179
        T1l = FMA(KP559016994, T1g, T1f);
180
        io[WS(os, 2)] = FMA(KP951056516, T1k, T1h);
181
        io[WS(os, 6)] = FNMS(KP951056516, T1m, T1l);
182
        io[WS(os, 8)] = FNMS(KP951056516, T1k, T1h);
183
        io[WS(os, 4)] = FMA(KP951056516, T1m, T1l);
184
         }
185
    }
186
     }
187
}
188
189
static const kdft_desc desc = { 10, "n1_10", { 48, 0, 36, 0 }, &GENUS, 0, 0, 0, 0 };
190
191
void X(codelet_n1_10) (planner *p) { X(kdft_register) (p, n1_10, &desc);
192
}
193
194
#else
195
196
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include dft/scalar/n.h */
197
198
/*
199
 * This function contains 84 FP additions, 24 FP multiplications,
200
 * (or, 72 additions, 12 multiplications, 12 fused multiply/add),
201
 * 41 stack variables, 4 constants, and 40 memory accesses
202
 */
203
#include "dft/scalar/n.h"
204
205
static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
206
16
{
207
16
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
208
16
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
209
16
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
210
16
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
211
16
     {
212
16
    INT i;
213
144
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
214
128
         E T3, Tj, TQ, T1e, TU, TV, T1c, T1b, Tm, Tp, Tq, Ta, Th, Ti, TA;
215
128
         E TH, T17, T14, T1f, T1g, T1h, TL, TM, TR;
216
128
         {
217
128
        E T1, T2, TO, TP;
218
128
        T1 = ri[0];
219
128
        T2 = ri[WS(is, 5)];
220
128
        T3 = T1 - T2;
221
128
        Tj = T1 + T2;
222
128
        TO = ii[0];
223
128
        TP = ii[WS(is, 5)];
224
128
        TQ = TO - TP;
225
128
        T1e = TO + TP;
226
128
         }
227
128
         {
228
128
        E T6, Tk, Tg, To, T9, Tl, Td, Tn;
229
128
        {
230
128
       E T4, T5, Te, Tf;
231
128
       T4 = ri[WS(is, 2)];
232
128
       T5 = ri[WS(is, 7)];
233
128
       T6 = T4 - T5;
234
128
       Tk = T4 + T5;
235
128
       Te = ri[WS(is, 6)];
236
128
       Tf = ri[WS(is, 1)];
237
128
       Tg = Te - Tf;
238
128
       To = Te + Tf;
239
128
        }
240
128
        {
241
128
       E T7, T8, Tb, Tc;
242
128
       T7 = ri[WS(is, 8)];
243
128
       T8 = ri[WS(is, 3)];
244
128
       T9 = T7 - T8;
245
128
       Tl = T7 + T8;
246
128
       Tb = ri[WS(is, 4)];
247
128
       Tc = ri[WS(is, 9)];
248
128
       Td = Tb - Tc;
249
128
       Tn = Tb + Tc;
250
128
        }
251
128
        TU = T6 - T9;
252
128
        TV = Td - Tg;
253
128
        T1c = Tk - Tl;
254
128
        T1b = Tn - To;
255
128
        Tm = Tk + Tl;
256
128
        Tp = Tn + To;
257
128
        Tq = Tm + Tp;
258
128
        Ta = T6 + T9;
259
128
        Th = Td + Tg;
260
128
        Ti = Ta + Th;
261
128
         }
262
128
         {
263
128
        E Tw, T15, TG, T13, Tz, T16, TD, T12;
264
128
        {
265
128
       E Tu, Tv, TE, TF;
266
128
       Tu = ii[WS(is, 2)];
267
128
       Tv = ii[WS(is, 7)];
268
128
       Tw = Tu - Tv;
269
128
       T15 = Tu + Tv;
270
128
       TE = ii[WS(is, 6)];
271
128
       TF = ii[WS(is, 1)];
272
128
       TG = TE - TF;
273
128
       T13 = TE + TF;
274
128
        }
275
128
        {
276
128
       E Tx, Ty, TB, TC;
277
128
       Tx = ii[WS(is, 8)];
278
128
       Ty = ii[WS(is, 3)];
279
128
       Tz = Tx - Ty;
280
128
       T16 = Tx + Ty;
281
128
       TB = ii[WS(is, 4)];
282
128
       TC = ii[WS(is, 9)];
283
128
       TD = TB - TC;
284
128
       T12 = TB + TC;
285
128
        }
286
128
        TA = Tw - Tz;
287
128
        TH = TD - TG;
288
128
        T17 = T15 - T16;
289
128
        T14 = T12 - T13;
290
128
        T1f = T15 + T16;
291
128
        T1g = T12 + T13;
292
128
        T1h = T1f + T1g;
293
128
        TL = Tw + Tz;
294
128
        TM = TD + TG;
295
128
        TR = TL + TM;
296
128
         }
297
128
         ro[WS(os, 5)] = T3 + Ti;
298
128
         io[WS(os, 5)] = TQ + TR;
299
128
         ro[0] = Tj + Tq;
300
128
         io[0] = T1e + T1h;
301
128
         {
302
128
        E TI, TK, Tt, TJ, Tr, Ts;
303
128
        TI = FMA(KP951056516, TA, KP587785252 * TH);
304
128
        TK = FNMS(KP587785252, TA, KP951056516 * TH);
305
128
        Tr = KP559016994 * (Ta - Th);
306
128
        Ts = FNMS(KP250000000, Ti, T3);
307
128
        Tt = Tr + Ts;
308
128
        TJ = Ts - Tr;
309
128
        ro[WS(os, 9)] = Tt - TI;
310
128
        ro[WS(os, 3)] = TJ + TK;
311
128
        ro[WS(os, 1)] = Tt + TI;
312
128
        ro[WS(os, 7)] = TJ - TK;
313
128
         }
314
128
         {
315
128
        E TW, TY, TT, TX, TN, TS;
316
128
        TW = FMA(KP951056516, TU, KP587785252 * TV);
317
128
        TY = FNMS(KP587785252, TU, KP951056516 * TV);
318
128
        TN = KP559016994 * (TL - TM);
319
128
        TS = FNMS(KP250000000, TR, TQ);
320
128
        TT = TN + TS;
321
128
        TX = TS - TN;
322
128
        io[WS(os, 1)] = TT - TW;
323
128
        io[WS(os, 7)] = TY + TX;
324
128
        io[WS(os, 9)] = TW + TT;
325
128
        io[WS(os, 3)] = TX - TY;
326
128
         }
327
128
         {
328
128
        E T18, T1a, T11, T19, TZ, T10;
329
128
        T18 = FNMS(KP587785252, T17, KP951056516 * T14);
330
128
        T1a = FMA(KP951056516, T17, KP587785252 * T14);
331
128
        TZ = FNMS(KP250000000, Tq, Tj);
332
128
        T10 = KP559016994 * (Tm - Tp);
333
128
        T11 = TZ - T10;
334
128
        T19 = T10 + TZ;
335
128
        ro[WS(os, 2)] = T11 - T18;
336
128
        ro[WS(os, 6)] = T19 + T1a;
337
128
        ro[WS(os, 8)] = T11 + T18;
338
128
        ro[WS(os, 4)] = T19 - T1a;
339
128
         }
340
128
         {
341
128
        E T1d, T1l, T1k, T1m, T1i, T1j;
342
128
        T1d = FNMS(KP587785252, T1c, KP951056516 * T1b);
343
128
        T1l = FMA(KP951056516, T1c, KP587785252 * T1b);
344
128
        T1i = FNMS(KP250000000, T1h, T1e);
345
128
        T1j = KP559016994 * (T1f - T1g);
346
128
        T1k = T1i - T1j;
347
128
        T1m = T1j + T1i;
348
128
        io[WS(os, 2)] = T1d + T1k;
349
128
        io[WS(os, 6)] = T1m - T1l;
350
128
        io[WS(os, 8)] = T1k - T1d;
351
128
        io[WS(os, 4)] = T1l + T1m;
352
128
         }
353
128
    }
354
16
     }
355
16
}
356
357
static const kdft_desc desc = { 10, "n1_10", { 72, 12, 12, 0 }, &GENUS, 0, 0, 0, 0 };
358
359
1
void X(codelet_n1_10) (planner *p) { X(kdft_register) (p, n1_10, &desc);
360
1
}
361
362
#endif