Coverage Report

Created: 2025-07-23 07:03

/src/fftw3/rdft/scalar/r2cb/hc2cb2_8.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Wed Jul 23 07:02:56 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_hc2c.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hc2cb2_8 -include rdft/scalar/hc2cb.h */
29
30
/*
31
 * This function contains 74 FP additions, 50 FP multiplications,
32
 * (or, 44 additions, 20 multiplications, 30 fused multiply/add),
33
 * 47 stack variables, 1 constants, and 32 memory accesses
34
 */
35
#include "rdft/scalar/hc2cb.h"
36
37
static void hc2cb2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
     {
41
    INT m;
42
    for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) {
43
         E Tf, Tg, Tl, Tp, Ti, Tj, Tk, T1b, T1u, T1e, T1o, To, Tq, TK;
44
         {
45
        E Th, T1n, T1t, Tn, Tm, TJ;
46
        Tf = W[0];
47
        Tg = W[2];
48
        Th = Tf * Tg;
49
        Tl = W[4];
50
        T1n = Tf * Tl;
51
        Tp = W[5];
52
        T1t = Tf * Tp;
53
        Ti = W[1];
54
        Tj = W[3];
55
        Tn = Tf * Tj;
56
        Tk = FMA(Ti, Tj, Th);
57
        T1b = FNMS(Ti, Tj, Th);
58
        T1u = FNMS(Ti, Tl, T1t);
59
        T1e = FMA(Ti, Tg, Tn);
60
        T1o = FMA(Ti, Tp, T1n);
61
        Tm = Tk * Tl;
62
        TJ = Tk * Tp;
63
        To = FNMS(Ti, Tg, Tn);
64
        Tq = FMA(To, Tp, Tm);
65
        TK = FNMS(To, Tl, TJ);
66
         }
67
         {
68
        E T7, T1p, T1v, Tv, TP, T13, T1h, TZ, Te, T1k, T1w, T1q, TQ, TR, T10;
69
        E TG, T14;
70
        {
71
       E T3, Tr, TO, T1f, T6, TL, Tu, T1g;
72
       {
73
            E T1, T2, TM, TN;
74
            T1 = Rp[0];
75
            T2 = Rm[WS(rs, 3)];
76
            T3 = T1 + T2;
77
            Tr = T1 - T2;
78
            TM = Ip[0];
79
            TN = Im[WS(rs, 3)];
80
            TO = TM + TN;
81
            T1f = TM - TN;
82
       }
83
       {
84
            E T4, T5, Ts, Tt;
85
            T4 = Rp[WS(rs, 2)];
86
            T5 = Rm[WS(rs, 1)];
87
            T6 = T4 + T5;
88
            TL = T4 - T5;
89
            Ts = Ip[WS(rs, 2)];
90
            Tt = Im[WS(rs, 1)];
91
            Tu = Ts + Tt;
92
            T1g = Ts - Tt;
93
       }
94
       T7 = T3 + T6;
95
       T1p = T3 - T6;
96
       T1v = T1f - T1g;
97
       Tv = Tr - Tu;
98
       TP = TL + TO;
99
       T13 = TO - TL;
100
       T1h = T1f + T1g;
101
       TZ = Tr + Tu;
102
        }
103
        {
104
       E Ta, Tw, Tz, T1i, Td, TB, TE, T1j, TA, TF;
105
       {
106
            E T8, T9, Tx, Ty;
107
            T8 = Rp[WS(rs, 1)];
108
            T9 = Rm[WS(rs, 2)];
109
            Ta = T8 + T9;
110
            Tw = T8 - T9;
111
            Tx = Ip[WS(rs, 1)];
112
            Ty = Im[WS(rs, 2)];
113
            Tz = Tx + Ty;
114
            T1i = Tx - Ty;
115
       }
116
       {
117
            E Tb, Tc, TC, TD;
118
            Tb = Rm[0];
119
            Tc = Rp[WS(rs, 3)];
120
            Td = Tb + Tc;
121
            TB = Tb - Tc;
122
            TC = Ip[WS(rs, 3)];
123
            TD = Im[0];
124
            TE = TC + TD;
125
            T1j = TC - TD;
126
       }
127
       Te = Ta + Td;
128
       T1k = T1i + T1j;
129
       T1w = Ta - Td;
130
       T1q = T1j - T1i;
131
       TQ = Tw + Tz;
132
       TR = TB + TE;
133
       T10 = TQ + TR;
134
       TA = Tw - Tz;
135
       TF = TB - TE;
136
       TG = TA + TF;
137
       T14 = TA - TF;
138
        }
139
        Rp[0] = T7 + Te;
140
        Rm[0] = T1h + T1k;
141
        {
142
       E T11, T12, T15, T16;
143
       T11 = FNMS(KP707106781, T10, TZ);
144
       T12 = Tg * T11;
145
       T15 = FMA(KP707106781, T14, T13);
146
       T16 = Tg * T15;
147
       Ip[WS(rs, 1)] = FNMS(Tj, T15, T12);
148
       Im[WS(rs, 1)] = FMA(Tj, T11, T16);
149
        }
150
        {
151
       E T1z, T1A, T1B, T1C;
152
       T1z = T1p + T1q;
153
       T1A = Tk * T1z;
154
       T1B = T1w + T1v;
155
       T1C = Tk * T1B;
156
       Rp[WS(rs, 1)] = FNMS(To, T1B, T1A);
157
       Rm[WS(rs, 1)] = FMA(To, T1z, T1C);
158
        }
159
        {
160
       E T17, T18, T19, T1a;
161
       T17 = FMA(KP707106781, T10, TZ);
162
       T18 = Tl * T17;
163
       T19 = FNMS(KP707106781, T14, T13);
164
       T1a = Tl * T19;
165
       Ip[WS(rs, 3)] = FNMS(Tp, T19, T18);
166
       Im[WS(rs, 3)] = FMA(Tp, T17, T1a);
167
        }
168
        {
169
       E T1l, T1d, T1m, T1c;
170
       T1l = T1h - T1k;
171
       T1c = T7 - Te;
172
       T1d = T1b * T1c;
173
       T1m = T1e * T1c;
174
       Rp[WS(rs, 2)] = FNMS(T1e, T1l, T1d);
175
       Rm[WS(rs, 2)] = FMA(T1b, T1l, T1m);
176
        }
177
        {
178
       E T1r, T1s, T1x, T1y;
179
       T1r = T1p - T1q;
180
       T1s = T1o * T1r;
181
       T1x = T1v - T1w;
182
       T1y = T1o * T1x;
183
       Rp[WS(rs, 3)] = FNMS(T1u, T1x, T1s);
184
       Rm[WS(rs, 3)] = FMA(T1u, T1r, T1y);
185
        }
186
        {
187
       E TT, TX, TW, TY, TI, TU, TS, TV, TH;
188
       TS = TQ - TR;
189
       TT = FNMS(KP707106781, TS, TP);
190
       TX = FMA(KP707106781, TS, TP);
191
       TV = FMA(KP707106781, TG, Tv);
192
       TW = Tf * TV;
193
       TY = Ti * TV;
194
       TH = FNMS(KP707106781, TG, Tv);
195
       TI = Tq * TH;
196
       TU = TK * TH;
197
       Ip[WS(rs, 2)] = FNMS(TK, TT, TI);
198
       Im[WS(rs, 2)] = FMA(Tq, TT, TU);
199
       Ip[0] = FNMS(Ti, TX, TW);
200
       Im[0] = FMA(Tf, TX, TY);
201
        }
202
         }
203
    }
204
     }
205
}
206
207
static const tw_instr twinstr[] = {
208
     { TW_CEXP, 1, 1 },
209
     { TW_CEXP, 1, 3 },
210
     { TW_CEXP, 1, 7 },
211
     { TW_NEXT, 1, 0 }
212
};
213
214
static const hc2c_desc desc = { 8, "hc2cb2_8", twinstr, &GENUS, { 44, 20, 30, 0 } };
215
216
void X(codelet_hc2cb2_8) (planner *p) {
217
     X(khc2c_register) (p, hc2cb2_8, &desc, HC2C_VIA_RDFT);
218
}
219
#else
220
221
/* Generated by: ../../../genfft/gen_hc2c.native -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 8 -dif -name hc2cb2_8 -include rdft/scalar/hc2cb.h */
222
223
/*
224
 * This function contains 74 FP additions, 44 FP multiplications,
225
 * (or, 56 additions, 26 multiplications, 18 fused multiply/add),
226
 * 46 stack variables, 1 constants, and 32 memory accesses
227
 */
228
#include "rdft/scalar/hc2cb.h"
229
230
static void hc2cb2_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
231
0
{
232
0
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
233
0
     {
234
0
    INT m;
235
0
    for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(32, rs)) {
236
0
         E Tf, Ti, Tg, Tj, Tl, Tp, TP, TR, TF, TG, TH, T15, TL, TT;
237
0
         {
238
0
        E Th, To, Tk, Tn;
239
0
        Tf = W[0];
240
0
        Ti = W[1];
241
0
        Tg = W[2];
242
0
        Tj = W[3];
243
0
        Th = Tf * Tg;
244
0
        To = Ti * Tg;
245
0
        Tk = Ti * Tj;
246
0
        Tn = Tf * Tj;
247
0
        Tl = Th - Tk;
248
0
        Tp = Tn + To;
249
0
        TP = Th + Tk;
250
0
        TR = Tn - To;
251
0
        TF = W[4];
252
0
        TG = W[5];
253
0
        TH = FMA(Tf, TF, Ti * TG);
254
0
        T15 = FNMS(TR, TF, TP * TG);
255
0
        TL = FNMS(Ti, TF, Tf * TG);
256
0
        TT = FMA(TP, TF, TR * TG);
257
0
         }
258
0
         {
259
0
        E T7, T1f, T1i, Tw, TI, TW, T18, TM, Te, T19, T1a, TD, TJ, TZ, T12;
260
0
        E TN, Tm, TE;
261
0
        {
262
0
       E T3, TU, Ts, T17, T6, T16, Tv, TV;
263
0
       {
264
0
            E T1, T2, Tq, Tr;
265
0
            T1 = Rp[0];
266
0
            T2 = Rm[WS(rs, 3)];
267
0
            T3 = T1 + T2;
268
0
            TU = T1 - T2;
269
0
            Tq = Ip[0];
270
0
            Tr = Im[WS(rs, 3)];
271
0
            Ts = Tq - Tr;
272
0
            T17 = Tq + Tr;
273
0
       }
274
0
       {
275
0
            E T4, T5, Tt, Tu;
276
0
            T4 = Rp[WS(rs, 2)];
277
0
            T5 = Rm[WS(rs, 1)];
278
0
            T6 = T4 + T5;
279
0
            T16 = T4 - T5;
280
0
            Tt = Ip[WS(rs, 2)];
281
0
            Tu = Im[WS(rs, 1)];
282
0
            Tv = Tt - Tu;
283
0
            TV = Tt + Tu;
284
0
       }
285
0
       T7 = T3 + T6;
286
0
       T1f = TU + TV;
287
0
       T1i = T17 - T16;
288
0
       Tw = Ts + Tv;
289
0
       TI = T3 - T6;
290
0
       TW = TU - TV;
291
0
       T18 = T16 + T17;
292
0
       TM = Ts - Tv;
293
0
        }
294
0
        {
295
0
       E Ta, TX, Tz, TY, Td, T10, TC, T11;
296
0
       {
297
0
            E T8, T9, Tx, Ty;
298
0
            T8 = Rp[WS(rs, 1)];
299
0
            T9 = Rm[WS(rs, 2)];
300
0
            Ta = T8 + T9;
301
0
            TX = T8 - T9;
302
0
            Tx = Ip[WS(rs, 1)];
303
0
            Ty = Im[WS(rs, 2)];
304
0
            Tz = Tx - Ty;
305
0
            TY = Tx + Ty;
306
0
       }
307
0
       {
308
0
            E Tb, Tc, TA, TB;
309
0
            Tb = Rm[0];
310
0
            Tc = Rp[WS(rs, 3)];
311
0
            Td = Tb + Tc;
312
0
            T10 = Tb - Tc;
313
0
            TA = Ip[WS(rs, 3)];
314
0
            TB = Im[0];
315
0
            TC = TA - TB;
316
0
            T11 = TA + TB;
317
0
       }
318
0
       Te = Ta + Td;
319
0
       T19 = TX + TY;
320
0
       T1a = T10 + T11;
321
0
       TD = Tz + TC;
322
0
       TJ = TC - Tz;
323
0
       TZ = TX - TY;
324
0
       T12 = T10 - T11;
325
0
       TN = Ta - Td;
326
0
        }
327
0
        Rp[0] = T7 + Te;
328
0
        Rm[0] = Tw + TD;
329
0
        Tm = T7 - Te;
330
0
        TE = Tw - TD;
331
0
        Rp[WS(rs, 2)] = FNMS(Tp, TE, Tl * Tm);
332
0
        Rm[WS(rs, 2)] = FMA(Tp, Tm, Tl * TE);
333
0
        {
334
0
       E TQ, TS, TK, TO;
335
0
       TQ = TI + TJ;
336
0
       TS = TN + TM;
337
0
       Rp[WS(rs, 1)] = FNMS(TR, TS, TP * TQ);
338
0
       Rm[WS(rs, 1)] = FMA(TP, TS, TR * TQ);
339
0
       TK = TI - TJ;
340
0
       TO = TM - TN;
341
0
       Rp[WS(rs, 3)] = FNMS(TL, TO, TH * TK);
342
0
       Rm[WS(rs, 3)] = FMA(TH, TO, TL * TK);
343
0
        }
344
0
        {
345
0
       E T1h, T1l, T1k, T1m, T1g, T1j;
346
0
       T1g = KP707106781 * (T19 + T1a);
347
0
       T1h = T1f - T1g;
348
0
       T1l = T1f + T1g;
349
0
       T1j = KP707106781 * (TZ - T12);
350
0
       T1k = T1i + T1j;
351
0
       T1m = T1i - T1j;
352
0
       Ip[WS(rs, 1)] = FNMS(Tj, T1k, Tg * T1h);
353
0
       Im[WS(rs, 1)] = FMA(Tg, T1k, Tj * T1h);
354
0
       Ip[WS(rs, 3)] = FNMS(TG, T1m, TF * T1l);
355
0
       Im[WS(rs, 3)] = FMA(TF, T1m, TG * T1l);
356
0
        }
357
0
        {
358
0
       E T14, T1d, T1c, T1e, T13, T1b;
359
0
       T13 = KP707106781 * (TZ + T12);
360
0
       T14 = TW - T13;
361
0
       T1d = TW + T13;
362
0
       T1b = KP707106781 * (T19 - T1a);
363
0
       T1c = T18 - T1b;
364
0
       T1e = T18 + T1b;
365
0
       Ip[WS(rs, 2)] = FNMS(T15, T1c, TT * T14);
366
0
       Im[WS(rs, 2)] = FMA(T15, T14, TT * T1c);
367
0
       Ip[0] = FNMS(Ti, T1e, Tf * T1d);
368
0
       Im[0] = FMA(Ti, T1d, Tf * T1e);
369
0
        }
370
0
         }
371
0
    }
372
0
     }
373
0
}
374
375
static const tw_instr twinstr[] = {
376
     { TW_CEXP, 1, 1 },
377
     { TW_CEXP, 1, 3 },
378
     { TW_CEXP, 1, 7 },
379
     { TW_NEXT, 1, 0 }
380
};
381
382
static const hc2c_desc desc = { 8, "hc2cb2_8", twinstr, &GENUS, { 56, 26, 18, 0 } };
383
384
1
void X(codelet_hc2cb2_8) (planner *p) {
385
1
     X(khc2c_register) (p, hc2cb2_8, &desc, HC2C_VIA_RDFT);
386
1
}
387
#endif