Coverage Report

Created: 2025-07-23 07:03

/src/fftw3/rdft/scalar/r2cb/r2cbIII_12.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Wed Jul 23 07:02:43 UTC 2025 */
23
24
#include "rdft/codelet-rdft.h"
25
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28
/* Generated by: ../../../genfft/gen_r2cb.native -fma -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include rdft/scalar/r2cbIII.h */
29
30
/*
31
 * This function contains 42 FP additions, 20 FP multiplications,
32
 * (or, 30 additions, 8 multiplications, 12 fused multiply/add),
33
 * 25 stack variables, 4 constants, and 24 memory accesses
34
 */
35
#include "rdft/scalar/r2cbIII.h"
36
37
static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
41
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
42
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
43
     {
44
    INT i;
45
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
46
         E T5, Tx, Tb, Te, Tw, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
47
         {
48
        E T1, T2, T3, T4;
49
        T1 = Cr[WS(csr, 1)];
50
        T2 = Cr[WS(csr, 5)];
51
        T3 = Cr[WS(csr, 2)];
52
        T4 = T2 + T3;
53
        T5 = T1 + T4;
54
        Tx = T2 - T3;
55
        Tb = FNMS(KP2_000000000, T1, T4);
56
         }
57
         {
58
        E Tq, Tc, Td, Tr;
59
        Tq = Ci[WS(csi, 1)];
60
        Tc = Ci[WS(csi, 5)];
61
        Td = Ci[WS(csi, 2)];
62
        Tr = Td - Tc;
63
        Te = Tc + Td;
64
        Tw = FMA(KP2_000000000, Tq, Tr);
65
        Ts = Tq - Tr;
66
         }
67
         {
68
        E T6, T7, T8, T9;
69
        T6 = Cr[WS(csr, 4)];
70
        T7 = Cr[0];
71
        T8 = Cr[WS(csr, 3)];
72
        T9 = T7 + T8;
73
        Ta = T6 + T9;
74
        TA = T7 - T8;
75
        Tg = FNMS(KP2_000000000, T6, T9);
76
         }
77
         {
78
        E To, Th, Ti, Tn;
79
        To = Ci[WS(csi, 4)];
80
        Th = Ci[0];
81
        Ti = Ci[WS(csi, 3)];
82
        Tn = Ti - Th;
83
        Tj = Th + Ti;
84
        Tz = FMA(KP2_000000000, To, Tn);
85
        Tp = Tn - To;
86
         }
87
         R0[0] = KP2_000000000 * (T5 + Ta);
88
         R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
89
         Tt = Tp - Ts;
90
         Tu = T5 - Ta;
91
         R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
92
         R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
93
         {
94
        E Tf, Tk, Tv, Ty, TB, TC;
95
        Tf = FMA(KP1_732050807, Te, Tb);
96
        Tk = FNMS(KP1_732050807, Tj, Tg);
97
        Tv = Tk - Tf;
98
        Ty = FMA(KP1_732050807, Tx, Tw);
99
        TB = FNMS(KP1_732050807, TA, Tz);
100
        TC = Ty + TB;
101
        R0[WS(rs, 2)] = Tf + Tk;
102
        R0[WS(rs, 5)] = TB - Ty;
103
        R1[0] = KP707106781 * (Tv - TC);
104
        R1[WS(rs, 3)] = KP707106781 * (Tv + TC);
105
         }
106
         {
107
        E Tl, Tm, TF, TD, TE, TG;
108
        Tl = FNMS(KP1_732050807, Te, Tb);
109
        Tm = FMA(KP1_732050807, Tj, Tg);
110
        TF = Tl - Tm;
111
        TD = FMA(KP1_732050807, TA, Tz);
112
        TE = FNMS(KP1_732050807, Tx, Tw);
113
        TG = TE + TD;
114
        R0[WS(rs, 4)] = -(Tl + Tm);
115
        R1[WS(rs, 2)] = KP707106781 * (TF + TG);
116
        R0[WS(rs, 1)] = TD - TE;
117
        R1[WS(rs, 5)] = KP707106781 * (TF - TG);
118
         }
119
    }
120
     }
121
}
122
123
static const kr2c_desc desc = { 12, "r2cbIII_12", { 30, 8, 12, 0 }, &GENUS };
124
125
void X(codelet_r2cbIII_12) (planner *p) { X(kr2c_register) (p, r2cbIII_12, &desc);
126
}
127
128
#else
129
130
/* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 12 -name r2cbIII_12 -dft-III -include rdft/scalar/r2cbIII.h */
131
132
/*
133
 * This function contains 42 FP additions, 20 FP multiplications,
134
 * (or, 38 additions, 16 multiplications, 4 fused multiply/add),
135
 * 25 stack variables, 4 constants, and 24 memory accesses
136
 */
137
#include "rdft/scalar/r2cbIII.h"
138
139
static void r2cbIII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
140
0
{
141
0
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
142
0
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
143
0
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
144
0
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
145
0
     {
146
0
    INT i;
147
0
    for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(48, rs), MAKE_VOLATILE_STRIDE(48, csr), MAKE_VOLATILE_STRIDE(48, csi)) {
148
0
         E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
149
0
         {
150
0
        E T1, T2, T3, T4;
151
0
        T1 = Cr[WS(csr, 1)];
152
0
        T2 = Cr[WS(csr, 5)];
153
0
        T3 = Cr[WS(csr, 2)];
154
0
        T4 = T2 + T3;
155
0
        T5 = T1 + T4;
156
0
        Tw = KP866025403 * (T2 - T3);
157
0
        Tb = FNMS(KP500000000, T4, T1);
158
0
         }
159
0
         {
160
0
        E Tq, Tc, Td, Tr;
161
0
        Tq = Ci[WS(csi, 1)];
162
0
        Tc = Ci[WS(csi, 5)];
163
0
        Td = Ci[WS(csi, 2)];
164
0
        Tr = Td - Tc;
165
0
        Te = KP866025403 * (Tc + Td);
166
0
        Tx = FMA(KP500000000, Tr, Tq);
167
0
        Ts = Tq - Tr;
168
0
         }
169
0
         {
170
0
        E T6, T7, T8, T9;
171
0
        T6 = Cr[WS(csr, 4)];
172
0
        T7 = Cr[0];
173
0
        T8 = Cr[WS(csr, 3)];
174
0
        T9 = T7 + T8;
175
0
        Ta = T6 + T9;
176
0
        TA = KP866025403 * (T7 - T8);
177
0
        Tg = FNMS(KP500000000, T9, T6);
178
0
         }
179
0
         {
180
0
        E To, Th, Ti, Tn;
181
0
        To = Ci[WS(csi, 4)];
182
0
        Th = Ci[0];
183
0
        Ti = Ci[WS(csi, 3)];
184
0
        Tn = Ti - Th;
185
0
        Tj = KP866025403 * (Th + Ti);
186
0
        Tz = FMA(KP500000000, Tn, To);
187
0
        Tp = Tn - To;
188
0
         }
189
0
         R0[0] = KP2_000000000 * (T5 + Ta);
190
0
         R0[WS(rs, 3)] = KP2_000000000 * (Ts + Tp);
191
0
         Tt = Tp - Ts;
192
0
         Tu = T5 - Ta;
193
0
         R1[WS(rs, 1)] = KP1_414213562 * (Tt - Tu);
194
0
         R1[WS(rs, 4)] = KP1_414213562 * (Tu + Tt);
195
0
         {
196
0
        E Tf, Tk, Tv, Ty, TB, TC;
197
0
        Tf = Tb - Te;
198
0
        Tk = Tg + Tj;
199
0
        Tv = Tf - Tk;
200
0
        Ty = Tw + Tx;
201
0
        TB = Tz - TA;
202
0
        TC = Ty + TB;
203
0
        R0[WS(rs, 2)] = -(KP2_000000000 * (Tf + Tk));
204
0
        R0[WS(rs, 5)] = KP2_000000000 * (TB - Ty);
205
0
        R1[0] = KP1_414213562 * (Tv - TC);
206
0
        R1[WS(rs, 3)] = KP1_414213562 * (Tv + TC);
207
0
         }
208
0
         {
209
0
        E Tl, Tm, TF, TD, TE, TG;
210
0
        Tl = Tb + Te;
211
0
        Tm = Tg - Tj;
212
0
        TF = Tm - Tl;
213
0
        TD = TA + Tz;
214
0
        TE = Tx - Tw;
215
0
        TG = TE + TD;
216
0
        R0[WS(rs, 4)] = KP2_000000000 * (Tl + Tm);
217
0
        R1[WS(rs, 2)] = KP1_414213562 * (TF + TG);
218
0
        R0[WS(rs, 1)] = KP2_000000000 * (TD - TE);
219
0
        R1[WS(rs, 5)] = KP1_414213562 * (TF - TG);
220
0
         }
221
0
    }
222
0
     }
223
0
}
224
225
static const kr2c_desc desc = { 12, "r2cbIII_12", { 38, 16, 4, 0 }, &GENUS };
226
227
1
void X(codelet_r2cbIII_12) (planner *p) { X(kr2c_register) (p, r2cbIII_12, &desc);
228
1
}
229
230
#endif