/src/fftw3/rdft/scalar/r2cf/r2cf_8.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Wed Jul 23 07:01:26 UTC 2025 */

#include "rdft/codelet-rdft.h"

#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)

/* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include rdft/scalar/r2cf.h */

/*
 * This function contains 20 FP additions, 4 FP multiplications,
 * (or, 16 additions, 0 multiplications, 4 fused multiply/add),
 * 14 stack variables, 1 constants, and 16 memory accesses
 */
#include "rdft/scalar/r2cf.h"

static void r2cf_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
    INT i;
    for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) {
         E T3, T7, Td, Tj, T6, Tf, Ta, Ti;
         {
        E T1, T2, Tb, Tc;
        T1 = R0[0];
        T2 = R0[WS(rs, 2)];
        T3 = T1 + T2;
        T7 = T1 - T2;
        Tb = R1[WS(rs, 3)];
        Tc = R1[WS(rs, 1)];
        Td = Tb - Tc;
        Tj = Tb + Tc;
         }
         {
        E T4, T5, T8, T9;
        T4 = R0[WS(rs, 1)];
        T5 = R0[WS(rs, 3)];
        T6 = T4 + T5;
        Tf = T4 - T5;
        T8 = R1[0];
        T9 = R1[WS(rs, 2)];
        Ta = T8 - T9;
        Ti = T8 + T9;
         }
         Cr[WS(csr, 2)] = T3 - T6;
         Ci[WS(csi, 2)] = Tj - Ti;
         {
        E Te, Tg, Th, Tk;
        Te = Ta + Td;
        Cr[WS(csr, 3)] = FNMS(KP707106781, Te, T7);
        Cr[WS(csr, 1)] = FMA(KP707106781, Te, T7);
        Tg = Td - Ta;
        Ci[WS(csi, 1)] = FMS(KP707106781, Tg, Tf);
        Ci[WS(csi, 3)] = FMA(KP707106781, Tg, Tf);
        Th = T3 + T6;
        Tk = Ti + Tj;
        Cr[WS(csr, 4)] = Th - Tk;
        Cr[0] = Th + Tk;
         }
    }
     }
}

static const kr2c_desc desc = { 8, "r2cf_8", { 16, 0, 4, 0 }, &GENUS };

void X(codelet_r2cf_8) (planner *p) { X(kr2c_register) (p, r2cf_8, &desc);
}

#else

/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include rdft/scalar/r2cf.h */

/*
 * This function contains 20 FP additions, 2 FP multiplications,
 * (or, 20 additions, 2 multiplications, 0 fused multiply/add),
 * 14 stack variables, 1 constants, and 16 memory accesses
 */
#include "rdft/scalar/r2cf.h"

static void r2cf_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
    INT i;
    for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) {
         E T3, T7, Td, Tj, T6, Tg, Ta, Ti;
         {
        E T1, T2, Tb, Tc;
        T1 = R0[0];
        T2 = R0[WS(rs, 2)];
        T3 = T1 + T2;
        T7 = T1 - T2;
        Tb = R1[WS(rs, 3)];
        Tc = R1[WS(rs, 1)];
        Td = Tb - Tc;
        Tj = Tb + Tc;
         }
         {
        E T4, T5, T8, T9;
        T4 = R0[WS(rs, 1)];
        T5 = R0[WS(rs, 3)];
        T6 = T4 + T5;
        Tg = T4 - T5;
        T8 = R1[0];
        T9 = R1[WS(rs, 2)];
        Ta = T8 - T9;
        Ti = T8 + T9;
         }
         Cr[WS(csr, 2)] = T3 - T6;
         Ci[WS(csi, 2)] = Tj - Ti;
         {
        E Te, Tf, Th, Tk;
        Te = KP707106781 * (Ta + Td);
        Cr[WS(csr, 3)] = T7 - Te;
        Cr[WS(csr, 1)] = T7 + Te;
        Tf = KP707106781 * (Td - Ta);
        Ci[WS(csi, 1)] = Tf - Tg;
        Ci[WS(csi, 3)] = Tg + Tf;
        Th = T3 + T6;
        Tk = Ti + Tj;
        Cr[WS(csr, 4)] = Th - Tk;
        Cr[0] = Th + Tk;
         }
    }
     }
}

static const kr2c_desc desc = { 8, "r2cf_8", { 20, 2, 0, 0 }, &GENUS };

void X(codelet_r2cf_8) (planner *p) { X(kr2c_register) (p, r2cf_8, &desc);
}

#endif

Coverage Report

Created: 2025-07-23 07:03

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21		/* This file was automatically generated --- DO NOT EDIT */
22		/* Generated on Wed Jul 23 07:01:26 UTC 2025 */
23
24		#include "rdft/codelet-rdft.h"
25
26		#if defined(ARCH_PREFERS_FMA) \|\| defined(ISA_EXTENSION_PREFERS_FMA)
27
28		/* Generated by: ../../../genfft/gen_r2cf.native -fma -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include rdft/scalar/r2cf.h */
29
30		/*
31		* This function contains 20 FP additions, 4 FP multiplications,
32		* (or, 16 additions, 0 multiplications, 4 fused multiply/add),
33		* 14 stack variables, 1 constants, and 16 memory accesses
34		*/
35		#include "rdft/scalar/r2cf.h"
36
37		static void r2cf_8(R R0, R R1, R Cr, R Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
38		{
39		DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40		{
41		INT i;
42		for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) {
43		E T3, T7, Td, Tj, T6, Tf, Ta, Ti;
44		{
45		E T1, T2, Tb, Tc;
46		T1 = R0[0];
47		T2 = R0[WS(rs, 2)];
48		T3 = T1 + T2;
49		T7 = T1 - T2;
50		Tb = R1[WS(rs, 3)];
51		Tc = R1[WS(rs, 1)];
52		Td = Tb - Tc;
53		Tj = Tb + Tc;
54		}
55		{
56		E T4, T5, T8, T9;
57		T4 = R0[WS(rs, 1)];
58		T5 = R0[WS(rs, 3)];
59		T6 = T4 + T5;
60		Tf = T4 - T5;
61		T8 = R1[0];
62		T9 = R1[WS(rs, 2)];
63		Ta = T8 - T9;
64		Ti = T8 + T9;
65		}
66		Cr[WS(csr, 2)] = T3 - T6;
67		Ci[WS(csi, 2)] = Tj - Ti;
68		{
69		E Te, Tg, Th, Tk;
70		Te = Ta + Td;
71		Cr[WS(csr, 3)] = FNMS(KP707106781, Te, T7);
72		Cr[WS(csr, 1)] = FMA(KP707106781, Te, T7);
73		Tg = Td - Ta;
74		Ci[WS(csi, 1)] = FMS(KP707106781, Tg, Tf);
75		Ci[WS(csi, 3)] = FMA(KP707106781, Tg, Tf);
76		Th = T3 + T6;
77		Tk = Ti + Tj;
78		Cr[WS(csr, 4)] = Th - Tk;
79		Cr[0] = Th + Tk;
80		}
81		}
82		}
83		}
84
85		static const kr2c_desc desc = { 8, "r2cf_8", { 16, 0, 4, 0 }, &GENUS };
86
87		void X(codelet_r2cf_8) (planner *p) { X(kr2c_register) (p, r2cf_8, &desc);
88		}
89
90		#else
91
92		/* Generated by: ../../../genfft/gen_r2cf.native -compact -variables 4 -pipeline-latency 4 -n 8 -name r2cf_8 -include rdft/scalar/r2cf.h */
93
94		/*
95		* This function contains 20 FP additions, 2 FP multiplications,
96		* (or, 20 additions, 2 multiplications, 0 fused multiply/add),
97		* 14 stack variables, 1 constants, and 16 memory accesses
98		*/
99		#include "rdft/scalar/r2cf.h"
100
101		static void r2cf_8(R R0, R R1, R Cr, R Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
102	0	{
103	0	DK(KP707106781, +0.707106781186547524400844362104849039284835938);
104	0	{
105	0	INT i;
106	0	for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(32, rs), MAKE_VOLATILE_STRIDE(32, csr), MAKE_VOLATILE_STRIDE(32, csi)) {
107	0	E T3, T7, Td, Tj, T6, Tg, Ta, Ti;
108	0	{
109	0	E T1, T2, Tb, Tc;
110	0	T1 = R0[0];
111	0	T2 = R0[WS(rs, 2)];
112	0	T3 = T1 + T2;
113	0	T7 = T1 - T2;
114	0	Tb = R1[WS(rs, 3)];
115	0	Tc = R1[WS(rs, 1)];
116	0	Td = Tb - Tc;
117	0	Tj = Tb + Tc;
118	0	}
119	0	{
120	0	E T4, T5, T8, T9;
121	0	T4 = R0[WS(rs, 1)];
122	0	T5 = R0[WS(rs, 3)];
123	0	T6 = T4 + T5;
124	0	Tg = T4 - T5;
125	0	T8 = R1[0];
126	0	T9 = R1[WS(rs, 2)];
127	0	Ta = T8 - T9;
128	0	Ti = T8 + T9;
129	0	}
130	0	Cr[WS(csr, 2)] = T3 - T6;
131	0	Ci[WS(csi, 2)] = Tj - Ti;
132	0	{
133	0	E Te, Tf, Th, Tk;
134	0	Te = KP707106781 * (Ta + Td);
135	0	Cr[WS(csr, 3)] = T7 - Te;
136	0	Cr[WS(csr, 1)] = T7 + Te;
137	0	Tf = KP707106781 * (Td - Ta);
138	0	Ci[WS(csi, 1)] = Tf - Tg;
139	0	Ci[WS(csi, 3)] = Tg + Tf;
140	0	Th = T3 + T6;
141	0	Tk = Ti + Tj;
142	0	Cr[WS(csr, 4)] = Th - Tk;
143	0	Cr[0] = Th + Tk;
144	0	}
145	0	}
146	0	}
147	0	}
148
149		static const kr2c_desc desc = { 8, "r2cf_8", { 20, 2, 0, 0 }, &GENUS };
150
151	1	void X(codelet_r2cf_8) (planner *p) { X(kr2c_register) (p, r2cf_8, &desc);
152	1	}
153
154		#endif