/src/fftw3/dft/scalar/codelets/n1_4.c

Source
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sun Jun 22 06:40:50 UTC 2025 */

#include "dft/codelet-dft.h"

#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)

/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include dft/scalar/n.h */

/*
 * This function contains 16 FP additions, 0 FP multiplications,
 * (or, 16 additions, 0 multiplications, 0 fused multiply/add),
 * 13 stack variables, 0 constants, and 16 memory accesses
 */
#include "dft/scalar/n.h"

static void n1_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
     {
    INT i;
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) {
         E T3, Tb, T9, Tf, T6, Ta, Te, Tg;
         {
        E T1, T2, T7, T8;
        T1 = ri[0];
        T2 = ri[WS(is, 2)];
        T3 = T1 + T2;
        Tb = T1 - T2;
        T7 = ii[0];
        T8 = ii[WS(is, 2)];
        T9 = T7 - T8;
        Tf = T7 + T8;
         }
         {
        E T4, T5, Tc, Td;
        T4 = ri[WS(is, 1)];
        T5 = ri[WS(is, 3)];
        T6 = T4 + T5;
        Ta = T4 - T5;
        Tc = ii[WS(is, 1)];
        Td = ii[WS(is, 3)];
        Te = Tc - Td;
        Tg = Tc + Td;
         }
         ro[WS(os, 2)] = T3 - T6;
         io[WS(os, 2)] = Tf - Tg;
         ro[0] = T3 + T6;
         io[0] = Tf + Tg;
         io[WS(os, 1)] = T9 - Ta;
         ro[WS(os, 1)] = Tb + Te;
         io[WS(os, 3)] = Ta + T9;
         ro[WS(os, 3)] = Tb - Te;
    }
     }
}

static const kdft_desc desc = { 4, "n1_4", { 16, 0, 0, 0 }, &GENUS, 0, 0, 0, 0 };

void X(codelet_n1_4) (planner *p) { X(kdft_register) (p, n1_4, &desc);
}

#else

/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include dft/scalar/n.h */

/*
 * This function contains 16 FP additions, 0 FP multiplications,
 * (or, 16 additions, 0 multiplications, 0 fused multiply/add),
 * 13 stack variables, 0 constants, and 16 memory accesses
 */
#include "dft/scalar/n.h"

static void n1_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
     {
    INT i;
    for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) {
         E T3, Tb, T9, Tf, T6, Ta, Te, Tg;
         {
        E T1, T2, T7, T8;
        T1 = ri[0];
        T2 = ri[WS(is, 2)];
        T3 = T1 + T2;
        Tb = T1 - T2;
        T7 = ii[0];
        T8 = ii[WS(is, 2)];
        T9 = T7 - T8;
        Tf = T7 + T8;
         }
         {
        E T4, T5, Tc, Td;
        T4 = ri[WS(is, 1)];
        T5 = ri[WS(is, 3)];
        T6 = T4 + T5;
        Ta = T4 - T5;
        Tc = ii[WS(is, 1)];
        Td = ii[WS(is, 3)];
        Te = Tc - Td;
        Tg = Tc + Td;
         }
         ro[WS(os, 2)] = T3 - T6;
         io[WS(os, 2)] = Tf - Tg;
         ro[0] = T3 + T6;
         io[0] = Tf + Tg;
         io[WS(os, 1)] = T9 - Ta;
         ro[WS(os, 1)] = Tb + Te;
         io[WS(os, 3)] = Ta + T9;
         ro[WS(os, 3)] = Tb - Te;
    }
     }
}

static const kdft_desc desc = { 4, "n1_4", { 16, 0, 0, 0 }, &GENUS, 0, 0, 0, 0 };

void X(codelet_n1_4) (planner *p) { X(kdft_register) (p, n1_4, &desc);
}

#endif

Line	Count	Source
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21		/* This file was automatically generated --- DO NOT EDIT */
22		/* Generated on Sun Jun 22 06:40:50 UTC 2025 */
23
24		#include "dft/codelet-dft.h"
25
26		#if defined(ARCH_PREFERS_FMA) \|\| defined(ISA_EXTENSION_PREFERS_FMA)
27
28		/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include dft/scalar/n.h */
29
30		/*
31		* This function contains 16 FP additions, 0 FP multiplications,
32		* (or, 16 additions, 0 multiplications, 0 fused multiply/add),
33		* 13 stack variables, 0 constants, and 16 memory accesses
34		*/
35		#include "dft/scalar/n.h"
36
37		static void n1_4(const R ri, const R ii, R ro, R io, stride is, stride os, INT v, INT ivs, INT ovs)
38		{
39		{
40		INT i;
41		for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) {
42		E T3, Tb, T9, Tf, T6, Ta, Te, Tg;
43		{
44		E T1, T2, T7, T8;
45		T1 = ri[0];
46		T2 = ri[WS(is, 2)];
47		T3 = T1 + T2;
48		Tb = T1 - T2;
49		T7 = ii[0];
50		T8 = ii[WS(is, 2)];
51		T9 = T7 - T8;
52		Tf = T7 + T8;
53		}
54		{
55		E T4, T5, Tc, Td;
56		T4 = ri[WS(is, 1)];
57		T5 = ri[WS(is, 3)];
58		T6 = T4 + T5;
59		Ta = T4 - T5;
60		Tc = ii[WS(is, 1)];
61		Td = ii[WS(is, 3)];
62		Te = Tc - Td;
63		Tg = Tc + Td;
64		}
65		ro[WS(os, 2)] = T3 - T6;
66		io[WS(os, 2)] = Tf - Tg;
67		ro[0] = T3 + T6;
68		io[0] = Tf + Tg;
69		io[WS(os, 1)] = T9 - Ta;
70		ro[WS(os, 1)] = Tb + Te;
71		io[WS(os, 3)] = Ta + T9;
72		ro[WS(os, 3)] = Tb - Te;
73		}
74		}
75		}
76
77		static const kdft_desc desc = { 4, "n1_4", { 16, 0, 0, 0 }, &GENUS, 0, 0, 0, 0 };
78
79		void X(codelet_n1_4) (planner *p) { X(kdft_register) (p, n1_4, &desc);
80		}
81
82		#else
83
84		/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 4 -name n1_4 -include dft/scalar/n.h */
85
86		/*
87		* This function contains 16 FP additions, 0 FP multiplications,
88		* (or, 16 additions, 0 multiplications, 0 fused multiply/add),
89		* 13 stack variables, 0 constants, and 16 memory accesses
90		*/
91		#include "dft/scalar/n.h"
92
93		static void n1_4(const R ri, const R ii, R ro, R io, stride is, stride os, INT v, INT ivs, INT ovs)
94	4	{
95	4	{
96	4	INT i;
97	30	for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(16, is), MAKE_VOLATILE_STRIDE(16, os)) {
98	26	E T3, Tb, T9, Tf, T6, Ta, Te, Tg;
99	26	{
100	26	E T1, T2, T7, T8;
101	26	T1 = ri[0];
102	26	T2 = ri[WS(is, 2)];
103	26	T3 = T1 + T2;
104	26	Tb = T1 - T2;
105	26	T7 = ii[0];
106	26	T8 = ii[WS(is, 2)];
107	26	T9 = T7 - T8;
108	26	Tf = T7 + T8;
109	26	}
110	26	{
111	26	E T4, T5, Tc, Td;
112	26	T4 = ri[WS(is, 1)];
113	26	T5 = ri[WS(is, 3)];
114	26	T6 = T4 + T5;
115	26	Ta = T4 - T5;
116	26	Tc = ii[WS(is, 1)];
117	26	Td = ii[WS(is, 3)];
118	26	Te = Tc - Td;
119	26	Tg = Tc + Td;
120	26	}
121	26	ro[WS(os, 2)] = T3 - T6;
122	26	io[WS(os, 2)] = Tf - Tg;
123	26	ro[0] = T3 + T6;
124	26	io[0] = Tf + Tg;
125	26	io[WS(os, 1)] = T9 - Ta;
126	26	ro[WS(os, 1)] = Tb + Te;
127	26	io[WS(os, 3)] = Ta + T9;
128	26	ro[WS(os, 3)] = Tb - Te;
129	26	}
130	4	}
131	4	}
132
133		static const kdft_desc desc = { 4, "n1_4", { 16, 0, 0, 0 }, &GENUS, 0, 0, 0, 0 };
134
135	1	void X(codelet_n1_4) (planner *p) { X(kdft_register) (p, n1_4, &desc);
136	1	}
137
138		#endif

Coverage Report

Created: 2025-06-22 06:45