/src/fftw3/dft/scalar/codelets/q1_2.c

Source
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sat Jan 10 06:09:25 UTC 2026 */

#include "dft/codelet-dft.h"

#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)

/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */

/*
 * This function contains 12 FP additions, 8 FP multiplications,
 * (or, 8 additions, 4 multiplications, 4 fused multiply/add),
 * 17 stack variables, 0 constants, and 16 memory accesses
 */
#include "dft/scalar/q.h"

static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
     {
    INT m;
    for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
         E T1, T2, T4, T7, T8, T9, Tb, Tc, Te, Th, Ti, Tj;
         T1 = rio[0];
         T2 = rio[WS(rs, 1)];
         T4 = T1 - T2;
         T7 = iio[0];
         T8 = iio[WS(rs, 1)];
         T9 = T7 - T8;
         Tb = rio[WS(vs, 1)];
         Tc = rio[WS(vs, 1) + WS(rs, 1)];
         Te = Tb - Tc;
         Th = iio[WS(vs, 1)];
         Ti = iio[WS(vs, 1) + WS(rs, 1)];
         Tj = Th - Ti;
         rio[0] = T1 + T2;
         iio[0] = T7 + T8;
         rio[WS(rs, 1)] = Tb + Tc;
         iio[WS(rs, 1)] = Th + Ti;
         {
        E Tf, Tk, Td, Tg;
        Td = W[0];
        Tf = Td * Te;
        Tk = Td * Tj;
        Tg = W[1];
        rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tg, Tj, Tf);
        iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tg, Te, Tk);
         }
         {
        E T5, Ta, T3, T6;
        T3 = W[0];
        T5 = T3 * T4;
        Ta = T3 * T9;
        T6 = W[1];
        rio[WS(vs, 1)] = FMA(T6, T9, T5);
        iio[WS(vs, 1)] = FNMS(T6, T4, Ta);
         }
    }
     }
}

static const tw_instr twinstr[] = {
     { TW_FULL, 0, 2 },
     { TW_NEXT, 1, 0 }
};

static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, { 8, 4, 4, 0 }, 0, 0, 0 };

void X(codelet_q1_2) (planner *p) {
     X(kdft_difsq_register) (p, q1_2, &desc);
}
#else

/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */

/*
 * This function contains 12 FP additions, 8 FP multiplications,
 * (or, 8 additions, 4 multiplications, 4 fused multiply/add),
 * 17 stack variables, 0 constants, and 16 memory accesses
 */
#include "dft/scalar/q.h"

static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
     {
    INT m;
    for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
         E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg;
         T1 = rio[0];
         T2 = rio[WS(rs, 1)];
         T4 = T1 - T2;
         T6 = iio[0];
         T7 = iio[WS(rs, 1)];
         T8 = T6 - T7;
         T9 = rio[WS(vs, 1)];
         Ta = rio[WS(vs, 1) + WS(rs, 1)];
         Tc = T9 - Ta;
         Te = iio[WS(vs, 1)];
         Tf = iio[WS(vs, 1) + WS(rs, 1)];
         Tg = Te - Tf;
         rio[0] = T1 + T2;
         iio[0] = T6 + T7;
         rio[WS(rs, 1)] = T9 + Ta;
         iio[WS(rs, 1)] = Te + Tf;
         {
        E Tb, Td, T3, T5;
        Tb = W[0];
        Td = W[1];
        rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tb, Tc, Td * Tg);
        iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Td, Tc, Tb * Tg);
        T3 = W[0];
        T5 = W[1];
        rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8);
        iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8);
         }
    }
     }
}

static const tw_instr twinstr[] = {
     { TW_FULL, 0, 2 },
     { TW_NEXT, 1, 0 }
};

static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, { 8, 4, 4, 0 }, 0, 0, 0 };

void X(codelet_q1_2) (planner *p) {
     X(kdft_difsq_register) (p, q1_2, &desc);
}
#endif

Coverage Report

Created: 2026-01-10 06:14

Line	Count	Source
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21		/* This file was automatically generated --- DO NOT EDIT */
22		/* Generated on Sat Jan 10 06:09:25 UTC 2026 */
23
24		#include "dft/codelet-dft.h"
25
26		#if defined(ARCH_PREFERS_FMA) \|\| defined(ISA_EXTENSION_PREFERS_FMA)
27
28		/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */
29
30		/*
31		* This function contains 12 FP additions, 8 FP multiplications,
32		* (or, 8 additions, 4 multiplications, 4 fused multiply/add),
33		* 17 stack variables, 0 constants, and 16 memory accesses
34		*/
35		#include "dft/scalar/q.h"
36
37		static void q1_2(R rio, R iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38		{
39		{
40		INT m;
41		for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
42		E T1, T2, T4, T7, T8, T9, Tb, Tc, Te, Th, Ti, Tj;
43		T1 = rio[0];
44		T2 = rio[WS(rs, 1)];
45		T4 = T1 - T2;
46		T7 = iio[0];
47		T8 = iio[WS(rs, 1)];
48		T9 = T7 - T8;
49		Tb = rio[WS(vs, 1)];
50		Tc = rio[WS(vs, 1) + WS(rs, 1)];
51		Te = Tb - Tc;
52		Th = iio[WS(vs, 1)];
53		Ti = iio[WS(vs, 1) + WS(rs, 1)];
54		Tj = Th - Ti;
55		rio[0] = T1 + T2;
56		iio[0] = T7 + T8;
57		rio[WS(rs, 1)] = Tb + Tc;
58		iio[WS(rs, 1)] = Th + Ti;
59		{
60		E Tf, Tk, Td, Tg;
61		Td = W[0];
62		Tf = Td * Te;
63		Tk = Td * Tj;
64		Tg = W[1];
65		rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tg, Tj, Tf);
66		iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tg, Te, Tk);
67		}
68		{
69		E T5, Ta, T3, T6;
70		T3 = W[0];
71		T5 = T3 * T4;
72		Ta = T3 * T9;
73		T6 = W[1];
74		rio[WS(vs, 1)] = FMA(T6, T9, T5);
75		iio[WS(vs, 1)] = FNMS(T6, T4, Ta);
76		}
77		}
78		}
79		}
80
81		static const tw_instr twinstr[] = {
82		{ TW_FULL, 0, 2 },
83		{ TW_NEXT, 1, 0 }
84		};
85
86		static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, { 8, 4, 4, 0 }, 0, 0, 0 };
87
88		void X(codelet_q1_2) (planner *p) {
89		X(kdft_difsq_register) (p, q1_2, &desc);
90		}
91		#else
92
93		/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */
94
95		/*
96		* This function contains 12 FP additions, 8 FP multiplications,
97		* (or, 8 additions, 4 multiplications, 4 fused multiply/add),
98		* 17 stack variables, 0 constants, and 16 memory accesses
99		*/
100		#include "dft/scalar/q.h"
101
102		static void q1_2(R rio, R iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
103	0	{
104	0	{
105	0	INT m;
106	0	for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
107	0	E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg;
108	0	T1 = rio[0];
109	0	T2 = rio[WS(rs, 1)];
110	0	T4 = T1 - T2;
111	0	T6 = iio[0];
112	0	T7 = iio[WS(rs, 1)];
113	0	T8 = T6 - T7;
114	0	T9 = rio[WS(vs, 1)];
115	0	Ta = rio[WS(vs, 1) + WS(rs, 1)];
116	0	Tc = T9 - Ta;
117	0	Te = iio[WS(vs, 1)];
118	0	Tf = iio[WS(vs, 1) + WS(rs, 1)];
119	0	Tg = Te - Tf;
120	0	rio[0] = T1 + T2;
121	0	iio[0] = T6 + T7;
122	0	rio[WS(rs, 1)] = T9 + Ta;
123	0	iio[WS(rs, 1)] = Te + Tf;
124	0	{
125	0	E Tb, Td, T3, T5;
126	0	Tb = W[0];
127	0	Td = W[1];
128	0	rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tb, Tc, Td * Tg);
129	0	iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Td, Tc, Tb * Tg);
130	0	T3 = W[0];
131	0	T5 = W[1];
132	0	rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8);
133	0	iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8);
134	0	}
135	0	}
136	0	}
137	0	}
138
139		static const tw_instr twinstr[] = {
140		{ TW_FULL, 0, 2 },
141		{ TW_NEXT, 1, 0 }
142		};
143
144		static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, { 8, 4, 4, 0 }, 0, 0, 0 };
145
146	1	void X(codelet_q1_2) (planner *p) {
147	1	X(kdft_difsq_register) (p, q1_2, &desc);
148	1	}
149		#endif