/src/fftw3/dft/indirect-transpose.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/* solvers/plans for vectors of DFTs corresponding to the columns
   of a matrix: first transpose the matrix so that the DFTs are
   contiguous, then do DFTs with transposed output.   In particular,
   we restrict ourselves to the case of a square transpose (or a
   sequence thereof). */

#include "dft/dft.h"

typedef solver S;

typedef struct {
     plan_dft super;
     INT vl, ivs, ovs;
     plan *cldtrans, *cld, *cldrest;
} P;

/* initial transpose is out-of-place from input to output */
static void apply_op(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i;

     for (i = 0; i < vl; ++i) {
    {
         plan_dft *cldtrans = (plan_dft *) ego->cldtrans;
         cldtrans->apply(ego->cldtrans, ri, ii, ro, io);
    }
    {
         plan_dft *cld = (plan_dft *) ego->cld;
         cld->apply(ego->cld, ro, io, ro, io);
    }
    ri += ivs; ii += ivs;
    ro += ovs; io += ovs;
     }
     {
    plan_dft *cldrest = (plan_dft *) ego->cldrest;
    cldrest->apply(ego->cldrest, ri, ii, ro, io);
     }
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cldrest);
     X(plan_destroy_internal)(ego->cld);
     X(plan_destroy_internal)(ego->cldtrans);
}

static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;
     X(plan_awake)(ego->cldtrans, wakefulness);
     X(plan_awake)(ego->cld, wakefulness);
     X(plan_awake)(ego->cldrest, wakefulness);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *) ego_;
     p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))", 
        ego->vl, ego->cldtrans, ego->cld, ego->cldrest);
}

static int pickdim(const tensor *vs, const tensor *s, int *pdim0, int *pdim1)
{
     int dim0, dim1;
     *pdim0 = *pdim1 = -1;
     for (dim0 = 0; dim0 < vs->rnk; ++dim0)
          for (dim1 = 0; dim1 < s->rnk; ++dim1) 
         if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is)
       && vs->dims[dim0].n >= s->dims[dim1].n
       && (*pdim0 == -1 
           || (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is)
         && X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) {
        *pdim0 = dim0;
        *pdim1 = dim1;
         }
     return (*pdim0 != -1 && *pdim1 != -1);
}

static int applicable0(const solver *ego_, const problem *p_,
           const planner *plnr,
           int *pdim0, int *pdim1)
{
     const problem_dft *p = (const problem_dft *) p_;
     UNUSED(ego_); UNUSED(plnr);

     return (1
       && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk)

       /* FIXME: can/should we relax this constraint? */
       && X(tensor_inplace_strides2)(p->vecsz, p->sz)

       && pickdim(p->vecsz, p->sz, pdim0, pdim1)

       /* output should not *already* include the transpose
    (in which case we duplicate the regular indirect.c) */
       && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is)
    );
}

static int applicable(const solver *ego_, const problem *p_,
          const planner *plnr,
          int *pdim0, int *pdim1)
{
     if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0;
     {
          const problem_dft *p = (const problem_dft *) p_;
    INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1;

    /* UGLY if does not result in contiguous transforms or
       transforms of contiguous vectors (since the latter at
       least have efficient transpositions) */
    if (NO_UGLYP(plnr)
        && p->vecsz->dims[*pdim0].is != u
        && !(p->vecsz->rnk == 2
       && p->vecsz->dims[1-*pdim0].is == u
       && p->vecsz->dims[*pdim0].is
          == u * p->vecsz->dims[1-*pdim0].n))
         return 0;

    if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0;
     }
     return 1;
}

static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     plan *cld = 0, *cldtrans = 0, *cldrest = 0;
     int pdim0, pdim1;
     tensor *ts, *tv;
     INT vl, ivs, ovs;
     R *rit, *iit, *rot, *iot;

     static const plan_adt padt = {
    X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &pdim0, &pdim1))
          return (plan *) 0;

     vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n;
     A(vl >= 1);
     ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is;
     ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os;
     rit = TAINT(p->ri, vl == 1 ? 0 : ivs);
     iit = TAINT(p->ii, vl == 1 ? 0 : ivs);
     rot = TAINT(p->ro, vl == 1 ? 0 : ovs);
     iot = TAINT(p->io, vl == 1 ? 0 : ovs);

     ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS);
     ts->dims[pdim1].os = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS);
     tv->dims[pdim0].os = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cldtrans = X(mkplan_d)(plnr, 
          X(mkproblem_dft_d)(X(mktensor_0d)(),
                 X(tensor_append)(tv, ts),
                 rit, iit, 
                 rot, iot));
     X(tensor_destroy2)(ts, tv);
     if (!cldtrans) goto nada;

     ts = X(tensor_copy)(p->sz);
     ts->dims[pdim1].is = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].is = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv,
            rot, iot,
            rot, iot));
     if (!cld) goto nada;

     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n;
     cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv,
                p->ri + ivs * vl,
                p->ii + ivs * vl,
                p->ro + ovs * vl,
                p->io + ovs * vl));
     if (!cldrest) goto nada;

     pln = MKPLAN_DFT(P, &padt, apply_op);
     pln->cldtrans = cldtrans;
     pln->cld = cld;
     pln->cldrest = cldrest;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     X(ops_cpy)(&cldrest->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops);
     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldtrans);
     return (plan *)0;
}

static solver *mksolver(void)
{
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     return slv;
}

void X(dft_indirect_transpose_register)(planner *p)
{
     REGISTER_SOLVER(p, mksolver());
}

Coverage Report

Created: 2025-07-18 06:52

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21		/* solvers/plans for vectors of DFTs corresponding to the columns
22		of a matrix: first transpose the matrix so that the DFTs are
23		contiguous, then do DFTs with transposed output. In particular,
24		we restrict ourselves to the case of a square transpose (or a
25		sequence thereof). */
26
27		#include "dft/dft.h"
28
29		typedef solver S;
30
31		typedef struct {
32		plan_dft super;
33		INT vl, ivs, ovs;
34		plan cldtrans, cld, *cldrest;
35		} P;
36
37		/* initial transpose is out-of-place from input to output */
38		static void apply_op(const plan ego_, R ri, R ii, R ro, R *io)
39	0	{
40	0	const P ego = (const P ) ego_;
41	0	INT vl = ego->vl, ivs = ego->ivs, ovs = ego->ovs, i;
42
43	0	for (i = 0; i < vl; ++i) {
44	0	{
45	0	plan_dft cldtrans = (plan_dft ) ego->cldtrans;
46	0	cldtrans->apply(ego->cldtrans, ri, ii, ro, io);
47	0	}
48	0	{
49	0	plan_dft cld = (plan_dft ) ego->cld;
50	0	cld->apply(ego->cld, ro, io, ro, io);
51	0	}
52	0	ri += ivs; ii += ivs;
53	0	ro += ovs; io += ovs;
54	0	}
55	0	{
56	0	plan_dft cldrest = (plan_dft ) ego->cldrest;
57	0	cldrest->apply(ego->cldrest, ri, ii, ro, io);
58	0	}
59	0	}
60
61		static void destroy(plan *ego_)
62	12	{
63	12	P ego = (P ) ego_;
64	12	X(plan_destroy_internal)(ego->cldrest);
65	12	X(plan_destroy_internal)(ego->cld);
66	12	X(plan_destroy_internal)(ego->cldtrans);
67	12	}
68
69		static void awake(plan *ego_, enum wakefulness wakefulness)
70	0	{
71	0	P ego = (P ) ego_;
72	0	X(plan_awake)(ego->cldtrans, wakefulness);
73	0	X(plan_awake)(ego->cld, wakefulness);
74	0	X(plan_awake)(ego->cldrest, wakefulness);
75	0	}
76
77		static void print(const plan ego_, printer p)
78	0	{
79	0	const P ego = (const P ) ego_;
80	0	p->print(p, "(indirect-transpose%v%(%p%)%(%p%)%(%p%))",
81	0	ego->vl, ego->cldtrans, ego->cld, ego->cldrest);
82	0	}
83
84		static int pickdim(const tensor vs, const tensor s, int pdim0, int pdim1)
85	162	{
86	162	int dim0, dim1;
87	162	pdim0 = pdim1 = -1;
88	223	for (dim0 = 0; dim0 < vs->rnk; ++dim0)
89	105	for (dim1 = 0; dim1 < s->rnk; ++dim1)
90	44	if (vs->dims[dim0].n * X(iabs)(vs->dims[dim0].is) <= X(iabs)(s->dims[dim1].is)
91	44	&& vs->dims[dim0].n >= s->dims[dim1].n
92	44	&& (*pdim0 == -1
93	12	\|\| (X(iabs)(vs->dims[dim0].is) <= X(iabs)(vs->dims[*pdim0].is)
94	12	&& X(iabs)(s->dims[dim1].is) >= X(iabs)(s->dims[*pdim1].is)))) {
95	12	*pdim0 = dim0;
96	12	*pdim1 = dim1;
97	12	}
98	162	return (pdim0 != -1 && pdim1 != -1);
99	162	}
100
101		static int applicable0(const solver ego_, const problem p_,
102		const planner *plnr,
103		int pdim0, int pdim1)
104	1.09k	{
105	1.09k	const problem_dft p = (const problem_dft ) p_;
106	1.09k	UNUSED(ego_); UNUSED(plnr);
107
108	1.09k	return (1
109	1.09k	&& FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk)
110
111		/* FIXME: can/should we relax this constraint? */
112	1.09k	&& X(tensor_inplace_strides2)(p->vecsz, p->sz)
113
114	1.09k	&& pickdim(p->vecsz, p->sz, pdim0, pdim1)
115
116		/* output should not already include the transpose
117		(in which case we duplicate the regular indirect.c) */
118	1.09k	&& (p->sz->dims[pdim1].os != p->vecsz->dims[pdim0].is)
119	1.09k	);
120	1.09k	}
121
122		static int applicable(const solver ego_, const problem p_,
123		const planner *plnr,
124		int pdim0, int pdim1)
125	1.09k	{
126	1.09k	if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0;
127	12	{
128	12	const problem_dft p = (const problem_dft ) p_;
129	12	INT u = p->ri == p->ii + 1 \|\| p->ii == p->ri + 1 ? (INT)2 : (INT)1;
130
131		/* UGLY if does not result in contiguous transforms or
132		transforms of contiguous vectors (since the latter at
133		least have efficient transpositions) */
134	12	if (NO_UGLYP(plnr)
135	12	&& p->vecsz->dims[*pdim0].is != u
136	12	&& !(p->vecsz->rnk == 2
137	0	&& p->vecsz->dims[1-*pdim0].is == u
138	0	&& p->vecsz->dims[*pdim0].is
139	0	== u * p->vecsz->dims[1-*pdim0].n))
140	0	return 0;
141
142	12	if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0;
143	12	}
144	12	return 1;
145	12	}
146
147		static plan mkplan(const solver ego_, const problem p_, planner plnr)
148	1.09k	{
149	1.09k	const problem_dft p = (const problem_dft ) p_;
150	1.09k	P *pln;
151	1.09k	plan cld = 0, cldtrans = 0, *cldrest = 0;
152	1.09k	int pdim0, pdim1;
153	1.09k	tensor ts, tv;
154	1.09k	INT vl, ivs, ovs;
155	1.09k	R rit, iit, rot, iot;
156
157	1.09k	static const plan_adt padt = {
158	1.09k	X(dft_solve), awake, print, destroy
159	1.09k	};
160
161	1.09k	if (!applicable(ego_, p_, plnr, &pdim0, &pdim1))
162	1.08k	return (plan *) 0;
163
164	12	vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n;
165	12	A(vl >= 1);
166	12	ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is;
167	12	ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os;
168	12	rit = TAINT(p->ri, vl == 1 ? 0 : ivs);
169	12	iit = TAINT(p->ii, vl == 1 ? 0 : ivs);
170	12	rot = TAINT(p->ro, vl == 1 ? 0 : ovs);
171	12	iot = TAINT(p->io, vl == 1 ? 0 : ovs);
172
173	12	ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS);
174	12	ts->dims[pdim1].os = p->vecsz->dims[pdim0].is;
175	12	tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS);
176	12	tv->dims[pdim0].os = p->sz->dims[pdim1].is;
177	12	tv->dims[pdim0].n = p->sz->dims[pdim1].n;
178	12	cldtrans = X(mkplan_d)(plnr,
179	12	X(mkproblem_dft_d)(X(mktensor_0d)(),
180	12	X(tensor_append)(tv, ts),
181	12	rit, iit,
182	12	rot, iot));
183	12	X(tensor_destroy2)(ts, tv);
184	12	if (!cldtrans) goto nada;
185
186	12	ts = X(tensor_copy)(p->sz);
187	12	ts->dims[pdim1].is = p->vecsz->dims[pdim0].is;
188	12	tv = X(tensor_copy)(p->vecsz);
189	12	tv->dims[pdim0].is = p->sz->dims[pdim1].is;
190	12	tv->dims[pdim0].n = p->sz->dims[pdim1].n;
191	12	cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv,
192	12	rot, iot,
193	12	rot, iot));
194	12	if (!cld) goto nada;
195
196	12	tv = X(tensor_copy)(p->vecsz);
197	12	tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n;
198	12	cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv,
199	12	p->ri + ivs * vl,
200	12	p->ii + ivs * vl,
201	12	p->ro + ovs * vl,
202	12	p->io + ovs * vl));
203	12	if (!cldrest) goto nada;
204
205	12	pln = MKPLAN_DFT(P, &padt, apply_op);
206	12	pln->cldtrans = cldtrans;
207	12	pln->cld = cld;
208	12	pln->cldrest = cldrest;
209	12	pln->vl = vl;
210	12	pln->ivs = ivs;
211	12	pln->ovs = ovs;
212	12	X(ops_cpy)(&cldrest->ops, &pln->super.super.ops);
213	12	X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops);
214	12	X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops);
215	12	return &(pln->super.super);
216
217	0	nada:
218	0	X(plan_destroy_internal)(cldrest);
219	0	X(plan_destroy_internal)(cld);
220	0	X(plan_destroy_internal)(cldtrans);
221	0	return (plan *)0;
222	12	}
223
224		static solver *mksolver(void)
225	1	{
226	1	static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
227	1	S *slv = MKSOLVER(S, &sadt);
228	1	return slv;
229	1	}
230
231		void X(dft_indirect_transpose_register)(planner *p)
232	1	{
233	1	REGISTER_SOLVER(p, mksolver());
234	1	}