/src/fftw3/dft/buffered.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */


#include "dft/dft.h"

typedef struct {
     solver super;
     size_t maxnbuf_ndx;
} S;

static const INT maxnbufs[] = { 8, 256 };

typedef struct {
     plan_dft super;

     plan *cld, *cldcpy, *cldrest;
     INT n, vl, nbuf, bufdist;
     INT ivs_by_nbuf, ovs_by_nbuf;
     INT roffset, ioffset;
} P;

/* transform a vector input with the help of bufs */
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT nbuf = ego->nbuf;
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);

     plan_dft *cld = (plan_dft *) ego->cld;
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
     plan_dft *cldrest;
     INT i, vl = ego->vl;
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
     INT roffset = ego->roffset, ioffset = ego->ioffset;

     for (i = nbuf; i <= vl; i += nbuf) {
          /* transform to bufs: */
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;

          /* copy back */
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
     }

     X(ifree)(bufs);

     /* Do the remaining transforms, if any: */
     cldrest = (plan_dft *) ego->cldrest;
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
}


static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld, wakefulness);
     X(plan_awake)(ego->cldcpy, wakefulness);
     X(plan_awake)(ego->cldrest, wakefulness);
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cldrest);
     X(plan_destroy_internal)(ego->cldcpy);
     X(plan_destroy_internal)(ego->cld);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *) ego_;
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
              ego->n, ego->nbuf,
              ego->vl, ego->bufdist % ego->n,
              ego->cld, ego->cldcpy, ego->cldrest);
}

static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     const iodim *d = p->sz->dims;

     if (1
   && p->vecsz->rnk <= 1
   && p->sz->rnk == 1
    ) {
    INT vl, ivs, ovs;
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
         return 0;

    /* if this solver is redundant, in the sense that a solver
       of lower index generates the same plan, then prune this
       solver */
    if (X(nbuf_redundant)(d[0].n, vl, 
        ego->maxnbuf_ndx,
        maxnbufs, NELEM(maxnbufs)))
         return 0;

    /*
      In principle, the buffered transforms might be useful
      when working out of place.  However, in order to
      prevent infinite loops in the planner, we require
      that the output stride of the buffered transforms be
      greater than 2.
    */
    if (p->ri != p->ro)
         return (d[0].os > 2);

    /*
     * If the problem is in place, the input/output strides must
     * be the same or the whole thing must fit in the buffer.
     */
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
         return 1;

    if (/* fits into buffer: */
         ((p->vecsz->rnk == 0)
    ||
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
       maxnbufs[ego->maxnbuf_ndx]) 
     == p->vecsz->dims[0].n)))
         return 1;
     }

     return 0;
}

static int applicable(const S *ego, const problem *p_, const planner *plnr)
{
     if (NO_BUFFERINGP(plnr)) return 0;
     if (!applicable0(ego, p_, plnr)) return 0;

     if (NO_UGLYP(plnr)) {
    const problem_dft *p = (const problem_dft *) p_;
    if (p->ri != p->ro) return 0;
    if (X(toobig)(p->sz->dims[0].n)) return 0;
     }
     return 1;
}

static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_dft *p = (const problem_dft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, roffset, ioffset;

     static const plan_adt padt = {
    X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
  so as to allow optimizations in the the copy plan */
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);

     /* allow destruction of input if problem is in place */
     cld = X(mkplan_f_d)(plnr,
       X(mkproblem_dft_d)(
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
            TAINT(p->ri, ivs * nbuf),
            TAINT(p->ii, ivs * nbuf),
            bufs + roffset, 
            bufs + ioffset),
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
     if (!cld)
          goto nada;

     /* copying back from the buffer is a rank-0 transform: */
     cldcpy = X(mkplan_d)(plnr,
        X(mkproblem_dft_d)(
             X(mktensor_0d)(),
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
                n, 2, p->sz->dims[0].os),
             bufs + roffset, 
             bufs + ioffset, 
             TAINT(p->ro, ovs * nbuf), 
             TAINT(p->io, ovs * nbuf)));
     if (!cldcpy)
          goto nada;

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
    INT id = ivs * (nbuf * (vl / nbuf));
    INT od = ovs * (nbuf * (vl / nbuf));
    cldrest = X(mkplan_d)(plnr, 
        X(mkproblem_dft_d)(
             X(tensor_copy)(p->sz),
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
     }
     if (!cldrest)
          goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
    opcnt t;
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}

static solver *mksolver(size_t maxnbuf_ndx)
{
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     slv->maxnbuf_ndx = maxnbuf_ndx;
     return &(slv->super);
}

void X(dft_buffered_register)(planner *p)
{
     size_t i;
     for (i = 0; i < NELEM(maxnbufs); ++i)
    REGISTER_SOLVER(p, mksolver(i));
}

Coverage Report

Created: 2025-07-23 07:03

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21
22		#include "dft/dft.h"
23
24		typedef struct {
25		solver super;
26		size_t maxnbuf_ndx;
27		} S;
28
29		static const INT maxnbufs[] = { 8, 256 };
30
31		typedef struct {
32		plan_dft super;
33
34		plan cld, cldcpy, *cldrest;
35		INT n, vl, nbuf, bufdist;
36		INT ivs_by_nbuf, ovs_by_nbuf;
37		INT roffset, ioffset;
38		} P;
39
40		/* transform a vector input with the help of bufs */
41		static void apply(const plan ego_, R ri, R ii, R ro, R *io)
42	68	{
43	68	const P ego = (const P ) ego_;
44	68	INT nbuf = ego->nbuf;
45	68	R bufs = (R )MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47	68	plan_dft cld = (plan_dft ) ego->cld;
48	68	plan_dft cldcpy = (plan_dft ) ego->cldcpy;
49	68	plan_dft *cldrest;
50	68	INT i, vl = ego->vl;
51	68	INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52	68	INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54	136	for (i = nbuf; i <= vl; i += nbuf) {
55		/* transform to bufs: */
56	68	cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57	68	ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59		/* copy back */
60	68	cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61	68	ro += ovs_by_nbuf; io += ovs_by_nbuf;
62	68	}
63
64	68	X(ifree)(bufs);
65
66		/* Do the remaining transforms, if any: */
67	68	cldrest = (plan_dft *) ego->cldrest;
68	68	cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69	68	}
70
71
72		static void awake(plan *ego_, enum wakefulness wakefulness)
73	52	{
74	52	P ego = (P ) ego_;
75
76	52	X(plan_awake)(ego->cld, wakefulness);
77	52	X(plan_awake)(ego->cldcpy, wakefulness);
78	52	X(plan_awake)(ego->cldrest, wakefulness);
79	52	}
80
81		static void destroy(plan *ego_)
82	195	{
83	195	P ego = (P ) ego_;
84	195	X(plan_destroy_internal)(ego->cldrest);
85	195	X(plan_destroy_internal)(ego->cldcpy);
86	195	X(plan_destroy_internal)(ego->cld);
87	195	}
88
89		static void print(const plan ego_, printer p)
90	0	{
91	0	const P ego = (const P ) ego_;
92	0	p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93	0	ego->n, ego->nbuf,
94	0	ego->vl, ego->bufdist % ego->n,
95	0	ego->cld, ego->cldcpy, ego->cldrest);
96	0	}
97
98		static int applicable0(const S ego, const problem p_, const planner *plnr)
99	2.19k	{
100	2.19k	const problem_dft p = (const problem_dft ) p_;
101	2.19k	const iodim *d = p->sz->dims;
102
103	2.19k	if (1
104	2.19k	&& p->vecsz->rnk <= 1
105	2.19k	&& p->sz->rnk == 1
106	2.19k	) {
107	1.16k	INT vl, ivs, ovs;
108	1.16k	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110	1.16k	if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111	0	return 0;
112
113		/* if this solver is redundant, in the sense that a solver
114		of lower index generates the same plan, then prune this
115		solver */
116	1.16k	if (X(nbuf_redundant)(d[0].n, vl,
117	1.16k	ego->maxnbuf_ndx,
118	1.16k	maxnbufs, NELEM(maxnbufs)))
119	426	return 0;
120
121		/*
122		In principle, the buffered transforms might be useful
123		when working out of place. However, in order to
124		prevent infinite loops in the planner, we require
125		that the output stride of the buffered transforms be
126		greater than 2.
127		*/
128	735	if (p->ri != p->ro)
129	435	return (d[0].os > 2);
130
131		/*
132		* If the problem is in place, the input/output strides must
133		* be the same or the whole thing must fit in the buffer.
134		*/
135	300	if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136	0	return 1;
137
138	300	if (/* fits into buffer: */
139	300	((p->vecsz->rnk == 0)
140	300	\|\|
141	300	(X(nbuf)(d[0].n, p->vecsz->dims[0].n,
142	300	maxnbufs[ego->maxnbuf_ndx])
143	300	== p->vecsz->dims[0].n)))
144	195	return 1;
145	300	}
146
147	1.13k	return 0;
148	2.19k	}
149
150		static int applicable(const S ego, const problem p_, const planner *plnr)
151	2.19k	{
152	2.19k	if (NO_BUFFERINGP(plnr)) return 0;
153	2.19k	if (!applicable0(ego, p_, plnr)) return 0;
154
155	195	if (NO_UGLYP(plnr)) {
156	195	const problem_dft p = (const problem_dft ) p_;
157	195	if (p->ri != p->ro) return 0;
158	195	if (X(toobig)(p->sz->dims[0].n)) return 0;
159	195	}
160	195	return 1;
161	195	}
162
163		static plan mkplan(const solver ego_, const problem p_, planner plnr)
164	2.19k	{
165	2.19k	P *pln;
166	2.19k	const S ego = (const S )ego_;
167	2.19k	plan cld = (plan ) 0;
168	2.19k	plan cldcpy = (plan ) 0;
169	2.19k	plan cldrest = (plan ) 0;
170	2.19k	const problem_dft p = (const problem_dft ) p_;
171	2.19k	R bufs = (R ) 0;
172	2.19k	INT nbuf = 0, bufdist, n, vl;
173	2.19k	INT ivs, ovs, roffset, ioffset;
174
175	2.19k	static const plan_adt padt = {
176	2.19k	X(dft_solve), awake, print, destroy
177	2.19k	};
178
179	2.19k	if (!applicable(ego, p_, plnr))
180	2.00k	goto nada;
181
182	195	n = X(tensor_sz)(p->sz);
183
184	195	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186	195	nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187	195	bufdist = X(bufdist)(n, vl);
188	195	A(nbuf > 0);
189
190		/* attempt to keep real and imaginary part in the same order,
191		so as to allow optimizations in the the copy plan */
192	195	roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193	195	ioffset = 1 - roffset;
194
195		/* initial allocation for the purpose of planning */
196	195	bufs = (R ) MALLOC(sizeof(R) nbuf * bufdist * 2, BUFFERS);
197
198		/* allow destruction of input if problem is in place */
199	195	cld = X(mkplan_f_d)(plnr,
200	195	X(mkproblem_dft_d)(
201	195	X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202	195	X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203	195	TAINT(p->ri, ivs * nbuf),
204	195	TAINT(p->ii, ivs * nbuf),
205	195	bufs + roffset,
206	195	bufs + ioffset),
207	195	0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208	195	if (!cld)
209	0	goto nada;
210
211		/* copying back from the buffer is a rank-0 transform: */
212	195	cldcpy = X(mkplan_d)(plnr,
213	195	X(mkproblem_dft_d)(
214	195	X(mktensor_0d)(),
215	195	X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216	195	n, 2, p->sz->dims[0].os),
217	195	bufs + roffset,
218	195	bufs + ioffset,
219	195	TAINT(p->ro, ovs * nbuf),
220	195	TAINT(p->io, ovs * nbuf)));
221	195	if (!cldcpy)
222	0	goto nada;
223
224		/* deallocate buffers, let apply() allocate them for real */
225	195	X(ifree)(bufs);
226	195	bufs = 0;
227
228		/* plan the leftover transforms (cldrest): */
229	195	{
230	195	INT id = ivs * (nbuf * (vl / nbuf));
231	195	INT od = ovs * (nbuf * (vl / nbuf));
232	195	cldrest = X(mkplan_d)(plnr,
233	195	X(mkproblem_dft_d)(
234	195	X(tensor_copy)(p->sz),
235	195	X(mktensor_1d)(vl % nbuf, ivs, ovs),
236	195	p->ri+id, p->ii+id, p->ro+od, p->io+od));
237	195	}
238	195	if (!cldrest)
239	0	goto nada;
240
241	195	pln = MKPLAN_DFT(P, &padt, apply);
242	195	pln->cld = cld;
243	195	pln->cldcpy = cldcpy;
244	195	pln->cldrest = cldrest;
245	195	pln->n = n;
246	195	pln->vl = vl;
247	195	pln->ivs_by_nbuf = ivs * nbuf;
248	195	pln->ovs_by_nbuf = ovs * nbuf;
249	195	pln->roffset = roffset;
250	195	pln->ioffset = ioffset;
251
252	195	pln->nbuf = nbuf;
253	195	pln->bufdist = bufdist;
254
255	195	{
256	195	opcnt t;
257	195	X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258	195	X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259	195	}
260
261	195	return &(pln->super.super);
262
263	2.00k	nada:
264	2.00k	X(ifree0)(bufs);
265	2.00k	X(plan_destroy_internal)(cldrest);
266	2.00k	X(plan_destroy_internal)(cldcpy);
267	2.00k	X(plan_destroy_internal)(cld);
268	2.00k	return (plan *) 0;
269	195	}
270
271		static solver *mksolver(size_t maxnbuf_ndx)
272	4	{
273	4	static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274	4	S *slv = MKSOLVER(S, &sadt);
275	4	slv->maxnbuf_ndx = maxnbuf_ndx;
276	4	return &(slv->super);
277	4	}
278
279		void X(dft_buffered_register)(planner *p)
280	1	{
281	1	size_t i;
282	3	for (i = 0; i < NELEM(maxnbufs); ++i)
283	2	REGISTER_SOLVER(p, mksolver(i));
284	1	}