/src/fftw3/dft/buffered.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */


#include "dft/dft.h"

typedef struct {
     solver super;
     size_t maxnbuf_ndx;
} S;

static const INT maxnbufs[] = { 8, 256 };

typedef struct {
     plan_dft super;

     plan *cld, *cldcpy, *cldrest;
     INT n, vl, nbuf, bufdist;
     INT ivs_by_nbuf, ovs_by_nbuf;
     INT roffset, ioffset;
} P;

/* transform a vector input with the help of bufs */
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT nbuf = ego->nbuf;
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);

     plan_dft *cld = (plan_dft *) ego->cld;
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
     plan_dft *cldrest;
     INT i, vl = ego->vl;
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
     INT roffset = ego->roffset, ioffset = ego->ioffset;

     for (i = nbuf; i <= vl; i += nbuf) {
          /* transform to bufs: */
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;

          /* copy back */
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
     }

     X(ifree)(bufs);

     /* Do the remaining transforms, if any: */
     cldrest = (plan_dft *) ego->cldrest;
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
}


static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld, wakefulness);
     X(plan_awake)(ego->cldcpy, wakefulness);
     X(plan_awake)(ego->cldrest, wakefulness);
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cldrest);
     X(plan_destroy_internal)(ego->cldcpy);
     X(plan_destroy_internal)(ego->cld);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *) ego_;
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
              ego->n, ego->nbuf,
              ego->vl, ego->bufdist % ego->n,
              ego->cld, ego->cldcpy, ego->cldrest);
}

static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     const iodim *d = p->sz->dims;

     if (1
   && p->vecsz->rnk <= 1
   && p->sz->rnk == 1
    ) {
    INT vl, ivs, ovs;
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
         return 0;

    /* if this solver is redundant, in the sense that a solver
       of lower index generates the same plan, then prune this
       solver */
    if (X(nbuf_redundant)(d[0].n, vl, 
        ego->maxnbuf_ndx,
        maxnbufs, NELEM(maxnbufs)))
         return 0;

    /*
      In principle, the buffered transforms might be useful
      when working out of place.  However, in order to
      prevent infinite loops in the planner, we require
      that the output stride of the buffered transforms be
      greater than 2.
    */
    if (p->ri != p->ro)
         return (d[0].os > 2);

    /*
     * If the problem is in place, the input/output strides must
     * be the same or the whole thing must fit in the buffer.
     */
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
         return 1;

    if (/* fits into buffer: */
         ((p->vecsz->rnk == 0)
    ||
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
       maxnbufs[ego->maxnbuf_ndx]) 
     == p->vecsz->dims[0].n)))
         return 1;
     }

     return 0;
}

static int applicable(const S *ego, const problem *p_, const planner *plnr)
{
     if (NO_BUFFERINGP(plnr)) return 0;
     if (!applicable0(ego, p_, plnr)) return 0;

     if (NO_UGLYP(plnr)) {
    const problem_dft *p = (const problem_dft *) p_;
    if (p->ri != p->ro) return 0;
    if (X(toobig)(p->sz->dims[0].n)) return 0;
     }
     return 1;
}

static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_dft *p = (const problem_dft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, roffset, ioffset;

     static const plan_adt padt = {
    X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
  so as to allow optimizations in the the copy plan */
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);

     /* allow destruction of input if problem is in place */
     cld = X(mkplan_f_d)(plnr,
       X(mkproblem_dft_d)(
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
            TAINT(p->ri, ivs * nbuf),
            TAINT(p->ii, ivs * nbuf),
            bufs + roffset, 
            bufs + ioffset),
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
     if (!cld)
          goto nada;

     /* copying back from the buffer is a rank-0 transform: */
     cldcpy = X(mkplan_d)(plnr,
        X(mkproblem_dft_d)(
             X(mktensor_0d)(),
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
                n, 2, p->sz->dims[0].os),
             bufs + roffset, 
             bufs + ioffset, 
             TAINT(p->ro, ovs * nbuf), 
             TAINT(p->io, ovs * nbuf)));
     if (!cldcpy)
          goto nada;

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
    INT id = ivs * (nbuf * (vl / nbuf));
    INT od = ovs * (nbuf * (vl / nbuf));
    cldrest = X(mkplan_d)(plnr, 
        X(mkproblem_dft_d)(
             X(tensor_copy)(p->sz),
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
     }
     if (!cldrest)
          goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
    opcnt t;
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}

static solver *mksolver(size_t maxnbuf_ndx)
{
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     slv->maxnbuf_ndx = maxnbuf_ndx;
     return &(slv->super);
}

void X(dft_buffered_register)(planner *p)
{
     size_t i;
     for (i = 0; i < NELEM(maxnbufs); ++i)
    REGISTER_SOLVER(p, mksolver(i));
}

Coverage Report

Created: 2025-06-22 06:45

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21
22		#include "dft/dft.h"
23
24		typedef struct {
25		solver super;
26		size_t maxnbuf_ndx;
27		} S;
28
29		static const INT maxnbufs[] = { 8, 256 };
30
31		typedef struct {
32		plan_dft super;
33
34		plan cld, cldcpy, *cldrest;
35		INT n, vl, nbuf, bufdist;
36		INT ivs_by_nbuf, ovs_by_nbuf;
37		INT roffset, ioffset;
38		} P;
39
40		/* transform a vector input with the help of bufs */
41		static void apply(const plan ego_, R ri, R ii, R ro, R *io)
42	61	{
43	61	const P ego = (const P ) ego_;
44	61	INT nbuf = ego->nbuf;
45	61	R bufs = (R )MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47	61	plan_dft cld = (plan_dft ) ego->cld;
48	61	plan_dft cldcpy = (plan_dft ) ego->cldcpy;
49	61	plan_dft *cldrest;
50	61	INT i, vl = ego->vl;
51	61	INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52	61	INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54	122	for (i = nbuf; i <= vl; i += nbuf) {
55		/* transform to bufs: */
56	61	cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57	61	ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59		/* copy back */
60	61	cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61	61	ro += ovs_by_nbuf; io += ovs_by_nbuf;
62	61	}
63
64	61	X(ifree)(bufs);
65
66		/* Do the remaining transforms, if any: */
67	61	cldrest = (plan_dft *) ego->cldrest;
68	61	cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69	61	}
70
71
72		static void awake(plan *ego_, enum wakefulness wakefulness)
73	50	{
74	50	P ego = (P ) ego_;
75
76	50	X(plan_awake)(ego->cld, wakefulness);
77	50	X(plan_awake)(ego->cldcpy, wakefulness);
78	50	X(plan_awake)(ego->cldrest, wakefulness);
79	50	}
80
81		static void destroy(plan *ego_)
82	196	{
83	196	P ego = (P ) ego_;
84	196	X(plan_destroy_internal)(ego->cldrest);
85	196	X(plan_destroy_internal)(ego->cldcpy);
86	196	X(plan_destroy_internal)(ego->cld);
87	196	}
88
89		static void print(const plan ego_, printer p)
90	0	{
91	0	const P ego = (const P ) ego_;
92	0	p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93	0	ego->n, ego->nbuf,
94	0	ego->vl, ego->bufdist % ego->n,
95	0	ego->cld, ego->cldcpy, ego->cldrest);
96	0	}
97
98		static int applicable0(const S ego, const problem p_, const planner *plnr)
99	2.21k	{
100	2.21k	const problem_dft p = (const problem_dft ) p_;
101	2.21k	const iodim *d = p->sz->dims;
102
103	2.21k	if (1
104	2.21k	&& p->vecsz->rnk <= 1
105	2.21k	&& p->sz->rnk == 1
106	2.21k	) {
107	1.16k	INT vl, ivs, ovs;
108	1.16k	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110	1.16k	if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111	0	return 0;
112
113		/* if this solver is redundant, in the sense that a solver
114		of lower index generates the same plan, then prune this
115		solver */
116	1.16k	if (X(nbuf_redundant)(d[0].n, vl,
117	1.16k	ego->maxnbuf_ndx,
118	1.16k	maxnbufs, NELEM(maxnbufs)))
119	430	return 0;
120
121		/*
122		In principle, the buffered transforms might be useful
123		when working out of place. However, in order to
124		prevent infinite loops in the planner, we require
125		that the output stride of the buffered transforms be
126		greater than 2.
127		*/
128	735	if (p->ri != p->ro)
129	435	return (d[0].os > 2);
130
131		/*
132		* If the problem is in place, the input/output strides must
133		* be the same or the whole thing must fit in the buffer.
134		*/
135	300	if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136	0	return 1;
137
138	300	if (/* fits into buffer: */
139	300	((p->vecsz->rnk == 0)
140	300	\|\|
141	300	(X(nbuf)(d[0].n, p->vecsz->dims[0].n,
142	300	maxnbufs[ego->maxnbuf_ndx])
143	300	== p->vecsz->dims[0].n)))
144	196	return 1;
145	300	}
146
147	1.15k	return 0;
148	2.21k	}
149
150		static int applicable(const S ego, const problem p_, const planner *plnr)
151	2.21k	{
152	2.21k	if (NO_BUFFERINGP(plnr)) return 0;
153	2.21k	if (!applicable0(ego, p_, plnr)) return 0;
154
155	196	if (NO_UGLYP(plnr)) {
156	196	const problem_dft p = (const problem_dft ) p_;
157	196	if (p->ri != p->ro) return 0;
158	196	if (X(toobig)(p->sz->dims[0].n)) return 0;
159	196	}
160	196	return 1;
161	196	}
162
163		static plan mkplan(const solver ego_, const problem p_, planner plnr)
164	2.21k	{
165	2.21k	P *pln;
166	2.21k	const S ego = (const S )ego_;
167	2.21k	plan cld = (plan ) 0;
168	2.21k	plan cldcpy = (plan ) 0;
169	2.21k	plan cldrest = (plan ) 0;
170	2.21k	const problem_dft p = (const problem_dft ) p_;
171	2.21k	R bufs = (R ) 0;
172	2.21k	INT nbuf = 0, bufdist, n, vl;
173	2.21k	INT ivs, ovs, roffset, ioffset;
174
175	2.21k	static const plan_adt padt = {
176	2.21k	X(dft_solve), awake, print, destroy
177	2.21k	};
178
179	2.21k	if (!applicable(ego, p_, plnr))
180	2.01k	goto nada;
181
182	196	n = X(tensor_sz)(p->sz);
183
184	196	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186	196	nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187	196	bufdist = X(bufdist)(n, vl);
188	196	A(nbuf > 0);
189
190		/* attempt to keep real and imaginary part in the same order,
191		so as to allow optimizations in the the copy plan */
192	196	roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193	196	ioffset = 1 - roffset;
194
195		/* initial allocation for the purpose of planning */
196	196	bufs = (R ) MALLOC(sizeof(R) nbuf * bufdist * 2, BUFFERS);
197
198		/* allow destruction of input if problem is in place */
199	196	cld = X(mkplan_f_d)(plnr,
200	196	X(mkproblem_dft_d)(
201	196	X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202	196	X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203	196	TAINT(p->ri, ivs * nbuf),
204	196	TAINT(p->ii, ivs * nbuf),
205	196	bufs + roffset,
206	196	bufs + ioffset),
207	196	0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208	196	if (!cld)
209	0	goto nada;
210
211		/* copying back from the buffer is a rank-0 transform: */
212	196	cldcpy = X(mkplan_d)(plnr,
213	196	X(mkproblem_dft_d)(
214	196	X(mktensor_0d)(),
215	196	X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216	196	n, 2, p->sz->dims[0].os),
217	196	bufs + roffset,
218	196	bufs + ioffset,
219	196	TAINT(p->ro, ovs * nbuf),
220	196	TAINT(p->io, ovs * nbuf)));
221	196	if (!cldcpy)
222	0	goto nada;
223
224		/* deallocate buffers, let apply() allocate them for real */
225	196	X(ifree)(bufs);
226	196	bufs = 0;
227
228		/* plan the leftover transforms (cldrest): */
229	196	{
230	196	INT id = ivs * (nbuf * (vl / nbuf));
231	196	INT od = ovs * (nbuf * (vl / nbuf));
232	196	cldrest = X(mkplan_d)(plnr,
233	196	X(mkproblem_dft_d)(
234	196	X(tensor_copy)(p->sz),
235	196	X(mktensor_1d)(vl % nbuf, ivs, ovs),
236	196	p->ri+id, p->ii+id, p->ro+od, p->io+od));
237	196	}
238	196	if (!cldrest)
239	0	goto nada;
240
241	196	pln = MKPLAN_DFT(P, &padt, apply);
242	196	pln->cld = cld;
243	196	pln->cldcpy = cldcpy;
244	196	pln->cldrest = cldrest;
245	196	pln->n = n;
246	196	pln->vl = vl;
247	196	pln->ivs_by_nbuf = ivs * nbuf;
248	196	pln->ovs_by_nbuf = ovs * nbuf;
249	196	pln->roffset = roffset;
250	196	pln->ioffset = ioffset;
251
252	196	pln->nbuf = nbuf;
253	196	pln->bufdist = bufdist;
254
255	196	{
256	196	opcnt t;
257	196	X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258	196	X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259	196	}
260
261	196	return &(pln->super.super);
262
263	2.01k	nada:
264	2.01k	X(ifree0)(bufs);
265	2.01k	X(plan_destroy_internal)(cldrest);
266	2.01k	X(plan_destroy_internal)(cldcpy);
267	2.01k	X(plan_destroy_internal)(cld);
268	2.01k	return (plan *) 0;
269	196	}
270
271		static solver *mksolver(size_t maxnbuf_ndx)
272	4	{
273	4	static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274	4	S *slv = MKSOLVER(S, &sadt);
275	4	slv->maxnbuf_ndx = maxnbuf_ndx;
276	4	return &(slv->super);
277	4	}
278
279		void X(dft_buffered_register)(planner *p)
280	1	{
281	1	size_t i;
282	3	for (i = 0; i < NELEM(maxnbufs); ++i)
283	2	REGISTER_SOLVER(p, mksolver(i));
284	1	}