/src/fftw3/dft/buffered.c

Source
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */


#include "dft/dft.h"

typedef struct {
     solver super;
     size_t maxnbuf_ndx;
} S;

static const INT maxnbufs[] = { 8, 256 };

typedef struct {
     plan_dft super;

     plan *cld, *cldcpy, *cldrest;
     INT n, vl, nbuf, bufdist;
     INT ivs_by_nbuf, ovs_by_nbuf;
     INT roffset, ioffset;
} P;

/* transform a vector input with the help of bufs */
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT nbuf = ego->nbuf;
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);

     plan_dft *cld = (plan_dft *) ego->cld;
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
     plan_dft *cldrest;
     INT i, vl = ego->vl;
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
     INT roffset = ego->roffset, ioffset = ego->ioffset;

     for (i = nbuf; i <= vl; i += nbuf) {
          /* transform to bufs: */
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;

          /* copy back */
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
     }

     X(ifree)(bufs);

     /* Do the remaining transforms, if any: */
     cldrest = (plan_dft *) ego->cldrest;
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
}


static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld, wakefulness);
     X(plan_awake)(ego->cldcpy, wakefulness);
     X(plan_awake)(ego->cldrest, wakefulness);
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cldrest);
     X(plan_destroy_internal)(ego->cldcpy);
     X(plan_destroy_internal)(ego->cld);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *) ego_;
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
              ego->n, ego->nbuf,
              ego->vl, ego->bufdist % ego->n,
              ego->cld, ego->cldcpy, ego->cldrest);
}

static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     const iodim *d = p->sz->dims;

     if (1
   && p->vecsz->rnk <= 1
   && p->sz->rnk == 1
    ) {
    INT vl, ivs, ovs;
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
         return 0;

    /* if this solver is redundant, in the sense that a solver
       of lower index generates the same plan, then prune this
       solver */
    if (X(nbuf_redundant)(d[0].n, vl, 
        ego->maxnbuf_ndx,
        maxnbufs, NELEM(maxnbufs)))
         return 0;

    /*
      In principle, the buffered transforms might be useful
      when working out of place.  However, in order to
      prevent infinite loops in the planner, we require
      that the output stride of the buffered transforms be
      greater than 2.
    */
    if (p->ri != p->ro)
         return (d[0].os > 2);

    /*
     * If the problem is in place, the input/output strides must
     * be the same or the whole thing must fit in the buffer.
     */
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
         return 1;

    if (/* fits into buffer: */
         ((p->vecsz->rnk == 0)
    ||
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
       maxnbufs[ego->maxnbuf_ndx]) 
     == p->vecsz->dims[0].n)))
         return 1;
     }

     return 0;
}

static int applicable(const S *ego, const problem *p_, const planner *plnr)
{
     if (NO_BUFFERINGP(plnr)) return 0;
     if (!applicable0(ego, p_, plnr)) return 0;

     if (NO_UGLYP(plnr)) {
    const problem_dft *p = (const problem_dft *) p_;
    if (p->ri != p->ro) return 0;
    if (X(toobig)(p->sz->dims[0].n)) return 0;
     }
     return 1;
}

static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_dft *p = (const problem_dft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, roffset, ioffset;

     static const plan_adt padt = {
    X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
  so as to allow optimizations in the the copy plan */
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);

     /* allow destruction of input if problem is in place */
     cld = X(mkplan_f_d)(plnr,
       X(mkproblem_dft_d)(
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
            TAINT(p->ri, ivs * nbuf),
            TAINT(p->ii, ivs * nbuf),
            bufs + roffset, 
            bufs + ioffset),
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
     if (!cld)
          goto nada;

     /* copying back from the buffer is a rank-0 transform: */
     cldcpy = X(mkplan_d)(plnr,
        X(mkproblem_dft_d)(
             X(mktensor_0d)(),
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
                n, 2, p->sz->dims[0].os),
             bufs + roffset, 
             bufs + ioffset, 
             TAINT(p->ro, ovs * nbuf), 
             TAINT(p->io, ovs * nbuf)));
     if (!cldcpy)
          goto nada;

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
    INT id = ivs * (nbuf * (vl / nbuf));
    INT od = ovs * (nbuf * (vl / nbuf));
    cldrest = X(mkplan_d)(plnr, 
        X(mkproblem_dft_d)(
             X(tensor_copy)(p->sz),
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
     }
     if (!cldrest)
          goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
    opcnt t;
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}

static solver *mksolver(size_t maxnbuf_ndx)
{
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     slv->maxnbuf_ndx = maxnbuf_ndx;
     return &(slv->super);
}

void X(dft_buffered_register)(planner *p)
{
     size_t i;
     for (i = 0; i < NELEM(maxnbufs); ++i)
    REGISTER_SOLVER(p, mksolver(i));
}

Coverage Report

Created: 2026-02-26 07:12

Line	Count	Source
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21
22		#include "dft/dft.h"
23
24		typedef struct {
25		solver super;
26		size_t maxnbuf_ndx;
27		} S;
28
29		static const INT maxnbufs[] = { 8, 256 };
30
31		typedef struct {
32		plan_dft super;
33
34		plan cld, cldcpy, *cldrest;
35		INT n, vl, nbuf, bufdist;
36		INT ivs_by_nbuf, ovs_by_nbuf;
37		INT roffset, ioffset;
38		} P;
39
40		/* transform a vector input with the help of bufs */
41		static void apply(const plan ego_, R ri, R ii, R ro, R *io)
42	61	{
43	61	const P ego = (const P ) ego_;
44	61	INT nbuf = ego->nbuf;
45	61	R bufs = (R )MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47	61	plan_dft cld = (plan_dft ) ego->cld;
48	61	plan_dft cldcpy = (plan_dft ) ego->cldcpy;
49	61	plan_dft *cldrest;
50	61	INT i, vl = ego->vl;
51	61	INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52	61	INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54	122	for (i = nbuf; i <= vl; i += nbuf) {
55		/* transform to bufs: */
56	61	cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57	61	ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59		/* copy back */
60	61	cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61	61	ro += ovs_by_nbuf; io += ovs_by_nbuf;
62	61	}
63
64	61	X(ifree)(bufs);
65
66		/* Do the remaining transforms, if any: */
67	61	cldrest = (plan_dft *) ego->cldrest;
68	61	cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69	61	}
70
71
72		static void awake(plan *ego_, enum wakefulness wakefulness)
73	50	{
74	50	P ego = (P ) ego_;
75
76	50	X(plan_awake)(ego->cld, wakefulness);
77	50	X(plan_awake)(ego->cldcpy, wakefulness);
78	50	X(plan_awake)(ego->cldrest, wakefulness);
79	50	}
80
81		static void destroy(plan *ego_)
82	193	{
83	193	P ego = (P ) ego_;
84	193	X(plan_destroy_internal)(ego->cldrest);
85	193	X(plan_destroy_internal)(ego->cldcpy);
86	193	X(plan_destroy_internal)(ego->cld);
87	193	}
88
89		static void print(const plan ego_, printer p)
90	0	{
91	0	const P ego = (const P ) ego_;
92	0	p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93	0	ego->n, ego->nbuf,
94	0	ego->vl, ego->bufdist % ego->n,
95	0	ego->cld, ego->cldcpy, ego->cldrest);
96	0	}
97
98		static int applicable0(const S ego, const problem p_, const planner *plnr)
99	2.17k	{
100	2.17k	const problem_dft p = (const problem_dft ) p_;
101	2.17k	const iodim *d = p->sz->dims;
102
103	2.17k	if (1
104	2.17k	&& p->vecsz->rnk <= 1
105	1.18k	&& p->sz->rnk == 1
106	2.17k	) {
107	1.15k	INT vl, ivs, ovs;
108	1.15k	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110	1.15k	if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111	0	return 0;
112
113		/* if this solver is redundant, in the sense that a solver
114		of lower index generates the same plan, then prune this
115		solver */
116	1.15k	if (X(nbuf_redundant)(d[0].n, vl,
117	1.15k	ego->maxnbuf_ndx,
118	1.15k	maxnbufs, NELEM(maxnbufs)))
119	426	return 0;
120
121		/*
122		In principle, the buffered transforms might be useful
123		when working out of place. However, in order to
124		prevent infinite loops in the planner, we require
125		that the output stride of the buffered transforms be
126		greater than 2.
127		*/
128	729	if (p->ri != p->ro)
129	434	return (d[0].os > 2);
130
131		/*
132		* If the problem is in place, the input/output strides must
133		* be the same or the whole thing must fit in the buffer.
134		*/
135	295	if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136	0	return 1;
137
138	295	if (/* fits into buffer: */
139	295	((p->vecsz->rnk == 0)
140	295	\|\|
141	295	(X(nbuf)(d[0].n, p->vecsz->dims[0].n,
142	295	maxnbufs[ego->maxnbuf_ndx])
143	295	== p->vecsz->dims[0].n)))
144	193	return 1;
145	295	}
146
147	1.12k	return 0;
148	2.17k	}
149
150		static int applicable(const S ego, const problem p_, const planner *plnr)
151	2.17k	{
152	2.17k	if (NO_BUFFERINGP(plnr)) return 0;
153	2.17k	if (!applicable0(ego, p_, plnr)) return 0;
154
155	193	if (NO_UGLYP(plnr)) {
156	193	const problem_dft p = (const problem_dft ) p_;
157	193	if (p->ri != p->ro) return 0;
158	193	if (X(toobig)(p->sz->dims[0].n)) return 0;
159	193	}
160	193	return 1;
161	193	}
162
163		static plan mkplan(const solver ego_, const problem p_, planner plnr)
164	2.17k	{
165	2.17k	P *pln;
166	2.17k	const S ego = (const S )ego_;
167	2.17k	plan cld = (plan ) 0;
168	2.17k	plan cldcpy = (plan ) 0;
169	2.17k	plan cldrest = (plan ) 0;
170	2.17k	const problem_dft p = (const problem_dft ) p_;
171	2.17k	R bufs = (R ) 0;
172	2.17k	INT nbuf = 0, bufdist, n, vl;
173	2.17k	INT ivs, ovs, roffset, ioffset;
174
175	2.17k	static const plan_adt padt = {
176	2.17k	X(dft_solve), awake, print, destroy
177	2.17k	};
178
179	2.17k	if (!applicable(ego, p_, plnr))
180	1.98k	goto nada;
181
182	193	n = X(tensor_sz)(p->sz);
183
184	193	X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186	193	nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187	193	bufdist = X(bufdist)(n, vl);
188	193	A(nbuf > 0);
189
190		/* attempt to keep real and imaginary part in the same order,
191		so as to allow optimizations in the the copy plan */
192	193	roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193	193	ioffset = 1 - roffset;
194
195		/* initial allocation for the purpose of planning */
196	193	bufs = (R ) MALLOC(sizeof(R) nbuf * bufdist * 2, BUFFERS);
197
198		/* allow destruction of input if problem is in place */
199	193	cld = X(mkplan_f_d)(plnr,
200	193	X(mkproblem_dft_d)(
201	193	X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202	193	X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203	193	TAINT(p->ri, ivs * nbuf),
204	193	TAINT(p->ii, ivs * nbuf),
205	193	bufs + roffset,
206	193	bufs + ioffset),
207	193	0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208	193	if (!cld)
209	0	goto nada;
210
211		/* copying back from the buffer is a rank-0 transform: */
212	193	cldcpy = X(mkplan_d)(plnr,
213	193	X(mkproblem_dft_d)(
214	193	X(mktensor_0d)(),
215	193	X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216	193	n, 2, p->sz->dims[0].os),
217	193	bufs + roffset,
218	193	bufs + ioffset,
219	193	TAINT(p->ro, ovs * nbuf),
220	193	TAINT(p->io, ovs * nbuf)));
221	193	if (!cldcpy)
222	0	goto nada;
223
224		/* deallocate buffers, let apply() allocate them for real */
225	193	X(ifree)(bufs);
226	193	bufs = 0;
227
228		/* plan the leftover transforms (cldrest): */
229	193	{
230	193	INT id = ivs * (nbuf * (vl / nbuf));
231	193	INT od = ovs * (nbuf * (vl / nbuf));
232	193	cldrest = X(mkplan_d)(plnr,
233	193	X(mkproblem_dft_d)(
234	193	X(tensor_copy)(p->sz),
235	193	X(mktensor_1d)(vl % nbuf, ivs, ovs),
236	193	p->ri+id, p->ii+id, p->ro+od, p->io+od));
237	193	}
238	193	if (!cldrest)
239	0	goto nada;
240
241	193	pln = MKPLAN_DFT(P, &padt, apply);
242	193	pln->cld = cld;
243	193	pln->cldcpy = cldcpy;
244	193	pln->cldrest = cldrest;
245	193	pln->n = n;
246	193	pln->vl = vl;
247	193	pln->ivs_by_nbuf = ivs * nbuf;
248	193	pln->ovs_by_nbuf = ovs * nbuf;
249	193	pln->roffset = roffset;
250	193	pln->ioffset = ioffset;
251
252	193	pln->nbuf = nbuf;
253	193	pln->bufdist = bufdist;
254
255	193	{
256	193	opcnt t;
257	193	X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258	193	X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259	193	}
260
261	193	return &(pln->super.super);
262
263	1.98k	nada:
264	1.98k	X(ifree0)(bufs);
265	1.98k	X(plan_destroy_internal)(cldrest);
266	1.98k	X(plan_destroy_internal)(cldcpy);
267	1.98k	X(plan_destroy_internal)(cld);
268	1.98k	return (plan *) 0;
269	193	}
270
271		static solver *mksolver(size_t maxnbuf_ndx)
272	4	{
273	4	static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274	4	S *slv = MKSOLVER(S, &sadt);
275	4	slv->maxnbuf_ndx = maxnbuf_ndx;
276	4	return &(slv->super);
277	4	}
278
279		void X(dft_buffered_register)(planner *p)
280	1	{
281	1	size_t i;
282	3	for (i = 0; i < NELEM(maxnbufs); ++i)
283	2	REGISTER_SOLVER(p, mksolver(i));
284	1	}