/src/fftw3/rdft/buffered.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */


#include "rdft/rdft.h"

typedef struct {
     solver super;
     size_t maxnbuf_ndx;
} S;

static const INT maxnbufs[] = { 8, 256 };

typedef struct {
     plan_rdft super;

     plan *cld, *cldcpy, *cldrest;
     INT n, vl, nbuf, bufdist;
     INT ivs_by_nbuf, ovs_by_nbuf;
} P;

/* transform a vector input with the help of bufs */
static void apply(const plan *ego_, R *I, R *O)
{
     const P *ego = (const P *) ego_;
     plan_rdft *cld = (plan_rdft *) ego->cld;
     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
     plan_rdft *cldrest;
     INT i, vl = ego->vl, nbuf = ego->nbuf;
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
     R *bufs;

     bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);

     for (i = nbuf; i <= vl; i += nbuf) {
          /* transform to bufs: */
          cld->apply((plan *) cld, I, bufs);
    I += ivs_by_nbuf;

          /* copy back */
          cldcpy->apply((plan *) cldcpy, bufs, O);
    O += ovs_by_nbuf;
     }

     X(ifree)(bufs);

     /* Do the remaining transforms, if any: */
     cldrest = (plan_rdft *) ego->cldrest;
     cldrest->apply((plan *) cldrest, I, O);
}

/* for hc2r problems, copy the input into buffer, and then
   transform buffer->output, which allows for destruction of the
   buffer */
static void apply_hc2r(const plan *ego_, R *I, R *O)
{
     const P *ego = (const P *) ego_;
     plan_rdft *cld = (plan_rdft *) ego->cld;
     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
     plan_rdft *cldrest;
     INT i, vl = ego->vl, nbuf = ego->nbuf;
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
     R *bufs;

     bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);

     for (i = nbuf; i <= vl; i += nbuf) {
          /* copy input into bufs: */
          cldcpy->apply((plan *) cldcpy, I, bufs);
    I += ivs_by_nbuf;

          /* transform to output */
          cld->apply((plan *) cld, bufs, O);
    O += ovs_by_nbuf;
     }

     X(ifree)(bufs);

     /* Do the remaining transforms, if any: */
     cldrest = (plan_rdft *) ego->cldrest;
     cldrest->apply((plan *) cldrest, I, O);
}


static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld, wakefulness);
     X(plan_awake)(ego->cldcpy, wakefulness);
     X(plan_awake)(ego->cldrest, wakefulness);
}

static void destroy(plan *ego_)
{
     P *ego = (P *) ego_;
     X(plan_destroy_internal)(ego->cldrest);
     X(plan_destroy_internal)(ego->cldcpy);
     X(plan_destroy_internal)(ego->cld);
}

static void print(const plan *ego_, printer *p)
{
     const P *ego = (const P *) ego_;
     p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
              ego->n, ego->nbuf,
              ego->vl, ego->bufdist % ego->n,
              ego->cld, ego->cldcpy, ego->cldrest);
}

static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     iodim *d = p->sz->dims;

     if (1
   && p->vecsz->rnk <= 1
   && p->sz->rnk == 1
    ) {
    INT vl, ivs, ovs;
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

    if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
         return 0;

    /* if this solver is redundant, in the sense that a solver
       of lower index generates the same plan, then prune this
       solver */
    if (X(nbuf_redundant)(d[0].n, vl,
        ego->maxnbuf_ndx,
        maxnbufs, NELEM(maxnbufs)))
         return 0;

    if (p->I != p->O) {
         if (p->kind[0] == HC2R) {
        /* Allow HC2R problems only if the input is to be
           preserved.  This solver sets NO_DESTROY_INPUT,
           which prevents infinite loops */
        return (NO_DESTROY_INPUTP(plnr));
         } else {
        /*
          In principle, the buffered transforms might be useful
          when working out of place.  However, in order to
          prevent infinite loops in the planner, we require
          that the output stride of the buffered transforms be
          greater than 1.
        */
        return (d[0].os > 1);
         }
    }

    /*
     * If the problem is in place, the input/output strides must
     * be the same or the whole thing must fit in the buffer.
     */
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
         return 1;

    if (/* fits into buffer: */
         ((p->vecsz->rnk == 0)
    ||
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
       maxnbufs[ego->maxnbuf_ndx]) 
     == p->vecsz->dims[0].n)))
         return 1;
     }

     return 0;
}

static int applicable(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_rdft *p;

     if (NO_BUFFERINGP(plnr)) return 0;

     if (!applicable0(ego, p_, plnr)) return 0;

     p = (const problem_rdft *) p_;
     if (p->kind[0] == HC2R) {
    if (NO_UGLYP(plnr)) {
         /* UGLY if in-place and too big, since the problem
      could be solved via transpositions */
         if (p->I == p->O && X(toobig)(p->sz->dims[0].n)) 
        return 0;
    }
     } else {
    if (NO_UGLYP(plnr)) {
         if (p->I != p->O) return 0;
         if (X(toobig)(p->sz->dims[0].n)) return 0;
    }
     }
     return 1;
}

static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft *p = (const problem_rdft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs;
     int hc2rp;

     static const plan_adt padt = {
    X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
     hc2rp = (p->kind[0] == HC2R);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);

     if (hc2rp) {
    /* allow destruction of buffer */
    cld = X(mkplan_f_d)(plnr, 
            X(mkproblem_rdft_d)(
           X(mktensor_1d)(n, 1, p->sz->dims[0].os),
           X(mktensor_1d)(nbuf, bufdist, ovs),
           bufs, TAINT(p->O, ovs * nbuf), p->kind),
            0, 0, NO_DESTROY_INPUT);
    if (!cld) goto nada;

    /* copying input into buffer buffer is a rank-0 transform: */
    cldcpy = X(mkplan_d)(plnr, 
             X(mkproblem_rdft_0_d)(
            X(mktensor_2d)(nbuf, ivs, bufdist,
               n, p->sz->dims[0].is, 1),
            TAINT(p->I, ivs * nbuf), bufs));
    if (!cldcpy) goto nada;
     } else {
    /* allow destruction of input if problem is in place */
    cld = X(mkplan_f_d)(plnr, 
            X(mkproblem_rdft_d)(
           X(mktensor_1d)(n, p->sz->dims[0].is, 1),
           X(mktensor_1d)(nbuf, ivs, bufdist),
           TAINT(p->I, ivs * nbuf), bufs, p->kind),
            0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
    if (!cld) goto nada;

    /* copying back from the buffer is a rank-0 transform: */
    cldcpy = X(mkplan_d)(plnr, 
             X(mkproblem_rdft_0_d)(
            X(mktensor_2d)(nbuf, bufdist, ovs,
               n, 1, p->sz->dims[0].os),
            bufs, TAINT(p->O, ovs * nbuf)));
    if (!cldcpy) goto nada;
     }

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
    INT id = ivs * (nbuf * (vl / nbuf));
    INT od = ovs * (nbuf * (vl / nbuf));
    cldrest = X(mkplan_d)(plnr, 
        X(mkproblem_rdft_d)(
             X(tensor_copy)(p->sz),
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
             p->I + id, p->O + od, p->kind));
     }
     if (!cldrest) goto nada;

     pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
    opcnt t;
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}

static solver *mksolver(size_t maxnbuf_ndx)
{
     static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
     S *slv = MKSOLVER(S, &sadt);
     slv->maxnbuf_ndx = maxnbuf_ndx;
     return &(slv->super);
}

void X(rdft_buffered_register)(planner *p)
{
     size_t i;
     for (i = 0; i < NELEM(maxnbufs); ++i)
    REGISTER_SOLVER(p, mksolver(i));
}

Coverage Report

Created: 2023-09-25 07:08

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2003, 2007-14 Matteo Frigo
3		* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4		*
5		* This program is free software; you can redistribute it and/or modify
6		* it under the terms of the GNU General Public License as published by
7		* the Free Software Foundation; either version 2 of the License, or
8		* (at your option) any later version.
9		*
10		* This program is distributed in the hope that it will be useful,
11		* but WITHOUT ANY WARRANTY; without even the implied warranty of
12		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13		* GNU General Public License for more details.
14		*
15		* You should have received a copy of the GNU General Public License
16		* along with this program; if not, write to the Free Software
17		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18		*
19		*/
20
21
22		#include "rdft/rdft.h"
23
24		typedef struct {
25		solver super;
26		size_t maxnbuf_ndx;
27		} S;
28
29		static const INT maxnbufs[] = { 8, 256 };
30
31		typedef struct {
32		plan_rdft super;
33
34		plan cld, cldcpy, *cldrest;
35		INT n, vl, nbuf, bufdist;
36		INT ivs_by_nbuf, ovs_by_nbuf;
37		} P;
38
39		/* transform a vector input with the help of bufs */
40		static void apply(const plan ego_, R I, R *O)
41		{
42		const P ego = (const P ) ego_;
43		plan_rdft cld = (plan_rdft ) ego->cld;
44		plan_rdft cldcpy = (plan_rdft ) ego->cldcpy;
45		plan_rdft *cldrest;
46		INT i, vl = ego->vl, nbuf = ego->nbuf;
47		INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
48		R *bufs;
49
50		bufs = (R )MALLOC(sizeof(R) nbuf * ego->bufdist, BUFFERS);
51
52		for (i = nbuf; i <= vl; i += nbuf) {
53		/* transform to bufs: */
54		cld->apply((plan *) cld, I, bufs);
55		I += ivs_by_nbuf;
56
57		/* copy back */
58		cldcpy->apply((plan *) cldcpy, bufs, O);
59		O += ovs_by_nbuf;
60		}
61
62		X(ifree)(bufs);
63
64		/* Do the remaining transforms, if any: */
65		cldrest = (plan_rdft *) ego->cldrest;
66		cldrest->apply((plan *) cldrest, I, O);
67		}
68
69		/* for hc2r problems, copy the input into buffer, and then
70		transform buffer->output, which allows for destruction of the
71		buffer */
72		static void apply_hc2r(const plan ego_, R I, R *O)
73	0	{
74	0	const P ego = (const P ) ego_;
75	0	plan_rdft cld = (plan_rdft ) ego->cld;
76	0	plan_rdft cldcpy = (plan_rdft ) ego->cldcpy;
77	0	plan_rdft *cldrest;
78	0	INT i, vl = ego->vl, nbuf = ego->nbuf;
79	0	INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
80	0	R *bufs;
81
82	0	bufs = (R )MALLOC(sizeof(R) nbuf * ego->bufdist, BUFFERS);
83
84	0	for (i = nbuf; i <= vl; i += nbuf) {
85		/* copy input into bufs: */
86	0	cldcpy->apply((plan *) cldcpy, I, bufs);
87	0	I += ivs_by_nbuf;
88
89		/* transform to output */
90	0	cld->apply((plan *) cld, bufs, O);
91	0	O += ovs_by_nbuf;
92	0	}
93
94	0	X(ifree)(bufs);
95
96		/* Do the remaining transforms, if any: */
97	0	cldrest = (plan_rdft *) ego->cldrest;
98	0	cldrest->apply((plan *) cldrest, I, O);
99	0	}
100
101
102		static void awake(plan *ego_, enum wakefulness wakefulness)
103		{
104		P ego = (P ) ego_;
105
106		X(plan_awake)(ego->cld, wakefulness);
107		X(plan_awake)(ego->cldcpy, wakefulness);
108		X(plan_awake)(ego->cldrest, wakefulness);
109		}
110
111		static void destroy(plan *ego_)
112		{
113		P ego = (P ) ego_;
114		X(plan_destroy_internal)(ego->cldrest);
115		X(plan_destroy_internal)(ego->cldcpy);
116		X(plan_destroy_internal)(ego->cld);
117		}
118
119		static void print(const plan ego_, printer p)
120		{
121		const P ego = (const P ) ego_;
122		p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
123		ego->n, ego->nbuf,
124		ego->vl, ego->bufdist % ego->n,
125		ego->cld, ego->cldcpy, ego->cldrest);
126		}
127
128		static int applicable0(const S ego, const problem p_, const planner *plnr)
129		{
130		const problem_rdft p = (const problem_rdft ) p_;
131		iodim *d = p->sz->dims;
132
133		if (1
134		&& p->vecsz->rnk <= 1
135		&& p->sz->rnk == 1
136		) {
137		INT vl, ivs, ovs;
138		X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
139
140		if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
141		return 0;
142
143		/* if this solver is redundant, in the sense that a solver
144		of lower index generates the same plan, then prune this
145		solver */
146		if (X(nbuf_redundant)(d[0].n, vl,
147		ego->maxnbuf_ndx,
148		maxnbufs, NELEM(maxnbufs)))
149		return 0;
150
151		if (p->I != p->O) {
152		if (p->kind[0] == HC2R) {
153		/* Allow HC2R problems only if the input is to be
154		preserved. This solver sets NO_DESTROY_INPUT,
155		which prevents infinite loops */
156		return (NO_DESTROY_INPUTP(plnr));
157		} else {
158		/*
159		In principle, the buffered transforms might be useful
160		when working out of place. However, in order to
161		prevent infinite loops in the planner, we require
162		that the output stride of the buffered transforms be
163		greater than 1.
164		*/
165		return (d[0].os > 1);
166		}
167		}
168
169		/*
170		* If the problem is in place, the input/output strides must
171		* be the same or the whole thing must fit in the buffer.
172		*/
173		if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
174		return 1;
175
176		if (/* fits into buffer: */
177		((p->vecsz->rnk == 0)
178		\|\|
179		(X(nbuf)(d[0].n, p->vecsz->dims[0].n,
180		maxnbufs[ego->maxnbuf_ndx])
181		== p->vecsz->dims[0].n)))
182		return 1;
183		}
184
185		return 0;
186		}
187
188		static int applicable(const S ego, const problem p_, const planner *plnr)
189		{
190		const problem_rdft *p;
191
192		if (NO_BUFFERINGP(plnr)) return 0;
193
194		if (!applicable0(ego, p_, plnr)) return 0;
195
196		p = (const problem_rdft *) p_;
197		if (p->kind[0] == HC2R) {
198		if (NO_UGLYP(plnr)) {
199		/* UGLY if in-place and too big, since the problem
200		could be solved via transpositions */
201		if (p->I == p->O && X(toobig)(p->sz->dims[0].n))
202		return 0;
203		}
204		} else {
205		if (NO_UGLYP(plnr)) {
206		if (p->I != p->O) return 0;
207		if (X(toobig)(p->sz->dims[0].n)) return 0;
208		}
209		}
210		return 1;
211		}
212
213		static plan mkplan(const solver ego_, const problem p_, planner plnr)
214		{
215		P *pln;
216		const S ego = (const S )ego_;
217		plan cld = (plan ) 0;
218		plan cldcpy = (plan ) 0;
219		plan cldrest = (plan ) 0;
220		const problem_rdft p = (const problem_rdft ) p_;
221		R bufs = (R ) 0;
222		INT nbuf = 0, bufdist, n, vl;
223		INT ivs, ovs;
224		int hc2rp;
225
226		static const plan_adt padt = {
227		X(rdft_solve), awake, print, destroy
228		};
229
230		if (!applicable(ego, p_, plnr))
231		goto nada;
232
233		n = X(tensor_sz)(p->sz);
234		X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
235		hc2rp = (p->kind[0] == HC2R);
236
237		nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
238		bufdist = X(bufdist)(n, vl);
239		A(nbuf > 0);
240
241		/* initial allocation for the purpose of planning */
242		bufs = (R ) MALLOC(sizeof(R) nbuf * bufdist, BUFFERS);
243
244		if (hc2rp) {
245		/* allow destruction of buffer */
246		cld = X(mkplan_f_d)(plnr,
247		X(mkproblem_rdft_d)(
248		X(mktensor_1d)(n, 1, p->sz->dims[0].os),
249		X(mktensor_1d)(nbuf, bufdist, ovs),
250		bufs, TAINT(p->O, ovs * nbuf), p->kind),
251		0, 0, NO_DESTROY_INPUT);
252		if (!cld) goto nada;
253
254		/* copying input into buffer buffer is a rank-0 transform: */
255		cldcpy = X(mkplan_d)(plnr,
256		X(mkproblem_rdft_0_d)(
257		X(mktensor_2d)(nbuf, ivs, bufdist,
258		n, p->sz->dims[0].is, 1),
259		TAINT(p->I, ivs * nbuf), bufs));
260		if (!cldcpy) goto nada;
261		} else {
262		/* allow destruction of input if problem is in place */
263		cld = X(mkplan_f_d)(plnr,
264		X(mkproblem_rdft_d)(
265		X(mktensor_1d)(n, p->sz->dims[0].is, 1),
266		X(mktensor_1d)(nbuf, ivs, bufdist),
267		TAINT(p->I, ivs * nbuf), bufs, p->kind),
268		0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
269		if (!cld) goto nada;
270
271		/* copying back from the buffer is a rank-0 transform: */
272		cldcpy = X(mkplan_d)(plnr,
273		X(mkproblem_rdft_0_d)(
274		X(mktensor_2d)(nbuf, bufdist, ovs,
275		n, 1, p->sz->dims[0].os),
276		bufs, TAINT(p->O, ovs * nbuf)));
277		if (!cldcpy) goto nada;
278		}
279
280		/* deallocate buffers, let apply() allocate them for real */
281		X(ifree)(bufs);
282		bufs = 0;
283
284		/* plan the leftover transforms (cldrest): */
285		{
286		INT id = ivs * (nbuf * (vl / nbuf));
287		INT od = ovs * (nbuf * (vl / nbuf));
288		cldrest = X(mkplan_d)(plnr,
289		X(mkproblem_rdft_d)(
290		X(tensor_copy)(p->sz),
291		X(mktensor_1d)(vl % nbuf, ivs, ovs),
292		p->I + id, p->O + od, p->kind));
293		}
294		if (!cldrest) goto nada;
295
296		pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
297		pln->cld = cld;
298		pln->cldcpy = cldcpy;
299		pln->cldrest = cldrest;
300		pln->n = n;
301		pln->vl = vl;
302		pln->ivs_by_nbuf = ivs * nbuf;
303		pln->ovs_by_nbuf = ovs * nbuf;
304
305		pln->nbuf = nbuf;
306		pln->bufdist = bufdist;
307
308		{
309		opcnt t;
310		X(ops_add)(&cld->ops, &cldcpy->ops, &t);
311		X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
312		}
313
314		return &(pln->super.super);
315
316		nada:
317		X(ifree0)(bufs);
318		X(plan_destroy_internal)(cldrest);
319		X(plan_destroy_internal)(cldcpy);
320		X(plan_destroy_internal)(cld);
321		return (plan *) 0;
322		}
323
324		static solver *mksolver(size_t maxnbuf_ndx)
325		{
326		static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
327		S *slv = MKSOLVER(S, &sadt);
328		slv->maxnbuf_ndx = maxnbuf_ndx;
329		return &(slv->super);
330		}
331
332		void X(rdft_buffered_register)(planner *p)
333	1	{
334	1	size_t i;
335	3	for (i = 0; i < NELEM(maxnbufs); ++i)
336	2	REGISTER_SOLVER(p, mksolver(i));
337	1	}