Coverage Report

Created: 2023-09-25 07:08

/src/fftw3/rdft/buffered.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "rdft/rdft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_rdft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
} P;
38
39
/* transform a vector input with the help of bufs */
40
static void apply(const plan *ego_, R *I, R *O)
41
{
42
     const P *ego = (const P *) ego_;
43
     plan_rdft *cld = (plan_rdft *) ego->cld;
44
     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
45
     plan_rdft *cldrest;
46
     INT i, vl = ego->vl, nbuf = ego->nbuf;
47
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
48
     R *bufs;
49
50
     bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
51
52
     for (i = nbuf; i <= vl; i += nbuf) {
53
          /* transform to bufs: */
54
          cld->apply((plan *) cld, I, bufs);
55
    I += ivs_by_nbuf;
56
57
          /* copy back */
58
          cldcpy->apply((plan *) cldcpy, bufs, O);
59
    O += ovs_by_nbuf;
60
     }
61
62
     X(ifree)(bufs);
63
64
     /* Do the remaining transforms, if any: */
65
     cldrest = (plan_rdft *) ego->cldrest;
66
     cldrest->apply((plan *) cldrest, I, O);
67
}
68
69
/* for hc2r problems, copy the input into buffer, and then
70
   transform buffer->output, which allows for destruction of the
71
   buffer */
72
static void apply_hc2r(const plan *ego_, R *I, R *O)
73
0
{
74
0
     const P *ego = (const P *) ego_;
75
0
     plan_rdft *cld = (plan_rdft *) ego->cld;
76
0
     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
77
0
     plan_rdft *cldrest;
78
0
     INT i, vl = ego->vl, nbuf = ego->nbuf;
79
0
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
80
0
     R *bufs;
81
82
0
     bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
83
84
0
     for (i = nbuf; i <= vl; i += nbuf) {
85
          /* copy input into bufs: */
86
0
          cldcpy->apply((plan *) cldcpy, I, bufs);
87
0
    I += ivs_by_nbuf;
88
89
          /* transform to output */
90
0
          cld->apply((plan *) cld, bufs, O);
91
0
    O += ovs_by_nbuf;
92
0
     }
93
94
0
     X(ifree)(bufs);
95
96
     /* Do the remaining transforms, if any: */
97
0
     cldrest = (plan_rdft *) ego->cldrest;
98
0
     cldrest->apply((plan *) cldrest, I, O);
99
0
}
100
101
102
static void awake(plan *ego_, enum wakefulness wakefulness)
103
{
104
     P *ego = (P *) ego_;
105
106
     X(plan_awake)(ego->cld, wakefulness);
107
     X(plan_awake)(ego->cldcpy, wakefulness);
108
     X(plan_awake)(ego->cldrest, wakefulness);
109
}
110
111
static void destroy(plan *ego_)
112
{
113
     P *ego = (P *) ego_;
114
     X(plan_destroy_internal)(ego->cldrest);
115
     X(plan_destroy_internal)(ego->cldcpy);
116
     X(plan_destroy_internal)(ego->cld);
117
}
118
119
static void print(const plan *ego_, printer *p)
120
{
121
     const P *ego = (const P *) ego_;
122
     p->print(p, "(rdft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
123
              ego->n, ego->nbuf,
124
              ego->vl, ego->bufdist % ego->n,
125
              ego->cld, ego->cldcpy, ego->cldrest);
126
}
127
128
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
129
{
130
     const problem_rdft *p = (const problem_rdft *) p_;
131
     iodim *d = p->sz->dims;
132
133
     if (1
134
   && p->vecsz->rnk <= 1
135
   && p->sz->rnk == 1
136
    ) {
137
    INT vl, ivs, ovs;
138
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
139
140
    if (X(toobig)(d[0].n) && CONSERVE_MEMORYP(plnr))
141
         return 0;
142
143
    /* if this solver is redundant, in the sense that a solver
144
       of lower index generates the same plan, then prune this
145
       solver */
146
    if (X(nbuf_redundant)(d[0].n, vl,
147
        ego->maxnbuf_ndx,
148
        maxnbufs, NELEM(maxnbufs)))
149
         return 0;
150
151
    if (p->I != p->O) {
152
         if (p->kind[0] == HC2R) {
153
        /* Allow HC2R problems only if the input is to be
154
           preserved.  This solver sets NO_DESTROY_INPUT,
155
           which prevents infinite loops */
156
        return (NO_DESTROY_INPUTP(plnr));
157
         } else {
158
        /*
159
          In principle, the buffered transforms might be useful
160
          when working out of place.  However, in order to
161
          prevent infinite loops in the planner, we require
162
          that the output stride of the buffered transforms be
163
          greater than 1.
164
        */
165
        return (d[0].os > 1);
166
         }
167
    }
168
169
    /*
170
     * If the problem is in place, the input/output strides must
171
     * be the same or the whole thing must fit in the buffer.
172
     */
173
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
174
         return 1;
175
176
    if (/* fits into buffer: */
177
         ((p->vecsz->rnk == 0)
178
    ||
179
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
180
       maxnbufs[ego->maxnbuf_ndx]) 
181
     == p->vecsz->dims[0].n)))
182
         return 1;
183
     }
184
185
     return 0;
186
}
187
188
static int applicable(const S *ego, const problem *p_, const planner *plnr)
189
{
190
     const problem_rdft *p;
191
192
     if (NO_BUFFERINGP(plnr)) return 0;
193
194
     if (!applicable0(ego, p_, plnr)) return 0;
195
196
     p = (const problem_rdft *) p_;
197
     if (p->kind[0] == HC2R) {
198
    if (NO_UGLYP(plnr)) {
199
         /* UGLY if in-place and too big, since the problem
200
      could be solved via transpositions */
201
         if (p->I == p->O && X(toobig)(p->sz->dims[0].n)) 
202
        return 0;
203
    }
204
     } else {
205
    if (NO_UGLYP(plnr)) {
206
         if (p->I != p->O) return 0;
207
         if (X(toobig)(p->sz->dims[0].n)) return 0;
208
    }
209
     }
210
     return 1;
211
}
212
213
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
214
{
215
     P *pln;
216
     const S *ego = (const S *)ego_;
217
     plan *cld = (plan *) 0;
218
     plan *cldcpy = (plan *) 0;
219
     plan *cldrest = (plan *) 0;
220
     const problem_rdft *p = (const problem_rdft *) p_;
221
     R *bufs = (R *) 0;
222
     INT nbuf = 0, bufdist, n, vl;
223
     INT ivs, ovs;
224
     int hc2rp;
225
226
     static const plan_adt padt = {
227
    X(rdft_solve), awake, print, destroy
228
     };
229
230
     if (!applicable(ego, p_, plnr))
231
          goto nada;
232
233
     n = X(tensor_sz)(p->sz);
234
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
235
     hc2rp = (p->kind[0] == HC2R);
236
237
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
238
     bufdist = X(bufdist)(n, vl);
239
     A(nbuf > 0);
240
241
     /* initial allocation for the purpose of planning */
242
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
243
244
     if (hc2rp) {
245
    /* allow destruction of buffer */
246
    cld = X(mkplan_f_d)(plnr, 
247
            X(mkproblem_rdft_d)(
248
           X(mktensor_1d)(n, 1, p->sz->dims[0].os),
249
           X(mktensor_1d)(nbuf, bufdist, ovs),
250
           bufs, TAINT(p->O, ovs * nbuf), p->kind),
251
            0, 0, NO_DESTROY_INPUT);
252
    if (!cld) goto nada;
253
254
    /* copying input into buffer buffer is a rank-0 transform: */
255
    cldcpy = X(mkplan_d)(plnr, 
256
             X(mkproblem_rdft_0_d)(
257
            X(mktensor_2d)(nbuf, ivs, bufdist,
258
               n, p->sz->dims[0].is, 1),
259
            TAINT(p->I, ivs * nbuf), bufs));
260
    if (!cldcpy) goto nada;
261
     } else {
262
    /* allow destruction of input if problem is in place */
263
    cld = X(mkplan_f_d)(plnr, 
264
            X(mkproblem_rdft_d)(
265
           X(mktensor_1d)(n, p->sz->dims[0].is, 1),
266
           X(mktensor_1d)(nbuf, ivs, bufdist),
267
           TAINT(p->I, ivs * nbuf), bufs, p->kind),
268
            0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
269
    if (!cld) goto nada;
270
271
    /* copying back from the buffer is a rank-0 transform: */
272
    cldcpy = X(mkplan_d)(plnr, 
273
             X(mkproblem_rdft_0_d)(
274
            X(mktensor_2d)(nbuf, bufdist, ovs,
275
               n, 1, p->sz->dims[0].os),
276
            bufs, TAINT(p->O, ovs * nbuf)));
277
    if (!cldcpy) goto nada;
278
     }
279
280
     /* deallocate buffers, let apply() allocate them for real */
281
     X(ifree)(bufs);
282
     bufs = 0;
283
284
     /* plan the leftover transforms (cldrest): */
285
     {
286
    INT id = ivs * (nbuf * (vl / nbuf));
287
    INT od = ovs * (nbuf * (vl / nbuf));
288
    cldrest = X(mkplan_d)(plnr, 
289
        X(mkproblem_rdft_d)(
290
             X(tensor_copy)(p->sz),
291
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
292
             p->I + id, p->O + od, p->kind));
293
     }
294
     if (!cldrest) goto nada;
295
296
     pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
297
     pln->cld = cld;
298
     pln->cldcpy = cldcpy;
299
     pln->cldrest = cldrest;
300
     pln->n = n;
301
     pln->vl = vl;
302
     pln->ivs_by_nbuf = ivs * nbuf;
303
     pln->ovs_by_nbuf = ovs * nbuf;
304
305
     pln->nbuf = nbuf;
306
     pln->bufdist = bufdist;
307
308
     {
309
    opcnt t;
310
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
311
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
312
     }
313
314
     return &(pln->super.super);
315
316
 nada:
317
     X(ifree0)(bufs);
318
     X(plan_destroy_internal)(cldrest);
319
     X(plan_destroy_internal)(cldcpy);
320
     X(plan_destroy_internal)(cld);
321
     return (plan *) 0;
322
}
323
324
static solver *mksolver(size_t maxnbuf_ndx)
325
{
326
     static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
327
     S *slv = MKSOLVER(S, &sadt);
328
     slv->maxnbuf_ndx = maxnbuf_ndx;
329
     return &(slv->super);
330
}
331
332
void X(rdft_buffered_register)(planner *p)
333
1
{
334
1
     size_t i;
335
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
336
2
    REGISTER_SOLVER(p, mksolver(i));
337
1
}