Coverage Report

Created: 2025-06-22 06:45

/src/fftw3/dft/buffered.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "dft/dft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_dft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
61
{
43
61
     const P *ego = (const P *) ego_;
44
61
     INT nbuf = ego->nbuf;
45
61
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47
61
     plan_dft *cld = (plan_dft *) ego->cld;
48
61
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
61
     plan_dft *cldrest;
50
61
     INT i, vl = ego->vl;
51
61
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
61
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54
122
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
61
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
61
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59
          /* copy back */
60
61
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
61
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
61
     }
63
64
61
     X(ifree)(bufs);
65
66
     /* Do the remaining transforms, if any: */
67
61
     cldrest = (plan_dft *) ego->cldrest;
68
61
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
61
}
70
71
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
50
{
74
50
     P *ego = (P *) ego_;
75
76
50
     X(plan_awake)(ego->cld, wakefulness);
77
50
     X(plan_awake)(ego->cldcpy, wakefulness);
78
50
     X(plan_awake)(ego->cldrest, wakefulness);
79
50
}
80
81
static void destroy(plan *ego_)
82
196
{
83
196
     P *ego = (P *) ego_;
84
196
     X(plan_destroy_internal)(ego->cldrest);
85
196
     X(plan_destroy_internal)(ego->cldcpy);
86
196
     X(plan_destroy_internal)(ego->cld);
87
196
}
88
89
static void print(const plan *ego_, printer *p)
90
0
{
91
0
     const P *ego = (const P *) ego_;
92
0
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
0
              ego->n, ego->nbuf,
94
0
              ego->vl, ego->bufdist % ego->n,
95
0
              ego->cld, ego->cldcpy, ego->cldrest);
96
0
}
97
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
2.21k
{
100
2.21k
     const problem_dft *p = (const problem_dft *) p_;
101
2.21k
     const iodim *d = p->sz->dims;
102
103
2.21k
     if (1
104
2.21k
   && p->vecsz->rnk <= 1
105
2.21k
   && p->sz->rnk == 1
106
2.21k
    ) {
107
1.16k
    INT vl, ivs, ovs;
108
1.16k
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110
1.16k
    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
0
         return 0;
112
113
    /* if this solver is redundant, in the sense that a solver
114
       of lower index generates the same plan, then prune this
115
       solver */
116
1.16k
    if (X(nbuf_redundant)(d[0].n, vl, 
117
1.16k
        ego->maxnbuf_ndx,
118
1.16k
        maxnbufs, NELEM(maxnbufs)))
119
430
         return 0;
120
121
    /*
122
      In principle, the buffered transforms might be useful
123
      when working out of place.  However, in order to
124
      prevent infinite loops in the planner, we require
125
      that the output stride of the buffered transforms be
126
      greater than 2.
127
    */
128
735
    if (p->ri != p->ro)
129
435
         return (d[0].os > 2);
130
131
    /*
132
     * If the problem is in place, the input/output strides must
133
     * be the same or the whole thing must fit in the buffer.
134
     */
135
300
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
0
         return 1;
137
138
300
    if (/* fits into buffer: */
139
300
         ((p->vecsz->rnk == 0)
140
300
    ||
141
300
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
300
       maxnbufs[ego->maxnbuf_ndx]) 
143
300
     == p->vecsz->dims[0].n)))
144
196
         return 1;
145
300
     }
146
147
1.15k
     return 0;
148
2.21k
}
149
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
2.21k
{
152
2.21k
     if (NO_BUFFERINGP(plnr)) return 0;
153
2.21k
     if (!applicable0(ego, p_, plnr)) return 0;
154
155
196
     if (NO_UGLYP(plnr)) {
156
196
    const problem_dft *p = (const problem_dft *) p_;
157
196
    if (p->ri != p->ro) return 0;
158
196
    if (X(toobig)(p->sz->dims[0].n)) return 0;
159
196
     }
160
196
     return 1;
161
196
}
162
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
2.21k
{
165
2.21k
     P *pln;
166
2.21k
     const S *ego = (const S *)ego_;
167
2.21k
     plan *cld = (plan *) 0;
168
2.21k
     plan *cldcpy = (plan *) 0;
169
2.21k
     plan *cldrest = (plan *) 0;
170
2.21k
     const problem_dft *p = (const problem_dft *) p_;
171
2.21k
     R *bufs = (R *) 0;
172
2.21k
     INT nbuf = 0, bufdist, n, vl;
173
2.21k
     INT ivs, ovs, roffset, ioffset;
174
175
2.21k
     static const plan_adt padt = {
176
2.21k
    X(dft_solve), awake, print, destroy
177
2.21k
     };
178
179
2.21k
     if (!applicable(ego, p_, plnr))
180
2.01k
          goto nada;
181
182
196
     n = X(tensor_sz)(p->sz);
183
184
196
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186
196
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
196
     bufdist = X(bufdist)(n, vl);
188
196
     A(nbuf > 0);
189
190
     /* attempt to keep real and imaginary part in the same order,
191
  so as to allow optimizations in the the copy plan */
192
196
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
196
     ioffset = 1 - roffset;
194
195
     /* initial allocation for the purpose of planning */
196
196
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197
198
     /* allow destruction of input if problem is in place */
199
196
     cld = X(mkplan_f_d)(plnr,
200
196
       X(mkproblem_dft_d)(
201
196
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
196
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
196
            TAINT(p->ri, ivs * nbuf),
204
196
            TAINT(p->ii, ivs * nbuf),
205
196
            bufs + roffset, 
206
196
            bufs + ioffset),
207
196
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
196
     if (!cld)
209
0
          goto nada;
210
211
     /* copying back from the buffer is a rank-0 transform: */
212
196
     cldcpy = X(mkplan_d)(plnr,
213
196
        X(mkproblem_dft_d)(
214
196
             X(mktensor_0d)(),
215
196
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
196
                n, 2, p->sz->dims[0].os),
217
196
             bufs + roffset, 
218
196
             bufs + ioffset, 
219
196
             TAINT(p->ro, ovs * nbuf), 
220
196
             TAINT(p->io, ovs * nbuf)));
221
196
     if (!cldcpy)
222
0
          goto nada;
223
224
     /* deallocate buffers, let apply() allocate them for real */
225
196
     X(ifree)(bufs);
226
196
     bufs = 0;
227
228
     /* plan the leftover transforms (cldrest): */
229
196
     {
230
196
    INT id = ivs * (nbuf * (vl / nbuf));
231
196
    INT od = ovs * (nbuf * (vl / nbuf));
232
196
    cldrest = X(mkplan_d)(plnr, 
233
196
        X(mkproblem_dft_d)(
234
196
             X(tensor_copy)(p->sz),
235
196
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
196
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
196
     }
238
196
     if (!cldrest)
239
0
          goto nada;
240
241
196
     pln = MKPLAN_DFT(P, &padt, apply);
242
196
     pln->cld = cld;
243
196
     pln->cldcpy = cldcpy;
244
196
     pln->cldrest = cldrest;
245
196
     pln->n = n;
246
196
     pln->vl = vl;
247
196
     pln->ivs_by_nbuf = ivs * nbuf;
248
196
     pln->ovs_by_nbuf = ovs * nbuf;
249
196
     pln->roffset = roffset;
250
196
     pln->ioffset = ioffset;
251
252
196
     pln->nbuf = nbuf;
253
196
     pln->bufdist = bufdist;
254
255
196
     {
256
196
    opcnt t;
257
196
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
196
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
196
     }
260
261
196
     return &(pln->super.super);
262
263
2.01k
 nada:
264
2.01k
     X(ifree0)(bufs);
265
2.01k
     X(plan_destroy_internal)(cldrest);
266
2.01k
     X(plan_destroy_internal)(cldcpy);
267
2.01k
     X(plan_destroy_internal)(cld);
268
2.01k
     return (plan *) 0;
269
196
}
270
271
static solver *mksolver(size_t maxnbuf_ndx)
272
4
{
273
4
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
4
     S *slv = MKSOLVER(S, &sadt);
275
4
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
4
     return &(slv->super);
277
4
}
278
279
void X(dft_buffered_register)(planner *p)
280
1
{
281
1
     size_t i;
282
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
2
    REGISTER_SOLVER(p, mksolver(i));
284
1
}