Coverage Report

Created: 2026-05-30 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/buffered.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "dft/dft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_dft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
65
{
43
65
     const P *ego = (const P *) ego_;
44
65
     INT nbuf = ego->nbuf;
45
65
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47
65
     plan_dft *cld = (plan_dft *) ego->cld;
48
65
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
65
     plan_dft *cldrest;
50
65
     INT i, vl = ego->vl;
51
65
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
65
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54
130
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
65
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
65
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59
          /* copy back */
60
65
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
65
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
65
     }
63
64
65
     X(ifree)(bufs);
65
66
     /* Do the remaining transforms, if any: */
67
65
     cldrest = (plan_dft *) ego->cldrest;
68
65
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
65
}
70
71
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
50
{
74
50
     P *ego = (P *) ego_;
75
76
50
     X(plan_awake)(ego->cld, wakefulness);
77
50
     X(plan_awake)(ego->cldcpy, wakefulness);
78
50
     X(plan_awake)(ego->cldrest, wakefulness);
79
50
}
80
81
static void destroy(plan *ego_)
82
200
{
83
200
     P *ego = (P *) ego_;
84
200
     X(plan_destroy_internal)(ego->cldrest);
85
200
     X(plan_destroy_internal)(ego->cldcpy);
86
200
     X(plan_destroy_internal)(ego->cld);
87
200
}
88
89
static void print(const plan *ego_, printer *p)
90
0
{
91
0
     const P *ego = (const P *) ego_;
92
0
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
0
              ego->n, ego->nbuf,
94
0
              ego->vl, ego->bufdist % ego->n,
95
0
              ego->cld, ego->cldcpy, ego->cldrest);
96
0
}
97
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
2.26k
{
100
2.26k
     const problem_dft *p = (const problem_dft *) p_;
101
2.26k
     const iodim *d = p->sz->dims;
102
103
2.26k
     if (1
104
2.26k
   && p->vecsz->rnk <= 1
105
1.23k
   && p->sz->rnk == 1
106
2.26k
    ) {
107
1.19k
    INT vl, ivs, ovs;
108
1.19k
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110
1.19k
    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
0
         return 0;
112
113
    /* if this solver is redundant, in the sense that a solver
114
       of lower index generates the same plan, then prune this
115
       solver */
116
1.19k
    if (X(nbuf_redundant)(d[0].n, vl, 
117
1.19k
        ego->maxnbuf_ndx,
118
1.19k
        maxnbufs, NELEM(maxnbufs)))
119
439
         return 0;
120
121
    /*
122
      In principle, the buffered transforms might be useful
123
      when working out of place.  However, in order to
124
      prevent infinite loops in the planner, we require
125
      that the output stride of the buffered transforms be
126
      greater than 2.
127
    */
128
756
    if (p->ri != p->ro)
129
448
         return (d[0].os > 2);
130
131
    /*
132
     * If the problem is in place, the input/output strides must
133
     * be the same or the whole thing must fit in the buffer.
134
     */
135
308
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
0
         return 1;
137
138
308
    if (/* fits into buffer: */
139
308
         ((p->vecsz->rnk == 0)
140
308
    ||
141
308
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
308
       maxnbufs[ego->maxnbuf_ndx]) 
143
308
     == p->vecsz->dims[0].n)))
144
200
         return 1;
145
308
     }
146
147
1.18k
     return 0;
148
2.26k
}
149
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
2.26k
{
152
2.26k
     if (NO_BUFFERINGP(plnr)) return 0;
153
2.26k
     if (!applicable0(ego, p_, plnr)) return 0;
154
155
200
     if (NO_UGLYP(plnr)) {
156
200
    const problem_dft *p = (const problem_dft *) p_;
157
200
    if (p->ri != p->ro) return 0;
158
200
    if (X(toobig)(p->sz->dims[0].n)) return 0;
159
200
     }
160
200
     return 1;
161
200
}
162
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
2.26k
{
165
2.26k
     P *pln;
166
2.26k
     const S *ego = (const S *)ego_;
167
2.26k
     plan *cld = (plan *) 0;
168
2.26k
     plan *cldcpy = (plan *) 0;
169
2.26k
     plan *cldrest = (plan *) 0;
170
2.26k
     const problem_dft *p = (const problem_dft *) p_;
171
2.26k
     R *bufs = (R *) 0;
172
2.26k
     INT nbuf = 0, bufdist, n, vl;
173
2.26k
     INT ivs, ovs, roffset, ioffset;
174
175
2.26k
     static const plan_adt padt = {
176
2.26k
    X(dft_solve), awake, print, destroy
177
2.26k
     };
178
179
2.26k
     if (!applicable(ego, p_, plnr))
180
2.06k
          goto nada;
181
182
200
     n = X(tensor_sz)(p->sz);
183
184
200
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186
200
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
200
     bufdist = X(bufdist)(n, vl);
188
200
     A(nbuf > 0);
189
190
     /* attempt to keep real and imaginary part in the same order,
191
  so as to allow optimizations in the the copy plan */
192
200
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
200
     ioffset = 1 - roffset;
194
195
     /* initial allocation for the purpose of planning */
196
200
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197
198
     /* allow destruction of input if problem is in place */
199
200
     cld = X(mkplan_f_d)(plnr,
200
200
       X(mkproblem_dft_d)(
201
200
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
200
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
200
            TAINT(p->ri, ivs * nbuf),
204
200
            TAINT(p->ii, ivs * nbuf),
205
200
            bufs + roffset, 
206
200
            bufs + ioffset),
207
200
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
200
     if (!cld)
209
0
          goto nada;
210
211
     /* copying back from the buffer is a rank-0 transform: */
212
200
     cldcpy = X(mkplan_d)(plnr,
213
200
        X(mkproblem_dft_d)(
214
200
             X(mktensor_0d)(),
215
200
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
200
                n, 2, p->sz->dims[0].os),
217
200
             bufs + roffset, 
218
200
             bufs + ioffset, 
219
200
             TAINT(p->ro, ovs * nbuf), 
220
200
             TAINT(p->io, ovs * nbuf)));
221
200
     if (!cldcpy)
222
0
          goto nada;
223
224
     /* deallocate buffers, let apply() allocate them for real */
225
200
     X(ifree)(bufs);
226
200
     bufs = 0;
227
228
     /* plan the leftover transforms (cldrest): */
229
200
     {
230
200
    INT id = ivs * (nbuf * (vl / nbuf));
231
200
    INT od = ovs * (nbuf * (vl / nbuf));
232
200
    cldrest = X(mkplan_d)(plnr, 
233
200
        X(mkproblem_dft_d)(
234
200
             X(tensor_copy)(p->sz),
235
200
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
200
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
200
     }
238
200
     if (!cldrest)
239
0
          goto nada;
240
241
200
     pln = MKPLAN_DFT(P, &padt, apply);
242
200
     pln->cld = cld;
243
200
     pln->cldcpy = cldcpy;
244
200
     pln->cldrest = cldrest;
245
200
     pln->n = n;
246
200
     pln->vl = vl;
247
200
     pln->ivs_by_nbuf = ivs * nbuf;
248
200
     pln->ovs_by_nbuf = ovs * nbuf;
249
200
     pln->roffset = roffset;
250
200
     pln->ioffset = ioffset;
251
252
200
     pln->nbuf = nbuf;
253
200
     pln->bufdist = bufdist;
254
255
200
     {
256
200
    opcnt t;
257
200
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
200
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
200
     }
260
261
200
     return &(pln->super.super);
262
263
2.06k
 nada:
264
2.06k
     X(ifree0)(bufs);
265
2.06k
     X(plan_destroy_internal)(cldrest);
266
2.06k
     X(plan_destroy_internal)(cldcpy);
267
2.06k
     X(plan_destroy_internal)(cld);
268
2.06k
     return (plan *) 0;
269
200
}
270
271
static solver *mksolver(size_t maxnbuf_ndx)
272
4
{
273
4
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
4
     S *slv = MKSOLVER(S, &sadt);
275
4
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
4
     return &(slv->super);
277
4
}
278
279
void X(dft_buffered_register)(planner *p)
280
1
{
281
1
     size_t i;
282
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
2
    REGISTER_SOLVER(p, mksolver(i));
284
1
}