Coverage Report

Created: 2025-07-23 07:03

/src/fftw3/dft/buffered.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "dft/dft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_dft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
68
{
43
68
     const P *ego = (const P *) ego_;
44
68
     INT nbuf = ego->nbuf;
45
68
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47
68
     plan_dft *cld = (plan_dft *) ego->cld;
48
68
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
68
     plan_dft *cldrest;
50
68
     INT i, vl = ego->vl;
51
68
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
68
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54
136
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
68
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
68
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59
          /* copy back */
60
68
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
68
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
68
     }
63
64
68
     X(ifree)(bufs);
65
66
     /* Do the remaining transforms, if any: */
67
68
     cldrest = (plan_dft *) ego->cldrest;
68
68
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
68
}
70
71
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
52
{
74
52
     P *ego = (P *) ego_;
75
76
52
     X(plan_awake)(ego->cld, wakefulness);
77
52
     X(plan_awake)(ego->cldcpy, wakefulness);
78
52
     X(plan_awake)(ego->cldrest, wakefulness);
79
52
}
80
81
static void destroy(plan *ego_)
82
195
{
83
195
     P *ego = (P *) ego_;
84
195
     X(plan_destroy_internal)(ego->cldrest);
85
195
     X(plan_destroy_internal)(ego->cldcpy);
86
195
     X(plan_destroy_internal)(ego->cld);
87
195
}
88
89
static void print(const plan *ego_, printer *p)
90
0
{
91
0
     const P *ego = (const P *) ego_;
92
0
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
0
              ego->n, ego->nbuf,
94
0
              ego->vl, ego->bufdist % ego->n,
95
0
              ego->cld, ego->cldcpy, ego->cldrest);
96
0
}
97
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
2.19k
{
100
2.19k
     const problem_dft *p = (const problem_dft *) p_;
101
2.19k
     const iodim *d = p->sz->dims;
102
103
2.19k
     if (1
104
2.19k
   && p->vecsz->rnk <= 1
105
2.19k
   && p->sz->rnk == 1
106
2.19k
    ) {
107
1.16k
    INT vl, ivs, ovs;
108
1.16k
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110
1.16k
    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
0
         return 0;
112
113
    /* if this solver is redundant, in the sense that a solver
114
       of lower index generates the same plan, then prune this
115
       solver */
116
1.16k
    if (X(nbuf_redundant)(d[0].n, vl, 
117
1.16k
        ego->maxnbuf_ndx,
118
1.16k
        maxnbufs, NELEM(maxnbufs)))
119
426
         return 0;
120
121
    /*
122
      In principle, the buffered transforms might be useful
123
      when working out of place.  However, in order to
124
      prevent infinite loops in the planner, we require
125
      that the output stride of the buffered transforms be
126
      greater than 2.
127
    */
128
735
    if (p->ri != p->ro)
129
435
         return (d[0].os > 2);
130
131
    /*
132
     * If the problem is in place, the input/output strides must
133
     * be the same or the whole thing must fit in the buffer.
134
     */
135
300
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
0
         return 1;
137
138
300
    if (/* fits into buffer: */
139
300
         ((p->vecsz->rnk == 0)
140
300
    ||
141
300
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
300
       maxnbufs[ego->maxnbuf_ndx]) 
143
300
     == p->vecsz->dims[0].n)))
144
195
         return 1;
145
300
     }
146
147
1.13k
     return 0;
148
2.19k
}
149
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
2.19k
{
152
2.19k
     if (NO_BUFFERINGP(plnr)) return 0;
153
2.19k
     if (!applicable0(ego, p_, plnr)) return 0;
154
155
195
     if (NO_UGLYP(plnr)) {
156
195
    const problem_dft *p = (const problem_dft *) p_;
157
195
    if (p->ri != p->ro) return 0;
158
195
    if (X(toobig)(p->sz->dims[0].n)) return 0;
159
195
     }
160
195
     return 1;
161
195
}
162
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
2.19k
{
165
2.19k
     P *pln;
166
2.19k
     const S *ego = (const S *)ego_;
167
2.19k
     plan *cld = (plan *) 0;
168
2.19k
     plan *cldcpy = (plan *) 0;
169
2.19k
     plan *cldrest = (plan *) 0;
170
2.19k
     const problem_dft *p = (const problem_dft *) p_;
171
2.19k
     R *bufs = (R *) 0;
172
2.19k
     INT nbuf = 0, bufdist, n, vl;
173
2.19k
     INT ivs, ovs, roffset, ioffset;
174
175
2.19k
     static const plan_adt padt = {
176
2.19k
    X(dft_solve), awake, print, destroy
177
2.19k
     };
178
179
2.19k
     if (!applicable(ego, p_, plnr))
180
2.00k
          goto nada;
181
182
195
     n = X(tensor_sz)(p->sz);
183
184
195
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186
195
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
195
     bufdist = X(bufdist)(n, vl);
188
195
     A(nbuf > 0);
189
190
     /* attempt to keep real and imaginary part in the same order,
191
  so as to allow optimizations in the the copy plan */
192
195
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
195
     ioffset = 1 - roffset;
194
195
     /* initial allocation for the purpose of planning */
196
195
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197
198
     /* allow destruction of input if problem is in place */
199
195
     cld = X(mkplan_f_d)(plnr,
200
195
       X(mkproblem_dft_d)(
201
195
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
195
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
195
            TAINT(p->ri, ivs * nbuf),
204
195
            TAINT(p->ii, ivs * nbuf),
205
195
            bufs + roffset, 
206
195
            bufs + ioffset),
207
195
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
195
     if (!cld)
209
0
          goto nada;
210
211
     /* copying back from the buffer is a rank-0 transform: */
212
195
     cldcpy = X(mkplan_d)(plnr,
213
195
        X(mkproblem_dft_d)(
214
195
             X(mktensor_0d)(),
215
195
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
195
                n, 2, p->sz->dims[0].os),
217
195
             bufs + roffset, 
218
195
             bufs + ioffset, 
219
195
             TAINT(p->ro, ovs * nbuf), 
220
195
             TAINT(p->io, ovs * nbuf)));
221
195
     if (!cldcpy)
222
0
          goto nada;
223
224
     /* deallocate buffers, let apply() allocate them for real */
225
195
     X(ifree)(bufs);
226
195
     bufs = 0;
227
228
     /* plan the leftover transforms (cldrest): */
229
195
     {
230
195
    INT id = ivs * (nbuf * (vl / nbuf));
231
195
    INT od = ovs * (nbuf * (vl / nbuf));
232
195
    cldrest = X(mkplan_d)(plnr, 
233
195
        X(mkproblem_dft_d)(
234
195
             X(tensor_copy)(p->sz),
235
195
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
195
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
195
     }
238
195
     if (!cldrest)
239
0
          goto nada;
240
241
195
     pln = MKPLAN_DFT(P, &padt, apply);
242
195
     pln->cld = cld;
243
195
     pln->cldcpy = cldcpy;
244
195
     pln->cldrest = cldrest;
245
195
     pln->n = n;
246
195
     pln->vl = vl;
247
195
     pln->ivs_by_nbuf = ivs * nbuf;
248
195
     pln->ovs_by_nbuf = ovs * nbuf;
249
195
     pln->roffset = roffset;
250
195
     pln->ioffset = ioffset;
251
252
195
     pln->nbuf = nbuf;
253
195
     pln->bufdist = bufdist;
254
255
195
     {
256
195
    opcnt t;
257
195
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
195
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
195
     }
260
261
195
     return &(pln->super.super);
262
263
2.00k
 nada:
264
2.00k
     X(ifree0)(bufs);
265
2.00k
     X(plan_destroy_internal)(cldrest);
266
2.00k
     X(plan_destroy_internal)(cldcpy);
267
2.00k
     X(plan_destroy_internal)(cld);
268
2.00k
     return (plan *) 0;
269
195
}
270
271
static solver *mksolver(size_t maxnbuf_ndx)
272
4
{
273
4
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
4
     S *slv = MKSOLVER(S, &sadt);
275
4
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
4
     return &(slv->super);
277
4
}
278
279
void X(dft_buffered_register)(planner *p)
280
1
{
281
1
     size_t i;
282
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
2
    REGISTER_SOLVER(p, mksolver(i));
284
1
}