Coverage Report

Created: 2026-02-26 07:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/buffered.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "dft/dft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_dft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
61
{
43
61
     const P *ego = (const P *) ego_;
44
61
     INT nbuf = ego->nbuf;
45
61
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47
61
     plan_dft *cld = (plan_dft *) ego->cld;
48
61
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
61
     plan_dft *cldrest;
50
61
     INT i, vl = ego->vl;
51
61
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
61
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54
122
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
61
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
61
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59
          /* copy back */
60
61
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
61
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
61
     }
63
64
61
     X(ifree)(bufs);
65
66
     /* Do the remaining transforms, if any: */
67
61
     cldrest = (plan_dft *) ego->cldrest;
68
61
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
61
}
70
71
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
50
{
74
50
     P *ego = (P *) ego_;
75
76
50
     X(plan_awake)(ego->cld, wakefulness);
77
50
     X(plan_awake)(ego->cldcpy, wakefulness);
78
50
     X(plan_awake)(ego->cldrest, wakefulness);
79
50
}
80
81
static void destroy(plan *ego_)
82
193
{
83
193
     P *ego = (P *) ego_;
84
193
     X(plan_destroy_internal)(ego->cldrest);
85
193
     X(plan_destroy_internal)(ego->cldcpy);
86
193
     X(plan_destroy_internal)(ego->cld);
87
193
}
88
89
static void print(const plan *ego_, printer *p)
90
0
{
91
0
     const P *ego = (const P *) ego_;
92
0
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
0
              ego->n, ego->nbuf,
94
0
              ego->vl, ego->bufdist % ego->n,
95
0
              ego->cld, ego->cldcpy, ego->cldrest);
96
0
}
97
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
2.17k
{
100
2.17k
     const problem_dft *p = (const problem_dft *) p_;
101
2.17k
     const iodim *d = p->sz->dims;
102
103
2.17k
     if (1
104
2.17k
   && p->vecsz->rnk <= 1
105
1.18k
   && p->sz->rnk == 1
106
2.17k
    ) {
107
1.15k
    INT vl, ivs, ovs;
108
1.15k
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110
1.15k
    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
0
         return 0;
112
113
    /* if this solver is redundant, in the sense that a solver
114
       of lower index generates the same plan, then prune this
115
       solver */
116
1.15k
    if (X(nbuf_redundant)(d[0].n, vl, 
117
1.15k
        ego->maxnbuf_ndx,
118
1.15k
        maxnbufs, NELEM(maxnbufs)))
119
426
         return 0;
120
121
    /*
122
      In principle, the buffered transforms might be useful
123
      when working out of place.  However, in order to
124
      prevent infinite loops in the planner, we require
125
      that the output stride of the buffered transforms be
126
      greater than 2.
127
    */
128
729
    if (p->ri != p->ro)
129
434
         return (d[0].os > 2);
130
131
    /*
132
     * If the problem is in place, the input/output strides must
133
     * be the same or the whole thing must fit in the buffer.
134
     */
135
295
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
0
         return 1;
137
138
295
    if (/* fits into buffer: */
139
295
         ((p->vecsz->rnk == 0)
140
295
    ||
141
295
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
295
       maxnbufs[ego->maxnbuf_ndx]) 
143
295
     == p->vecsz->dims[0].n)))
144
193
         return 1;
145
295
     }
146
147
1.12k
     return 0;
148
2.17k
}
149
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
2.17k
{
152
2.17k
     if (NO_BUFFERINGP(plnr)) return 0;
153
2.17k
     if (!applicable0(ego, p_, plnr)) return 0;
154
155
193
     if (NO_UGLYP(plnr)) {
156
193
    const problem_dft *p = (const problem_dft *) p_;
157
193
    if (p->ri != p->ro) return 0;
158
193
    if (X(toobig)(p->sz->dims[0].n)) return 0;
159
193
     }
160
193
     return 1;
161
193
}
162
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
2.17k
{
165
2.17k
     P *pln;
166
2.17k
     const S *ego = (const S *)ego_;
167
2.17k
     plan *cld = (plan *) 0;
168
2.17k
     plan *cldcpy = (plan *) 0;
169
2.17k
     plan *cldrest = (plan *) 0;
170
2.17k
     const problem_dft *p = (const problem_dft *) p_;
171
2.17k
     R *bufs = (R *) 0;
172
2.17k
     INT nbuf = 0, bufdist, n, vl;
173
2.17k
     INT ivs, ovs, roffset, ioffset;
174
175
2.17k
     static const plan_adt padt = {
176
2.17k
    X(dft_solve), awake, print, destroy
177
2.17k
     };
178
179
2.17k
     if (!applicable(ego, p_, plnr))
180
1.98k
          goto nada;
181
182
193
     n = X(tensor_sz)(p->sz);
183
184
193
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186
193
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
193
     bufdist = X(bufdist)(n, vl);
188
193
     A(nbuf > 0);
189
190
     /* attempt to keep real and imaginary part in the same order,
191
  so as to allow optimizations in the the copy plan */
192
193
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
193
     ioffset = 1 - roffset;
194
195
     /* initial allocation for the purpose of planning */
196
193
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197
198
     /* allow destruction of input if problem is in place */
199
193
     cld = X(mkplan_f_d)(plnr,
200
193
       X(mkproblem_dft_d)(
201
193
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
193
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
193
            TAINT(p->ri, ivs * nbuf),
204
193
            TAINT(p->ii, ivs * nbuf),
205
193
            bufs + roffset, 
206
193
            bufs + ioffset),
207
193
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
193
     if (!cld)
209
0
          goto nada;
210
211
     /* copying back from the buffer is a rank-0 transform: */
212
193
     cldcpy = X(mkplan_d)(plnr,
213
193
        X(mkproblem_dft_d)(
214
193
             X(mktensor_0d)(),
215
193
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
193
                n, 2, p->sz->dims[0].os),
217
193
             bufs + roffset, 
218
193
             bufs + ioffset, 
219
193
             TAINT(p->ro, ovs * nbuf), 
220
193
             TAINT(p->io, ovs * nbuf)));
221
193
     if (!cldcpy)
222
0
          goto nada;
223
224
     /* deallocate buffers, let apply() allocate them for real */
225
193
     X(ifree)(bufs);
226
193
     bufs = 0;
227
228
     /* plan the leftover transforms (cldrest): */
229
193
     {
230
193
    INT id = ivs * (nbuf * (vl / nbuf));
231
193
    INT od = ovs * (nbuf * (vl / nbuf));
232
193
    cldrest = X(mkplan_d)(plnr, 
233
193
        X(mkproblem_dft_d)(
234
193
             X(tensor_copy)(p->sz),
235
193
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
193
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
193
     }
238
193
     if (!cldrest)
239
0
          goto nada;
240
241
193
     pln = MKPLAN_DFT(P, &padt, apply);
242
193
     pln->cld = cld;
243
193
     pln->cldcpy = cldcpy;
244
193
     pln->cldrest = cldrest;
245
193
     pln->n = n;
246
193
     pln->vl = vl;
247
193
     pln->ivs_by_nbuf = ivs * nbuf;
248
193
     pln->ovs_by_nbuf = ovs * nbuf;
249
193
     pln->roffset = roffset;
250
193
     pln->ioffset = ioffset;
251
252
193
     pln->nbuf = nbuf;
253
193
     pln->bufdist = bufdist;
254
255
193
     {
256
193
    opcnt t;
257
193
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
193
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
193
     }
260
261
193
     return &(pln->super.super);
262
263
1.98k
 nada:
264
1.98k
     X(ifree0)(bufs);
265
1.98k
     X(plan_destroy_internal)(cldrest);
266
1.98k
     X(plan_destroy_internal)(cldcpy);
267
1.98k
     X(plan_destroy_internal)(cld);
268
1.98k
     return (plan *) 0;
269
193
}
270
271
static solver *mksolver(size_t maxnbuf_ndx)
272
4
{
273
4
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
4
     S *slv = MKSOLVER(S, &sadt);
275
4
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
4
     return &(slv->super);
277
4
}
278
279
void X(dft_buffered_register)(planner *p)
280
1
{
281
1
     size_t i;
282
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
2
    REGISTER_SOLVER(p, mksolver(i));
284
1
}