Coverage Report

Created: 2025-11-24 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fftw3/dft/buffered.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20
21
22
#include "dft/dft.h"
23
24
typedef struct {
25
     solver super;
26
     size_t maxnbuf_ndx;
27
} S;
28
29
static const INT maxnbufs[] = { 8, 256 };
30
31
typedef struct {
32
     plan_dft super;
33
34
     plan *cld, *cldcpy, *cldrest;
35
     INT n, vl, nbuf, bufdist;
36
     INT ivs_by_nbuf, ovs_by_nbuf;
37
     INT roffset, ioffset;
38
} P;
39
40
/* transform a vector input with the help of bufs */
41
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
42
70
{
43
70
     const P *ego = (const P *) ego_;
44
70
     INT nbuf = ego->nbuf;
45
70
     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
46
47
70
     plan_dft *cld = (plan_dft *) ego->cld;
48
70
     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
49
70
     plan_dft *cldrest;
50
70
     INT i, vl = ego->vl;
51
70
     INT ivs_by_nbuf = ego->ivs_by_nbuf, ovs_by_nbuf = ego->ovs_by_nbuf;
52
70
     INT roffset = ego->roffset, ioffset = ego->ioffset;
53
54
140
     for (i = nbuf; i <= vl; i += nbuf) {
55
          /* transform to bufs: */
56
70
          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
57
70
    ri += ivs_by_nbuf; ii += ivs_by_nbuf;
58
59
          /* copy back */
60
70
          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
61
70
    ro += ovs_by_nbuf; io += ovs_by_nbuf;
62
70
     }
63
64
70
     X(ifree)(bufs);
65
66
     /* Do the remaining transforms, if any: */
67
70
     cldrest = (plan_dft *) ego->cldrest;
68
70
     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
69
70
}
70
71
72
static void awake(plan *ego_, enum wakefulness wakefulness)
73
56
{
74
56
     P *ego = (P *) ego_;
75
76
56
     X(plan_awake)(ego->cld, wakefulness);
77
56
     X(plan_awake)(ego->cldcpy, wakefulness);
78
56
     X(plan_awake)(ego->cldrest, wakefulness);
79
56
}
80
81
static void destroy(plan *ego_)
82
202
{
83
202
     P *ego = (P *) ego_;
84
202
     X(plan_destroy_internal)(ego->cldrest);
85
202
     X(plan_destroy_internal)(ego->cldcpy);
86
202
     X(plan_destroy_internal)(ego->cld);
87
202
}
88
89
static void print(const plan *ego_, printer *p)
90
0
{
91
0
     const P *ego = (const P *) ego_;
92
0
     p->print(p, "(dft-buffered-%D%v/%D-%D%(%p%)%(%p%)%(%p%))",
93
0
              ego->n, ego->nbuf,
94
0
              ego->vl, ego->bufdist % ego->n,
95
0
              ego->cld, ego->cldcpy, ego->cldrest);
96
0
}
97
98
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
99
2.23k
{
100
2.23k
     const problem_dft *p = (const problem_dft *) p_;
101
2.23k
     const iodim *d = p->sz->dims;
102
103
2.23k
     if (1
104
2.23k
   && p->vecsz->rnk <= 1
105
1.21k
   && p->sz->rnk == 1
106
2.23k
    ) {
107
1.17k
    INT vl, ivs, ovs;
108
1.17k
    X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
109
110
1.17k
    if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
111
0
         return 0;
112
113
    /* if this solver is redundant, in the sense that a solver
114
       of lower index generates the same plan, then prune this
115
       solver */
116
1.17k
    if (X(nbuf_redundant)(d[0].n, vl, 
117
1.17k
        ego->maxnbuf_ndx,
118
1.17k
        maxnbufs, NELEM(maxnbufs)))
119
432
         return 0;
120
121
    /*
122
      In principle, the buffered transforms might be useful
123
      when working out of place.  However, in order to
124
      prevent infinite loops in the planner, we require
125
      that the output stride of the buffered transforms be
126
      greater than 2.
127
    */
128
744
    if (p->ri != p->ro)
129
438
         return (d[0].os > 2);
130
131
    /*
132
     * If the problem is in place, the input/output strides must
133
     * be the same or the whole thing must fit in the buffer.
134
     */
135
306
    if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
136
0
         return 1;
137
138
306
    if (/* fits into buffer: */
139
306
         ((p->vecsz->rnk == 0)
140
306
    ||
141
306
    (X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
142
306
       maxnbufs[ego->maxnbuf_ndx]) 
143
306
     == p->vecsz->dims[0].n)))
144
202
         return 1;
145
306
     }
146
147
1.15k
     return 0;
148
2.23k
}
149
150
static int applicable(const S *ego, const problem *p_, const planner *plnr)
151
2.23k
{
152
2.23k
     if (NO_BUFFERINGP(plnr)) return 0;
153
2.23k
     if (!applicable0(ego, p_, plnr)) return 0;
154
155
202
     if (NO_UGLYP(plnr)) {
156
202
    const problem_dft *p = (const problem_dft *) p_;
157
202
    if (p->ri != p->ro) return 0;
158
202
    if (X(toobig)(p->sz->dims[0].n)) return 0;
159
202
     }
160
202
     return 1;
161
202
}
162
163
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
164
2.23k
{
165
2.23k
     P *pln;
166
2.23k
     const S *ego = (const S *)ego_;
167
2.23k
     plan *cld = (plan *) 0;
168
2.23k
     plan *cldcpy = (plan *) 0;
169
2.23k
     plan *cldrest = (plan *) 0;
170
2.23k
     const problem_dft *p = (const problem_dft *) p_;
171
2.23k
     R *bufs = (R *) 0;
172
2.23k
     INT nbuf = 0, bufdist, n, vl;
173
2.23k
     INT ivs, ovs, roffset, ioffset;
174
175
2.23k
     static const plan_adt padt = {
176
2.23k
    X(dft_solve), awake, print, destroy
177
2.23k
     };
178
179
2.23k
     if (!applicable(ego, p_, plnr))
180
2.02k
          goto nada;
181
182
202
     n = X(tensor_sz)(p->sz);
183
184
202
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
185
186
202
     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
187
202
     bufdist = X(bufdist)(n, vl);
188
202
     A(nbuf > 0);
189
190
     /* attempt to keep real and imaginary part in the same order,
191
  so as to allow optimizations in the the copy plan */
192
202
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
193
202
     ioffset = 1 - roffset;
194
195
     /* initial allocation for the purpose of planning */
196
202
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
197
198
     /* allow destruction of input if problem is in place */
199
202
     cld = X(mkplan_f_d)(plnr,
200
202
       X(mkproblem_dft_d)(
201
202
            X(mktensor_1d)(n, p->sz->dims[0].is, 2),
202
202
            X(mktensor_1d)(nbuf, ivs, bufdist * 2),
203
202
            TAINT(p->ri, ivs * nbuf),
204
202
            TAINT(p->ii, ivs * nbuf),
205
202
            bufs + roffset, 
206
202
            bufs + ioffset),
207
202
       0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
208
202
     if (!cld)
209
0
          goto nada;
210
211
     /* copying back from the buffer is a rank-0 transform: */
212
202
     cldcpy = X(mkplan_d)(plnr,
213
202
        X(mkproblem_dft_d)(
214
202
             X(mktensor_0d)(),
215
202
             X(mktensor_2d)(nbuf, bufdist * 2, ovs,
216
202
                n, 2, p->sz->dims[0].os),
217
202
             bufs + roffset, 
218
202
             bufs + ioffset, 
219
202
             TAINT(p->ro, ovs * nbuf), 
220
202
             TAINT(p->io, ovs * nbuf)));
221
202
     if (!cldcpy)
222
0
          goto nada;
223
224
     /* deallocate buffers, let apply() allocate them for real */
225
202
     X(ifree)(bufs);
226
202
     bufs = 0;
227
228
     /* plan the leftover transforms (cldrest): */
229
202
     {
230
202
    INT id = ivs * (nbuf * (vl / nbuf));
231
202
    INT od = ovs * (nbuf * (vl / nbuf));
232
202
    cldrest = X(mkplan_d)(plnr, 
233
202
        X(mkproblem_dft_d)(
234
202
             X(tensor_copy)(p->sz),
235
202
             X(mktensor_1d)(vl % nbuf, ivs, ovs),
236
202
             p->ri+id, p->ii+id, p->ro+od, p->io+od));
237
202
     }
238
202
     if (!cldrest)
239
0
          goto nada;
240
241
202
     pln = MKPLAN_DFT(P, &padt, apply);
242
202
     pln->cld = cld;
243
202
     pln->cldcpy = cldcpy;
244
202
     pln->cldrest = cldrest;
245
202
     pln->n = n;
246
202
     pln->vl = vl;
247
202
     pln->ivs_by_nbuf = ivs * nbuf;
248
202
     pln->ovs_by_nbuf = ovs * nbuf;
249
202
     pln->roffset = roffset;
250
202
     pln->ioffset = ioffset;
251
252
202
     pln->nbuf = nbuf;
253
202
     pln->bufdist = bufdist;
254
255
202
     {
256
202
    opcnt t;
257
202
    X(ops_add)(&cld->ops, &cldcpy->ops, &t);
258
202
    X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
259
202
     }
260
261
202
     return &(pln->super.super);
262
263
2.02k
 nada:
264
2.02k
     X(ifree0)(bufs);
265
2.02k
     X(plan_destroy_internal)(cldrest);
266
2.02k
     X(plan_destroy_internal)(cldcpy);
267
2.02k
     X(plan_destroy_internal)(cld);
268
2.02k
     return (plan *) 0;
269
202
}
270
271
static solver *mksolver(size_t maxnbuf_ndx)
272
4
{
273
4
     static const solver_adt sadt = { PROBLEM_DFT, mkplan, 0 };
274
4
     S *slv = MKSOLVER(S, &sadt);
275
4
     slv->maxnbuf_ndx = maxnbuf_ndx;
276
4
     return &(slv->super);
277
4
}
278
279
void X(dft_buffered_register)(planner *p)
280
1
{
281
1
     size_t i;
282
3
     for (i = 0; i < NELEM(maxnbufs); ++i)
283
2
    REGISTER_SOLVER(p, mksolver(i));
284
1
}