Coverage Report

Created: 2026-03-12 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops_dispatch.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/avassert.h"
22
#include "libavutil/mem.h"
23
#include "libavutil/mem_internal.h"
24
25
#include "ops.h"
26
#include "ops_internal.h"
27
#include "ops_dispatch.h"
28
29
typedef struct SwsOpPass {
30
    SwsCompiledOp comp;
31
    SwsOpExec exec_base;
32
    int num_blocks;
33
    int tail_off_in;
34
    int tail_off_out;
35
    int tail_size_in;
36
    int tail_size_out;
37
    int planes_in;
38
    int planes_out;
39
    int pixel_bits_in;
40
    int pixel_bits_out;
41
    int idx_in[4];
42
    int idx_out[4];
43
    bool memcpy_in;
44
    bool memcpy_out;
45
} SwsOpPass;
46
47
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
48
                               const SwsOpList *ops, SwsCompiledOp *out)
49
0
{
50
0
    SwsOpList *copy;
51
0
    SwsCompiledOp compiled = {0};
52
0
    int ret = 0;
53
54
0
    copy = ff_sws_op_list_duplicate(ops);
55
0
    if (!copy)
56
0
        return AVERROR(ENOMEM);
57
58
    /* Ensure these are always set during compilation */
59
0
    ff_sws_op_list_update_comps(copy);
60
61
0
    ret = backend->compile(ctx, copy, &compiled);
62
0
    if (ret < 0) {
63
0
        int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
64
0
        av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
65
0
               backend->name, av_err2str(ret));
66
0
    } else {
67
0
        *out = compiled;
68
0
    }
69
70
0
    ff_sws_op_list_free(&copy);
71
0
    return ret;
72
0
}
73
74
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
75
0
{
76
0
    for (int n = 0; ff_sws_op_backends[n]; n++) {
77
0
        const SwsOpBackend *backend = ff_sws_op_backends[n];
78
0
        if (ops->src.hw_format != backend->hw_format ||
79
0
            ops->dst.hw_format != backend->hw_format)
80
0
            continue;
81
0
        if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
82
0
            continue;
83
84
0
        av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
85
0
               "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
86
0
               backend->name, out->block_size, out->over_read, out->over_write,
87
0
               out->cpu_flags);
88
89
0
        ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
90
0
        return 0;
91
0
    }
92
93
0
    av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
94
0
    ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
95
0
    return AVERROR(ENOTSUP);
96
0
}
97
98
static void op_pass_free(void *ptr)
99
0
{
100
0
    SwsOpPass *p = ptr;
101
0
    if (!p)
102
0
        return;
103
104
0
    if (p->comp.free)
105
0
        p->comp.free(p->comp.priv);
106
107
0
    av_free(p);
108
0
}
109
110
static inline void get_row_data(const SwsOpPass *p, const int y,
111
                                const uint8_t *in[4], uint8_t *out[4])
112
0
{
113
0
    const SwsOpExec *base = &p->exec_base;
114
0
    for (int i = 0; i < p->planes_in; i++)
115
0
        in[i] = base->in[i] + (y >> base->in_sub_y[i]) * base->in_stride[i];
116
0
    for (int i = 0; i < p->planes_out; i++)
117
0
        out[i] = base->out[i] + (y >> base->out_sub_y[i]) * base->out_stride[i];
118
0
}
119
120
static void op_pass_setup(const SwsFrame *out, const SwsFrame *in,
121
                          const SwsPass *pass)
122
0
{
123
0
    const AVPixFmtDescriptor *indesc  = av_pix_fmt_desc_get(in->format);
124
0
    const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
125
126
0
    SwsOpPass *p = pass->priv;
127
0
    SwsOpExec *exec = &p->exec_base;
128
0
    const SwsCompiledOp *comp = &p->comp;
129
0
    const int block_size = comp->block_size;
130
0
    p->num_blocks = (pass->width + block_size - 1) / block_size;
131
132
    /* Set up main loop parameters */
133
0
    const int aligned_w  = p->num_blocks * block_size;
134
0
    const int safe_width = (p->num_blocks - 1) * block_size;
135
0
    const int tail_size  = pass->width - safe_width;
136
0
    p->tail_off_in   = safe_width * p->pixel_bits_in  >> 3;
137
0
    p->tail_off_out  = safe_width * p->pixel_bits_out >> 3;
138
0
    p->tail_size_in  = tail_size  * p->pixel_bits_in  >> 3;
139
0
    p->tail_size_out = tail_size  * p->pixel_bits_out >> 3;
140
0
    p->memcpy_in     = false;
141
0
    p->memcpy_out    = false;
142
143
0
    for (int i = 0; i < p->planes_in; i++) {
144
0
        const int idx        = p->idx_in[i];
145
0
        const int chroma     = idx == 1 || idx == 2;
146
0
        const int sub_x      = chroma ? indesc->log2_chroma_w : 0;
147
0
        const int sub_y      = chroma ? indesc->log2_chroma_h : 0;
148
0
        const int plane_w    = (aligned_w + sub_x) >> sub_x;
149
0
        const int plane_pad  = (comp->over_read + sub_x) >> sub_x;
150
0
        const int plane_size = plane_w * p->pixel_bits_in >> 3;
151
0
        if (comp->slice_align)
152
0
            p->memcpy_in |= plane_size + plane_pad > in->linesize[idx];
153
0
        exec->in[i]        = in->data[idx];
154
0
        exec->in_stride[i] = in->linesize[idx];
155
0
        exec->in_sub_y[i]  = sub_y;
156
0
        exec->in_sub_x[i]  = sub_x;
157
0
    }
158
159
0
    for (int i = 0; i < p->planes_out; i++) {
160
0
        const int idx        = p->idx_out[i];
161
0
        const int chroma     = idx == 1 || idx == 2;
162
0
        const int sub_x      = chroma ? outdesc->log2_chroma_w : 0;
163
0
        const int sub_y      = chroma ? outdesc->log2_chroma_h : 0;
164
0
        const int plane_w    = (aligned_w + sub_x) >> sub_x;
165
0
        const int plane_pad  = (comp->over_write + sub_x) >> sub_x;
166
0
        const int plane_size = plane_w * p->pixel_bits_out >> 3;
167
0
        if (comp->slice_align)
168
0
            p->memcpy_out |= plane_size + plane_pad > out->linesize[idx];
169
0
        exec->out[i]        = out->data[idx];
170
0
        exec->out_stride[i] = out->linesize[idx];
171
0
        exec->out_sub_y[i]  = sub_y;
172
0
        exec->out_sub_x[i]  = sub_x;
173
0
    }
174
175
    /* Pre-fill pointer bump for the main section only; this value does not
176
     * matter at all for the tail / last row handlers because they only ever
177
     * process a single line */
178
0
    const int blocks_main = p->num_blocks - p->memcpy_out;
179
0
    for (int i = 0; i < 4; i++) {
180
0
        exec->in_bump[i]  = exec->in_stride[i]  - blocks_main * exec->block_size_in;
181
0
        exec->out_bump[i] = exec->out_stride[i] - blocks_main * exec->block_size_out;
182
0
    }
183
0
}
184
185
/* Dispatch kernel over the last column of the image using memcpy */
186
static av_always_inline void
187
handle_tail(const SwsOpPass *p, SwsOpExec *exec,
188
            const bool copy_out, const bool copy_in,
189
            int y, const int h)
190
0
{
191
0
    DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])];
192
193
0
    const SwsOpExec *base = &p->exec_base;
194
0
    const SwsCompiledOp *comp = &p->comp;
195
0
    const int tail_size_in  = p->tail_size_in;
196
0
    const int tail_size_out = p->tail_size_out;
197
0
    const int bx = p->num_blocks - 1;
198
199
0
    const uint8_t *in_data[4];
200
0
    uint8_t *out_data[4];
201
0
    get_row_data(p, y, in_data, out_data);
202
203
0
    for (int i = 0; i < p->planes_in; i++) {
204
0
        in_data[i] += p->tail_off_in;
205
0
        if (copy_in) {
206
0
            exec->in[i] = (void *) tmp[0][i];
207
0
            exec->in_stride[i] = sizeof(tmp[0][i]);
208
0
        } else {
209
0
            exec->in[i] = in_data[i];
210
0
        }
211
0
    }
212
213
0
    for (int i = 0; i < p->planes_out; i++) {
214
0
        out_data[i] += p->tail_off_out;
215
0
        if (copy_out) {
216
0
            exec->out[i] = (void *) tmp[1][i];
217
0
            exec->out_stride[i] = sizeof(tmp[1][i]);
218
0
        } else {
219
0
            exec->out[i] = out_data[i];
220
0
        }
221
0
    }
222
223
0
    for (int y_end = y + h; y < y_end; y++) {
224
0
        if (copy_in) {
225
0
            for (int i = 0; i < p->planes_in; i++) {
226
0
                av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]);
227
0
                memcpy(tmp[0][i], in_data[i], tail_size_in);
228
0
                in_data[i] += base->in_stride[i]; /* exec->in_stride was clobbered */
229
0
            }
230
0
        }
231
232
0
        comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1);
233
234
0
        if (copy_out) {
235
0
            for (int i = 0; i < p->planes_out; i++) {
236
0
                av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]);
237
0
                memcpy(out_data[i], tmp[1][i], tail_size_out);
238
0
                out_data[i] += base->out_stride[i];
239
0
            }
240
0
        }
241
242
0
        for (int i = 0; i < 4; i++) {
243
0
            if (!copy_in && exec->in[i])
244
0
                exec->in[i] += exec->in_stride[i];
245
0
            if (!copy_out && exec->out[i])
246
0
                exec->out[i] += exec->out_stride[i];
247
0
        }
248
0
    }
249
0
}
250
251
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
252
                        const int h, const SwsPass *pass)
253
0
{
254
0
    const SwsOpPass *p = pass->priv;
255
0
    const SwsCompiledOp *comp = &p->comp;
256
257
    /* Fill exec metadata for this slice */
258
0
    DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
259
0
    exec.slice_y = y;
260
0
    exec.slice_h = h;
261
262
    /**
263
     *  To ensure safety, we need to consider the following:
264
     *
265
     * 1. We can overread the input, unless this is the last line of an
266
     *    unpadded buffer. All defined operations can handle arbitrary pixel
267
     *    input, so overread of arbitrary data is fine.
268
     *
269
     * 2. We can overwrite the output, as long as we don't write more than the
270
     *    amount of pixels that fit into one linesize. So we always need to
271
     *    memcpy the last column on the output side if unpadded.
272
     *
273
     * 3. For the last row, we also need to memcpy the remainder of the input,
274
     *    to avoid reading past the end of the buffer. Note that since we know
275
     *    the run() function is called on stripes of the same buffer, we don't
276
     *    need to worry about this for the end of a slice.
277
     */
278
279
0
    const int last_slice  = y + h == pass->height;
280
0
    const bool memcpy_in  = last_slice && p->memcpy_in;
281
0
    const bool memcpy_out = p->memcpy_out;
282
0
    const int num_blocks  = p->num_blocks;
283
0
    const int blocks_main = num_blocks - memcpy_out;
284
0
    const int h_main      = h - memcpy_in;
285
286
    /* Handle main section */
287
0
    get_row_data(p, y, exec.in, exec.out);
288
0
    comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main);
289
290
0
    if (memcpy_in) {
291
        /* Safe part of last row */
292
0
        get_row_data(p, y + h_main, exec.in, exec.out);
293
0
        comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h);
294
0
    }
295
296
    /* Handle last column via memcpy, takes over `exec` so call these last */
297
0
    if (memcpy_out)
298
0
        handle_tail(p, &exec, true, false, y, h_main);
299
0
    if (memcpy_in)
300
0
        handle_tail(p, &exec, memcpy_out, true, y + h_main, 1);
301
0
}
302
303
static int rw_planes(const SwsOp *op)
304
0
{
305
0
    return op->rw.packed ? 1 : op->rw.elems;
306
0
}
307
308
static int rw_pixel_bits(const SwsOp *op)
309
0
{
310
0
    const int elems = op->rw.packed ? op->rw.elems : 1;
311
0
    const int size  = ff_sws_pixel_type_size(op->type);
312
0
    const int bits  = 8 >> op->rw.frac;
313
0
    av_assert1(bits >= 1);
314
0
    return elems * size * bits;
315
0
}
316
317
static int compile(SwsGraph *graph, const SwsOpList *ops,
318
                   const SwsFormat *dst, SwsPass *input, SwsPass **output)
319
0
{
320
0
    SwsContext *ctx = graph->ctx;
321
0
    SwsOpPass *p = av_mallocz(sizeof(*p));
322
0
    if (!p)
323
0
        return AVERROR(ENOMEM);
324
325
0
    int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
326
0
    if (ret < 0)
327
0
        goto fail;
328
329
0
    if (p->comp.opaque) {
330
0
        SwsCompiledOp c = p->comp;
331
0
        av_free(p);
332
0
        return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
333
0
                                     input, c.slice_align, c.func_opaque,
334
0
                                     NULL, c.priv, c.free, output);
335
0
    }
336
337
0
    const SwsOp *read  = ff_sws_op_list_input(ops);
338
0
    const SwsOp *write = ff_sws_op_list_output(ops);
339
0
    p->planes_in  = rw_planes(read);
340
0
    p->planes_out = rw_planes(write);
341
0
    p->pixel_bits_in  = rw_pixel_bits(read);
342
0
    p->pixel_bits_out = rw_pixel_bits(write);
343
0
    p->exec_base = (SwsOpExec) {
344
0
        .width  = dst->width,
345
0
        .height = dst->height,
346
0
        .block_size_in  = p->comp.block_size * p->pixel_bits_in  >> 3,
347
0
        .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3,
348
0
    };
349
350
0
    for (int i = 0; i < 4; i++) {
351
0
        p->idx_in[i]  = i < p->planes_in  ? ops->order_src.in[i] : -1;
352
0
        p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1;
353
0
    }
354
355
0
    return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
356
0
                                 input, p->comp.slice_align, op_pass_run,
357
0
                                 op_pass_setup, p, op_pass_free, output);
358
359
0
fail:
360
0
    op_pass_free(p);
361
0
    return ret;
362
0
}
363
364
int ff_sws_compile_pass(SwsGraph *graph, SwsOpList *ops, int flags,
365
                        const SwsFormat *dst, SwsPass *input, SwsPass **output)
366
0
{
367
0
    SwsContext *ctx = graph->ctx;
368
0
    int ret;
369
370
    /* Check if the whole operation graph is an end-to-end no-op */
371
0
    if (ff_sws_op_list_is_noop(ops)) {
372
0
        *output = input;
373
0
        return 0;
374
0
    }
375
376
0
    const SwsOp *read  = ff_sws_op_list_input(ops);
377
0
    const SwsOp *write = ff_sws_op_list_output(ops);
378
0
    if (!read || !write) {
379
0
        av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
380
0
               "and write, respectively.\n");
381
0
        return AVERROR(EINVAL);
382
0
    }
383
384
0
    if (flags & SWS_OP_FLAG_OPTIMIZE) {
385
0
        ret = ff_sws_op_list_optimize(ops);
386
0
        if (ret < 0)
387
0
            return ret;
388
0
    }
389
390
0
    return compile(graph, ops, dst, input, output);
391
0
}