Coverage Report

Created: 2026-04-29 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops_dispatch.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/avassert.h"
22
#include "libavutil/cpu.h"
23
#include "libavutil/mathematics.h"
24
#include "libavutil/mem.h"
25
#include "libavutil/mem_internal.h"
26
#include "libavutil/refstruct.h"
27
28
#include "ops.h"
29
#include "ops_internal.h"
30
#include "ops_dispatch.h"
31
32
typedef struct SwsOpPass {
33
    SwsCompiledOp comp;
34
    SwsOpExec exec_base;
35
    SwsOpExec exec_tail;
36
    size_t num_blocks;
37
    int tail_off_in;
38
    int tail_off_out;
39
    int tail_size_in;
40
    int tail_size_out;
41
    int planes_in;
42
    int planes_out;
43
    int pixel_bits_in;
44
    int pixel_bits_out;
45
    int idx_in[4];
46
    int idx_out[4];
47
    int *offsets_y;
48
    int filter_size;
49
    bool memcpy_first;
50
    bool memcpy_last;
51
    bool memcpy_out;
52
    size_t tail_blocks;
53
    uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
54
    unsigned int tail_buf_size;
55
} SwsOpPass;
56
57
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
58
                               const SwsOpList *ops, SwsCompiledOp *out)
59
0
{
60
0
    SwsOpList *copy;
61
0
    SwsCompiledOp compiled = {0};
62
0
    int ret = 0;
63
64
0
    copy = ff_sws_op_list_duplicate(ops);
65
0
    if (!copy)
66
0
        return AVERROR(ENOMEM);
67
68
    /* Ensure these are always set during compilation */
69
0
    ff_sws_op_list_update_comps(copy);
70
71
0
    ret = backend->compile(ctx, copy, &compiled);
72
0
    if (ret < 0) {
73
0
        int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
74
0
        av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
75
0
               backend->name, av_err2str(ret));
76
0
    } else {
77
0
        *out = compiled;
78
0
    }
79
80
0
    ff_sws_op_list_free(&copy);
81
0
    return ret;
82
0
}
83
84
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
85
0
{
86
0
    for (int n = 0; ff_sws_op_backends[n]; n++) {
87
0
        const SwsOpBackend *backend = ff_sws_op_backends[n];
88
0
        if (ops->src.hw_format != backend->hw_format ||
89
0
            ops->dst.hw_format != backend->hw_format)
90
0
            continue;
91
0
        if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
92
0
            continue;
93
94
0
        av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
95
0
               "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
96
0
               backend->name, out->block_size, out->over_read, out->over_write,
97
0
               out->cpu_flags);
98
99
0
        ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
100
0
        return 0;
101
0
    }
102
103
0
    return AVERROR(ENOTSUP);
104
0
}
105
106
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
107
0
{
108
0
    if (comp->free)
109
0
        comp->free(comp->priv);
110
111
0
    *comp = (SwsCompiledOp) {0};
112
0
}
113
114
static void op_pass_free(void *ptr)
115
0
{
116
0
    SwsOpPass *p = ptr;
117
0
    if (!p)
118
0
        return;
119
120
0
    ff_sws_compiled_op_unref(&p->comp);
121
0
    av_refstruct_unref(&p->offsets_y);
122
0
    av_free(p->exec_base.in_bump_y);
123
0
    av_free(p->exec_base.in_offset_x);
124
0
    av_free(p->tail_buf);
125
0
    av_free(p);
126
0
}
127
128
static inline void get_row_data(const SwsOpPass *p, const int y_dst,
129
                                const uint8_t *in[4], uint8_t *out[4])
130
0
{
131
0
    const SwsOpExec *base = &p->exec_base;
132
0
    const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
133
0
    for (int i = 0; i < p->planes_in; i++)
134
0
        in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
135
0
    for (int i = 0; i < p->planes_out; i++)
136
0
        out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
137
0
}
138
139
static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
140
                                 enum AVRounding rounding)
141
0
{
142
0
    const uint64_t bits = (uint64_t) pixels * pixel_bits;
143
0
    switch (rounding) {
144
0
    case AV_ROUND_ZERO:
145
0
    case AV_ROUND_DOWN:
146
0
        return bits >> 3;
147
0
    case AV_ROUND_INF:
148
0
    case AV_ROUND_UP:
149
0
        return (bits + 7) >> 3;
150
0
    default:
151
0
        av_unreachable("Invalid rounding mode");
152
0
        return (size_t) -1;
153
0
    }
154
0
}
155
156
static size_t safe_bytes_pad(int linesize, int plane_pad)
157
0
{
158
0
    av_assert1(linesize);
159
0
    int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
160
0
    return FFMAX(safe_bytes, 0);
161
0
}
162
163
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
164
                                 ptrdiff_t safe_offset,
165
                                 const int32_t *offset_bytes)
166
0
{
167
0
    size_t safe_blocks = num_blocks;
168
0
    while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
169
0
        safe_blocks--;
170
0
    return safe_blocks;
171
0
}
172
173
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
174
                         const SwsPass *pass)
175
0
{
176
0
    const AVPixFmtDescriptor *indesc  = av_pix_fmt_desc_get(in->format);
177
0
    const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
178
179
0
    SwsOpPass *p = pass->priv;
180
0
    SwsOpExec *exec = &p->exec_base;
181
0
    const SwsCompiledOp *comp = &p->comp;
182
183
    /* Set up main loop parameters */
184
0
    const unsigned block_size = comp->block_size;
185
0
    const size_t num_blocks   = (pass->width + block_size - 1) / block_size;
186
0
    const size_t aligned_w    = num_blocks * block_size;
187
0
    if (aligned_w < pass->width) /* overflow */
188
0
        return AVERROR(EINVAL);
189
0
    p->num_blocks   = num_blocks;
190
0
    p->memcpy_first = false;
191
0
    p->memcpy_last  = false;
192
0
    p->memcpy_out   = false;
193
194
0
    size_t safe_blocks = num_blocks;
195
0
    for (int i = 0; i < p->planes_in; i++) {
196
0
        int idx    = p->idx_in[i];
197
0
        int chroma = idx == 1 || idx == 2;
198
0
        int sub_x  = chroma ? indesc->log2_chroma_w : 0;
199
0
        int sub_y  = chroma ? indesc->log2_chroma_h : 0;
200
0
        size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
201
0
        size_t safe_blocks_in;
202
0
        if (exec->in_offset_x) {
203
0
            size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
204
0
                                             AV_ROUND_UP);
205
0
            safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
206
0
                                                safe_bytes - filter_size,
207
0
                                                exec->in_offset_x);
208
0
        } else {
209
0
            safe_blocks_in = safe_bytes / exec->block_size_in;
210
0
        }
211
212
0
        if (safe_blocks_in < num_blocks) {
213
0
            p->memcpy_first |= in->linesize[idx] < 0;
214
0
            p->memcpy_last  |= in->linesize[idx] > 0;
215
0
            safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
216
0
        }
217
218
0
        size_t loop_size   = num_blocks * exec->block_size_in;
219
0
        exec->in[i]        = in->data[idx];
220
0
        exec->in_stride[i] = in->linesize[idx];
221
0
        exec->in_bump[i]   = in->linesize[idx] - loop_size;
222
0
        exec->in_sub_y[i]  = sub_y;
223
0
        exec->in_sub_x[i]  = sub_x;
224
0
    }
225
226
0
    for (int i = 0; i < p->planes_out; i++) {
227
0
        int idx    = p->idx_out[i];
228
0
        int chroma = idx == 1 || idx == 2;
229
0
        int sub_x  = chroma ? outdesc->log2_chroma_w : 0;
230
0
        int sub_y  = chroma ? outdesc->log2_chroma_h : 0;
231
0
        size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
232
0
        size_t safe_blocks_out = safe_bytes / exec->block_size_out;
233
0
        if (safe_blocks_out < num_blocks) {
234
0
            p->memcpy_out = true;
235
0
            safe_blocks   = FFMIN(safe_blocks, safe_blocks_out);
236
0
        }
237
238
0
        size_t loop_size    = num_blocks * exec->block_size_out;
239
0
        exec->out[i]        = out->data[idx];
240
0
        exec->out_stride[i] = out->linesize[idx];
241
0
        exec->out_bump[i]   = out->linesize[idx] - loop_size;
242
0
        exec->out_sub_y[i]  = sub_y;
243
0
        exec->out_sub_x[i]  = sub_x;
244
0
    }
245
246
0
    const bool memcpy_in = p->memcpy_first || p->memcpy_last;
247
0
    if (!memcpy_in && !p->memcpy_out) {
248
0
        av_assert0(safe_blocks == num_blocks);
249
0
        return 0;
250
0
    }
251
252
    /* Set-up tail section parameters and buffers */
253
0
    SwsOpExec *tail = &p->exec_tail;
254
0
    const int align = av_cpu_max_align();
255
0
    size_t alloc_size = 0;
256
0
    *tail = *exec;
257
258
0
    const size_t safe_width = safe_blocks * block_size;
259
0
    const size_t tail_size  = pass->width - safe_width;
260
0
    p->tail_off_out  = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
261
0
    p->tail_size_out = pixel_bytes(tail_size,  p->pixel_bits_out, AV_ROUND_UP);
262
0
    p->tail_blocks   = num_blocks - safe_blocks;
263
264
0
    if (exec->in_offset_x) {
265
0
        p->tail_off_in  = exec->in_offset_x[safe_width];
266
0
        p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
267
0
        p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP);
268
0
    } else {
269
0
        p->tail_off_in  = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
270
0
        p->tail_size_in = pixel_bytes(tail_size,  p->pixel_bits_in, AV_ROUND_UP);
271
0
    }
272
273
0
    const size_t alloc_width = aligned_w - safe_width;
274
0
    for (int i = 0; memcpy_in && i < p->planes_in; i++) {
275
0
        size_t needed_size;
276
0
        if (exec->in_offset_x) {
277
            /* The input offset map is already padded to multiples of the block
278
             * size, and clamps the input offsets to the image boundaries; so
279
             * we just need to compensate for the comp->over_read */
280
0
            needed_size = p->tail_size_in;
281
0
        } else {
282
0
            needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
283
0
        }
284
0
        size_t loop_size   = p->tail_blocks * exec->block_size_in;
285
0
        tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
286
0
        tail->in_bump[i]   = tail->in_stride[i] - loop_size;
287
0
        alloc_size += tail->in_stride[i] * in->height;
288
0
    }
289
290
0
    for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
291
0
        size_t needed_size  = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
292
0
        size_t loop_size    = p->tail_blocks * exec->block_size_out;
293
0
        tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
294
0
        tail->out_bump[i]   = tail->out_stride[i] - loop_size;
295
0
        alloc_size += tail->out_stride[i] * out->height;
296
0
    }
297
298
0
    if (memcpy_in && exec->in_offset_x) {
299
        /* `in_offset_x` is indexed relative to the line start, not the start
300
         * of the section being processed; so we need to over-allocate this
301
         * array to the full width of the image, even though we will only
302
         * partially fill in the offsets relevant to the tail region */
303
0
        alloc_size += aligned_w * sizeof(*exec->in_offset_x);
304
0
    }
305
306
0
    av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
307
0
    if (!p->tail_buf)
308
0
        return AVERROR(ENOMEM);
309
310
0
    uint8_t *tail_buf = p->tail_buf;
311
0
    for (int i = 0; memcpy_in && i < p->planes_in; i++) {
312
0
        tail->in[i] = tail_buf;
313
0
        tail_buf += tail->in_stride[i] * in->height;
314
0
    }
315
316
0
    for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
317
0
        tail->out[i] = tail_buf;
318
0
        tail_buf += tail->out_stride[i] * out->height;
319
0
    }
320
321
0
    if (memcpy_in && exec->in_offset_x) {
322
0
        tail->in_offset_x = (int32_t *) tail_buf;
323
0
        for (int i = safe_width; i < aligned_w; i++)
324
0
            tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
325
0
    }
326
327
0
    return 0;
328
0
}
329
330
static void copy_lines(uint8_t *dst, const size_t dst_stride,
331
                       const uint8_t *src, const size_t src_stride,
332
                       const int h, const size_t bytes)
333
0
{
334
0
    for (int y = 0; y < h; y++) {
335
0
        memcpy(dst, src, bytes);
336
0
        dst += dst_stride;
337
0
        src += src_stride;
338
0
    }
339
0
}
340
341
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
342
                        const int h, const SwsPass *pass)
343
0
{
344
0
    const SwsOpPass *p = pass->priv;
345
0
    const SwsCompiledOp *comp = &p->comp;
346
347
    /* Fill exec metadata for this slice */
348
0
    DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
349
0
    exec.slice_y = y;
350
0
    exec.slice_h = h;
351
352
    /**
353
     *  To ensure safety, we need to consider the following:
354
     *
355
     * 1. We can overread the input, unless this is the last line of an
356
     *    unpadded buffer. All defined operations can handle arbitrary pixel
357
     *    input, so overread of arbitrary data is fine. For flipped images,
358
     *    this condition is actually *inverted* to where the first line is
359
     *    the one at the end of the buffer.
360
     *
361
     * 2. We can overwrite the output, as long as we don't write more than the
362
     *    amount of pixels that fit into one linesize. So we always need to
363
     *    memcpy the last column on the output side if unpadded.
364
     */
365
366
0
    const bool memcpy_in  = p->memcpy_last && y + h == pass->height ||
367
0
                            p->memcpy_first && y == 0;
368
0
    const bool memcpy_out = p->memcpy_out;
369
0
    const size_t num_blocks  = p->num_blocks;
370
0
    const size_t tail_blocks = p->tail_blocks;
371
372
0
    get_row_data(p, y, exec.in, exec.out);
373
0
    if (!memcpy_in && !memcpy_out) {
374
        /* Fast path (fully aligned/padded inputs and outputs) */
375
0
        comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
376
0
        return;
377
0
    }
378
379
    /* Non-aligned case (slow path); process main blocks as normal, and
380
     * a separate tail (via memcpy into an appropriately padded buffer) */
381
0
    if (num_blocks > tail_blocks) {
382
0
        for (int i = 0; i < 4; i++) {
383
            /* We process fewer blocks, so the in_bump needs to be increased
384
             * to reflect that the plane pointers are left on the last block,
385
             * not the end of the processed line, after each loop iteration */
386
0
            exec.in_bump[i]  += exec.block_size_in  * tail_blocks;
387
0
            exec.out_bump[i] += exec.block_size_out * tail_blocks;
388
0
        }
389
390
0
        comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
391
0
    }
392
393
0
    DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
394
0
    tail.slice_y = y;
395
0
    tail.slice_h = h;
396
397
0
    for (int i = 0; i < p->planes_in; i++) {
398
        /* Input offsets are relative to the base pointer */
399
0
        if (!exec.in_offset_x || memcpy_in)
400
0
            exec.in[i] += p->tail_off_in;
401
0
        tail.in[i] += y * tail.in_stride[i];
402
0
    }
403
0
    for (int i = 0; i < p->planes_out; i++) {
404
0
        exec.out[i] += p->tail_off_out;
405
0
        tail.out[i] += y * tail.out_stride[i];
406
0
    }
407
408
0
    for (int i = 0; i < p->planes_in; i++) {
409
0
        if (memcpy_in) {
410
0
            copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
411
0
                       exec.in[i], exec.in_stride[i], h, p->tail_size_in);
412
0
        } else {
413
            /* Reuse input pointers directly */
414
0
            const size_t loop_size = tail_blocks * exec.block_size_in;
415
0
            tail.in[i]        = exec.in[i];
416
0
            tail.in_stride[i] = exec.in_stride[i];
417
0
            tail.in_bump[i]   = exec.in_stride[i] - loop_size;
418
0
        }
419
0
    }
420
421
0
    for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
422
        /* Reuse output pointers directly */
423
0
        const size_t loop_size = tail_blocks * exec.block_size_out;
424
0
        tail.out[i]        = exec.out[i];
425
0
        tail.out_stride[i] = exec.out_stride[i];
426
0
        tail.out_bump[i]   = exec.out_stride[i] - loop_size;
427
0
    }
428
429
    /* Dispatch kernel over tail */
430
0
    av_assert1(tail_blocks > 0);
431
0
    comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
432
433
0
    for (int i = 0; memcpy_out && i < p->planes_out; i++) {
434
0
        copy_lines(exec.out[i], exec.out_stride[i],
435
0
                   tail.out[i], tail.out_stride[i], h, p->tail_size_out);
436
0
    }
437
0
}
438
439
static int rw_planes(const SwsOp *op)
440
0
{
441
0
    return op->rw.packed ? 1 : op->rw.elems;
442
0
}
443
444
static int rw_pixel_bits(const SwsOp *op)
445
0
{
446
0
    const int elems = op->rw.packed ? op->rw.elems : 1;
447
0
    const int size  = ff_sws_pixel_type_size(op->type);
448
0
    const int bits  = 8 >> op->rw.frac;
449
0
    av_assert1(bits >= 1);
450
0
    return elems * size * bits;
451
0
}
452
453
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
454
0
{
455
0
    if (!pass)
456
0
        return;
457
458
    /* Add at least as many pixels as needed to cover the padding requirement */
459
0
    const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
460
461
0
    SwsPassBuffer *buf = pass->output;
462
0
    buf->width_align = FFMAX(buf->width_align, block_size);
463
0
    buf->width_pad = FFMAX(buf->width_pad, pad);
464
0
}
465
466
static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input,
467
                   SwsPass **output)
468
0
{
469
0
    SwsContext *ctx = graph->ctx;
470
0
    SwsOpPass *p = av_mallocz(sizeof(*p));
471
0
    if (!p)
472
0
        return AVERROR(ENOMEM);
473
474
0
    int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
475
0
    if (ret < 0)
476
0
        goto fail;
477
478
0
    const SwsCompiledOp *comp = &p->comp;
479
0
    const SwsFormat *dst = &ops->dst;
480
0
    if (p->comp.opaque) {
481
0
        SwsCompiledOp c = *comp;
482
0
        av_free(p);
483
0
        return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
484
0
                                     input, c.slice_align, c.func_opaque,
485
0
                                     NULL, c.priv, c.free, output);
486
0
    }
487
488
0
    const SwsOp *read  = ff_sws_op_list_input(ops);
489
0
    const SwsOp *write = ff_sws_op_list_output(ops);
490
0
    p->planes_in  = rw_planes(read);
491
0
    p->planes_out = rw_planes(write);
492
0
    p->pixel_bits_in  = rw_pixel_bits(read);
493
0
    p->pixel_bits_out = rw_pixel_bits(write);
494
0
    p->exec_base = (SwsOpExec) {
495
0
        .width  = dst->width,
496
0
        .height = dst->height,
497
0
    };
498
499
0
    const int64_t block_bits_in  = (int64_t) comp->block_size * p->pixel_bits_in;
500
0
    const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
501
0
    if (block_bits_in & 0x7 || block_bits_out & 0x7) {
502
0
        av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
503
0
        ret = AVERROR(EINVAL);
504
0
        goto fail;
505
0
    }
506
507
0
    p->exec_base.block_size_in  = block_bits_in  >> 3;
508
0
    p->exec_base.block_size_out = block_bits_out >> 3;
509
510
0
    for (int i = 0; i < 4; i++) {
511
0
        p->idx_in[i]  = i < p->planes_in  ? ops->plane_src[i] : -1;
512
0
        p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
513
0
    }
514
515
0
    const SwsFilterWeights *filter = read->rw.kernel;
516
0
    if (read->rw.filter == SWS_OP_FILTER_V) {
517
0
        p->offsets_y = av_refstruct_ref(filter->offsets);
518
519
        /* Compute relative pointer bumps for each output line */
520
0
        int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
521
0
        if (!bump) {
522
0
            ret = AVERROR(ENOMEM);
523
0
            goto fail;
524
0
        }
525
526
0
        int line = filter->offsets[0];
527
0
        for (int y = 0; y < filter->dst_size - 1; y++) {
528
0
            int next = filter->offsets[y + 1];
529
0
            bump[y] = next - line - 1;
530
0
            line = next;
531
0
        }
532
0
        bump[filter->dst_size - 1] = 0;
533
0
        p->exec_base.in_bump_y = bump;
534
0
    } else if (read->rw.filter == SWS_OP_FILTER_H) {
535
        /* Compute pixel offset map for each output line */
536
0
        const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
537
0
        int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
538
0
        if (!offset) {
539
0
            ret = AVERROR(ENOMEM);
540
0
            goto fail;
541
0
        }
542
0
        p->exec_base.in_offset_x = offset;
543
544
0
        for (int x = 0; x < filter->dst_size; x++) {
545
            /* Sanity check; if the tap would land on a half-pixel, we cannot
546
             * reasonably expect the implementation to know about this. Just
547
             * error out in such (theoretical) cases. */
548
0
            int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
549
0
            if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
550
0
                ret = AVERROR(EINVAL);
551
0
                goto fail;
552
0
            }
553
0
            offset[x] = bits >> 3;
554
0
        }
555
0
        for (int x = filter->dst_size; x < pixels; x++)
556
0
            offset[x] = offset[filter->dst_size - 1];
557
0
        p->exec_base.block_size_in = 0; /* ptr does not advance */
558
0
        p->filter_size = filter->filter_size;
559
0
    }
560
561
0
    ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
562
0
                                input, comp->slice_align, op_pass_run,
563
0
                                op_pass_setup, p, op_pass_free, output);
564
0
    if (ret < 0)
565
0
        return ret;
566
567
0
    align_pass(input,   comp->block_size, comp->over_read,  p->pixel_bits_in);
568
0
    align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
569
0
    return 0;
570
571
0
fail:
572
0
    op_pass_free(p);
573
0
    return ret;
574
0
}
575
576
int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags,
577
                        SwsPass *input, SwsPass **output)
578
0
{
579
0
    const int passes_orig = graph->num_passes;
580
0
    SwsContext *ctx = graph->ctx;
581
0
    SwsOpList *ops = *pops;
582
0
    int ret = 0;
583
584
    /* Check if the whole operation graph is an end-to-end no-op */
585
0
    if (ff_sws_op_list_is_noop(ops)) {
586
0
        *output = input;
587
0
        goto out;
588
0
    }
589
590
0
    const SwsOp *read  = ff_sws_op_list_input(ops);
591
0
    const SwsOp *write = ff_sws_op_list_output(ops);
592
0
    if (!read || !write) {
593
0
        av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
594
0
               "and write, respectively.\n");
595
0
        ret = AVERROR(EINVAL);
596
0
        goto out;
597
0
    }
598
599
0
    if (flags & SWS_OP_FLAG_OPTIMIZE) {
600
0
        ret = ff_sws_op_list_optimize(ops);
601
0
        if (ret < 0)
602
0
            goto out;
603
0
        av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
604
0
        ff_sws_op_list_print(ctx, AV_LOG_DEBUG, AV_LOG_TRACE, ops);
605
0
    }
606
607
0
    ret = compile(graph, ops, input, output);
608
0
    if (ret != AVERROR(ENOTSUP))
609
0
        goto out;
610
611
0
    av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
612
0
    SwsPass *prev = input;
613
0
    while (ops) {
614
0
        SwsOpList *rest;
615
0
        ret = ff_sws_op_list_subpass(ops, &rest);
616
0
        if (ret < 0)
617
0
            goto out;
618
619
0
        if (prev == input && !rest) {
620
            /* No point in compiling an unsplit pass again */
621
0
            ret = AVERROR(ENOTSUP);
622
0
            goto out;
623
0
        }
624
625
0
        ret = compile(graph, ops, prev, &prev);
626
0
        if (ret < 0) {
627
0
            ff_sws_op_list_free(&rest);
628
0
            goto out;
629
0
        }
630
631
0
        ff_sws_op_list_free(&ops);
632
0
        ops = rest;
633
0
    }
634
635
    /* Return last subpass successfully compiled */
636
0
    av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
637
0
           graph->num_passes - passes_orig);
638
0
    *output = prev;
639
640
0
out:
641
0
    if (ret == AVERROR(ENOTSUP)) {
642
0
        av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
643
0
        ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
644
0
    }
645
0
    if (ret < 0)
646
0
        ff_sws_graph_rollback(graph, passes_orig);
647
0
    ff_sws_op_list_free(&ops);
648
    *pops = NULL;
649
0
    return ret;
650
0
}