Coverage Report

Created: 2026-02-26 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/avassert.h"
22
#include "libavutil/avstring.h"
23
#include "libavutil/bswap.h"
24
#include "libavutil/mem.h"
25
#include "libavutil/rational.h"
26
#include "libavutil/refstruct.h"
27
28
#include "ops.h"
29
#include "ops_internal.h"
30
31
extern const SwsOpBackend backend_c;
32
extern const SwsOpBackend backend_murder;
33
extern const SwsOpBackend backend_x86;
34
35
const SwsOpBackend * const ff_sws_op_backends[] = {
36
    &backend_murder,
37
#if ARCH_X86_64 && HAVE_X86ASM
38
    &backend_x86,
39
#endif
40
    &backend_c,
41
    NULL
42
};
43
44
#define RET(x)                                                                 \
45
0
    do {                                                                       \
46
0
        if ((ret = (x)) < 0)                                                   \
47
0
            return ret;                                                        \
48
0
    } while (0)
49
50
const char *ff_sws_pixel_type_name(SwsPixelType type)
51
0
{
52
0
    switch (type) {
53
0
    case SWS_PIXEL_U8:   return "u8";
54
0
    case SWS_PIXEL_U16:  return "u16";
55
0
    case SWS_PIXEL_U32:  return "u32";
56
0
    case SWS_PIXEL_F32:  return "f32";
57
0
    case SWS_PIXEL_NONE: return "none";
58
0
    case SWS_PIXEL_TYPE_NB: break;
59
0
    }
60
61
0
    av_unreachable("Invalid pixel type!");
62
0
    return "ERR";
63
0
}
64
65
int ff_sws_pixel_type_size(SwsPixelType type)
66
0
{
67
0
    switch (type) {
68
0
    case SWS_PIXEL_U8:  return sizeof(uint8_t);
69
0
    case SWS_PIXEL_U16: return sizeof(uint16_t);
70
0
    case SWS_PIXEL_U32: return sizeof(uint32_t);
71
0
    case SWS_PIXEL_F32: return sizeof(float);
72
0
    case SWS_PIXEL_NONE: break;
73
0
    case SWS_PIXEL_TYPE_NB: break;
74
0
    }
75
76
0
    av_unreachable("Invalid pixel type!");
77
0
    return 0;
78
0
}
79
80
bool ff_sws_pixel_type_is_int(SwsPixelType type)
81
0
{
82
0
    switch (type) {
83
0
    case SWS_PIXEL_U8:
84
0
    case SWS_PIXEL_U16:
85
0
    case SWS_PIXEL_U32:
86
0
        return true;
87
0
    case SWS_PIXEL_F32:
88
0
        return false;
89
0
    case SWS_PIXEL_NONE:
90
0
    case SWS_PIXEL_TYPE_NB: break;
91
0
    }
92
93
0
    av_unreachable("Invalid pixel type!");
94
0
    return false;
95
0
}
96
97
/* biased towards `a` */
98
static AVRational av_min_q(AVRational a, AVRational b)
99
0
{
100
0
    return av_cmp_q(a, b) == 1 ? b : a;
101
0
}
102
103
static AVRational av_max_q(AVRational a, AVRational b)
104
0
{
105
0
    return av_cmp_q(a, b) == -1 ? b : a;
106
0
}
107
108
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
109
0
{
110
0
    uint64_t mask[4];
111
0
    int shift[4];
112
113
0
    switch (op->op) {
114
0
    case SWS_OP_READ:
115
0
    case SWS_OP_WRITE:
116
0
        return;
117
0
    case SWS_OP_UNPACK: {
118
0
        av_assert1(ff_sws_pixel_type_is_int(op->type));
119
0
        ff_sws_pack_op_decode(op, mask, shift);
120
0
        unsigned val = x[0].num;
121
0
        for (int i = 0; i < 4; i++)
122
0
            x[i] = Q((val >> shift[i]) & mask[i]);
123
0
        return;
124
0
    }
125
0
    case SWS_OP_PACK: {
126
0
        av_assert1(ff_sws_pixel_type_is_int(op->type));
127
0
        ff_sws_pack_op_decode(op, mask, shift);
128
0
        unsigned val = 0;
129
0
        for (int i = 0; i < 4; i++)
130
0
            val |= (x[i].num & mask[i]) << shift[i];
131
0
        x[0] = Q(val);
132
0
        return;
133
0
    }
134
0
    case SWS_OP_SWAP_BYTES:
135
0
        av_assert1(ff_sws_pixel_type_is_int(op->type));
136
0
        switch (ff_sws_pixel_type_size(op->type)) {
137
0
        case 2:
138
0
            for (int i = 0; i < 4; i++)
139
0
                x[i].num = av_bswap16(x[i].num);
140
0
            break;
141
0
        case 4:
142
0
            for (int i = 0; i < 4; i++)
143
0
                x[i].num = av_bswap32(x[i].num);
144
0
            break;
145
0
        }
146
0
        return;
147
0
    case SWS_OP_CLEAR:
148
0
        for (int i = 0; i < 4; i++) {
149
0
            if (op->c.q4[i].den)
150
0
                x[i] = op->c.q4[i];
151
0
        }
152
0
        return;
153
0
    case SWS_OP_LSHIFT: {
154
0
        av_assert1(ff_sws_pixel_type_is_int(op->type));
155
0
        AVRational mult = Q(1 << op->c.u);
156
0
        for (int i = 0; i < 4; i++)
157
0
            x[i] = x[i].den ? av_mul_q(x[i], mult) : x[i];
158
0
        return;
159
0
    }
160
0
    case SWS_OP_RSHIFT: {
161
0
        av_assert1(ff_sws_pixel_type_is_int(op->type));
162
0
        for (int i = 0; i < 4; i++)
163
0
            x[i] = x[i].den ? Q((x[i].num / x[i].den) >> op->c.u) : x[i];
164
0
        return;
165
0
    }
166
0
    case SWS_OP_SWIZZLE: {
167
0
        const AVRational orig[4] = { x[0], x[1], x[2], x[3] };
168
0
        for (int i = 0; i < 4; i++)
169
0
            x[i] = orig[op->swizzle.in[i]];
170
0
        return;
171
0
    }
172
0
    case SWS_OP_CONVERT:
173
0
        if (ff_sws_pixel_type_is_int(op->convert.to)) {
174
0
            const AVRational scale = ff_sws_pixel_expand(op->type, op->convert.to);
175
0
            for (int i = 0; i < 4; i++) {
176
0
                x[i] = x[i].den ? Q(x[i].num / x[i].den) : x[i];
177
0
                if (op->convert.expand)
178
0
                    x[i] = av_mul_q(x[i], scale);
179
0
            }
180
0
        }
181
0
        return;
182
0
    case SWS_OP_DITHER:
183
0
        av_assert1(!ff_sws_pixel_type_is_int(op->type));
184
0
        for (int i = 0; i < 4; i++)
185
0
            x[i] = x[i].den ? av_add_q(x[i], av_make_q(1, 2)) : x[i];
186
0
        return;
187
0
    case SWS_OP_MIN:
188
0
        for (int i = 0; i < 4; i++)
189
0
            x[i] = av_min_q(x[i], op->c.q4[i]);
190
0
        return;
191
0
    case SWS_OP_MAX:
192
0
        for (int i = 0; i < 4; i++)
193
0
            x[i] = av_max_q(x[i], op->c.q4[i]);
194
0
        return;
195
0
    case SWS_OP_LINEAR: {
196
0
        av_assert1(!ff_sws_pixel_type_is_int(op->type));
197
0
        const AVRational orig[4] = { x[0], x[1], x[2], x[3] };
198
0
        for (int i = 0; i < 4; i++) {
199
0
            AVRational sum = op->lin.m[i][4];
200
0
            for (int j = 0; j < 4; j++)
201
0
                sum = av_add_q(sum, av_mul_q(orig[j], op->lin.m[i][j]));
202
0
            x[i] = sum;
203
0
        }
204
0
        return;
205
0
    }
206
0
    case SWS_OP_SCALE:
207
0
        for (int i = 0; i < 4; i++)
208
0
            x[i] = x[i].den ? av_mul_q(x[i], op->c.q) : x[i];
209
0
        return;
210
0
    }
211
212
0
    av_unreachable("Invalid operation type!");
213
0
}
214
215
/* merge_comp_flags() forms a monoid with flags_identity as the null element */
216
static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT;
217
static unsigned merge_comp_flags(unsigned a, unsigned b)
218
0
{
219
0
    const unsigned flags_or  = SWS_COMP_GARBAGE;
220
0
    const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT;
221
0
    return ((a & b) & flags_and) | ((a | b) & flags_or);
222
0
}
223
224
/* Linearly propagate flags per component */
225
static void propagate_flags(SwsOp *op, const SwsComps *prev)
226
0
{
227
0
    for (int i = 0; i < 4; i++)
228
0
        op->comps.flags[i] = prev->flags[i];
229
0
}
230
231
/* Clear undefined values in dst with src */
232
static void clear_undefined_values(AVRational dst[4], const AVRational src[4])
233
0
{
234
0
    for (int i = 0; i < 4; i++) {
235
0
        if (dst[i].den == 0)
236
0
            dst[i] = src[i];
237
0
    }
238
0
}
239
240
/* Infer + propagate known information about components */
241
void ff_sws_op_list_update_comps(SwsOpList *ops)
242
0
{
243
0
    SwsComps next = { .unused = {true, true, true, true} };
244
0
    SwsComps prev = { .flags = {
245
0
        SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE,
246
0
    }};
247
248
    /* Forwards pass, propagates knowledge about the incoming pixel values */
249
0
    for (int n = 0; n < ops->num_ops; n++) {
250
0
        SwsOp *op = &ops->ops[n];
251
252
0
        switch (op->op) {
253
0
        case SWS_OP_READ:
254
0
        case SWS_OP_LINEAR:
255
0
        case SWS_OP_SWAP_BYTES:
256
0
        case SWS_OP_UNPACK:
257
0
            break; /* special cases, handled below */
258
0
        default:
259
0
            memcpy(op->comps.min, prev.min, sizeof(prev.min));
260
0
            memcpy(op->comps.max, prev.max, sizeof(prev.max));
261
0
            ff_sws_apply_op_q(op, op->comps.min);
262
0
            ff_sws_apply_op_q(op, op->comps.max);
263
0
            break;
264
0
        }
265
266
0
        switch (op->op) {
267
0
        case SWS_OP_READ:
268
            /* Active components are taken from the user-provided values,
269
             * other components are explicitly stripped */
270
0
            for (int i = 0; i < op->rw.elems; i++) {
271
0
                const int idx = op->rw.packed ? i : ops->order_src.in[i];
272
0
                op->comps.flags[i] = ops->comps_src.flags[idx];
273
0
                op->comps.min[i]   = ops->comps_src.min[idx];
274
0
                op->comps.max[i]   = ops->comps_src.max[idx];
275
0
            }
276
0
            for (int i = op->rw.elems; i < 4; i++) {
277
0
                op->comps.flags[i] = prev.flags[i];
278
0
                op->comps.min[i]   = prev.min[i];
279
0
                op->comps.max[i]   = prev.max[i];
280
0
            }
281
0
            break;
282
0
        case SWS_OP_SWAP_BYTES:
283
0
            for (int i = 0; i < 4; i++) {
284
0
                op->comps.flags[i] = prev.flags[i] ^ SWS_COMP_SWAPPED;
285
0
                op->comps.min[i]   = prev.min[i];
286
0
                op->comps.max[i]   = prev.max[i];
287
0
            }
288
0
            break;
289
0
        case SWS_OP_WRITE:
290
0
            for (int i = 0; i < op->rw.elems; i++)
291
0
                av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
292
            /* fall through */
293
0
        case SWS_OP_LSHIFT:
294
0
        case SWS_OP_RSHIFT:
295
0
            propagate_flags(op, &prev);
296
0
            break;
297
0
        case SWS_OP_MIN:
298
0
            propagate_flags(op, &prev);
299
0
            clear_undefined_values(op->comps.max, op->c.q4);
300
0
            break;
301
0
        case SWS_OP_MAX:
302
0
            propagate_flags(op, &prev);
303
0
            clear_undefined_values(op->comps.min, op->c.q4);
304
0
            break;
305
0
        case SWS_OP_DITHER:
306
            /* Strip zero flag because of the nonzero dithering offset */
307
0
            for (int i = 0; i < 4; i++)
308
0
                op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO;
309
0
            break;
310
0
        case SWS_OP_UNPACK:
311
0
            for (int i = 0; i < 4; i++) {
312
0
                const int pattern = op->pack.pattern[i];
313
0
                if (pattern) {
314
0
                    av_assert1(pattern < 32);
315
0
                    op->comps.flags[i] = prev.flags[0];
316
0
                    op->comps.min[i]   = Q(0);
317
0
                    op->comps.max[i]   = Q((1ULL << pattern) - 1);
318
0
                } else
319
0
                    op->comps.flags[i] = SWS_COMP_GARBAGE;
320
0
            }
321
0
            break;
322
0
        case SWS_OP_PACK: {
323
0
            unsigned flags = flags_identity;
324
0
            for (int i = 0; i < 4; i++) {
325
0
                if (op->pack.pattern[i])
326
0
                    flags = merge_comp_flags(flags, prev.flags[i]);
327
0
                if (i > 0) /* clear remaining comps for sanity */
328
0
                    op->comps.flags[i] = SWS_COMP_GARBAGE;
329
0
            }
330
0
            op->comps.flags[0] = flags;
331
0
            break;
332
0
        }
333
0
        case SWS_OP_CLEAR:
334
0
            for (int i = 0; i < 4; i++) {
335
0
                if (op->c.q4[i].den) {
336
0
                    op->comps.flags[i] = 0;
337
0
                    if (op->c.q4[i].num == 0)
338
0
                        op->comps.flags[i] |= SWS_COMP_ZERO;
339
0
                    if (op->c.q4[i].den == 1)
340
0
                        op->comps.flags[i] |= SWS_COMP_EXACT;
341
0
                } else {
342
0
                    op->comps.flags[i] = prev.flags[i];
343
0
                }
344
0
            }
345
0
            break;
346
0
        case SWS_OP_SWIZZLE:
347
0
            for (int i = 0; i < 4; i++)
348
0
                op->comps.flags[i] = prev.flags[op->swizzle.in[i]];
349
0
            break;
350
0
        case SWS_OP_CONVERT:
351
0
            for (int i = 0; i < 4; i++) {
352
0
                op->comps.flags[i] = prev.flags[i];
353
0
                if (ff_sws_pixel_type_is_int(op->convert.to))
354
0
                    op->comps.flags[i] |= SWS_COMP_EXACT;
355
0
            }
356
0
            break;
357
0
        case SWS_OP_LINEAR:
358
0
            for (int i = 0; i < 4; i++) {
359
0
                unsigned flags = flags_identity;
360
0
                AVRational min = Q(0), max = Q(0);
361
0
                for (int j = 0; j < 4; j++) {
362
0
                    const AVRational k = op->lin.m[i][j];
363
0
                    AVRational mink = av_mul_q(prev.min[j], k);
364
0
                    AVRational maxk = av_mul_q(prev.max[j], k);
365
0
                    if (k.num) {
366
0
                        flags = merge_comp_flags(flags, prev.flags[j]);
367
0
                        if (k.den != 1) /* fractional coefficient */
368
0
                            flags &= ~SWS_COMP_EXACT;
369
0
                        if (k.num < 0)
370
0
                            FFSWAP(AVRational, mink, maxk);
371
0
                        min = av_add_q(min, mink);
372
0
                        max = av_add_q(max, maxk);
373
0
                    }
374
0
                }
375
0
                if (op->lin.m[i][4].num) { /* nonzero offset */
376
0
                    flags &= ~SWS_COMP_ZERO;
377
0
                    if (op->lin.m[i][4].den != 1) /* fractional offset */
378
0
                        flags &= ~SWS_COMP_EXACT;
379
0
                    min = av_add_q(min, op->lin.m[i][4]);
380
0
                    max = av_add_q(max, op->lin.m[i][4]);
381
0
                }
382
0
                op->comps.flags[i] = flags;
383
0
                op->comps.min[i] = min;
384
0
                op->comps.max[i] = max;
385
0
            }
386
0
            break;
387
0
        case SWS_OP_SCALE:
388
0
            for (int i = 0; i < 4; i++) {
389
0
                op->comps.flags[i] = prev.flags[i];
390
0
                if (op->c.q.den != 1) /* fractional scale */
391
0
                    op->comps.flags[i] &= ~SWS_COMP_EXACT;
392
0
                if (op->c.q.num < 0)
393
0
                    FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]);
394
0
            }
395
0
            break;
396
397
0
        case SWS_OP_INVALID:
398
0
        case SWS_OP_TYPE_NB:
399
0
            av_unreachable("Invalid operation type!");
400
0
        }
401
402
0
        prev = op->comps;
403
0
    }
404
405
    /* Backwards pass, solves for component dependencies */
406
0
    for (int n = ops->num_ops - 1; n >= 0; n--) {
407
0
        SwsOp *op = &ops->ops[n];
408
409
0
        switch (op->op) {
410
0
        case SWS_OP_READ:
411
0
        case SWS_OP_WRITE:
412
0
            for (int i = 0; i < op->rw.elems; i++)
413
0
                op->comps.unused[i] = op->op == SWS_OP_READ;
414
0
            for (int i = op->rw.elems; i < 4; i++)
415
0
                op->comps.unused[i] = next.unused[i];
416
0
            break;
417
0
        case SWS_OP_SWAP_BYTES:
418
0
        case SWS_OP_LSHIFT:
419
0
        case SWS_OP_RSHIFT:
420
0
        case SWS_OP_CONVERT:
421
0
        case SWS_OP_DITHER:
422
0
        case SWS_OP_MIN:
423
0
        case SWS_OP_MAX:
424
0
        case SWS_OP_SCALE:
425
0
            for (int i = 0; i < 4; i++)
426
0
                op->comps.unused[i] = next.unused[i];
427
0
            break;
428
0
        case SWS_OP_UNPACK: {
429
0
            bool unused = true;
430
0
            for (int i = 0; i < 4; i++) {
431
0
                if (op->pack.pattern[i])
432
0
                    unused &= next.unused[i];
433
0
                op->comps.unused[i] = i > 0;
434
0
            }
435
0
            op->comps.unused[0] = unused;
436
0
            break;
437
0
        }
438
0
        case SWS_OP_PACK:
439
0
            for (int i = 0; i < 4; i++) {
440
0
                if (op->pack.pattern[i])
441
0
                    op->comps.unused[i] = next.unused[0];
442
0
                else
443
0
                    op->comps.unused[i] = true;
444
0
            }
445
0
            break;
446
0
        case SWS_OP_CLEAR:
447
0
            for (int i = 0; i < 4; i++) {
448
0
                if (op->c.q4[i].den)
449
0
                    op->comps.unused[i] = true;
450
0
                else
451
0
                    op->comps.unused[i] = next.unused[i];
452
0
            }
453
0
            break;
454
0
        case SWS_OP_SWIZZLE: {
455
0
            bool unused[4] = { true, true, true, true };
456
0
            for (int i = 0; i < 4; i++)
457
0
                unused[op->swizzle.in[i]] &= next.unused[i];
458
0
            for (int i = 0; i < 4; i++)
459
0
                op->comps.unused[i] = unused[i];
460
0
            break;
461
0
        }
462
0
        case SWS_OP_LINEAR:
463
0
            for (int j = 0; j < 4; j++) {
464
0
                bool unused = true;
465
0
                for (int i = 0; i < 4; i++) {
466
0
                    if (op->lin.m[i][j].num)
467
0
                        unused &= next.unused[i];
468
0
                }
469
0
                op->comps.unused[j] = unused;
470
0
            }
471
0
            break;
472
0
        }
473
474
0
        next = op->comps;
475
0
    }
476
0
}
477
478
static void op_uninit(SwsOp *op)
479
0
{
480
0
    switch (op->op) {
481
0
    case SWS_OP_DITHER:
482
0
        av_refstruct_unref(&op->dither.matrix);
483
0
        break;
484
0
    }
485
486
0
    *op = (SwsOp) {0};
487
0
}
488
489
SwsOpList *ff_sws_op_list_alloc(void)
490
0
{
491
0
    SwsOpList *ops = av_mallocz(sizeof(SwsOpList));
492
0
    if (!ops)
493
0
        return NULL;
494
495
0
    ops->order_src = ops->order_dst = SWS_SWIZZLE(0, 1, 2, 3);
496
0
    ff_fmt_clear(&ops->src);
497
0
    ff_fmt_clear(&ops->dst);
498
0
    return ops;
499
0
}
500
501
void ff_sws_op_list_free(SwsOpList **p_ops)
502
0
{
503
0
    SwsOpList *ops = *p_ops;
504
0
    if (!ops)
505
0
        return;
506
507
0
    for (int i = 0; i < ops->num_ops; i++)
508
0
        op_uninit(&ops->ops[i]);
509
510
0
    av_freep(&ops->ops);
511
0
    av_free(ops);
512
0
    *p_ops = NULL;
513
0
}
514
515
SwsOpList *ff_sws_op_list_duplicate(const SwsOpList *ops)
516
0
{
517
0
    SwsOpList *copy = av_malloc(sizeof(*copy));
518
0
    if (!copy)
519
0
        return NULL;
520
521
0
    int num = ops->num_ops;
522
0
    if (num)
523
0
        num = 1 << av_ceil_log2(num);
524
525
0
    *copy = *ops;
526
0
    copy->ops = av_memdup(ops->ops, num * sizeof(ops->ops[0]));
527
0
    if (!copy->ops) {
528
0
        av_free(copy);
529
0
        return NULL;
530
0
    }
531
532
0
    for (int i = 0; i < ops->num_ops; i++) {
533
0
        const SwsOp *op = &ops->ops[i];
534
0
        switch (op->op) {
535
0
        case SWS_OP_DITHER:
536
0
            av_refstruct_ref(copy->ops[i].dither.matrix);
537
0
            break;
538
0
        }
539
0
    }
540
541
0
    return copy;
542
0
}
543
544
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
545
0
{
546
0
    const int end = ops->num_ops - count;
547
0
    av_assert2(index >= 0 && count >= 0 && index + count <= ops->num_ops);
548
0
    op_uninit(&ops->ops[index]);
549
0
    for (int i = index; i < end; i++)
550
0
        ops->ops[i] = ops->ops[i + count];
551
0
    ops->num_ops = end;
552
0
}
553
554
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
555
0
{
556
0
    void *ret = av_dynarray2_add((void **) &ops->ops, &ops->num_ops, sizeof(*op), NULL);
557
0
    if (!ret) {
558
0
        op_uninit(op);
559
0
        return AVERROR(ENOMEM);
560
0
    }
561
562
0
    for (int i = ops->num_ops - 1; i > index; i--)
563
0
        ops->ops[i] = ops->ops[i - 1];
564
0
    ops->ops[index] = *op;
565
0
    return 0;
566
0
}
567
568
int ff_sws_op_list_append(SwsOpList *ops, SwsOp *op)
569
0
{
570
0
    return ff_sws_op_list_insert_at(ops, ops->num_ops, op);
571
0
}
572
573
bool ff_sws_op_list_is_noop(const SwsOpList *ops)
574
0
{
575
0
    if (!ops->num_ops)
576
0
        return true;
577
578
0
    const SwsOp *read  = &ops->ops[0];
579
0
    const SwsOp *write = &ops->ops[1];
580
0
    if (ops->num_ops != 2 ||
581
0
        read->op != SWS_OP_READ ||
582
0
        write->op != SWS_OP_WRITE ||
583
0
        read->type != write->type ||
584
0
        read->rw.packed != write->rw.packed ||
585
0
        read->rw.elems != write->rw.elems ||
586
0
        read->rw.frac != write->rw.frac)
587
0
        return false;
588
589
    /**
590
     * Note that this check is unlikely to ever be hit in practice, since it
591
     * would imply the existence of planar formats with different plane orders
592
     * between them, e.g. rgbap <-> gbrap, which doesn't currently exist.
593
     * However, the check is cheap and lets me sleep at night.
594
     */
595
0
    const int num_planes = read->rw.packed ? 1 : read->rw.elems;
596
0
    for (int i = 0; i < num_planes; i++) {
597
0
        if (ops->order_src.in[i] != ops->order_dst.in[i])
598
0
            return false;
599
0
    }
600
601
0
    return true;
602
0
}
603
604
int ff_sws_op_list_max_size(const SwsOpList *ops)
605
0
{
606
0
    int max_size = 0;
607
0
    for (int i = 0; i < ops->num_ops; i++) {
608
0
        const int size = ff_sws_pixel_type_size(ops->ops[i].type);
609
0
        max_size = FFMAX(max_size, size);
610
0
    }
611
612
0
    return max_size;
613
0
}
614
615
uint32_t ff_sws_linear_mask(const SwsLinearOp c)
616
0
{
617
0
    uint32_t mask = 0;
618
0
    for (int i = 0; i < 4; i++) {
619
0
        for (int j = 0; j < 5; j++) {
620
0
            if (av_cmp_q(c.m[i][j], Q(i == j)))
621
0
                mask |= SWS_MASK(i, j);
622
0
        }
623
0
    }
624
0
    return mask;
625
0
}
626
627
static const char *describe_lin_mask(uint32_t mask)
628
0
{
629
    /* Try to be fairly descriptive without assuming too much */
630
0
    static const struct {
631
0
        char name[24];
632
0
        uint32_t mask;
633
0
    } patterns[] = {
634
0
        { "noop",               0 },
635
0
        { "luma",               SWS_MASK_LUMA },
636
0
        { "alpha",              SWS_MASK_ALPHA },
637
0
        { "luma+alpha",         SWS_MASK_LUMA | SWS_MASK_ALPHA },
638
0
        { "dot3",               0x7 },
639
0
        { "dot4",               0xF },
640
0
        { "row0",               SWS_MASK_ROW(0) },
641
0
        { "row0+alpha",         SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
642
0
        { "col0",               SWS_MASK_COL(0) },
643
0
        { "col0+off3",          SWS_MASK_COL(0) | SWS_MASK_OFF3 },
644
0
        { "off3",               SWS_MASK_OFF3 },
645
0
        { "off3+alpha",         SWS_MASK_OFF3 | SWS_MASK_ALPHA },
646
0
        { "diag3",              SWS_MASK_DIAG3 },
647
0
        { "diag4",              SWS_MASK_DIAG4 },
648
0
        { "diag3+alpha",        SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
649
0
        { "diag3+off3",         SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
650
0
        { "diag3+off3+alpha",   SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
651
0
        { "diag4+off4",         SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
652
0
        { "matrix3",            SWS_MASK_MAT3 },
653
0
        { "matrix3+off3",       SWS_MASK_MAT3 | SWS_MASK_OFF3 },
654
0
        { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
655
0
        { "matrix4",            SWS_MASK_MAT4 },
656
0
        { "matrix4+off4",       SWS_MASK_MAT4 | SWS_MASK_OFF4 },
657
0
    };
658
659
0
    for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
660
0
        if (!(mask & ~patterns[i].mask))
661
0
            return patterns[i].name;
662
0
    }
663
664
0
    av_unreachable("Invalid linear mask!");
665
0
    return "ERR";
666
0
}
667
668
static char describe_comp_flags(unsigned flags)
669
0
{
670
0
    if (flags & SWS_COMP_GARBAGE)
671
0
        return 'X';
672
0
    else if (flags & SWS_COMP_ZERO)
673
0
        return '0';
674
0
    else if (flags & SWS_COMP_SWAPPED)
675
0
        return 'z';
676
0
    else if (flags & SWS_COMP_EXACT)
677
0
        return '+';
678
0
    else
679
0
        return '.';
680
0
}
681
682
static const char *describe_order(SwsSwizzleOp order, int planes, char buf[32])
683
0
{
684
0
    if (order.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
685
0
        return "";
686
687
0
    av_strlcpy(buf, ", via {", 32);
688
0
    for (int i = 0; i < planes; i++)
689
0
        av_strlcatf(buf, 32, "%s%d", i ? ", " : "", order.in[i]);
690
0
    av_strlcat(buf, "}", 32);
691
0
    return buf;
692
0
}
693
694
static const char *print_q(const AVRational q, char buf[], int buf_len)
695
0
{
696
0
    if (!q.den) {
697
0
        return q.num > 0 ? "inf" : q.num < 0 ? "-inf" : "nan";
698
0
    } else if (q.den == 1) {
699
0
        snprintf(buf, buf_len, "%d", q.num);
700
0
        return buf;
701
0
    } else if (abs(q.num) > 1000 || abs(q.den) > 1000) {
702
0
        snprintf(buf, buf_len, "%f", av_q2d(q));
703
0
        return buf;
704
0
    } else {
705
0
        snprintf(buf, buf_len, "%d/%d", q.num, q.den);
706
0
        return buf;
707
0
    }
708
0
}
709
710
0
#define PRINTQ(q) print_q(q, (char[32]){0}, sizeof(char[32]))
711
712
void ff_sws_op_list_print(void *log, int lev, int lev_extra,
713
                          const SwsOpList *ops)
714
0
{
715
0
    if (!ops->num_ops) {
716
0
        av_log(log, lev, "  (empty)\n");
717
0
        return;
718
0
    }
719
720
0
    for (int i = 0; i < ops->num_ops; i++) {
721
0
        const SwsOp *op = &ops->ops[i];
722
0
        const SwsOp *next = i + 1 < ops->num_ops ? &ops->ops[i + 1] : op;
723
0
        char buf[32];
724
725
0
        av_log(log, lev, "  [%3s %c%c%c%c -> %c%c%c%c] ",
726
0
               ff_sws_pixel_type_name(op->type),
727
0
               op->comps.unused[0] ? 'X' : '.',
728
0
               op->comps.unused[1] ? 'X' : '.',
729
0
               op->comps.unused[2] ? 'X' : '.',
730
0
               op->comps.unused[3] ? 'X' : '.',
731
0
               next->comps.unused[0] ? 'X' : describe_comp_flags(op->comps.flags[0]),
732
0
               next->comps.unused[1] ? 'X' : describe_comp_flags(op->comps.flags[1]),
733
0
               next->comps.unused[2] ? 'X' : describe_comp_flags(op->comps.flags[2]),
734
0
               next->comps.unused[3] ? 'X' : describe_comp_flags(op->comps.flags[3]));
735
736
0
        switch (op->op) {
737
0
        case SWS_OP_INVALID:
738
0
            av_log(log, lev, "SWS_OP_INVALID\n");
739
0
            break;
740
0
        case SWS_OP_READ:
741
0
        case SWS_OP_WRITE:
742
0
            av_log(log, lev, "%-20s: %d elem(s) %s >> %d%s\n",
743
0
                   op->op == SWS_OP_READ ? "SWS_OP_READ"
744
0
                                         : "SWS_OP_WRITE",
745
0
                   op->rw.elems,  op->rw.packed ? "packed" : "planar",
746
0
                   op->rw.frac,
747
0
                   describe_order(op->op == SWS_OP_READ ? ops->order_src
748
0
                                                        : ops->order_dst,
749
0
                                  op->rw.packed ? 1 : op->rw.elems, buf));
750
0
            break;
751
0
        case SWS_OP_SWAP_BYTES:
752
0
            av_log(log, lev, "SWS_OP_SWAP_BYTES\n");
753
0
            break;
754
0
        case SWS_OP_LSHIFT:
755
0
            av_log(log, lev, "%-20s: << %u\n", "SWS_OP_LSHIFT", op->c.u);
756
0
            break;
757
0
        case SWS_OP_RSHIFT:
758
0
            av_log(log, lev, "%-20s: >> %u\n", "SWS_OP_RSHIFT", op->c.u);
759
0
            break;
760
0
        case SWS_OP_PACK:
761
0
        case SWS_OP_UNPACK:
762
0
            av_log(log, lev, "%-20s: {%d %d %d %d}\n",
763
0
                   op->op == SWS_OP_PACK ? "SWS_OP_PACK"
764
0
                                         : "SWS_OP_UNPACK",
765
0
                   op->pack.pattern[0], op->pack.pattern[1],
766
0
                   op->pack.pattern[2], op->pack.pattern[3]);
767
0
            break;
768
0
        case SWS_OP_CLEAR:
769
0
            av_log(log, lev, "%-20s: {%s %s %s %s}\n", "SWS_OP_CLEAR",
770
0
                   op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
771
0
                   op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
772
0
                   op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
773
0
                   op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
774
0
            break;
775
0
        case SWS_OP_SWIZZLE:
776
0
            av_log(log, lev, "%-20s: %d%d%d%d\n", "SWS_OP_SWIZZLE",
777
0
                   op->swizzle.x, op->swizzle.y, op->swizzle.z, op->swizzle.w);
778
0
            break;
779
0
        case SWS_OP_CONVERT:
780
0
            av_log(log, lev, "%-20s: %s -> %s%s\n", "SWS_OP_CONVERT",
781
0
                   ff_sws_pixel_type_name(op->type),
782
0
                   ff_sws_pixel_type_name(op->convert.to),
783
0
                   op->convert.expand ? " (expand)" : "");
784
0
            break;
785
0
        case SWS_OP_DITHER:
786
0
            av_log(log, lev, "%-20s: %dx%d matrix + {%d %d %d %d}\n", "SWS_OP_DITHER",
787
0
                    1 << op->dither.size_log2, 1 << op->dither.size_log2,
788
0
                    op->dither.y_offset[0], op->dither.y_offset[1],
789
0
                    op->dither.y_offset[2], op->dither.y_offset[3]);
790
0
            break;
791
0
        case SWS_OP_MIN:
792
0
            av_log(log, lev, "%-20s: x <= {%s %s %s %s}\n", "SWS_OP_MIN",
793
0
                    op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
794
0
                    op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
795
0
                    op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
796
0
                    op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
797
0
            break;
798
0
        case SWS_OP_MAX:
799
0
            av_log(log, lev, "%-20s: {%s %s %s %s} <= x\n", "SWS_OP_MAX",
800
0
                    op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_",
801
0
                    op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_",
802
0
                    op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_",
803
0
                    op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_");
804
0
            break;
805
0
        case SWS_OP_LINEAR:
806
0
            av_log(log, lev, "%-20s: %s [[%s %s %s %s %s] "
807
0
                                        "[%s %s %s %s %s] "
808
0
                                        "[%s %s %s %s %s] "
809
0
                                        "[%s %s %s %s %s]]\n",
810
0
                   "SWS_OP_LINEAR", describe_lin_mask(op->lin.mask),
811
0
                   PRINTQ(op->lin.m[0][0]), PRINTQ(op->lin.m[0][1]), PRINTQ(op->lin.m[0][2]), PRINTQ(op->lin.m[0][3]), PRINTQ(op->lin.m[0][4]),
812
0
                   PRINTQ(op->lin.m[1][0]), PRINTQ(op->lin.m[1][1]), PRINTQ(op->lin.m[1][2]), PRINTQ(op->lin.m[1][3]), PRINTQ(op->lin.m[1][4]),
813
0
                   PRINTQ(op->lin.m[2][0]), PRINTQ(op->lin.m[2][1]), PRINTQ(op->lin.m[2][2]), PRINTQ(op->lin.m[2][3]), PRINTQ(op->lin.m[2][4]),
814
0
                   PRINTQ(op->lin.m[3][0]), PRINTQ(op->lin.m[3][1]), PRINTQ(op->lin.m[3][2]), PRINTQ(op->lin.m[3][3]), PRINTQ(op->lin.m[3][4]));
815
0
            break;
816
0
        case SWS_OP_SCALE:
817
0
            av_log(log, lev, "%-20s: * %s\n", "SWS_OP_SCALE",
818
0
                   PRINTQ(op->c.q));
819
0
            break;
820
0
        case SWS_OP_TYPE_NB:
821
0
            break;
822
0
        }
823
824
0
        if (op->comps.min[0].den || op->comps.min[1].den ||
825
0
            op->comps.min[2].den || op->comps.min[3].den ||
826
0
            op->comps.max[0].den || op->comps.max[1].den ||
827
0
            op->comps.max[2].den || op->comps.max[3].den)
828
0
        {
829
0
            av_log(log, lev_extra, "    min: {%s, %s, %s, %s}, max: {%s, %s, %s, %s}\n",
830
0
                   next->comps.unused[0] ? "_" : PRINTQ(op->comps.min[0]),
831
0
                   next->comps.unused[1] ? "_" : PRINTQ(op->comps.min[1]),
832
0
                   next->comps.unused[2] ? "_" : PRINTQ(op->comps.min[2]),
833
0
                   next->comps.unused[3] ? "_" : PRINTQ(op->comps.min[3]),
834
0
                   next->comps.unused[0] ? "_" : PRINTQ(op->comps.max[0]),
835
0
                   next->comps.unused[1] ? "_" : PRINTQ(op->comps.max[1]),
836
0
                   next->comps.unused[2] ? "_" : PRINTQ(op->comps.max[2]),
837
0
                   next->comps.unused[3] ? "_" : PRINTQ(op->comps.max[3]));
838
0
        }
839
840
0
    }
841
842
0
    av_log(log, lev, "    (X = unused, z = byteswapped, + = exact, 0 = zero)\n");
843
0
}
844
845
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
846
                               const SwsOpList *ops, SwsCompiledOp *out)
847
0
{
848
0
    SwsOpList *copy, rest;
849
0
    SwsCompiledOp compiled = {0};
850
0
    int ret = 0;
851
852
0
    copy = ff_sws_op_list_duplicate(ops);
853
0
    if (!copy)
854
0
        return AVERROR(ENOMEM);
855
856
    /* Ensure these are always set during compilation */
857
0
    ff_sws_op_list_update_comps(copy);
858
859
    /* Make an on-stack copy of `ops` to ensure we can still properly clean up
860
     * the copy afterwards */
861
0
    rest = *copy;
862
863
0
    ret = backend->compile(ctx, &rest, &compiled);
864
0
    if (ret < 0) {
865
0
        int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
866
0
        av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
867
0
               backend->name, av_err2str(ret));
868
0
        if (rest.num_ops != ops->num_ops) {
869
0
            av_log(ctx, msg_lev, "Uncompiled remainder:\n");
870
0
            ff_sws_op_list_print(ctx, msg_lev, AV_LOG_TRACE, &rest);
871
0
        }
872
0
    } else {
873
0
        *out = compiled;
874
0
    }
875
876
0
    ff_sws_op_list_free(&copy);
877
0
    return ret;
878
0
}
879
880
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
881
0
{
882
0
    for (int n = 0; ff_sws_op_backends[n]; n++) {
883
0
        const SwsOpBackend *backend = ff_sws_op_backends[n];
884
0
        if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
885
0
            continue;
886
887
0
        av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
888
0
               "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
889
0
               backend->name, out->block_size, out->over_read, out->over_write,
890
0
               out->cpu_flags);
891
0
        return 0;
892
0
    }
893
894
0
    av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
895
0
    ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
896
0
    return AVERROR(ENOTSUP);
897
0
}
898
899
typedef struct SwsOpPass {
900
    SwsCompiledOp comp;
901
    SwsOpExec exec_base;
902
    int num_blocks;
903
    int tail_off_in;
904
    int tail_off_out;
905
    int tail_size_in;
906
    int tail_size_out;
907
    int planes_in;
908
    int planes_out;
909
    int pixel_bits_in;
910
    int pixel_bits_out;
911
    int idx_in[4];
912
    int idx_out[4];
913
    bool memcpy_in;
914
    bool memcpy_out;
915
} SwsOpPass;
916
917
static void op_pass_free(void *ptr)
918
0
{
919
0
    SwsOpPass *p = ptr;
920
0
    if (!p)
921
0
        return;
922
923
0
    if (p->comp.free)
924
0
        p->comp.free(p->comp.priv);
925
926
0
    av_free(p);
927
0
}
928
929
static inline SwsImg img_shift_idx(const SwsImg *base, const int y,
930
                                   const int plane_idx[4])
931
0
{
932
0
    SwsImg img = *base;
933
0
    for (int i = 0; i < 4; i++) {
934
0
        const int idx = plane_idx[i];
935
0
        if (idx >= 0) {
936
0
            const int yshift = y >> ff_fmt_vshift(base->fmt, idx);
937
0
            img.data[i] = base->data[idx] + yshift * base->linesize[idx];
938
0
        } else {
939
0
            img.data[i] = NULL;
940
0
        }
941
0
    }
942
0
    return img;
943
0
}
944
945
static void op_pass_setup(const SwsImg *out_base, const SwsImg *in_base,
946
                          const SwsPass *pass)
947
0
{
948
0
    const AVPixFmtDescriptor *indesc  = av_pix_fmt_desc_get(in_base->fmt);
949
0
    const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out_base->fmt);
950
951
0
    SwsOpPass *p = pass->priv;
952
0
    SwsOpExec *exec = &p->exec_base;
953
0
    const SwsCompiledOp *comp = &p->comp;
954
0
    const int block_size = comp->block_size;
955
0
    p->num_blocks = (pass->width + block_size - 1) / block_size;
956
957
    /* Set up main loop parameters */
958
0
    const int aligned_w  = p->num_blocks * block_size;
959
0
    const int safe_width = (p->num_blocks - 1) * block_size;
960
0
    const int tail_size  = pass->width - safe_width;
961
0
    p->tail_off_in   = safe_width * p->pixel_bits_in  >> 3;
962
0
    p->tail_off_out  = safe_width * p->pixel_bits_out >> 3;
963
0
    p->tail_size_in  = tail_size  * p->pixel_bits_in  >> 3;
964
0
    p->tail_size_out = tail_size  * p->pixel_bits_out >> 3;
965
0
    p->memcpy_in     = false;
966
0
    p->memcpy_out    = false;
967
968
0
    const SwsImg in  = img_shift_idx(in_base,  0, p->idx_in);
969
0
    const SwsImg out = img_shift_idx(out_base, 0, p->idx_out);
970
971
0
    for (int i = 0; i < p->planes_in; i++) {
972
0
        const int idx        = p->idx_in[i];
973
0
        const int sub_x      = (idx == 1 || idx == 2) ? indesc->log2_chroma_w : 0;
974
0
        const int plane_w    = (aligned_w + sub_x) >> sub_x;
975
0
        const int plane_pad  = (comp->over_read + sub_x) >> sub_x;
976
0
        const int plane_size = plane_w * p->pixel_bits_in >> 3;
977
0
        p->memcpy_in |= plane_size + plane_pad > in.linesize[i];
978
0
        exec->in_stride[i] = in.linesize[i];
979
0
    }
980
981
0
    for (int i = 0; i < p->planes_out; i++) {
982
0
        const int idx        = p->idx_out[i];
983
0
        const int sub_x      = (idx == 1 || idx == 2) ? outdesc->log2_chroma_w : 0;
984
0
        const int plane_w    = (aligned_w + sub_x) >> sub_x;
985
0
        const int plane_pad  = (comp->over_write + sub_x) >> sub_x;
986
0
        const int plane_size = plane_w * p->pixel_bits_out >> 3;
987
0
        p->memcpy_out |= plane_size + plane_pad > out.linesize[i];
988
0
        exec->out_stride[i] = out.linesize[i];
989
0
    }
990
991
    /* Pre-fill pointer bump for the main section only; this value does not
992
     * matter at all for the tail / last row handlers because they only ever
993
     * process a single line */
994
0
    const int blocks_main = p->num_blocks - p->memcpy_out;
995
0
    for (int i = 0; i < 4; i++) {
996
0
        exec->in_bump[i]  = in.linesize[i]  - blocks_main * exec->block_size_in;
997
0
        exec->out_bump[i] = out.linesize[i] - blocks_main * exec->block_size_out;
998
0
    }
999
0
}
1000
1001
/* Dispatch kernel over the last column of the image using memcpy */
1002
static av_always_inline void
1003
handle_tail(const SwsOpPass *p, SwsOpExec *exec,
1004
            const SwsImg *out_base, const bool copy_out,
1005
            const SwsImg *in_base, const bool copy_in,
1006
            int y, const int h)
1007
0
{
1008
0
    DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])];
1009
1010
0
    const SwsCompiledOp *comp = &p->comp;
1011
0
    const int tail_size_in  = p->tail_size_in;
1012
0
    const int tail_size_out = p->tail_size_out;
1013
0
    const int bx = p->num_blocks - 1;
1014
1015
0
    SwsImg in  = img_shift_idx(in_base,  y, p->idx_in);
1016
0
    SwsImg out = img_shift_idx(out_base, y, p->idx_out);
1017
0
    for (int i = 0; i < p->planes_in; i++) {
1018
0
        in.data[i]  += p->tail_off_in;
1019
0
        if (copy_in) {
1020
0
            exec->in[i] = (void *) tmp[0][i];
1021
0
            exec->in_stride[i] = sizeof(tmp[0][i]);
1022
0
        } else {
1023
0
            exec->in[i] = in.data[i];
1024
0
        }
1025
0
    }
1026
1027
0
    for (int i = 0; i < p->planes_out; i++) {
1028
0
        out.data[i] += p->tail_off_out;
1029
0
        if (copy_out) {
1030
0
            exec->out[i] = (void *) tmp[1][i];
1031
0
            exec->out_stride[i] = sizeof(tmp[1][i]);
1032
0
        } else {
1033
0
            exec->out[i] = out.data[i];
1034
0
        }
1035
0
    }
1036
1037
0
    for (int y_end = y + h; y < y_end; y++) {
1038
0
        if (copy_in) {
1039
0
            for (int i = 0; i < p->planes_in; i++) {
1040
0
                av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]);
1041
0
                memcpy(tmp[0][i], in.data[i], tail_size_in);
1042
0
                in.data[i] += in.linesize[i];
1043
0
            }
1044
0
        }
1045
1046
0
        comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1);
1047
1048
0
        if (copy_out) {
1049
0
            for (int i = 0; i < p->planes_out; i++) {
1050
0
                av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]);
1051
0
                memcpy(out.data[i], tmp[1][i], tail_size_out);
1052
0
                out.data[i] += out.linesize[i];
1053
0
            }
1054
0
        }
1055
1056
0
        for (int i = 0; i < 4; i++) {
1057
0
            if (!copy_in)
1058
0
                exec->in[i] += in.linesize[i];
1059
0
            if (!copy_out)
1060
0
                exec->out[i] += out.linesize[i];
1061
0
        }
1062
0
    }
1063
0
}
1064
1065
static void op_pass_run(const SwsImg *out_base, const SwsImg *in_base,
1066
                        const int y, const int h, const SwsPass *pass)
1067
0
{
1068
0
    const SwsOpPass *p = pass->priv;
1069
0
    const SwsCompiledOp *comp = &p->comp;
1070
0
    const SwsImg in  = img_shift_idx(in_base,  y, p->idx_in);
1071
0
    const SwsImg out = img_shift_idx(out_base, y, p->idx_out);
1072
1073
    /* Fill exec metadata for this slice */
1074
0
    DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
1075
0
    exec.slice_y = y;
1076
0
    exec.slice_h = h;
1077
0
    for (int i = 0; i < 4; i++) {
1078
0
        exec.in[i]  = in.data[i];
1079
0
        exec.out[i] = out.data[i];
1080
0
    }
1081
1082
    /**
1083
     *  To ensure safety, we need to consider the following:
1084
     *
1085
     * 1. We can overread the input, unless this is the last line of an
1086
     *    unpadded buffer. All defined operations can handle arbitrary pixel
1087
     *    input, so overread of arbitrary data is fine.
1088
     *
1089
     * 2. We can overwrite the output, as long as we don't write more than the
1090
     *    amount of pixels that fit into one linesize. So we always need to
1091
     *    memcpy the last column on the output side if unpadded.
1092
     *
1093
     * 3. For the last row, we also need to memcpy the remainder of the input,
1094
     *    to avoid reading past the end of the buffer. Note that since we know
1095
     *    the run() function is called on stripes of the same buffer, we don't
1096
     *    need to worry about this for the end of a slice.
1097
     */
1098
1099
0
    const int last_slice  = y + h == pass->height;
1100
0
    const bool memcpy_in  = last_slice && p->memcpy_in;
1101
0
    const bool memcpy_out = p->memcpy_out;
1102
0
    const int num_blocks  = p->num_blocks;
1103
0
    const int blocks_main = num_blocks - memcpy_out;
1104
0
    const int h_main      = h - memcpy_in;
1105
1106
    /* Handle main section */
1107
0
    comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main);
1108
1109
0
    if (memcpy_in) {
1110
        /* Safe part of last row */
1111
0
        for (int i = 0; i < 4; i++) {
1112
0
            exec.in[i]  += h_main * in.linesize[i];
1113
0
            exec.out[i] += h_main * out.linesize[i];
1114
0
        }
1115
0
        comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h);
1116
0
    }
1117
1118
    /* Handle last column via memcpy, takes over `exec` so call these last */
1119
0
    if (memcpy_out)
1120
0
        handle_tail(p, &exec, out_base, true, in_base, false, y, h_main);
1121
0
    if (memcpy_in)
1122
0
        handle_tail(p, &exec, out_base, memcpy_out, in_base, true, y + h_main, 1);
1123
0
}
1124
1125
static int rw_planes(const SwsOp *op)
1126
0
{
1127
0
    return op->rw.packed ? 1 : op->rw.elems;
1128
0
}
1129
1130
static int rw_pixel_bits(const SwsOp *op)
1131
0
{
1132
0
    const int elems = op->rw.packed ? op->rw.elems : 1;
1133
0
    const int size  = ff_sws_pixel_type_size(op->type);
1134
0
    const int bits  = 8 >> op->rw.frac;
1135
0
    av_assert1(bits >= 1);
1136
0
    return elems * size * bits;
1137
0
}
1138
1139
int ff_sws_compile_pass(SwsGraph *graph, SwsOpList *ops, int flags, SwsFormat dst,
1140
                        SwsPass *input, SwsPass **output)
1141
0
{
1142
0
    SwsContext *ctx = graph->ctx;
1143
0
    SwsOpPass *p = NULL;
1144
0
    const SwsOp *read = &ops->ops[0];
1145
0
    const SwsOp *write = &ops->ops[ops->num_ops - 1];
1146
0
    SwsPass *pass;
1147
0
    int ret;
1148
1149
    /* Check if the whole operation graph is an end-to-end no-op */
1150
0
    if (ff_sws_op_list_is_noop(ops)) {
1151
0
        *output = input;
1152
0
        return 0;
1153
0
    }
1154
1155
0
    if (ops->num_ops < 2) {
1156
0
        av_log(ctx, AV_LOG_ERROR, "Need at least two operations.\n");
1157
0
        return AVERROR(EINVAL);
1158
0
    }
1159
1160
0
    if (read->op != SWS_OP_READ || write->op != SWS_OP_WRITE) {
1161
0
        av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
1162
0
               "and write, respectively.\n");
1163
0
        return AVERROR(EINVAL);
1164
0
    }
1165
1166
0
    if (flags & SWS_OP_FLAG_OPTIMIZE)
1167
0
        RET(ff_sws_op_list_optimize(ops));
1168
0
    else
1169
0
        ff_sws_op_list_update_comps(ops);
1170
1171
0
    p = av_mallocz(sizeof(*p));
1172
0
    if (!p)
1173
0
        return AVERROR(ENOMEM);
1174
1175
0
    ret = ff_sws_ops_compile(ctx, ops, &p->comp);
1176
0
    if (ret < 0)
1177
0
        goto fail;
1178
1179
0
    p->planes_in  = rw_planes(read);
1180
0
    p->planes_out = rw_planes(write);
1181
0
    p->pixel_bits_in  = rw_pixel_bits(read);
1182
0
    p->pixel_bits_out = rw_pixel_bits(write);
1183
0
    p->exec_base = (SwsOpExec) {
1184
0
        .width  = dst.width,
1185
0
        .height = dst.height,
1186
0
        .block_size_in  = p->comp.block_size * p->pixel_bits_in  >> 3,
1187
0
        .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3,
1188
0
    };
1189
1190
0
    for (int i = 0; i < 4; i++) {
1191
0
        p->idx_in[i]  = i < p->planes_in  ? ops->order_src.in[i] : -1;
1192
0
        p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1;
1193
0
    }
1194
1195
0
    pass = ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, input,
1196
0
                                 1, p, op_pass_run);
1197
0
    if (!pass) {
1198
0
        ret = AVERROR(ENOMEM);
1199
0
        goto fail;
1200
0
    }
1201
0
    pass->setup = op_pass_setup;
1202
0
    pass->free  = op_pass_free;
1203
1204
0
    *output = pass;
1205
0
    return 0;
1206
1207
0
fail:
1208
0
    op_pass_free(p);
1209
0
    return ret;
1210
0
}