Coverage Report

Created: 2026-04-29 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops_optimizer.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/attributes.h"
22
#include "libavutil/avassert.h"
23
#include "libavutil/bswap.h"
24
#include "libavutil/rational.h"
25
26
#include "ops.h"
27
#include "ops_internal.h"
28
29
#define RET(x)                                                                 \
30
0
    do {                                                                       \
31
0
        if ((ret = (x)) < 0)                                                   \
32
0
            return ret;                                                        \
33
0
    } while (0)
34
35
/**
36
 * Try to commute a clear op with the next operation. Makes any adjustments
37
 * to the operations as needed, but does not perform the actual commutation.
38
 *
39
 * Returns whether successful.
40
 */
41
static bool op_commute_clear(SwsOp *op, SwsOp *next)
42
0
{
43
0
    SwsClearOp tmp = {0};
44
45
0
    av_assert1(op->op == SWS_OP_CLEAR);
46
0
    switch (next->op) {
47
0
    case SWS_OP_CONVERT:
48
0
        op->type = next->convert.to;
49
0
        av_fallthrough;
50
0
    case SWS_OP_LSHIFT:
51
0
    case SWS_OP_RSHIFT:
52
0
    case SWS_OP_DITHER:
53
0
    case SWS_OP_MIN:
54
0
    case SWS_OP_MAX:
55
0
    case SWS_OP_SCALE:
56
0
    case SWS_OP_READ:
57
0
    case SWS_OP_FILTER_H:
58
0
    case SWS_OP_FILTER_V:
59
0
        ff_sws_apply_op_q(next, op->clear.value);
60
0
        return true;
61
0
    case SWS_OP_SWIZZLE:
62
0
        op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, next->swizzle);
63
0
        ff_sws_apply_op_q(next, op->clear.value);
64
0
        return true;
65
0
    case SWS_OP_SWAP_BYTES:
66
0
        switch (next->type) {
67
0
        case SWS_PIXEL_U16:
68
0
            ff_sws_apply_op_q(next, op->clear.value); /* always works */
69
0
            return true;
70
0
        case SWS_PIXEL_U32:
71
0
            for (int i = 0; i < 4; i++) {
72
0
                if (!SWS_COMP_TEST(op->clear.mask, i))
73
0
                    continue;
74
0
                uint32_t v = av_bswap32(op->clear.value[i].num);
75
0
                if (v > INT_MAX)
76
0
                    return false; /* can't represent as AVRational anymore */
77
0
                tmp.value[i] = Q(v);
78
0
            }
79
0
            op->clear = tmp;
80
0
            return true;
81
0
        default:
82
0
            return false;
83
0
        }
84
0
    case SWS_OP_INVALID:
85
0
    case SWS_OP_WRITE:
86
0
    case SWS_OP_LINEAR:
87
0
    case SWS_OP_PACK:
88
0
    case SWS_OP_UNPACK:
89
0
    case SWS_OP_CLEAR:
90
0
        return false;
91
0
    case SWS_OP_TYPE_NB:
92
0
        break;
93
0
    }
94
95
0
    av_unreachable("Invalid operation type!");
96
0
    return false;
97
0
}
98
99
 /**
100
  * Try to commute a swizzle op with the next operation. Makes any adjustments
101
  * to the operations as needed, but does not perform the actual commutation.
102
  *
103
  * Returns whether successful.
104
  */
105
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
106
0
{
107
0
    bool seen[4] = {0};
108
109
0
    av_assert1(op->op == SWS_OP_SWIZZLE);
110
0
    switch (next->op) {
111
0
    case SWS_OP_CONVERT:
112
0
        op->type = next->convert.to;
113
0
        av_fallthrough;
114
0
    case SWS_OP_SWAP_BYTES:
115
0
    case SWS_OP_LSHIFT:
116
0
    case SWS_OP_RSHIFT:
117
0
    case SWS_OP_SCALE:
118
0
    case SWS_OP_FILTER_H:
119
0
    case SWS_OP_FILTER_V:
120
0
        return true;
121
122
    /**
123
     * We can commute per-channel ops only if the per-channel constants are the
124
     * same for all duplicated channels; e.g.:
125
     *   SWIZZLE {0, 0, 0, 3}
126
     *   NEXT    {x, x, x, w}
127
     * ->
128
     *   NEXT    {x, _, _, w}
129
     *   SWIZZLE {0, 0, 0, 3}
130
     */
131
0
    case SWS_OP_MIN:
132
0
    case SWS_OP_MAX: {
133
0
        const SwsClampOp c = next->clamp;
134
0
        for (int i = 0; i < 4; i++) {
135
0
            if (!SWS_OP_NEEDED(op, i))
136
0
                continue;
137
0
            const int j = op->swizzle.in[i];
138
0
            if (seen[j] && av_cmp_q(next->clamp.limit[j], c.limit[i]))
139
0
                return false;
140
0
            next->clamp.limit[j] = c.limit[i];
141
0
            seen[j] = true;
142
0
        }
143
0
        return true;
144
0
    }
145
146
0
    case SWS_OP_DITHER: {
147
0
        const SwsDitherOp d = next->dither;
148
0
        for (int i = 0; i < 4; i++) {
149
0
            if (!SWS_OP_NEEDED(op, i))
150
0
                continue;
151
0
            const int j = op->swizzle.in[i];
152
0
            if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
153
0
                return false;
154
0
            next->dither.y_offset[j] = d.y_offset[i];
155
0
            seen[j] = true;
156
0
        }
157
0
        return true;
158
0
    }
159
160
0
    case SWS_OP_INVALID:
161
0
    case SWS_OP_READ:
162
0
    case SWS_OP_WRITE:
163
0
    case SWS_OP_SWIZZLE:
164
0
    case SWS_OP_CLEAR:
165
0
    case SWS_OP_LINEAR:
166
0
    case SWS_OP_PACK:
167
0
    case SWS_OP_UNPACK:
168
0
        return false;
169
0
    case SWS_OP_TYPE_NB:
170
0
        break;
171
0
    }
172
173
0
    av_unreachable("Invalid operation type!");
174
0
    return false;
175
0
}
176
177
/**
178
 * Try to commute a filter op with the previous operation. Makes any
179
 * adjustments to the operations as needed, but does not perform the actual
180
 * commutation.
181
 *
182
 * Returns whether successful.
183
 */
184
static bool op_commute_filter(SwsOp *op, SwsOp *prev)
185
0
{
186
0
    switch (prev->op) {
187
0
    case SWS_OP_SWIZZLE:
188
0
    case SWS_OP_SCALE:
189
0
    case SWS_OP_LINEAR:
190
0
    case SWS_OP_DITHER:
191
0
        prev->type = SWS_PIXEL_F32;
192
0
        return true;
193
0
    case SWS_OP_CONVERT:
194
0
        if (prev->convert.to == SWS_PIXEL_F32) {
195
0
            av_assert0(!prev->convert.expand);
196
0
            FFSWAP(SwsPixelType, op->type, prev->type);
197
0
            return true;
198
0
        }
199
0
        return false;
200
0
    case SWS_OP_INVALID:
201
0
    case SWS_OP_READ:
202
0
    case SWS_OP_WRITE:
203
0
    case SWS_OP_SWAP_BYTES:
204
0
    case SWS_OP_UNPACK:
205
0
    case SWS_OP_PACK:
206
0
    case SWS_OP_LSHIFT:
207
0
    case SWS_OP_RSHIFT:
208
0
    case SWS_OP_CLEAR:
209
0
    case SWS_OP_MIN:
210
0
    case SWS_OP_MAX:
211
0
    case SWS_OP_FILTER_H:
212
0
    case SWS_OP_FILTER_V:
213
0
        return false;
214
0
    case SWS_OP_TYPE_NB:
215
0
        break;
216
0
    }
217
218
0
    av_unreachable("Invalid operation type!");
219
0
    return false;
220
0
}
221
222
/* returns log2(x) only if x is a power of two, or 0 otherwise */
223
static int exact_log2(const int x)
224
0
{
225
0
    int p;
226
0
    if (x <= 0)
227
0
        return 0;
228
0
    p = av_log2(x);
229
0
    return (1 << p) == x ? p : 0;
230
0
}
231
232
static int exact_log2_q(const AVRational x)
233
0
{
234
0
    if (x.den == 1)
235
0
        return exact_log2(x.num);
236
0
    else if (x.num == 1)
237
0
        return -exact_log2(x.den);
238
0
    else
239
0
        return 0;
240
0
}
241
242
/**
243
 * If a linear operation can be reduced to a scalar multiplication, returns
244
 * the corresponding scaling factor, or 0 otherwise.
245
 */
246
static bool extract_scalar(const SwsLinearOp *c, SwsComps comps, SwsComps prev,
247
                           SwsScaleOp *out_scale)
248
0
{
249
0
    SwsScaleOp scale = {0};
250
251
    /* There are components not on the main diagonal */
252
0
    if (c->mask & ~SWS_MASK_DIAG4)
253
0
        return false;
254
255
0
    for (int i = 0; i < 4; i++) {
256
0
        const AVRational s = c->m[i][i];
257
0
        if ((prev.flags[i]  & SWS_COMP_ZERO) ||
258
0
            (comps.flags[i] & SWS_COMP_GARBAGE))
259
0
            continue;
260
0
        if (scale.factor.den && av_cmp_q(s, scale.factor))
261
0
            return false;
262
0
        scale.factor = s;
263
0
    }
264
265
0
    if (scale.factor.den)
266
0
        *out_scale = scale;
267
0
    return scale.factor.den;
268
0
}
269
270
/* Extracts an integer clear operation (subset) from the given linear op. */
271
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
272
                                  SwsClearOp *out_clear)
273
0
{
274
0
    SwsClearOp clear = {0};
275
0
    bool ret = false;
276
277
0
    for (int i = 0; i < 4; i++) {
278
0
        bool const_row = c->m[i][4].den == 1; /* offset is integer */
279
0
        for (int j = 0; j < 4; j++) {
280
0
            const_row &= c->m[i][j].num == 0 || /* scalar is zero */
281
0
                         (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
282
0
        }
283
0
        if (const_row && (c->mask & SWS_MASK_ROW(i))) {
284
0
            clear.mask |= SWS_COMP(i);
285
0
            clear.value[i] = c->m[i][4];
286
0
            for (int j = 0; j < 5; j++)
287
0
                c->m[i][j] = Q(i == j);
288
0
            c->mask &= ~SWS_MASK_ROW(i);
289
0
            ret = true;
290
0
        }
291
0
    }
292
293
0
    if (ret)
294
0
        *out_clear = clear;
295
0
    return ret;
296
0
}
297
298
/* Unswizzle a linear operation by aligning single-input rows with
299
 * their corresponding diagonal */
300
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
301
0
{
302
0
    SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
303
0
    SwsLinearOp c = *op;
304
305
    /* Find non-zero coefficients in the main 4x4 matrix */
306
0
    uint32_t nonzero = 0;
307
0
    for (int i = 0; i < 4; i++) {
308
0
        for (int j = 0; j < 4; j++) {
309
0
            if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
310
0
                continue;
311
0
            nonzero |= SWS_MASK(i, j);
312
0
        }
313
0
    }
314
315
    /* If a value is unique in its row and the target column is
316
     * empty, move it there and update the input swizzle */
317
0
    for (int i = 0; i < 4; i++) {
318
0
        if (nonzero & SWS_MASK_COL(i))
319
0
            continue; /* target column is not empty */
320
0
        for (int j = 0; j < 4; j++) {
321
0
            if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
322
                /* Move coefficient to the diagonal */
323
0
                c.m[i][i] = c.m[i][j];
324
0
                c.m[i][j] = Q(0);
325
0
                swiz.in[i] = j;
326
0
                break;
327
0
            }
328
0
        }
329
0
    }
330
331
0
    if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
332
0
        return false; /* no swizzle was identified */
333
334
0
    c.mask = ff_sws_linear_mask(c);
335
0
    *out_swiz = swiz;
336
0
    *op = c;
337
0
    return true;
338
0
}
339
340
int ff_sws_op_list_optimize(SwsOpList *ops)
341
0
{
342
0
    int ret;
343
344
0
retry:
345
0
    ff_sws_op_list_update_comps(ops);
346
347
    /* Try to push filters towards the input; do this first to unblock
348
     * in-place optimizations like linear op fusion */
349
0
    for (int n = 1; n < ops->num_ops; n++) {
350
0
        SwsOp *op = &ops->ops[n];
351
0
        SwsOp *prev = &ops->ops[n - 1];
352
353
0
        switch (op->op) {
354
0
        case SWS_OP_FILTER_H:
355
0
        case SWS_OP_FILTER_V:
356
0
            if (op_commute_filter(op, prev)) {
357
0
                FFSWAP(SwsOp, *op, *prev);
358
0
                goto retry;
359
0
            }
360
0
            break;
361
0
        }
362
0
    }
363
364
    /* Apply all in-place optimizations (that do not re-order the list) */
365
0
    for (int n = 0; n < ops->num_ops; n++) {
366
0
        SwsOp dummy = {0};
367
0
        SwsOp *op = &ops->ops[n];
368
0
        SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
369
0
        SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
370
371
        /* common helper variable */
372
0
        bool noop = true;
373
374
0
        if (!SWS_OP_NEEDED(op, 0) && !SWS_OP_NEEDED(op, 1) &&
375
0
            !SWS_OP_NEEDED(op, 2) && !SWS_OP_NEEDED(op, 3) &&
376
0
            op->op != SWS_OP_WRITE)
377
0
        {
378
            /* Remove any operation whose output is not needed */
379
0
            ff_sws_op_list_remove_at(ops, n, 1);
380
0
            goto retry;
381
0
        }
382
383
0
        switch (op->op) {
384
0
        case SWS_OP_READ:
385
            /* "Compress" planar reads where not all components are needed */
386
0
            if (!op->rw.packed) {
387
0
                SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
388
0
                int nb_planes = 0;
389
0
                for (int i = 0; i < op->rw.elems; i++) {
390
0
                    if (!SWS_OP_NEEDED(op, i)) {
391
0
                        swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
392
0
                        continue;
393
0
                    }
394
395
0
                    const int idx = nb_planes++;
396
0
                    av_assert1(idx <= i);
397
0
                    ops->plane_src[idx] = ops->plane_src[i];
398
0
                    swiz.in[i] = idx;
399
0
                }
400
401
0
                if (nb_planes < op->rw.elems) {
402
0
                    op->rw.elems = nb_planes;
403
0
                    RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
404
0
                        .op = SWS_OP_SWIZZLE,
405
0
                        .type = op->rw.filter ? SWS_PIXEL_F32 : op->type,
406
0
                        .swizzle = swiz,
407
0
                    }));
408
0
                    goto retry;
409
0
                }
410
0
            }
411
0
            break;
412
413
0
        case SWS_OP_SWAP_BYTES:
414
            /* Redundant (double) swap */
415
0
            if (next->op == SWS_OP_SWAP_BYTES) {
416
0
                ff_sws_op_list_remove_at(ops, n, 2);
417
0
                goto retry;
418
0
            }
419
0
            break;
420
421
0
        case SWS_OP_UNPACK:
422
            /* Redundant unpack+pack */
423
0
            if (next->op == SWS_OP_PACK && next->type == op->type &&
424
0
                next->pack.pattern[0] == op->pack.pattern[0] &&
425
0
                next->pack.pattern[1] == op->pack.pattern[1] &&
426
0
                next->pack.pattern[2] == op->pack.pattern[2] &&
427
0
                next->pack.pattern[3] == op->pack.pattern[3])
428
0
            {
429
0
                ff_sws_op_list_remove_at(ops, n, 2);
430
0
                goto retry;
431
0
            }
432
0
            break;
433
434
0
        case SWS_OP_LSHIFT:
435
0
        case SWS_OP_RSHIFT:
436
            /* Two shifts in the same direction */
437
0
            if (next->op == op->op) {
438
0
                op->shift.amount += next->shift.amount;
439
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
440
0
                goto retry;
441
0
            }
442
443
            /* No-op shift */
444
0
            if (!op->shift.amount) {
445
0
                ff_sws_op_list_remove_at(ops, n, 1);
446
0
                goto retry;
447
0
            }
448
0
            break;
449
450
0
        case SWS_OP_CLEAR:
451
0
            for (int i = 0; i < 4; i++) {
452
0
                if (!SWS_COMP_TEST(op->clear.mask, i))
453
0
                    continue;
454
455
0
                if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
456
0
                    !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
457
0
                    op->clear.value[i].num == 0)
458
0
                {
459
                    /* Redundant clear-to-zero of zero component */
460
0
                    op->clear.mask ^= SWS_COMP(i);
461
0
                } else if (!SWS_OP_NEEDED(op, i)) {
462
                    /* Unnecessary clear of unused component */
463
0
                    op->clear.mask ^= SWS_COMP(i);
464
0
                } else {
465
0
                    noop = false;
466
0
                }
467
0
            }
468
469
0
            if (noop) {
470
0
                ff_sws_op_list_remove_at(ops, n, 1);
471
0
                goto retry;
472
0
            }
473
474
            /* Transitive clear */
475
0
            if (next->op == SWS_OP_CLEAR) {
476
0
                for (int i = 0; i < 4; i++) {
477
0
                    if (SWS_COMP_TEST(next->clear.mask, i))
478
0
                        op->clear.value[i] = next->clear.value[i];
479
0
                }
480
0
                op->clear.mask |= next->clear.mask;
481
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
482
0
                goto retry;
483
0
            }
484
0
            break;
485
486
0
        case SWS_OP_SWIZZLE:
487
0
            for (int i = 0; i < 4; i++) {
488
0
                if (!SWS_OP_NEEDED(op, i))
489
0
                    continue;
490
0
                if (op->swizzle.in[i] != i)
491
0
                    noop = false;
492
0
            }
493
494
            /* Identity swizzle */
495
0
            if (noop) {
496
0
                ff_sws_op_list_remove_at(ops, n, 1);
497
0
                goto retry;
498
0
            }
499
500
            /* Transitive swizzle */
501
0
            if (next->op == SWS_OP_SWIZZLE) {
502
0
                const SwsSwizzleOp orig = op->swizzle;
503
0
                for (int i = 0; i < 4; i++)
504
0
                    op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
505
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
506
0
                goto retry;
507
0
            }
508
509
            /* Swizzle planes instead of components, if possible */
510
0
            if (prev->op == SWS_OP_READ && !prev->rw.packed) {
511
0
                for (int dst = 0; dst < prev->rw.elems; dst++) {
512
0
                    const int src = op->swizzle.in[dst];
513
0
                    if (src > dst && src < prev->rw.elems) {
514
0
                        FFSWAP(int, ops->plane_src[dst], ops->plane_src[src]);
515
0
                        for (int i = dst; i < 4; i++) {
516
0
                            if (op->swizzle.in[i] == dst)
517
0
                                op->swizzle.in[i] = src;
518
0
                            else if (op->swizzle.in[i] == src)
519
0
                                op->swizzle.in[i] = dst;
520
0
                        }
521
0
                        goto retry;
522
0
                    }
523
0
                }
524
0
            }
525
526
0
            if (next->op == SWS_OP_WRITE && !next->rw.packed) {
527
0
                for (int dst = 0; dst < next->rw.elems; dst++) {
528
0
                    const int src = op->swizzle.in[dst];
529
0
                    if (src > dst && src < next->rw.elems) {
530
0
                        FFSWAP(int, ops->plane_dst[dst], ops->plane_dst[src]);
531
0
                        FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
532
0
                        goto retry;
533
0
                    }
534
0
                }
535
0
            }
536
0
            break;
537
538
0
        case SWS_OP_CONVERT:
539
            /* No-op conversion */
540
0
            if (op->type == op->convert.to) {
541
0
                ff_sws_op_list_remove_at(ops, n, 1);
542
0
                goto retry;
543
0
            }
544
545
            /* Transitive conversion */
546
0
            if (next->op == SWS_OP_CONVERT &&
547
0
                op->convert.expand == next->convert.expand)
548
0
            {
549
0
                av_assert1(op->convert.to == next->type);
550
0
                op->convert.to = next->convert.to;
551
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
552
0
                goto retry;
553
0
            }
554
555
            /* Conversion followed by integer expansion */
556
0
            if (next->op == SWS_OP_SCALE && !op->convert.expand &&
557
0
                ff_sws_pixel_type_is_int(op->type) &&
558
0
                ff_sws_pixel_type_is_int(op->convert.to) &&
559
0
                !av_cmp_q(next->scale.factor,
560
0
                          ff_sws_pixel_expand(op->type, op->convert.to)))
561
0
            {
562
0
                op->convert.expand = true;
563
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
564
0
                goto retry;
565
0
            }
566
0
            break;
567
568
0
        case SWS_OP_MIN:
569
0
            for (int i = 0; i < 4; i++) {
570
0
                if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
571
0
                    continue;
572
0
                if (av_cmp_q(op->clamp.limit[i], prev->comps.max[i]) < 0)
573
0
                    noop = false;
574
0
            }
575
576
0
            if (noop) {
577
0
                ff_sws_op_list_remove_at(ops, n, 1);
578
0
                goto retry;
579
0
            }
580
0
            break;
581
582
0
        case SWS_OP_MAX:
583
0
            for (int i = 0; i < 4; i++) {
584
0
                if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
585
0
                    continue;
586
0
                if (av_cmp_q(prev->comps.min[i], op->clamp.limit[i]) < 0)
587
0
                    noop = false;
588
0
            }
589
590
0
            if (noop) {
591
0
                ff_sws_op_list_remove_at(ops, n, 1);
592
0
                goto retry;
593
0
            }
594
0
            break;
595
596
0
        case SWS_OP_DITHER:
597
0
            for (int i = 0; i < 4; i++) {
598
0
                if (op->dither.y_offset[i] < 0)
599
0
                    continue;
600
0
                if (!SWS_OP_NEEDED(op, i) || (prev->comps.flags[i] & SWS_COMP_EXACT)) {
601
0
                    op->dither.y_offset[i] = -1; /* unnecessary dither */
602
0
                    goto retry;
603
0
                } else {
604
0
                    noop = false;
605
0
                }
606
0
            }
607
608
0
            if (noop) {
609
0
                ff_sws_op_list_remove_at(ops, n, 1);
610
0
                goto retry;
611
0
            }
612
0
            break;
613
614
0
        case SWS_OP_LINEAR: {
615
0
            SwsSwizzleOp swizzle;
616
0
            SwsClearOp clear;
617
0
            SwsScaleOp scale;
618
619
            /* No-op (identity) linear operation */
620
0
            if (!op->lin.mask) {
621
0
                ff_sws_op_list_remove_at(ops, n, 1);
622
0
                goto retry;
623
0
            }
624
625
0
            if (next->op == SWS_OP_LINEAR) {
626
                /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
627
0
                const SwsLinearOp m1 = op->lin;
628
0
                const SwsLinearOp m2 = next->lin;
629
0
                for (int i = 0; i < 4; i++) {
630
0
                    for (int j = 0; j < 5; j++) {
631
0
                        AVRational sum = Q(0);
632
0
                        for (int k = 0; k < 4; k++)
633
0
                            sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
634
0
                        if (j == 4) /* m1.m[4][j] == 1 */
635
0
                            sum = av_add_q(sum, m2.m[i][4]);
636
0
                        op->lin.m[i][j] = sum;
637
0
                    }
638
0
                }
639
0
                op->lin.mask = ff_sws_linear_mask(op->lin);
640
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
641
0
                goto retry;
642
0
            }
643
644
            /* Optimize away zero columns */
645
0
            for (int j = 0; j < 4; j++) {
646
0
                const uint32_t col = SWS_MASK_COL(j);
647
0
                if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
648
0
                    continue;
649
0
                for (int i = 0; i < 4; i++)
650
0
                    op->lin.m[i][j] = Q(i == j);
651
0
                op->lin.mask &= ~col;
652
0
                goto retry;
653
0
            }
654
655
            /* Optimize away unused rows */
656
0
            for (int i = 0; i < 4; i++) {
657
0
                const uint32_t row = SWS_MASK_ROW(i);
658
0
                if (SWS_OP_NEEDED(op, i) || !(op->lin.mask & row))
659
0
                    continue;
660
0
                for (int j = 0; j < 5; j++)
661
0
                    op->lin.m[i][j] = Q(i == j);
662
0
                op->lin.mask &= ~row;
663
0
                goto retry;
664
0
            }
665
666
            /* Convert constant rows to explicit clear instruction */
667
0
            if (extract_constant_rows(&op->lin, prev->comps, &clear)) {
668
0
                RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
669
0
                    .op    = SWS_OP_CLEAR,
670
0
                    .type  = op->type,
671
0
                    .comps = op->comps,
672
0
                    .clear = clear,
673
0
                }));
674
0
                goto retry;
675
0
            }
676
677
            /* Multiplication by scalar constant */
678
0
            if (extract_scalar(&op->lin, op->comps, prev->comps, &scale)) {
679
0
                op->op    = SWS_OP_SCALE;
680
0
                op->scale = scale;
681
0
                goto retry;
682
0
            }
683
684
            /* Swizzle by fixed pattern */
685
0
            if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
686
0
                RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
687
0
                    .op      = SWS_OP_SWIZZLE,
688
0
                    .type    = op->type,
689
0
                    .swizzle = swizzle,
690
0
                }));
691
0
                goto retry;
692
0
            }
693
0
            break;
694
0
        }
695
696
0
        case SWS_OP_SCALE: {
697
0
            const int factor2 = exact_log2_q(op->scale.factor);
698
699
            /* No-op scaling */
700
0
            if (op->scale.factor.num == 1 && op->scale.factor.den == 1) {
701
0
                ff_sws_op_list_remove_at(ops, n, 1);
702
0
                goto retry;
703
0
            }
704
705
            /* Merge consecutive scaling operations (that don't overflow) */
706
0
            if (next->op == SWS_OP_SCALE) {
707
0
                int64_t p = op->scale.factor.num * (int64_t) next->scale.factor.num;
708
0
                int64_t q = op->scale.factor.den * (int64_t) next->scale.factor.den;
709
0
                if (FFABS(p) <= INT_MAX && FFABS(q) <= INT_MAX) {
710
0
                    av_reduce(&op->scale.factor.num, &op->scale.factor.den, p, q, INT_MAX);
711
0
                    ff_sws_op_list_remove_at(ops, n + 1, 1);
712
0
                    goto retry;
713
0
                }
714
0
            }
715
716
            /* Scaling by exact power of two */
717
0
            if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
718
0
                op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
719
0
                op->shift.amount = FFABS(factor2);
720
0
                goto retry;
721
0
            }
722
0
            break;
723
0
        }
724
725
0
        case SWS_OP_FILTER_H:
726
0
        case SWS_OP_FILTER_V:
727
            /* Merge with prior simple planar read */
728
0
            if (prev->op == SWS_OP_READ && !prev->rw.filter &&
729
0
                !prev->rw.packed && !prev->rw.frac) {
730
0
                prev->rw.filter = op->op;
731
0
                prev->rw.kernel = av_refstruct_ref(op->filter.kernel);
732
0
                ff_sws_op_list_remove_at(ops, n, 1);
733
0
                goto retry;
734
0
            }
735
0
            break;
736
0
        }
737
0
    }
738
739
    /* Push clears to the back to void any unused components */
740
0
    for (int n = 0; n < ops->num_ops - 1; n++) {
741
0
        SwsOp *op = &ops->ops[n];
742
0
        SwsOp *next = &ops->ops[n + 1];
743
744
0
        switch (op->op) {
745
0
        case SWS_OP_CLEAR:
746
0
            if (op_commute_clear(op, next)) {
747
0
                FFSWAP(SwsOp, *op, *next);
748
0
                goto retry;
749
0
            }
750
0
            break;
751
0
        }
752
0
    }
753
754
    /* Apply any remaining preferential re-ordering optimizations; do these
755
     * last because they are more likely to block other optimizations if done
756
     * too aggressively */
757
0
    for (int n = 0; n < ops->num_ops - 1; n++) {
758
0
        SwsOp *op = &ops->ops[n];
759
0
        SwsOp *next = &ops->ops[n + 1];
760
761
0
        switch (op->op) {
762
0
        case SWS_OP_SWIZZLE: {
763
            /* Try to push swizzles towards the output */
764
0
            if (op_commute_swizzle(op, next)) {
765
0
                FFSWAP(SwsOp, *op, *next);
766
0
                goto retry;
767
0
            }
768
0
            break;
769
0
        }
770
771
0
        case SWS_OP_SCALE:
772
            /* Scaling by integer before conversion to int */
773
0
            if (op->scale.factor.den == 1 && next->op == SWS_OP_CONVERT &&
774
0
                ff_sws_pixel_type_is_int(next->convert.to))
775
0
            {
776
0
                op->type = next->convert.to;
777
0
                FFSWAP(SwsOp, *op, *next);
778
0
                goto retry;
779
0
            }
780
0
            break;
781
0
        }
782
0
    }
783
784
0
    return 0;
785
0
}
786
787
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
788
                         int size, uint8_t clear_val,
789
                         int *read_bytes, int *write_bytes)
790
0
{
791
0
    if (!ops->num_ops)
792
0
        return AVERROR(EINVAL);
793
794
0
    const SwsOp *read = ff_sws_op_list_input(ops);
795
0
    if (!read || read->rw.frac || read->rw.filter ||
796
0
        (!read->rw.packed && read->rw.elems > 1))
797
0
        return AVERROR(ENOTSUP);
798
799
0
    const int read_size = ff_sws_pixel_type_size(read->type);
800
0
    uint32_t mask[4] = {0};
801
0
    for (int i = 0; i < read->rw.elems; i++)
802
0
        mask[i] = 0x01010101 * i * read_size + 0x03020100;
803
804
0
    for (int opidx = 1; opidx < ops->num_ops; opidx++) {
805
0
        const SwsOp *op = &ops->ops[opidx];
806
0
        switch (op->op) {
807
0
        case SWS_OP_SWIZZLE: {
808
0
            uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
809
0
            for (int i = 0; i < 4; i++)
810
0
                mask[i] = orig[op->swizzle.in[i]];
811
0
            break;
812
0
        }
813
814
0
        case SWS_OP_SWAP_BYTES:
815
0
            for (int i = 0; i < 4; i++) {
816
0
                switch (ff_sws_pixel_type_size(op->type)) {
817
0
                case 2: mask[i] = av_bswap16(mask[i]); break;
818
0
                case 4: mask[i] = av_bswap32(mask[i]); break;
819
0
                }
820
0
            }
821
0
            break;
822
823
0
        case SWS_OP_CLEAR:
824
0
            for (int i = 0; i < 4; i++) {
825
0
                if (!SWS_COMP_TEST(op->clear.mask, i))
826
0
                    continue;
827
0
                if (op->clear.value[i].num != 0 || !clear_val)
828
0
                    return AVERROR(ENOTSUP);
829
0
                mask[i] = 0x1010101ul * clear_val;
830
0
            }
831
0
            break;
832
833
0
        case SWS_OP_CONVERT: {
834
0
            if (!op->convert.expand)
835
0
                return AVERROR(ENOTSUP);
836
0
            for (int i = 0; i < 4; i++) {
837
0
                switch (ff_sws_pixel_type_size(op->type)) {
838
0
                case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF);   break;
839
0
                case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
840
0
                }
841
0
            }
842
0
            break;
843
0
        }
844
845
0
        case SWS_OP_WRITE: {
846
0
            if (op->rw.frac || op->rw.filter ||
847
0
                (!op->rw.packed && op->rw.elems > 1))
848
0
                return AVERROR(ENOTSUP);
849
850
            /* Initialize to no-op */
851
0
            memset(shuffle, clear_val, size);
852
853
0
            const int write_size  = ff_sws_pixel_type_size(op->type);
854
0
            const int read_chunk  = read->rw.elems * read_size;
855
0
            const int write_chunk = op->rw.elems * write_size;
856
0
            const int num_groups  = size / FFMAX(read_chunk, write_chunk);
857
0
            for (int n = 0; n < num_groups; n++) {
858
0
                const int base_in  = n * read_chunk;
859
0
                const int base_out = n * write_chunk;
860
0
                for (int i = 0; i < op->rw.elems; i++) {
861
0
                    const int offset = base_out + i * write_size;
862
0
                    for (int b = 0; b < write_size; b++) {
863
0
                        const uint8_t idx = mask[i] >> (b * 8);
864
0
                        if (idx != clear_val)
865
0
                            shuffle[offset + b] = base_in + idx;
866
0
                    }
867
0
                }
868
0
            }
869
870
0
            *read_bytes  = num_groups * read_chunk;
871
0
            *write_bytes = num_groups * write_chunk;
872
0
            return num_groups;
873
0
        }
874
875
0
        default:
876
0
            return AVERROR(ENOTSUP);
877
0
        }
878
0
    }
879
880
0
    return AVERROR(EINVAL);
881
0
}
882
883
/**
884
 * Determine a suitable intermediate buffer format for a given combination
885
 * of pixel types and number of planes. The exact interpretation of these
886
 * formats does not matter at all; since they will only ever be used as
887
 * temporary intermediate buffers. We still need to pick *some* format as
888
 * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the
889
 * output buffer.
890
 */
891
static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes)
892
0
{
893
0
    switch (ff_sws_pixel_type_size(type)) {
894
0
    case 1:
895
0
        switch (nb_planes) {
896
0
        case 1: return AV_PIX_FMT_GRAY8;
897
0
        case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt
898
0
        case 3: return AV_PIX_FMT_YUV444P;
899
0
        case 4: return AV_PIX_FMT_YUVA444P;
900
0
        }
901
0
        break;
902
0
    case 2:
903
0
        switch (nb_planes) {
904
0
        case 1: return AV_PIX_FMT_GRAY16;
905
0
        case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt
906
0
        case 3: return AV_PIX_FMT_YUV444P16;
907
0
        case 4: return AV_PIX_FMT_YUVA444P16;
908
0
        }
909
0
        break;
910
0
    case 4:
911
0
        switch (nb_planes) {
912
0
        case 1: return AV_PIX_FMT_GRAYF32;
913
0
        case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt
914
0
        case 3: return AV_PIX_FMT_GBRPF32;
915
0
        case 4: return AV_PIX_FMT_GBRAPF32;
916
0
        }
917
0
        break;
918
0
    }
919
920
0
    av_unreachable("Invalid pixel type or number of planes?");
921
0
    return AV_PIX_FMT_NONE;
922
0
}
923
924
static void get_input_size(const SwsOpList *ops, SwsFormat *fmt)
925
0
{
926
0
    fmt->width  = ops->src.width;
927
0
    fmt->height = ops->src.height;
928
929
0
    const SwsOp *read = ff_sws_op_list_input(ops);
930
0
    if (read && read->rw.filter == SWS_OP_FILTER_V) {
931
0
        fmt->height = read->rw.kernel->dst_size;
932
0
    } else if (read && read->rw.filter == SWS_OP_FILTER_H) {
933
0
        fmt->width = read->rw.kernel->dst_size;
934
0
    }
935
0
}
936
937
int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest)
938
0
{
939
0
    const SwsOp *op;
940
0
    int ret, idx;
941
942
0
    for (idx = 0; idx < ops1->num_ops; idx++) {
943
0
        op = &ops1->ops[idx];
944
0
        if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V)
945
0
            break;
946
0
    }
947
948
0
    if (idx == ops1->num_ops) {
949
0
        *out_rest = NULL;
950
0
        return 0;
951
0
    }
952
953
0
    av_assert0(idx > 0);
954
0
    const SwsOp *prev = &ops1->ops[idx - 1];
955
956
0
    SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1);
957
0
    if (!ops2)
958
0
        return AVERROR(ENOMEM);
959
960
    /**
961
     * Not all components may be needed; but we need the ones that *are*
962
     * used to be contiguous for the write/read operations. So, first
963
     * compress them into a linearly ascending list of components
964
     */
965
0
    int nb_planes = 0;
966
0
    SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3);
967
0
    SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3);
968
0
    for (int i = 0; i < 4; i++) {
969
0
        if (SWS_OP_NEEDED(prev, i)) {
970
0
            const int o = nb_planes++;
971
0
            swiz_wr.in[o] = i;
972
0
            swiz_rd.in[i] = o;
973
0
        }
974
0
    }
975
976
    /* Determine metadata for the intermediate format */
977
0
    const SwsPixelType type = op->type;
978
0
    ops2->src.format = get_planar_fmt(type, nb_planes);
979
0
    ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format);
980
0
    get_input_size(ops1, &ops2->src);
981
0
    ops1->dst = ops2->src;
982
983
0
    for (int i = 0; i < nb_planes; i++) {
984
0
        ops1->plane_dst[i] = ops2->plane_src[i] = i;
985
0
        ops2->comps_src.flags[i] = prev->comps.flags[swiz_wr.in[i]];
986
0
    }
987
988
0
    ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx);
989
0
    ff_sws_op_list_remove_at(ops2, 0, idx);
990
0
    op = NULL; /* the above command may invalidate op */
991
992
0
    if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
993
0
        ret = ff_sws_op_list_append(ops1, &(SwsOp) {
994
0
            .op      = SWS_OP_SWIZZLE,
995
0
            .type    = type,
996
0
            .swizzle = swiz_wr,
997
0
        });
998
0
        if (ret < 0)
999
0
            goto fail;
1000
0
    }
1001
1002
0
    ret = ff_sws_op_list_append(ops1, &(SwsOp) {
1003
0
        .op       = SWS_OP_WRITE,
1004
0
        .type     = type,
1005
0
        .rw.elems = nb_planes,
1006
0
    });
1007
0
    if (ret < 0)
1008
0
        goto fail;
1009
1010
0
    ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) {
1011
0
        .op        = SWS_OP_READ,
1012
0
        .type      = type,
1013
0
        .rw.elems  = nb_planes,
1014
0
    });
1015
0
    if (ret < 0)
1016
0
        goto fail;
1017
1018
0
    if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
1019
0
        ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) {
1020
0
            .op      = SWS_OP_SWIZZLE,
1021
0
            .type    = type,
1022
0
            .swizzle = swiz_rd,
1023
0
        });
1024
0
        if (ret < 0)
1025
0
            goto fail;
1026
0
    }
1027
1028
0
    ret = ff_sws_op_list_optimize(ops1);
1029
0
    if (ret < 0)
1030
0
        goto fail;
1031
1032
0
    ret = ff_sws_op_list_optimize(ops2);
1033
0
    if (ret < 0)
1034
0
        goto fail;
1035
1036
0
    *out_rest = ops2;
1037
0
    return 0;
1038
1039
0
fail:
1040
0
    ff_sws_op_list_free(&ops2);
1041
0
    return ret;
1042
0
}