Coverage Report

Created: 2026-03-12 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops_optimizer.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/avassert.h"
22
#include "libavutil/bswap.h"
23
#include "libavutil/rational.h"
24
25
#include "ops.h"
26
#include "ops_internal.h"
27
28
#define RET(x)                                                                 \
29
0
    do {                                                                       \
30
0
        if ((ret = (x)) < 0)                                                   \
31
0
            return ret;                                                        \
32
0
    } while (0)
33
34
/**
35
 * Try to commute a clear op with the next operation. Makes any adjustments
36
 * to the operations as needed, but does not perform the actual commutation.
37
 *
38
 * Returns whether successful.
39
 */
40
static bool op_commute_clear(SwsOp *op, SwsOp *next)
41
0
{
42
0
    SwsOp tmp;
43
44
0
    av_assert1(op->op == SWS_OP_CLEAR);
45
0
    switch (next->op) {
46
0
    case SWS_OP_CONVERT:
47
0
        op->type = next->convert.to;
48
        /* fall through */
49
0
    case SWS_OP_LSHIFT:
50
0
    case SWS_OP_RSHIFT:
51
0
    case SWS_OP_DITHER:
52
0
    case SWS_OP_MIN:
53
0
    case SWS_OP_MAX:
54
0
    case SWS_OP_SCALE:
55
0
    case SWS_OP_READ:
56
0
    case SWS_OP_SWIZZLE:
57
0
        ff_sws_apply_op_q(next, op->c.q4);
58
0
        return true;
59
0
    case SWS_OP_SWAP_BYTES:
60
0
        switch (next->type) {
61
0
        case SWS_PIXEL_U16:
62
0
            ff_sws_apply_op_q(next, op->c.q4); /* always works */
63
0
            return true;
64
0
        case SWS_PIXEL_U32:
65
0
            for (int i = 0; i < 4; i++) {
66
0
                uint32_t v = av_bswap32(op->c.q4[i].num);
67
0
                if (v > INT_MAX)
68
0
                    return false; /* can't represent as AVRational anymore */
69
0
                tmp.c.q4[i] = Q(v);
70
0
            }
71
0
            op->c = tmp.c;
72
0
            return true;
73
0
        default:
74
0
            return false;
75
0
        }
76
0
    case SWS_OP_INVALID:
77
0
    case SWS_OP_WRITE:
78
0
    case SWS_OP_LINEAR:
79
0
    case SWS_OP_PACK:
80
0
    case SWS_OP_UNPACK:
81
0
    case SWS_OP_CLEAR:
82
0
        return false;
83
0
    case SWS_OP_TYPE_NB:
84
0
        break;
85
0
    }
86
87
0
    av_unreachable("Invalid operation type!");
88
0
    return false;
89
0
}
90
91
 /**
92
  * Try to commute a swizzle op with the next operation. Makes any adjustments
93
  * to the operations as needed, but does not perform the actual commutation.
94
  *
95
  * Returns whether successful.
96
  */
97
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
98
0
{
99
0
    bool seen[4] = {0};
100
101
0
    av_assert1(op->op == SWS_OP_SWIZZLE);
102
0
    switch (next->op) {
103
0
    case SWS_OP_CONVERT:
104
0
        op->type = next->convert.to;
105
        /* fall through */
106
0
    case SWS_OP_SWAP_BYTES:
107
0
    case SWS_OP_LSHIFT:
108
0
    case SWS_OP_RSHIFT:
109
0
    case SWS_OP_SCALE:
110
0
        return true;
111
112
    /**
113
     * We can commute per-channel ops only if the per-channel constants are the
114
     * same for all duplicated channels; e.g.:
115
     *   SWIZZLE {0, 0, 0, 3}
116
     *   NEXT    {x, x, x, w}
117
     * ->
118
     *   NEXT    {x, _, _, w}
119
     *   SWIZZLE {0, 0, 0, 3}
120
     */
121
0
    case SWS_OP_MIN:
122
0
    case SWS_OP_MAX: {
123
0
        const SwsConst c = next->c;
124
0
        for (int i = 0; i < 4; i++) {
125
0
            if (next->comps.unused[i])
126
0
                continue;
127
0
            const int j = op->swizzle.in[i];
128
0
            if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129
0
                return false;
130
0
            next->c.q4[j] = c.q4[i];
131
0
            seen[j] = true;
132
0
        }
133
0
        return true;
134
0
    }
135
136
0
    case SWS_OP_DITHER: {
137
0
        const SwsDitherOp d = next->dither;
138
0
        for (int i = 0; i < 4; i++) {
139
0
            if (next->comps.unused[i])
140
0
                continue;
141
0
            const int j = op->swizzle.in[i];
142
0
            if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143
0
                return false;
144
0
            next->dither.y_offset[j] = d.y_offset[i];
145
0
            seen[j] = true;
146
0
        }
147
0
        return true;
148
0
    }
149
150
0
    case SWS_OP_INVALID:
151
0
    case SWS_OP_READ:
152
0
    case SWS_OP_WRITE:
153
0
    case SWS_OP_SWIZZLE:
154
0
    case SWS_OP_CLEAR:
155
0
    case SWS_OP_LINEAR:
156
0
    case SWS_OP_PACK:
157
0
    case SWS_OP_UNPACK:
158
0
        return false;
159
0
    case SWS_OP_TYPE_NB:
160
0
        break;
161
0
    }
162
163
0
    av_unreachable("Invalid operation type!");
164
0
    return false;
165
0
}
166
167
/* returns log2(x) only if x is a power of two, or 0 otherwise */
168
static int exact_log2(const int x)
169
0
{
170
0
    int p;
171
0
    if (x <= 0)
172
0
        return 0;
173
0
    p = av_log2(x);
174
0
    return (1 << p) == x ? p : 0;
175
0
}
176
177
static int exact_log2_q(const AVRational x)
178
0
{
179
0
    if (x.den == 1)
180
0
        return exact_log2(x.num);
181
0
    else if (x.num == 1)
182
0
        return -exact_log2(x.den);
183
0
    else
184
0
        return 0;
185
0
}
186
187
/**
188
 * If a linear operation can be reduced to a scalar multiplication, returns
189
 * the corresponding scaling factor, or 0 otherwise.
190
 */
191
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192
                           SwsConst *out_scale)
193
0
{
194
0
    SwsConst scale = {0};
195
196
    /* There are components not on the main diagonal */
197
0
    if (c->mask & ~SWS_MASK_DIAG4)
198
0
        return false;
199
200
0
    for (int i = 0; i < 4; i++) {
201
0
        const AVRational s = c->m[i][i];
202
0
        if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
203
0
            continue;
204
0
        if (scale.q.den && av_cmp_q(s, scale.q))
205
0
            return false;
206
0
        scale.q = s;
207
0
    }
208
209
0
    if (scale.q.den)
210
0
        *out_scale = scale;
211
0
    return scale.q.den;
212
0
}
213
214
/* Extracts an integer clear operation (subset) from the given linear op. */
215
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
216
                                  SwsConst *out_clear)
217
0
{
218
0
    SwsConst clear = {0};
219
0
    bool ret = false;
220
221
0
    for (int i = 0; i < 4; i++) {
222
0
        bool const_row = c->m[i][4].den == 1; /* offset is integer */
223
0
        for (int j = 0; j < 4; j++) {
224
0
            const_row &= c->m[i][j].num == 0 || /* scalar is zero */
225
0
                         (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226
0
        }
227
0
        if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228
0
            clear.q4[i] = c->m[i][4];
229
0
            for (int j = 0; j < 5; j++)
230
0
                c->m[i][j] = Q(i == j);
231
0
            c->mask &= ~SWS_MASK_ROW(i);
232
0
            ret = true;
233
0
        }
234
0
    }
235
236
0
    if (ret)
237
0
        *out_clear = clear;
238
0
    return ret;
239
0
}
240
241
/* Unswizzle a linear operation by aligning single-input rows with
242
 * their corresponding diagonal */
243
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
244
0
{
245
0
    SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246
0
    SwsLinearOp c = *op;
247
248
    /* Find non-zero coefficients in the main 4x4 matrix */
249
0
    uint32_t nonzero = 0;
250
0
    for (int i = 0; i < 4; i++) {
251
0
        for (int j = 0; j < 4; j++) {
252
0
            if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
253
0
                continue;
254
0
            nonzero |= SWS_MASK(i, j);
255
0
        }
256
0
    }
257
258
    /* If a value is unique in its row and the target column is
259
     * empty, move it there and update the input swizzle */
260
0
    for (int i = 0; i < 4; i++) {
261
0
        if (nonzero & SWS_MASK_COL(i))
262
0
            continue; /* target column is not empty */
263
0
        for (int j = 0; j < 4; j++) {
264
0
            if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265
                /* Move coefficient to the diagonal */
266
0
                c.m[i][i] = c.m[i][j];
267
0
                c.m[i][j] = Q(0);
268
0
                swiz.in[i] = j;
269
0
                break;
270
0
            }
271
0
        }
272
0
    }
273
274
0
    if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275
0
        return false; /* no swizzle was identified */
276
277
0
    c.mask = ff_sws_linear_mask(c);
278
0
    *out_swiz = swiz;
279
0
    *op = c;
280
0
    return true;
281
0
}
282
283
int ff_sws_op_list_optimize(SwsOpList *ops)
284
0
{
285
0
    int ret;
286
287
0
retry:
288
0
    ff_sws_op_list_update_comps(ops);
289
290
    /* Apply all in-place optimizations (that do not re-order the list) */
291
0
    for (int n = 0; n < ops->num_ops; n++) {
292
0
        SwsOp dummy = {0};
293
0
        SwsOp *op = &ops->ops[n];
294
0
        SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295
0
        SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296
297
        /* common helper variable */
298
0
        bool noop = true;
299
300
0
        if (next->comps.unused[0] && next->comps.unused[1] &&
301
0
            next->comps.unused[2] && next->comps.unused[3])
302
0
        {
303
            /* Remove completely unused operations */
304
0
            ff_sws_op_list_remove_at(ops, n, 1);
305
0
            goto retry;
306
0
        }
307
308
0
        switch (op->op) {
309
0
        case SWS_OP_READ:
310
            /* "Compress" planar reads where not all components are needed */
311
0
            if (!op->rw.packed) {
312
0
                SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313
0
                int nb_planes = 0;
314
0
                for (int i = 0; i < op->rw.elems; i++) {
315
0
                    if (next->comps.unused[i]) {
316
0
                        swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317
0
                        continue;
318
0
                    }
319
320
0
                    const int idx = nb_planes++;
321
0
                    av_assert1(idx <= i);
322
0
                    ops->order_src.in[idx] = ops->order_src.in[i];
323
0
                    swiz.in[i] = idx;
324
0
                }
325
326
0
                if (nb_planes < op->rw.elems) {
327
0
                    op->rw.elems = nb_planes;
328
0
                    RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329
0
                        .op = SWS_OP_SWIZZLE,
330
0
                        .type = op->type,
331
0
                        .swizzle = swiz,
332
0
                    }));
333
0
                    goto retry;
334
0
                }
335
0
            }
336
0
            break;
337
338
0
        case SWS_OP_SWAP_BYTES:
339
            /* Redundant (double) swap */
340
0
            if (next->op == SWS_OP_SWAP_BYTES) {
341
0
                ff_sws_op_list_remove_at(ops, n, 2);
342
0
                goto retry;
343
0
            }
344
0
            break;
345
346
0
        case SWS_OP_UNPACK:
347
            /* Redundant unpack+pack */
348
0
            if (next->op == SWS_OP_PACK && next->type == op->type &&
349
0
                next->pack.pattern[0] == op->pack.pattern[0] &&
350
0
                next->pack.pattern[1] == op->pack.pattern[1] &&
351
0
                next->pack.pattern[2] == op->pack.pattern[2] &&
352
0
                next->pack.pattern[3] == op->pack.pattern[3])
353
0
            {
354
0
                ff_sws_op_list_remove_at(ops, n, 2);
355
0
                goto retry;
356
0
            }
357
0
            break;
358
359
0
        case SWS_OP_LSHIFT:
360
0
        case SWS_OP_RSHIFT:
361
            /* Two shifts in the same direction */
362
0
            if (next->op == op->op) {
363
0
                op->c.u += next->c.u;
364
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
365
0
                goto retry;
366
0
            }
367
368
            /* No-op shift */
369
0
            if (!op->c.u) {
370
0
                ff_sws_op_list_remove_at(ops, n, 1);
371
0
                goto retry;
372
0
            }
373
0
            break;
374
375
0
        case SWS_OP_CLEAR:
376
0
            for (int i = 0; i < 4; i++) {
377
0
                if (!op->c.q4[i].den)
378
0
                    continue;
379
380
0
                if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381
0
                    !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382
0
                    op->c.q4[i].num == 0)
383
0
                {
384
                    /* Redundant clear-to-zero of zero component */
385
0
                    op->c.q4[i].den = 0;
386
0
                } else if (next->comps.unused[i]) {
387
                    /* Unnecessary clear of unused component */
388
0
                    op->c.q4[i] = (AVRational) {0, 0};
389
0
                } else if (op->c.q4[i].den) {
390
0
                    noop = false;
391
0
                }
392
0
            }
393
394
0
            if (noop) {
395
0
                ff_sws_op_list_remove_at(ops, n, 1);
396
0
                goto retry;
397
0
            }
398
399
            /* Transitive clear */
400
0
            if (next->op == SWS_OP_CLEAR) {
401
0
                for (int i = 0; i < 4; i++) {
402
0
                    if (next->c.q4[i].den)
403
0
                        op->c.q4[i] = next->c.q4[i];
404
0
                }
405
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
406
0
                goto retry;
407
0
            }
408
0
            break;
409
410
0
        case SWS_OP_SWIZZLE:
411
0
            for (int i = 0; i < 4; i++) {
412
0
                if (next->comps.unused[i])
413
0
                    continue;
414
0
                if (op->swizzle.in[i] != i)
415
0
                    noop = false;
416
0
            }
417
418
            /* Identity swizzle */
419
0
            if (noop) {
420
0
                ff_sws_op_list_remove_at(ops, n, 1);
421
0
                goto retry;
422
0
            }
423
424
            /* Transitive swizzle */
425
0
            if (next->op == SWS_OP_SWIZZLE) {
426
0
                const SwsSwizzleOp orig = op->swizzle;
427
0
                for (int i = 0; i < 4; i++)
428
0
                    op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
430
0
                goto retry;
431
0
            }
432
433
            /* Swizzle planes instead of components, if possible */
434
0
            if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435
0
                for (int dst = 0; dst < prev->rw.elems; dst++) {
436
0
                    const int src = op->swizzle.in[dst];
437
0
                    if (src > dst && src < prev->rw.elems) {
438
0
                        FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439
0
                        for (int i = dst; i < 4; i++) {
440
0
                            if (op->swizzle.in[i] == dst)
441
0
                                op->swizzle.in[i] = src;
442
0
                            else if (op->swizzle.in[i] == src)
443
0
                                op->swizzle.in[i] = dst;
444
0
                        }
445
0
                        goto retry;
446
0
                    }
447
0
                }
448
0
            }
449
450
0
            if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451
0
                for (int dst = 0; dst < next->rw.elems; dst++) {
452
0
                    const int src = op->swizzle.in[dst];
453
0
                    if (src > dst && src < next->rw.elems) {
454
0
                        FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455
0
                        FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456
0
                        goto retry;
457
0
                    }
458
0
                }
459
0
            }
460
0
            break;
461
462
0
        case SWS_OP_CONVERT:
463
            /* No-op conversion */
464
0
            if (op->type == op->convert.to) {
465
0
                ff_sws_op_list_remove_at(ops, n, 1);
466
0
                goto retry;
467
0
            }
468
469
            /* Transitive conversion */
470
0
            if (next->op == SWS_OP_CONVERT &&
471
0
                op->convert.expand == next->convert.expand)
472
0
            {
473
0
                av_assert1(op->convert.to == next->type);
474
0
                op->convert.to = next->convert.to;
475
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
476
0
                goto retry;
477
0
            }
478
479
            /* Conversion followed by integer expansion */
480
0
            if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481
0
                ff_sws_pixel_type_is_int(op->type) &&
482
0
                ff_sws_pixel_type_is_int(op->convert.to) &&
483
0
                !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
484
0
            {
485
0
                op->convert.expand = true;
486
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
487
0
                goto retry;
488
0
            }
489
0
            break;
490
491
0
        case SWS_OP_MIN:
492
0
            for (int i = 0; i < 4; i++) {
493
0
                if (next->comps.unused[i] || !op->c.q4[i].den)
494
0
                    continue;
495
0
                if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
496
0
                    noop = false;
497
0
            }
498
499
0
            if (noop) {
500
0
                ff_sws_op_list_remove_at(ops, n, 1);
501
0
                goto retry;
502
0
            }
503
0
            break;
504
505
0
        case SWS_OP_MAX:
506
0
            for (int i = 0; i < 4; i++) {
507
0
                if (next->comps.unused[i] || !op->c.q4[i].den)
508
0
                    continue;
509
0
                if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
510
0
                    noop = false;
511
0
            }
512
513
0
            if (noop) {
514
0
                ff_sws_op_list_remove_at(ops, n, 1);
515
0
                goto retry;
516
0
            }
517
0
            break;
518
519
0
        case SWS_OP_DITHER:
520
0
            for (int i = 0; i < 4; i++) {
521
0
                if (next->comps.unused[i] || op->dither.y_offset[i] < 0)
522
0
                    continue;
523
0
                if (prev->comps.flags[i] & SWS_COMP_EXACT) {
524
0
                    op->dither.y_offset[i] = -1; /* unnecessary dither */
525
0
                    goto retry;
526
0
                } else {
527
0
                    noop = false;
528
0
                }
529
0
            }
530
531
0
            if (noop) {
532
0
                ff_sws_op_list_remove_at(ops, n, 1);
533
0
                goto retry;
534
0
            }
535
0
            break;
536
537
0
        case SWS_OP_LINEAR: {
538
0
            SwsSwizzleOp swizzle;
539
0
            SwsConst c;
540
541
            /* No-op (identity) linear operation */
542
0
            if (!op->lin.mask) {
543
0
                ff_sws_op_list_remove_at(ops, n, 1);
544
0
                goto retry;
545
0
            }
546
547
0
            if (next->op == SWS_OP_LINEAR) {
548
                /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
549
0
                const SwsLinearOp m1 = op->lin;
550
0
                const SwsLinearOp m2 = next->lin;
551
0
                for (int i = 0; i < 4; i++) {
552
0
                    for (int j = 0; j < 5; j++) {
553
0
                        AVRational sum = Q(0);
554
0
                        for (int k = 0; k < 4; k++)
555
0
                            sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
556
0
                        if (j == 4) /* m1.m[4][j] == 1 */
557
0
                            sum = av_add_q(sum, m2.m[i][4]);
558
0
                        op->lin.m[i][j] = sum;
559
0
                    }
560
0
                }
561
0
                op->lin.mask = ff_sws_linear_mask(op->lin);
562
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
563
0
                goto retry;
564
0
            }
565
566
            /* Optimize away zero columns */
567
0
            for (int j = 0; j < 4; j++) {
568
0
                const uint32_t col = SWS_MASK_COL(j);
569
0
                if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
570
0
                    continue;
571
0
                for (int i = 0; i < 4; i++)
572
0
                    op->lin.m[i][j] = Q(i == j);
573
0
                op->lin.mask &= ~col;
574
0
                goto retry;
575
0
            }
576
577
            /* Optimize away unused rows */
578
0
            for (int i = 0; i < 4; i++) {
579
0
                const uint32_t row = SWS_MASK_ROW(i);
580
0
                if (!next->comps.unused[i] || !(op->lin.mask & row))
581
0
                    continue;
582
0
                for (int j = 0; j < 5; j++)
583
0
                    op->lin.m[i][j] = Q(i == j);
584
0
                op->lin.mask &= ~row;
585
0
                goto retry;
586
0
            }
587
588
            /* Convert constant rows to explicit clear instruction */
589
0
            if (extract_constant_rows(&op->lin, prev->comps, &c)) {
590
0
                RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
591
0
                    .op    = SWS_OP_CLEAR,
592
0
                    .type  = op->type,
593
0
                    .comps = op->comps,
594
0
                    .c     = c,
595
0
                }));
596
0
                goto retry;
597
0
            }
598
599
            /* Multiplication by scalar constant */
600
0
            if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
601
0
                op->op = SWS_OP_SCALE;
602
0
                op->c  = c;
603
0
                goto retry;
604
0
            }
605
606
            /* Swizzle by fixed pattern */
607
0
            if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
608
0
                RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
609
0
                    .op      = SWS_OP_SWIZZLE,
610
0
                    .type    = op->type,
611
0
                    .swizzle = swizzle,
612
0
                }));
613
0
                goto retry;
614
0
            }
615
0
            break;
616
0
        }
617
618
0
        case SWS_OP_SCALE: {
619
0
            const int factor2 = exact_log2_q(op->c.q);
620
621
            /* No-op scaling */
622
0
            if (op->c.q.num == 1 && op->c.q.den == 1) {
623
0
                ff_sws_op_list_remove_at(ops, n, 1);
624
0
                goto retry;
625
0
            }
626
627
            /* Scaling by exact power of two */
628
0
            if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
629
0
                op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
630
0
                op->c.u = FFABS(factor2);
631
0
                goto retry;
632
0
            }
633
0
            break;
634
0
        }
635
0
        }
636
0
    }
637
638
    /* Push clears to the back to void any unused components */
639
0
    for (int n = 0; n < ops->num_ops - 1; n++) {
640
0
        SwsOp *op = &ops->ops[n];
641
0
        SwsOp *next = &ops->ops[n + 1];
642
643
0
        switch (op->op) {
644
0
        case SWS_OP_CLEAR:
645
0
            if (op_commute_clear(op, next)) {
646
0
                FFSWAP(SwsOp, *op, *next);
647
0
                goto retry;
648
0
            }
649
0
            break;
650
0
        }
651
0
    }
652
653
    /* Apply any remaining preferential re-ordering optimizations; do these
654
     * last because they are more likely to block other optimizations if done
655
     * too aggressively */
656
0
    for (int n = 0; n < ops->num_ops - 1; n++) {
657
0
        SwsOp *op = &ops->ops[n];
658
0
        SwsOp *next = &ops->ops[n + 1];
659
660
0
        switch (op->op) {
661
0
        case SWS_OP_SWIZZLE: {
662
            /* Try to push swizzles towards the output */
663
0
            if (op_commute_swizzle(op, next)) {
664
0
                FFSWAP(SwsOp, *op, *next);
665
0
                goto retry;
666
0
            }
667
0
            break;
668
0
        }
669
670
0
        case SWS_OP_SCALE:
671
            /* Scaling by integer before conversion to int */
672
0
            if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
673
0
                ff_sws_pixel_type_is_int(next->convert.to))
674
0
            {
675
0
                op->type = next->convert.to;
676
0
                FFSWAP(SwsOp, *op, *next);
677
0
                goto retry;
678
0
            }
679
0
            break;
680
0
        }
681
0
    }
682
683
0
    return 0;
684
0
}
685
686
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
687
                         int size, uint8_t clear_val,
688
                         int *read_bytes, int *write_bytes)
689
0
{
690
0
    if (!ops->num_ops)
691
0
        return AVERROR(EINVAL);
692
693
0
    const SwsOp *read = ff_sws_op_list_input(ops);
694
0
    if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1))
695
0
        return AVERROR(ENOTSUP);
696
697
0
    const int read_size = ff_sws_pixel_type_size(read->type);
698
0
    uint32_t mask[4] = {0};
699
0
    for (int i = 0; i < read->rw.elems; i++)
700
0
        mask[i] = 0x01010101 * i * read_size + 0x03020100;
701
702
0
    for (int opidx = 1; opidx < ops->num_ops; opidx++) {
703
0
        const SwsOp *op = &ops->ops[opidx];
704
0
        switch (op->op) {
705
0
        case SWS_OP_SWIZZLE: {
706
0
            uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
707
0
            for (int i = 0; i < 4; i++)
708
0
                mask[i] = orig[op->swizzle.in[i]];
709
0
            break;
710
0
        }
711
712
0
        case SWS_OP_SWAP_BYTES:
713
0
            for (int i = 0; i < 4; i++) {
714
0
                switch (ff_sws_pixel_type_size(op->type)) {
715
0
                case 2: mask[i] = av_bswap16(mask[i]); break;
716
0
                case 4: mask[i] = av_bswap32(mask[i]); break;
717
0
                }
718
0
            }
719
0
            break;
720
721
0
        case SWS_OP_CLEAR:
722
0
            for (int i = 0; i < 4; i++) {
723
0
                if (!op->c.q4[i].den)
724
0
                    continue;
725
0
                if (op->c.q4[i].num != 0 || !clear_val)
726
0
                    return AVERROR(ENOTSUP);
727
0
                mask[i] = 0x1010101ul * clear_val;
728
0
            }
729
0
            break;
730
731
0
        case SWS_OP_CONVERT: {
732
0
            if (!op->convert.expand)
733
0
                return AVERROR(ENOTSUP);
734
0
            for (int i = 0; i < 4; i++) {
735
0
                switch (ff_sws_pixel_type_size(op->type)) {
736
0
                case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF);   break;
737
0
                case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
738
0
                }
739
0
            }
740
0
            break;
741
0
        }
742
743
0
        case SWS_OP_WRITE: {
744
0
            if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
745
0
                return AVERROR(ENOTSUP);
746
747
            /* Initialize to no-op */
748
0
            memset(shuffle, clear_val, size);
749
750
0
            const int write_size  = ff_sws_pixel_type_size(op->type);
751
0
            const int read_chunk  = read->rw.elems * read_size;
752
0
            const int write_chunk = op->rw.elems * write_size;
753
0
            const int num_groups  = size / FFMAX(read_chunk, write_chunk);
754
0
            for (int n = 0; n < num_groups; n++) {
755
0
                const int base_in  = n * read_chunk;
756
0
                const int base_out = n * write_chunk;
757
0
                for (int i = 0; i < op->rw.elems; i++) {
758
0
                    const int offset = base_out + i * write_size;
759
0
                    for (int b = 0; b < write_size; b++) {
760
0
                        const uint8_t idx = mask[i] >> (b * 8);
761
0
                        if (idx != clear_val)
762
0
                            shuffle[offset + b] = base_in + idx;
763
0
                    }
764
0
                }
765
0
            }
766
767
0
            *read_bytes  = num_groups * read_chunk;
768
0
            *write_bytes = num_groups * write_chunk;
769
0
            return num_groups;
770
0
        }
771
772
0
        default:
773
0
            return AVERROR(ENOTSUP);
774
0
        }
775
0
    }
776
777
0
    return AVERROR(EINVAL);
778
0
}