Coverage Report

Created: 2026-01-25 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/ops_optimizer.c
Line
Count
Source
1
/**
2
 * Copyright (C) 2025 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/avassert.h"
22
#include "libavutil/bswap.h"
23
#include "libavutil/rational.h"
24
25
#include "ops.h"
26
#include "ops_internal.h"
27
28
#define RET(x)                                                                 \
29
0
    do {                                                                       \
30
0
        if ((ret = (x)) < 0)                                                   \
31
0
            return ret;                                                        \
32
0
    } while (0)
33
34
/**
35
 * Try to commute a clear op with the next operation. Makes any adjustments
36
 * to the operations as needed, but does not perform the actual commutation.
37
 *
38
 * Returns whether successful.
39
 */
40
static bool op_commute_clear(SwsOp *op, SwsOp *next)
41
0
{
42
0
    av_assert1(op->op == SWS_OP_CLEAR);
43
0
    switch (next->op) {
44
0
    case SWS_OP_CONVERT:
45
0
        op->type = next->convert.to;
46
        /* fall through */
47
0
    case SWS_OP_LSHIFT:
48
0
    case SWS_OP_RSHIFT:
49
0
    case SWS_OP_DITHER:
50
0
    case SWS_OP_MIN:
51
0
    case SWS_OP_MAX:
52
0
    case SWS_OP_SCALE:
53
0
    case SWS_OP_READ:
54
0
    case SWS_OP_SWIZZLE:
55
0
        ff_sws_apply_op_q(next, op->c.q4);
56
0
        return true;
57
0
    case SWS_OP_INVALID:
58
0
    case SWS_OP_SWAP_BYTES:
59
0
    case SWS_OP_WRITE:
60
0
    case SWS_OP_LINEAR:
61
0
    case SWS_OP_PACK:
62
0
    case SWS_OP_UNPACK:
63
0
    case SWS_OP_CLEAR:
64
0
        return false;
65
0
    case SWS_OP_TYPE_NB:
66
0
        break;
67
0
    }
68
69
0
    av_unreachable("Invalid operation type!");
70
0
    return false;
71
0
}
72
73
 /**
74
  * Try to commute a swizzle op with the next operation. Makes any adjustments
75
  * to the operations as needed, but does not perform the actual commutation.
76
  *
77
  * Returns whether successful.
78
  */
79
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
80
0
{
81
0
    bool seen[4] = {0};
82
83
0
    av_assert1(op->op == SWS_OP_SWIZZLE);
84
0
    switch (next->op) {
85
0
    case SWS_OP_CONVERT:
86
0
        op->type = next->convert.to;
87
        /* fall through */
88
0
    case SWS_OP_SWAP_BYTES:
89
0
    case SWS_OP_LSHIFT:
90
0
    case SWS_OP_RSHIFT:
91
0
    case SWS_OP_SCALE:
92
0
        return true;
93
94
    /**
95
     * We can commute per-channel ops only if the per-channel constants are the
96
     * same for all duplicated channels; e.g.:
97
     *   SWIZZLE {0, 0, 0, 3}
98
     *   NEXT    {x, x, x, w}
99
     * ->
100
     *   NEXT    {x, _, _, w}
101
     *   SWIZZLE {0, 0, 0, 3}
102
     */
103
0
    case SWS_OP_MIN:
104
0
    case SWS_OP_MAX: {
105
0
        const SwsConst c = next->c;
106
0
        for (int i = 0; i < 4; i++) {
107
0
            if (next->comps.unused[i])
108
0
                continue;
109
0
            const int j = op->swizzle.in[i];
110
0
            if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
111
0
                return false;
112
0
            next->c.q4[j] = c.q4[i];
113
0
            seen[j] = true;
114
0
        }
115
0
        return true;
116
0
    }
117
118
0
    case SWS_OP_DITHER: {
119
0
        const SwsDitherOp d = next->dither;
120
0
        for (int i = 0; i < 4; i++) {
121
0
            if (next->comps.unused[i])
122
0
                continue;
123
0
            const int j = op->swizzle.in[i];
124
0
            if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
125
0
                return false;
126
0
            next->dither.y_offset[j] = d.y_offset[i];
127
0
            seen[j] = true;
128
0
        }
129
0
        return true;
130
0
    }
131
132
0
    case SWS_OP_INVALID:
133
0
    case SWS_OP_READ:
134
0
    case SWS_OP_WRITE:
135
0
    case SWS_OP_SWIZZLE:
136
0
    case SWS_OP_CLEAR:
137
0
    case SWS_OP_LINEAR:
138
0
    case SWS_OP_PACK:
139
0
    case SWS_OP_UNPACK:
140
0
        return false;
141
0
    case SWS_OP_TYPE_NB:
142
0
        break;
143
0
    }
144
145
0
    av_unreachable("Invalid operation type!");
146
0
    return false;
147
0
}
148
149
/* returns log2(x) only if x is a power of two, or 0 otherwise */
150
static int exact_log2(const int x)
151
0
{
152
0
    int p;
153
0
    if (x <= 0)
154
0
        return 0;
155
0
    p = av_log2(x);
156
0
    return (1 << p) == x ? p : 0;
157
0
}
158
159
static int exact_log2_q(const AVRational x)
160
0
{
161
0
    if (x.den == 1)
162
0
        return exact_log2(x.num);
163
0
    else if (x.num == 1)
164
0
        return -exact_log2(x.den);
165
0
    else
166
0
        return 0;
167
0
}
168
169
/**
170
 * If a linear operation can be reduced to a scalar multiplication, returns
171
 * the corresponding scaling factor, or 0 otherwise.
172
 */
173
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
174
                           SwsConst *out_scale)
175
0
{
176
0
    SwsConst scale = {0};
177
178
    /* There are components not on the main diagonal */
179
0
    if (c->mask & ~SWS_MASK_DIAG4)
180
0
        return false;
181
182
0
    for (int i = 0; i < 4; i++) {
183
0
        const AVRational s = c->m[i][i];
184
0
        if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
185
0
            continue;
186
0
        if (scale.q.den && av_cmp_q(s, scale.q))
187
0
            return false;
188
0
        scale.q = s;
189
0
    }
190
191
0
    if (scale.q.den)
192
0
        *out_scale = scale;
193
0
    return scale.q.den;
194
0
}
195
196
/* Extracts an integer clear operation (subset) from the given linear op. */
197
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
198
                                  SwsConst *out_clear)
199
0
{
200
0
    SwsConst clear = {0};
201
0
    bool ret = false;
202
203
0
    for (int i = 0; i < 4; i++) {
204
0
        bool const_row = c->m[i][4].den == 1; /* offset is integer */
205
0
        for (int j = 0; j < 4; j++) {
206
0
            const_row &= c->m[i][j].num == 0 || /* scalar is zero */
207
0
                         (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
208
0
        }
209
0
        if (const_row && (c->mask & SWS_MASK_ROW(i))) {
210
0
            clear.q4[i] = c->m[i][4];
211
0
            for (int j = 0; j < 5; j++)
212
0
                c->m[i][j] = Q(i == j);
213
0
            c->mask &= ~SWS_MASK_ROW(i);
214
0
            ret = true;
215
0
        }
216
0
    }
217
218
0
    if (ret)
219
0
        *out_clear = clear;
220
0
    return ret;
221
0
}
222
223
/* Unswizzle a linear operation by aligning single-input rows with
224
 * their corresponding diagonal */
225
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
226
0
{
227
0
    SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
228
0
    SwsLinearOp c = *op;
229
230
0
    for (int i = 0; i < 4; i++) {
231
0
        int idx = -1;
232
0
        for (int j = 0; j < 4; j++) {
233
0
            if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
234
0
                continue;
235
0
            if (idx >= 0)
236
0
                return false; /* multiple inputs */
237
0
            idx = j;
238
0
        }
239
240
0
        if (idx >= 0 && idx != i) {
241
            /* Move coefficient to the diagonal */
242
0
            c.m[i][i] = c.m[i][idx];
243
0
            c.m[i][idx] = Q(0);
244
0
            swiz.in[i] = idx;
245
0
        }
246
0
    }
247
248
0
    if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
249
0
        return false; /* no swizzle was identified */
250
251
0
    c.mask = ff_sws_linear_mask(c);
252
0
    *out_swiz = swiz;
253
0
    *op = c;
254
0
    return true;
255
0
}
256
257
int ff_sws_op_list_optimize(SwsOpList *ops)
258
0
{
259
0
    int ret;
260
261
0
retry:
262
0
    ff_sws_op_list_update_comps(ops);
263
264
    /* Apply all in-place optimizations (that do not re-order the list) */
265
0
    for (int n = 0; n < ops->num_ops; n++) {
266
0
        SwsOp dummy = {0};
267
0
        SwsOp *op = &ops->ops[n];
268
0
        SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
269
0
        SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
270
271
        /* common helper variable */
272
0
        bool noop = true;
273
274
0
        switch (op->op) {
275
0
        case SWS_OP_READ:
276
            /* Optimized further into refcopy / memcpy */
277
0
            if (next->op == SWS_OP_WRITE &&
278
0
                next->rw.elems == op->rw.elems &&
279
0
                next->rw.packed == op->rw.packed &&
280
0
                next->rw.frac == op->rw.frac)
281
0
            {
282
0
                ff_sws_op_list_remove_at(ops, n, 2);
283
0
                av_assert1(ops->num_ops == 0);
284
0
                return 0;
285
0
            }
286
287
            /* Skip reading extra unneeded components */
288
0
            if (!op->rw.packed) {
289
0
                int needed = op->rw.elems;
290
0
                while (needed > 0 && next->comps.unused[needed - 1])
291
0
                    needed--;
292
0
                if (op->rw.elems != needed) {
293
0
                    op->rw.elems = needed;
294
0
                    goto retry;
295
0
                }
296
0
            }
297
0
            break;
298
299
0
        case SWS_OP_SWAP_BYTES:
300
            /* Redundant (double) swap */
301
0
            if (next->op == SWS_OP_SWAP_BYTES) {
302
0
                ff_sws_op_list_remove_at(ops, n, 2);
303
0
                goto retry;
304
0
            }
305
0
            break;
306
307
0
        case SWS_OP_UNPACK:
308
            /* Redundant unpack+pack */
309
0
            if (next->op == SWS_OP_PACK && next->type == op->type &&
310
0
                next->pack.pattern[0] == op->pack.pattern[0] &&
311
0
                next->pack.pattern[1] == op->pack.pattern[1] &&
312
0
                next->pack.pattern[2] == op->pack.pattern[2] &&
313
0
                next->pack.pattern[3] == op->pack.pattern[3])
314
0
            {
315
0
                ff_sws_op_list_remove_at(ops, n, 2);
316
0
                goto retry;
317
0
            }
318
0
            break;
319
320
0
        case SWS_OP_LSHIFT:
321
0
        case SWS_OP_RSHIFT:
322
            /* Two shifts in the same direction */
323
0
            if (next->op == op->op) {
324
0
                op->c.u += next->c.u;
325
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
326
0
                goto retry;
327
0
            }
328
329
            /* No-op shift */
330
0
            if (!op->c.u) {
331
0
                ff_sws_op_list_remove_at(ops, n, 1);
332
0
                goto retry;
333
0
            }
334
0
            break;
335
336
0
        case SWS_OP_CLEAR:
337
0
            for (int i = 0; i < 4; i++) {
338
0
                if (!op->c.q4[i].den)
339
0
                    continue;
340
341
0
                if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
342
0
                    !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
343
0
                    op->c.q4[i].num == 0)
344
0
                {
345
                    /* Redundant clear-to-zero of zero component */
346
0
                    op->c.q4[i].den = 0;
347
0
                } else if (next->comps.unused[i]) {
348
                    /* Unnecessary clear of unused component */
349
0
                    op->c.q4[i] = (AVRational) {0, 0};
350
0
                } else if (op->c.q4[i].den) {
351
0
                    noop = false;
352
0
                }
353
0
            }
354
355
0
            if (noop) {
356
0
                ff_sws_op_list_remove_at(ops, n, 1);
357
0
                goto retry;
358
0
            }
359
360
            /* Transitive clear */
361
0
            if (next->op == SWS_OP_CLEAR) {
362
0
                for (int i = 0; i < 4; i++) {
363
0
                    if (next->c.q4[i].den)
364
0
                        op->c.q4[i] = next->c.q4[i];
365
0
                }
366
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
367
0
                goto retry;
368
0
            }
369
0
            break;
370
371
0
        case SWS_OP_SWIZZLE:
372
0
            for (int i = 0; i < 4; i++) {
373
0
                if (next->comps.unused[i])
374
0
                    continue;
375
0
                if (op->swizzle.in[i] != i)
376
0
                    noop = false;
377
0
            }
378
379
            /* Identity swizzle */
380
0
            if (noop) {
381
0
                ff_sws_op_list_remove_at(ops, n, 1);
382
0
                goto retry;
383
0
            }
384
385
            /* Transitive swizzle */
386
0
            if (next->op == SWS_OP_SWIZZLE) {
387
0
                const SwsSwizzleOp orig = op->swizzle;
388
0
                for (int i = 0; i < 4; i++)
389
0
                    op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
390
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
391
0
                goto retry;
392
0
            }
393
0
            break;
394
395
0
        case SWS_OP_CONVERT:
396
            /* No-op conversion */
397
0
            if (op->type == op->convert.to) {
398
0
                ff_sws_op_list_remove_at(ops, n, 1);
399
0
                goto retry;
400
0
            }
401
402
            /* Transitive conversion */
403
0
            if (next->op == SWS_OP_CONVERT &&
404
0
                op->convert.expand == next->convert.expand)
405
0
            {
406
0
                av_assert1(op->convert.to == next->type);
407
0
                op->convert.to = next->convert.to;
408
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
409
0
                goto retry;
410
0
            }
411
412
            /* Conversion followed by integer expansion */
413
0
            if (next->op == SWS_OP_SCALE && !op->convert.expand &&
414
0
                !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
415
0
            {
416
0
                op->convert.expand = true;
417
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
418
0
                goto retry;
419
0
            }
420
0
            break;
421
422
0
        case SWS_OP_MIN:
423
0
            for (int i = 0; i < 4; i++) {
424
0
                if (next->comps.unused[i] || !op->c.q4[i].den)
425
0
                    continue;
426
0
                if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
427
0
                    noop = false;
428
0
            }
429
430
0
            if (noop) {
431
0
                ff_sws_op_list_remove_at(ops, n, 1);
432
0
                goto retry;
433
0
            }
434
0
            break;
435
436
0
        case SWS_OP_MAX:
437
0
            for (int i = 0; i < 4; i++) {
438
0
                if (next->comps.unused[i] || !op->c.q4[i].den)
439
0
                    continue;
440
0
                if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
441
0
                    noop = false;
442
0
            }
443
444
0
            if (noop) {
445
0
                ff_sws_op_list_remove_at(ops, n, 1);
446
0
                goto retry;
447
0
            }
448
0
            break;
449
450
0
        case SWS_OP_DITHER:
451
0
            for (int i = 0; i < 4; i++) {
452
0
                noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
453
0
                        next->comps.unused[i];
454
0
            }
455
456
0
            if (noop) {
457
0
                ff_sws_op_list_remove_at(ops, n, 1);
458
0
                goto retry;
459
0
            }
460
0
            break;
461
462
0
        case SWS_OP_LINEAR: {
463
0
            SwsSwizzleOp swizzle;
464
0
            SwsConst c;
465
466
            /* No-op (identity) linear operation */
467
0
            if (!op->lin.mask) {
468
0
                ff_sws_op_list_remove_at(ops, n, 1);
469
0
                goto retry;
470
0
            }
471
472
0
            if (next->op == SWS_OP_LINEAR) {
473
                /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
474
0
                const SwsLinearOp m1 = op->lin;
475
0
                const SwsLinearOp m2 = next->lin;
476
0
                for (int i = 0; i < 4; i++) {
477
0
                    for (int j = 0; j < 5; j++) {
478
0
                        AVRational sum = Q(0);
479
0
                        for (int k = 0; k < 4; k++)
480
0
                            sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
481
0
                        if (j == 4) /* m1.m[4][j] == 1 */
482
0
                            sum = av_add_q(sum, m2.m[i][4]);
483
0
                        op->lin.m[i][j] = sum;
484
0
                    }
485
0
                }
486
0
                op->lin.mask = ff_sws_linear_mask(op->lin);
487
0
                ff_sws_op_list_remove_at(ops, n + 1, 1);
488
0
                goto retry;
489
0
            }
490
491
            /* Optimize away zero columns */
492
0
            for (int j = 0; j < 4; j++) {
493
0
                const uint32_t col = SWS_MASK_COL(j);
494
0
                if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
495
0
                    continue;
496
0
                for (int i = 0; i < 4; i++)
497
0
                    op->lin.m[i][j] = Q(i == j);
498
0
                op->lin.mask &= ~col;
499
0
                goto retry;
500
0
            }
501
502
            /* Optimize away unused rows */
503
0
            for (int i = 0; i < 4; i++) {
504
0
                const uint32_t row = SWS_MASK_ROW(i);
505
0
                if (!next->comps.unused[i] || !(op->lin.mask & row))
506
0
                    continue;
507
0
                for (int j = 0; j < 5; j++)
508
0
                    op->lin.m[i][j] = Q(i == j);
509
0
                op->lin.mask &= ~row;
510
0
                goto retry;
511
0
            }
512
513
            /* Convert constant rows to explicit clear instruction */
514
0
            if (extract_constant_rows(&op->lin, prev->comps, &c)) {
515
0
                RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
516
0
                    .op    = SWS_OP_CLEAR,
517
0
                    .type  = op->type,
518
0
                    .comps = op->comps,
519
0
                    .c     = c,
520
0
                }));
521
0
                goto retry;
522
0
            }
523
524
            /* Multiplication by scalar constant */
525
0
            if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
526
0
                op->op = SWS_OP_SCALE;
527
0
                op->c  = c;
528
0
                goto retry;
529
0
            }
530
531
            /* Swizzle by fixed pattern */
532
0
            if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
533
0
                RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
534
0
                    .op      = SWS_OP_SWIZZLE,
535
0
                    .type    = op->type,
536
0
                    .swizzle = swizzle,
537
0
                }));
538
0
                goto retry;
539
0
            }
540
0
            break;
541
0
        }
542
543
0
        case SWS_OP_SCALE: {
544
0
            const int factor2 = exact_log2_q(op->c.q);
545
546
            /* No-op scaling */
547
0
            if (op->c.q.num == 1 && op->c.q.den == 1) {
548
0
                ff_sws_op_list_remove_at(ops, n, 1);
549
0
                goto retry;
550
0
            }
551
552
            /* Scaling by exact power of two */
553
0
            if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
554
0
                op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
555
0
                op->c.u = FFABS(factor2);
556
0
                goto retry;
557
0
            }
558
0
            break;
559
0
        }
560
0
        }
561
0
    }
562
563
    /* Push clears to the back to void any unused components */
564
0
    for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
565
0
        SwsOp *op = &ops->ops[n];
566
0
        SwsOp *next = &ops->ops[n + 1];
567
568
0
        switch (op->op) {
569
0
        case SWS_OP_CLEAR:
570
0
            if (op_commute_clear(op, next)) {
571
0
                FFSWAP(SwsOp, *op, *next);
572
0
                goto retry;
573
0
            }
574
0
            break;
575
0
        }
576
0
    }
577
578
    /* Apply any remaining preferential re-ordering optimizations; do these
579
     * last because they are more likely to block other optimizations if done
580
     * too aggressively */
581
0
    for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
582
0
        SwsOp *op = &ops->ops[n];
583
0
        SwsOp *prev = &ops->ops[n - 1];
584
0
        SwsOp *next = &ops->ops[n + 1];
585
586
0
        switch (op->op) {
587
0
        case SWS_OP_SWIZZLE: {
588
0
            bool seen[4] = {0};
589
0
            bool has_duplicates = false;
590
0
            for (int i = 0; i < 4; i++) {
591
0
                if (next->comps.unused[i])
592
0
                    continue;
593
0
                has_duplicates |= seen[op->swizzle.in[i]];
594
0
                seen[op->swizzle.in[i]] = true;
595
0
            }
596
597
            /* Try to push swizzles with duplicates towards the output */
598
0
            if (has_duplicates && op_commute_swizzle(op, next)) {
599
0
                FFSWAP(SwsOp, *op, *next);
600
0
                goto retry;
601
0
            }
602
603
            /* Move swizzle out of the way between two converts so that
604
             * they may be merged */
605
0
            if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
606
0
                op->type = next->convert.to;
607
0
                FFSWAP(SwsOp, *op, *next);
608
0
                goto retry;
609
0
            }
610
0
            break;
611
0
        }
612
613
0
        case SWS_OP_SCALE:
614
            /* Scaling by integer before conversion to int */
615
0
            if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
616
0
                ff_sws_pixel_type_is_int(next->convert.to))
617
0
            {
618
0
                op->type = next->convert.to;
619
0
                FFSWAP(SwsOp, *op, *next);
620
0
                goto retry;
621
0
            }
622
0
            break;
623
0
        }
624
0
    }
625
626
0
    return 0;
627
0
}
628
629
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
630
                         int size, uint8_t clear_val,
631
                         int *read_bytes, int *write_bytes)
632
0
{
633
0
    const SwsOp read = ops->ops[0];
634
0
    const int read_size = ff_sws_pixel_type_size(read.type);
635
0
    uint32_t mask[4] = {0};
636
637
0
    if (!ops->num_ops || read.op != SWS_OP_READ)
638
0
        return AVERROR(EINVAL);
639
0
    if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
640
0
        return AVERROR(ENOTSUP);
641
642
0
    for (int i = 0; i < read.rw.elems; i++)
643
0
        mask[i] = 0x01010101 * i * read_size + 0x03020100;
644
645
0
    for (int opidx = 1; opidx < ops->num_ops; opidx++) {
646
0
        const SwsOp *op = &ops->ops[opidx];
647
0
        switch (op->op) {
648
0
        case SWS_OP_SWIZZLE: {
649
0
            uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
650
0
            for (int i = 0; i < 4; i++)
651
0
                mask[i] = orig[op->swizzle.in[i]];
652
0
            break;
653
0
        }
654
655
0
        case SWS_OP_SWAP_BYTES:
656
0
            for (int i = 0; i < 4; i++) {
657
0
                switch (ff_sws_pixel_type_size(op->type)) {
658
0
                case 2: mask[i] = av_bswap16(mask[i]); break;
659
0
                case 4: mask[i] = av_bswap32(mask[i]); break;
660
0
                }
661
0
            }
662
0
            break;
663
664
0
        case SWS_OP_CLEAR:
665
0
            for (int i = 0; i < 4; i++) {
666
0
                if (!op->c.q4[i].den)
667
0
                    continue;
668
0
                if (op->c.q4[i].num != 0 || !clear_val)
669
0
                    return AVERROR(ENOTSUP);
670
0
                mask[i] = 0x1010101ul * clear_val;
671
0
            }
672
0
            break;
673
674
0
        case SWS_OP_CONVERT: {
675
0
            if (!op->convert.expand)
676
0
                return AVERROR(ENOTSUP);
677
0
            for (int i = 0; i < 4; i++) {
678
0
                switch (ff_sws_pixel_type_size(op->type)) {
679
0
                case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF);   break;
680
0
                case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
681
0
                }
682
0
            }
683
0
            break;
684
0
        }
685
686
0
        case SWS_OP_WRITE: {
687
0
            if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
688
0
                return AVERROR(ENOTSUP);
689
690
            /* Initialize to no-op */
691
0
            memset(shuffle, clear_val, size);
692
693
0
            const int write_size  = ff_sws_pixel_type_size(op->type);
694
0
            const int read_chunk  = read.rw.elems * read_size;
695
0
            const int write_chunk = op->rw.elems * write_size;
696
0
            const int num_groups  = size / FFMAX(read_chunk, write_chunk);
697
0
            for (int n = 0; n < num_groups; n++) {
698
0
                const int base_in  = n * read_chunk;
699
0
                const int base_out = n * write_chunk;
700
0
                for (int i = 0; i < op->rw.elems; i++) {
701
0
                    const int offset = base_out + i * write_size;
702
0
                    for (int b = 0; b < write_size; b++) {
703
0
                        const uint8_t idx = mask[i] >> (b * 8);
704
0
                        if (idx != clear_val)
705
0
                            shuffle[offset + b] = base_in + idx;
706
0
                    }
707
0
                }
708
0
            }
709
710
0
            *read_bytes  = num_groups * read_chunk;
711
0
            *write_bytes = num_groups * write_chunk;
712
0
            return num_groups;
713
0
        }
714
715
0
        default:
716
0
            return AVERROR(ENOTSUP);
717
0
        }
718
0
    }
719
720
0
    return AVERROR(EINVAL);
721
0
}