/src/ffmpeg/libswscale/ops_optimizer.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avassert.h"
#include "libavutil/bswap.h"
#include "libavutil/rational.h"

#include "ops.h"
#include "ops_internal.h"

#define RET(x)                                                                 \
    do {                                                                       \
        if ((ret = (x)) < 0)                                                   \
            return ret;                                                        \
    } while (0)

/**
 * Try to commute a clear op with the next operation. Makes any adjustments
 * to the operations as needed, but does not perform the actual commutation.
 *
 * Returns whether successful.
 */
static bool op_commute_clear(SwsOp *op, SwsOp *next)
{
    SwsOp tmp;

    av_assert1(op->op == SWS_OP_CLEAR);
    switch (next->op) {
    case SWS_OP_CONVERT:
        op->type = next->convert.to;
        /* fall through */
    case SWS_OP_LSHIFT:
    case SWS_OP_RSHIFT:
    case SWS_OP_DITHER:
    case SWS_OP_MIN:
    case SWS_OP_MAX:
    case SWS_OP_SCALE:
    case SWS_OP_READ:
    case SWS_OP_SWIZZLE:
        ff_sws_apply_op_q(next, op->c.q4);
        return true;
    case SWS_OP_SWAP_BYTES:
        switch (next->type) {
        case SWS_PIXEL_U16:
            ff_sws_apply_op_q(next, op->c.q4); /* always works */
            return true;
        case SWS_PIXEL_U32:
            for (int i = 0; i < 4; i++) {
                uint32_t v = av_bswap32(op->c.q4[i].num);
                if (v > INT_MAX)
                    return false; /* can't represent as AVRational anymore */
                tmp.c.q4[i] = Q(v);
            }
            op->c = tmp.c;
            return true;
        default:
            return false;
        }
    case SWS_OP_INVALID:
    case SWS_OP_WRITE:
    case SWS_OP_LINEAR:
    case SWS_OP_PACK:
    case SWS_OP_UNPACK:
    case SWS_OP_CLEAR:
        return false;
    case SWS_OP_TYPE_NB:
        break;
    }

    av_unreachable("Invalid operation type!");
    return false;
}

 /**
  * Try to commute a swizzle op with the next operation. Makes any adjustments
  * to the operations as needed, but does not perform the actual commutation.
  *
  * Returns whether successful.
  */
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
{
    bool seen[4] = {0};

    av_assert1(op->op == SWS_OP_SWIZZLE);
    switch (next->op) {
    case SWS_OP_CONVERT:
        op->type = next->convert.to;
        /* fall through */
    case SWS_OP_SWAP_BYTES:
    case SWS_OP_LSHIFT:
    case SWS_OP_RSHIFT:
    case SWS_OP_SCALE:
        return true;

    /**
     * We can commute per-channel ops only if the per-channel constants are the
     * same for all duplicated channels; e.g.:
     *   SWIZZLE {0, 0, 0, 3}
     *   NEXT    {x, x, x, w}
     * ->
     *   NEXT    {x, _, _, w}
     *   SWIZZLE {0, 0, 0, 3}
     */
    case SWS_OP_MIN:
    case SWS_OP_MAX: {
        const SwsConst c = next->c;
        for (int i = 0; i < 4; i++) {
            if (next->comps.unused[i])
                continue;
            const int j = op->swizzle.in[i];
            if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
                return false;
            next->c.q4[j] = c.q4[i];
            seen[j] = true;
        }
        return true;
    }

    case SWS_OP_DITHER: {
        const SwsDitherOp d = next->dither;
        for (int i = 0; i < 4; i++) {
            if (next->comps.unused[i])
                continue;
            const int j = op->swizzle.in[i];
            if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
                return false;
            next->dither.y_offset[j] = d.y_offset[i];
            seen[j] = true;
        }
        return true;
    }

    case SWS_OP_INVALID:
    case SWS_OP_READ:
    case SWS_OP_WRITE:
    case SWS_OP_SWIZZLE:
    case SWS_OP_CLEAR:
    case SWS_OP_LINEAR:
    case SWS_OP_PACK:
    case SWS_OP_UNPACK:
        return false;
    case SWS_OP_TYPE_NB:
        break;
    }

    av_unreachable("Invalid operation type!");
    return false;
}

/* returns log2(x) only if x is a power of two, or 0 otherwise */
static int exact_log2(const int x)
{
    int p;
    if (x <= 0)
        return 0;
    p = av_log2(x);
    return (1 << p) == x ? p : 0;
}

static int exact_log2_q(const AVRational x)
{
    if (x.den == 1)
        return exact_log2(x.num);
    else if (x.num == 1)
        return -exact_log2(x.den);
    else
        return 0;
}

/**
 * If a linear operation can be reduced to a scalar multiplication, returns
 * the corresponding scaling factor, or 0 otherwise.
 */
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
                           SwsConst *out_scale)
{
    SwsConst scale = {0};

    /* There are components not on the main diagonal */
    if (c->mask & ~SWS_MASK_DIAG4)
        return false;

    for (int i = 0; i < 4; i++) {
        const AVRational s = c->m[i][i];
        if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
            continue;
        if (scale.q.den && av_cmp_q(s, scale.q))
            return false;
        scale.q = s;
    }

    if (scale.q.den)
        *out_scale = scale;
    return scale.q.den;
}

/* Extracts an integer clear operation (subset) from the given linear op. */
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
                                  SwsConst *out_clear)
{
    SwsConst clear = {0};
    bool ret = false;

    for (int i = 0; i < 4; i++) {
        bool const_row = c->m[i][4].den == 1; /* offset is integer */
        for (int j = 0; j < 4; j++) {
            const_row &= c->m[i][j].num == 0 || /* scalar is zero */
                         (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
        }
        if (const_row && (c->mask & SWS_MASK_ROW(i))) {
            clear.q4[i] = c->m[i][4];
            for (int j = 0; j < 5; j++)
                c->m[i][j] = Q(i == j);
            c->mask &= ~SWS_MASK_ROW(i);
            ret = true;
        }
    }

    if (ret)
        *out_clear = clear;
    return ret;
}

/* Unswizzle a linear operation by aligning single-input rows with
 * their corresponding diagonal */
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
{
    SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
    SwsLinearOp c = *op;

    /* Find non-zero coefficients in the main 4x4 matrix */
    uint32_t nonzero = 0;
    for (int i = 0; i < 4; i++) {
        for (int j = 0; j < 4; j++) {
            if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
                continue;
            nonzero |= SWS_MASK(i, j);
        }
    }

    /* If a value is unique in its row and the target column is
     * empty, move it there and update the input swizzle */
    for (int i = 0; i < 4; i++) {
        if (nonzero & SWS_MASK_COL(i))
            continue; /* target column is not empty */
        for (int j = 0; j < 4; j++) {
            if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
                /* Move coefficient to the diagonal */
                c.m[i][i] = c.m[i][j];
                c.m[i][j] = Q(0);
                swiz.in[i] = j;
                break;
            }
        }
    }

    if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
        return false; /* no swizzle was identified */

    c.mask = ff_sws_linear_mask(c);
    *out_swiz = swiz;
    *op = c;
    return true;
}

int ff_sws_op_list_optimize(SwsOpList *ops)
{
    int ret;

retry:
    ff_sws_op_list_update_comps(ops);

    /* Apply all in-place optimizations (that do not re-order the list) */
    for (int n = 0; n < ops->num_ops; n++) {
        SwsOp dummy = {0};
        SwsOp *op = &ops->ops[n];
        SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
        SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;

        /* common helper variable */
        bool noop = true;

        if (next->comps.unused[0] && next->comps.unused[1] &&
            next->comps.unused[2] && next->comps.unused[3])
        {
            /* Remove completely unused operations */
            ff_sws_op_list_remove_at(ops, n, 1);
            goto retry;
        }

        switch (op->op) {
        case SWS_OP_READ:
            /* "Compress" planar reads where not all components are needed */
            if (!op->rw.packed) {
                SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
                int nb_planes = 0;
                for (int i = 0; i < op->rw.elems; i++) {
                    if (next->comps.unused[i]) {
                        swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
                        continue;
                    }

                    const int idx = nb_planes++;
                    av_assert1(idx <= i);
                    ops->order_src.in[idx] = ops->order_src.in[i];
                    swiz.in[i] = idx;
                }

                if (nb_planes < op->rw.elems) {
                    op->rw.elems = nb_planes;
                    RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
                        .op = SWS_OP_SWIZZLE,
                        .type = op->type,
                        .swizzle = swiz,
                    }));
                    goto retry;
                }
            }
            break;

        case SWS_OP_SWAP_BYTES:
            /* Redundant (double) swap */
            if (next->op == SWS_OP_SWAP_BYTES) {
                ff_sws_op_list_remove_at(ops, n, 2);
                goto retry;
            }
            break;

        case SWS_OP_UNPACK:
            /* Redundant unpack+pack */
            if (next->op == SWS_OP_PACK && next->type == op->type &&
                next->pack.pattern[0] == op->pack.pattern[0] &&
                next->pack.pattern[1] == op->pack.pattern[1] &&
                next->pack.pattern[2] == op->pack.pattern[2] &&
                next->pack.pattern[3] == op->pack.pattern[3])
            {
                ff_sws_op_list_remove_at(ops, n, 2);
                goto retry;
            }
            break;

        case SWS_OP_LSHIFT:
        case SWS_OP_RSHIFT:
            /* Two shifts in the same direction */
            if (next->op == op->op) {
                op->c.u += next->c.u;
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }

            /* No-op shift */
            if (!op->c.u) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }
            break;

        case SWS_OP_CLEAR:
            for (int i = 0; i < 4; i++) {
                if (!op->c.q4[i].den)
                    continue;

                if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
                    !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
                    op->c.q4[i].num == 0)
                {
                    /* Redundant clear-to-zero of zero component */
                    op->c.q4[i].den = 0;
                } else if (next->comps.unused[i]) {
                    /* Unnecessary clear of unused component */
                    op->c.q4[i] = (AVRational) {0, 0};
                } else if (op->c.q4[i].den) {
                    noop = false;
                }
            }

            if (noop) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }

            /* Transitive clear */
            if (next->op == SWS_OP_CLEAR) {
                for (int i = 0; i < 4; i++) {
                    if (next->c.q4[i].den)
                        op->c.q4[i] = next->c.q4[i];
                }
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }
            break;

        case SWS_OP_SWIZZLE:
            for (int i = 0; i < 4; i++) {
                if (next->comps.unused[i])
                    continue;
                if (op->swizzle.in[i] != i)
                    noop = false;
            }

            /* Identity swizzle */
            if (noop) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }

            /* Transitive swizzle */
            if (next->op == SWS_OP_SWIZZLE) {
                const SwsSwizzleOp orig = op->swizzle;
                for (int i = 0; i < 4; i++)
                    op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }

            /* Swizzle planes instead of components, if possible */
            if (prev->op == SWS_OP_READ && !prev->rw.packed) {
                for (int dst = 0; dst < prev->rw.elems; dst++) {
                    const int src = op->swizzle.in[dst];
                    if (src > dst && src < prev->rw.elems) {
                        FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
                        for (int i = dst; i < 4; i++) {
                            if (op->swizzle.in[i] == dst)
                                op->swizzle.in[i] = src;
                            else if (op->swizzle.in[i] == src)
                                op->swizzle.in[i] = dst;
                        }
                        goto retry;
                    }
                }
            }

            if (next->op == SWS_OP_WRITE && !next->rw.packed) {
                for (int dst = 0; dst < next->rw.elems; dst++) {
                    const int src = op->swizzle.in[dst];
                    if (src > dst && src < next->rw.elems) {
                        FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
                        FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
                        goto retry;
                    }
                }
            }
            break;

        case SWS_OP_CONVERT:
            /* No-op conversion */
            if (op->type == op->convert.to) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }

            /* Transitive conversion */
            if (next->op == SWS_OP_CONVERT &&
                op->convert.expand == next->convert.expand)
            {
                av_assert1(op->convert.to == next->type);
                op->convert.to = next->convert.to;
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }

            /* Conversion followed by integer expansion */
            if (next->op == SWS_OP_SCALE && !op->convert.expand &&
                ff_sws_pixel_type_is_int(op->type) &&
                ff_sws_pixel_type_is_int(op->convert.to) &&
                !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
            {
                op->convert.expand = true;
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }
            break;

        case SWS_OP_MIN:
            for (int i = 0; i < 4; i++) {
                if (next->comps.unused[i] || !op->c.q4[i].den)
                    continue;
                if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
                    noop = false;
            }

            if (noop) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }
            break;

        case SWS_OP_MAX:
            for (int i = 0; i < 4; i++) {
                if (next->comps.unused[i] || !op->c.q4[i].den)
                    continue;
                if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
                    noop = false;
            }

            if (noop) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }
            break;

        case SWS_OP_DITHER:
            for (int i = 0; i < 4; i++) {
                if (next->comps.unused[i] || op->dither.y_offset[i] < 0)
                    continue;
                if (prev->comps.flags[i] & SWS_COMP_EXACT) {
                    op->dither.y_offset[i] = -1; /* unnecessary dither */
                    goto retry;
                } else {
                    noop = false;
                }
            }

            if (noop) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }
            break;

        case SWS_OP_LINEAR: {
            SwsSwizzleOp swizzle;
            SwsConst c;

            /* No-op (identity) linear operation */
            if (!op->lin.mask) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }

            if (next->op == SWS_OP_LINEAR) {
                /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
                const SwsLinearOp m1 = op->lin;
                const SwsLinearOp m2 = next->lin;
                for (int i = 0; i < 4; i++) {
                    for (int j = 0; j < 5; j++) {
                        AVRational sum = Q(0);
                        for (int k = 0; k < 4; k++)
                            sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
                        if (j == 4) /* m1.m[4][j] == 1 */
                            sum = av_add_q(sum, m2.m[i][4]);
                        op->lin.m[i][j] = sum;
                    }
                }
                op->lin.mask = ff_sws_linear_mask(op->lin);
                ff_sws_op_list_remove_at(ops, n + 1, 1);
                goto retry;
            }

            /* Optimize away zero columns */
            for (int j = 0; j < 4; j++) {
                const uint32_t col = SWS_MASK_COL(j);
                if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
                    continue;
                for (int i = 0; i < 4; i++)
                    op->lin.m[i][j] = Q(i == j);
                op->lin.mask &= ~col;
                goto retry;
            }

            /* Optimize away unused rows */
            for (int i = 0; i < 4; i++) {
                const uint32_t row = SWS_MASK_ROW(i);
                if (!next->comps.unused[i] || !(op->lin.mask & row))
                    continue;
                for (int j = 0; j < 5; j++)
                    op->lin.m[i][j] = Q(i == j);
                op->lin.mask &= ~row;
                goto retry;
            }

            /* Convert constant rows to explicit clear instruction */
            if (extract_constant_rows(&op->lin, prev->comps, &c)) {
                RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
                    .op    = SWS_OP_CLEAR,
                    .type  = op->type,
                    .comps = op->comps,
                    .c     = c,
                }));
                goto retry;
            }

            /* Multiplication by scalar constant */
            if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
                op->op = SWS_OP_SCALE;
                op->c  = c;
                goto retry;
            }

            /* Swizzle by fixed pattern */
            if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
                RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
                    .op      = SWS_OP_SWIZZLE,
                    .type    = op->type,
                    .swizzle = swizzle,
                }));
                goto retry;
            }
            break;
        }

        case SWS_OP_SCALE: {
            const int factor2 = exact_log2_q(op->c.q);

            /* No-op scaling */
            if (op->c.q.num == 1 && op->c.q.den == 1) {
                ff_sws_op_list_remove_at(ops, n, 1);
                goto retry;
            }

            /* Scaling by exact power of two */
            if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
                op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
                op->c.u = FFABS(factor2);
                goto retry;
            }
            break;
        }
        }
    }

    /* Push clears to the back to void any unused components */
    for (int n = 0; n < ops->num_ops - 1; n++) {
        SwsOp *op = &ops->ops[n];
        SwsOp *next = &ops->ops[n + 1];

        switch (op->op) {
        case SWS_OP_CLEAR:
            if (op_commute_clear(op, next)) {
                FFSWAP(SwsOp, *op, *next);
                goto retry;
            }
            break;
        }
    }

    /* Apply any remaining preferential re-ordering optimizations; do these
     * last because they are more likely to block other optimizations if done
     * too aggressively */
    for (int n = 0; n < ops->num_ops - 1; n++) {
        SwsOp *op = &ops->ops[n];
        SwsOp *next = &ops->ops[n + 1];

        switch (op->op) {
        case SWS_OP_SWIZZLE: {
            /* Try to push swizzles towards the output */
            if (op_commute_swizzle(op, next)) {
                FFSWAP(SwsOp, *op, *next);
                goto retry;
            }
            break;
        }

        case SWS_OP_SCALE:
            /* Scaling by integer before conversion to int */
            if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
                ff_sws_pixel_type_is_int(next->convert.to))
            {
                op->type = next->convert.to;
                FFSWAP(SwsOp, *op, *next);
                goto retry;
            }
            break;
        }
    }

    return 0;
}

int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
                         int size, uint8_t clear_val,
                         int *read_bytes, int *write_bytes)
{
    if (!ops->num_ops)
        return AVERROR(EINVAL);

    const SwsOp *read = ff_sws_op_list_input(ops);
    if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1))
        return AVERROR(ENOTSUP);

    const int read_size = ff_sws_pixel_type_size(read->type);
    uint32_t mask[4] = {0};
    for (int i = 0; i < read->rw.elems; i++)
        mask[i] = 0x01010101 * i * read_size + 0x03020100;

    for (int opidx = 1; opidx < ops->num_ops; opidx++) {
        const SwsOp *op = &ops->ops[opidx];
        switch (op->op) {
        case SWS_OP_SWIZZLE: {
            uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
            for (int i = 0; i < 4; i++)
                mask[i] = orig[op->swizzle.in[i]];
            break;
        }

        case SWS_OP_SWAP_BYTES:
            for (int i = 0; i < 4; i++) {
                switch (ff_sws_pixel_type_size(op->type)) {
                case 2: mask[i] = av_bswap16(mask[i]); break;
                case 4: mask[i] = av_bswap32(mask[i]); break;
                }
            }
            break;

        case SWS_OP_CLEAR:
            for (int i = 0; i < 4; i++) {
                if (!op->c.q4[i].den)
                    continue;
                if (op->c.q4[i].num != 0 || !clear_val)
                    return AVERROR(ENOTSUP);
                mask[i] = 0x1010101ul * clear_val;
            }
            break;

        case SWS_OP_CONVERT: {
            if (!op->convert.expand)
                return AVERROR(ENOTSUP);
            for (int i = 0; i < 4; i++) {
                switch (ff_sws_pixel_type_size(op->type)) {
                case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF);   break;
                case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
                }
            }
            break;
        }

        case SWS_OP_WRITE: {
            if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
                return AVERROR(ENOTSUP);

            /* Initialize to no-op */
            memset(shuffle, clear_val, size);

            const int write_size  = ff_sws_pixel_type_size(op->type);
            const int read_chunk  = read->rw.elems * read_size;
            const int write_chunk = op->rw.elems * write_size;
            const int num_groups  = size / FFMAX(read_chunk, write_chunk);
            for (int n = 0; n < num_groups; n++) {
                const int base_in  = n * read_chunk;
                const int base_out = n * write_chunk;
                for (int i = 0; i < op->rw.elems; i++) {
                    const int offset = base_out + i * write_size;
                    for (int b = 0; b < write_size; b++) {
                        const uint8_t idx = mask[i] >> (b * 8);
                        if (idx != clear_val)
                            shuffle[offset + b] = base_in + idx;
                    }
                }
            }

            *read_bytes  = num_groups * read_chunk;
            *write_bytes = num_groups * write_chunk;
            return num_groups;
        }

        default:
            return AVERROR(ENOTSUP);
        }
    }

    return AVERROR(EINVAL);
}

Coverage Report

Created: 2026-03-12 07:14

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/avassert.h"
22		#include "libavutil/bswap.h"
23		#include "libavutil/rational.h"
24
25		#include "ops.h"
26		#include "ops_internal.h"
27
28		#define RET(x) \
29	0	do { \
30	0	if ((ret = (x)) < 0) \
31	0	return ret; \
32	0	} while (0)
33
34		/**
35		* Try to commute a clear op with the next operation. Makes any adjustments
36		* to the operations as needed, but does not perform the actual commutation.
37		*
38		* Returns whether successful.
39		*/
40		static bool op_commute_clear(SwsOp op, SwsOp next)
41	0	{
42	0	SwsOp tmp;
43
44	0	av_assert1(op->op == SWS_OP_CLEAR);
45	0	switch (next->op) {
46	0	case SWS_OP_CONVERT:
47	0	op->type = next->convert.to;
48		/* fall through */
49	0	case SWS_OP_LSHIFT:
50	0	case SWS_OP_RSHIFT:
51	0	case SWS_OP_DITHER:
52	0	case SWS_OP_MIN:
53	0	case SWS_OP_MAX:
54	0	case SWS_OP_SCALE:
55	0	case SWS_OP_READ:
56	0	case SWS_OP_SWIZZLE:
57	0	ff_sws_apply_op_q(next, op->c.q4);
58	0	return true;
59	0	case SWS_OP_SWAP_BYTES:
60	0	switch (next->type) {
61	0	case SWS_PIXEL_U16:
62	0	ff_sws_apply_op_q(next, op->c.q4); /* always works */
63	0	return true;
64	0	case SWS_PIXEL_U32:
65	0	for (int i = 0; i < 4; i++) {
66	0	uint32_t v = av_bswap32(op->c.q4[i].num);
67	0	if (v > INT_MAX)
68	0	return false; /* can't represent as AVRational anymore */
69	0	tmp.c.q4[i] = Q(v);
70	0	}
71	0	op->c = tmp.c;
72	0	return true;
73	0	default:
74	0	return false;
75	0	}
76	0	case SWS_OP_INVALID:
77	0	case SWS_OP_WRITE:
78	0	case SWS_OP_LINEAR:
79	0	case SWS_OP_PACK:
80	0	case SWS_OP_UNPACK:
81	0	case SWS_OP_CLEAR:
82	0	return false;
83	0	case SWS_OP_TYPE_NB:
84	0	break;
85	0	}
86
87	0	av_unreachable("Invalid operation type!");
88	0	return false;
89	0	}
90
91		/**
92		* Try to commute a swizzle op with the next operation. Makes any adjustments
93		* to the operations as needed, but does not perform the actual commutation.
94		*
95		* Returns whether successful.
96		*/
97		static bool op_commute_swizzle(SwsOp op, SwsOp next)
98	0	{
99	0	bool seen[4] = {0};
100
101	0	av_assert1(op->op == SWS_OP_SWIZZLE);
102	0	switch (next->op) {
103	0	case SWS_OP_CONVERT:
104	0	op->type = next->convert.to;
105		/* fall through */
106	0	case SWS_OP_SWAP_BYTES:
107	0	case SWS_OP_LSHIFT:
108	0	case SWS_OP_RSHIFT:
109	0	case SWS_OP_SCALE:
110	0	return true;
111
112		/**
113		* We can commute per-channel ops only if the per-channel constants are the
114		* same for all duplicated channels; e.g.:
115		* SWIZZLE {0, 0, 0, 3}
116		* NEXT {x, x, x, w}
117		* ->
118		* NEXT {x, _, _, w}
119		* SWIZZLE {0, 0, 0, 3}
120		*/
121	0	case SWS_OP_MIN:
122	0	case SWS_OP_MAX: {
123	0	const SwsConst c = next->c;
124	0	for (int i = 0; i < 4; i++) {
125	0	if (next->comps.unused[i])
126	0	continue;
127	0	const int j = op->swizzle.in[i];
128	0	if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129	0	return false;
130	0	next->c.q4[j] = c.q4[i];
131	0	seen[j] = true;
132	0	}
133	0	return true;
134	0	}
135
136	0	case SWS_OP_DITHER: {
137	0	const SwsDitherOp d = next->dither;
138	0	for (int i = 0; i < 4; i++) {
139	0	if (next->comps.unused[i])
140	0	continue;
141	0	const int j = op->swizzle.in[i];
142	0	if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143	0	return false;
144	0	next->dither.y_offset[j] = d.y_offset[i];
145	0	seen[j] = true;
146	0	}
147	0	return true;
148	0	}
149
150	0	case SWS_OP_INVALID:
151	0	case SWS_OP_READ:
152	0	case SWS_OP_WRITE:
153	0	case SWS_OP_SWIZZLE:
154	0	case SWS_OP_CLEAR:
155	0	case SWS_OP_LINEAR:
156	0	case SWS_OP_PACK:
157	0	case SWS_OP_UNPACK:
158	0	return false;
159	0	case SWS_OP_TYPE_NB:
160	0	break;
161	0	}
162
163	0	av_unreachable("Invalid operation type!");
164	0	return false;
165	0	}
166
167		/* returns log2(x) only if x is a power of two, or 0 otherwise */
168		static int exact_log2(const int x)
169	0	{
170	0	int p;
171	0	if (x <= 0)
172	0	return 0;
173	0	p = av_log2(x);
174	0	return (1 << p) == x ? p : 0;
175	0	}
176
177		static int exact_log2_q(const AVRational x)
178	0	{
179	0	if (x.den == 1)
180	0	return exact_log2(x.num);
181	0	else if (x.num == 1)
182	0	return -exact_log2(x.den);
183	0	else
184	0	return 0;
185	0	}
186
187		/**
188		* If a linear operation can be reduced to a scalar multiplication, returns
189		* the corresponding scaling factor, or 0 otherwise.
190		*/
191		static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192		SwsConst *out_scale)
193	0	{
194	0	SwsConst scale = {0};
195
196		/* There are components not on the main diagonal */
197	0	if (c->mask & ~SWS_MASK_DIAG4)
198	0	return false;
199
200	0	for (int i = 0; i < 4; i++) {
201	0	const AVRational s = c->m[i][i];
202	0	if ((prev.flags[i] & SWS_COMP_ZERO) \|\| next.unused[i])
203	0	continue;
204	0	if (scale.q.den && av_cmp_q(s, scale.q))
205	0	return false;
206	0	scale.q = s;
207	0	}
208
209	0	if (scale.q.den)
210	0	*out_scale = scale;
211	0	return scale.q.den;
212	0	}
213
214		/* Extracts an integer clear operation (subset) from the given linear op. */
215		static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
216		SwsConst *out_clear)
217	0	{
218	0	SwsConst clear = {0};
219	0	bool ret = false;
220
221	0	for (int i = 0; i < 4; i++) {
222	0	bool const_row = c->m[i][4].den == 1; /* offset is integer */
223	0	for (int j = 0; j < 4; j++) {
224	0	const_row &= c->m[i][j].num == 0 \|\| /* scalar is zero */
225	0	(prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226	0	}
227	0	if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228	0	clear.q4[i] = c->m[i][4];
229	0	for (int j = 0; j < 5; j++)
230	0	c->m[i][j] = Q(i == j);
231	0	c->mask &= ~SWS_MASK_ROW(i);
232	0	ret = true;
233	0	}
234	0	}
235
236	0	if (ret)
237	0	*out_clear = clear;
238	0	return ret;
239	0	}
240
241		/* Unswizzle a linear operation by aligning single-input rows with
242		* their corresponding diagonal */
243		static bool extract_swizzle(SwsLinearOp op, SwsComps prev, SwsSwizzleOp out_swiz)
244	0	{
245	0	SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246	0	SwsLinearOp c = *op;
247
248		/* Find non-zero coefficients in the main 4x4 matrix */
249	0	uint32_t nonzero = 0;
250	0	for (int i = 0; i < 4; i++) {
251	0	for (int j = 0; j < 4; j++) {
252	0	if (!c.m[i][j].num \|\| (prev.flags[j] & SWS_COMP_ZERO))
253	0	continue;
254	0	nonzero \|= SWS_MASK(i, j);
255	0	}
256	0	}
257
258		/* If a value is unique in its row and the target column is
259		* empty, move it there and update the input swizzle */
260	0	for (int i = 0; i < 4; i++) {
261	0	if (nonzero & SWS_MASK_COL(i))
262	0	continue; /* target column is not empty */
263	0	for (int j = 0; j < 4; j++) {
264	0	if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265		/* Move coefficient to the diagonal */
266	0	c.m[i][i] = c.m[i][j];
267	0	c.m[i][j] = Q(0);
268	0	swiz.in[i] = j;
269	0	break;
270	0	}
271	0	}
272	0	}
273
274	0	if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275	0	return false; /* no swizzle was identified */
276
277	0	c.mask = ff_sws_linear_mask(c);
278	0	*out_swiz = swiz;
279	0	*op = c;
280	0	return true;
281	0	}
282
283		int ff_sws_op_list_optimize(SwsOpList *ops)
284	0	{
285	0	int ret;
286
287	0	retry:
288	0	ff_sws_op_list_update_comps(ops);
289
290		/* Apply all in-place optimizations (that do not re-order the list) */
291	0	for (int n = 0; n < ops->num_ops; n++) {
292	0	SwsOp dummy = {0};
293	0	SwsOp *op = &ops->ops[n];
294	0	SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295	0	SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296
297		/* common helper variable */
298	0	bool noop = true;
299
300	0	if (next->comps.unused[0] && next->comps.unused[1] &&
301	0	next->comps.unused[2] && next->comps.unused[3])
302	0	{
303		/* Remove completely unused operations */
304	0	ff_sws_op_list_remove_at(ops, n, 1);
305	0	goto retry;
306	0	}
307
308	0	switch (op->op) {
309	0	case SWS_OP_READ:
310		/* "Compress" planar reads where not all components are needed */
311	0	if (!op->rw.packed) {
312	0	SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313	0	int nb_planes = 0;
314	0	for (int i = 0; i < op->rw.elems; i++) {
315	0	if (next->comps.unused[i]) {
316	0	swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317	0	continue;
318	0	}
319
320	0	const int idx = nb_planes++;
321	0	av_assert1(idx <= i);
322	0	ops->order_src.in[idx] = ops->order_src.in[i];
323	0	swiz.in[i] = idx;
324	0	}
325
326	0	if (nb_planes < op->rw.elems) {
327	0	op->rw.elems = nb_planes;
328	0	RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329	0	.op = SWS_OP_SWIZZLE,
330	0	.type = op->type,
331	0	.swizzle = swiz,
332	0	}));
333	0	goto retry;
334	0	}
335	0	}
336	0	break;
337
338	0	case SWS_OP_SWAP_BYTES:
339		/* Redundant (double) swap */
340	0	if (next->op == SWS_OP_SWAP_BYTES) {
341	0	ff_sws_op_list_remove_at(ops, n, 2);
342	0	goto retry;
343	0	}
344	0	break;
345
346	0	case SWS_OP_UNPACK:
347		/* Redundant unpack+pack */
348	0	if (next->op == SWS_OP_PACK && next->type == op->type &&
349	0	next->pack.pattern[0] == op->pack.pattern[0] &&
350	0	next->pack.pattern[1] == op->pack.pattern[1] &&
351	0	next->pack.pattern[2] == op->pack.pattern[2] &&
352	0	next->pack.pattern[3] == op->pack.pattern[3])
353	0	{
354	0	ff_sws_op_list_remove_at(ops, n, 2);
355	0	goto retry;
356	0	}
357	0	break;
358
359	0	case SWS_OP_LSHIFT:
360	0	case SWS_OP_RSHIFT:
361		/* Two shifts in the same direction */
362	0	if (next->op == op->op) {
363	0	op->c.u += next->c.u;
364	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
365	0	goto retry;
366	0	}
367
368		/* No-op shift */
369	0	if (!op->c.u) {
370	0	ff_sws_op_list_remove_at(ops, n, 1);
371	0	goto retry;
372	0	}
373	0	break;
374
375	0	case SWS_OP_CLEAR:
376	0	for (int i = 0; i < 4; i++) {
377	0	if (!op->c.q4[i].den)
378	0	continue;
379
380	0	if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381	0	!(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382	0	op->c.q4[i].num == 0)
383	0	{
384		/* Redundant clear-to-zero of zero component */
385	0	op->c.q4[i].den = 0;
386	0	} else if (next->comps.unused[i]) {
387		/* Unnecessary clear of unused component */
388	0	op->c.q4[i] = (AVRational) {0, 0};
389	0	} else if (op->c.q4[i].den) {
390	0	noop = false;
391	0	}
392	0	}
393
394	0	if (noop) {
395	0	ff_sws_op_list_remove_at(ops, n, 1);
396	0	goto retry;
397	0	}
398
399		/* Transitive clear */
400	0	if (next->op == SWS_OP_CLEAR) {
401	0	for (int i = 0; i < 4; i++) {
402	0	if (next->c.q4[i].den)
403	0	op->c.q4[i] = next->c.q4[i];
404	0	}
405	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
406	0	goto retry;
407	0	}
408	0	break;
409
410	0	case SWS_OP_SWIZZLE:
411	0	for (int i = 0; i < 4; i++) {
412	0	if (next->comps.unused[i])
413	0	continue;
414	0	if (op->swizzle.in[i] != i)
415	0	noop = false;
416	0	}
417
418		/* Identity swizzle */
419	0	if (noop) {
420	0	ff_sws_op_list_remove_at(ops, n, 1);
421	0	goto retry;
422	0	}
423
424		/* Transitive swizzle */
425	0	if (next->op == SWS_OP_SWIZZLE) {
426	0	const SwsSwizzleOp orig = op->swizzle;
427	0	for (int i = 0; i < 4; i++)
428	0	op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
430	0	goto retry;
431	0	}
432
433		/* Swizzle planes instead of components, if possible */
434	0	if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435	0	for (int dst = 0; dst < prev->rw.elems; dst++) {
436	0	const int src = op->swizzle.in[dst];
437	0	if (src > dst && src < prev->rw.elems) {
438	0	FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439	0	for (int i = dst; i < 4; i++) {
440	0	if (op->swizzle.in[i] == dst)
441	0	op->swizzle.in[i] = src;
442	0	else if (op->swizzle.in[i] == src)
443	0	op->swizzle.in[i] = dst;
444	0	}
445	0	goto retry;
446	0	}
447	0	}
448	0	}
449
450	0	if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451	0	for (int dst = 0; dst < next->rw.elems; dst++) {
452	0	const int src = op->swizzle.in[dst];
453	0	if (src > dst && src < next->rw.elems) {
454	0	FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455	0	FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456	0	goto retry;
457	0	}
458	0	}
459	0	}
460	0	break;
461
462	0	case SWS_OP_CONVERT:
463		/* No-op conversion */
464	0	if (op->type == op->convert.to) {
465	0	ff_sws_op_list_remove_at(ops, n, 1);
466	0	goto retry;
467	0	}
468
469		/* Transitive conversion */
470	0	if (next->op == SWS_OP_CONVERT &&
471	0	op->convert.expand == next->convert.expand)
472	0	{
473	0	av_assert1(op->convert.to == next->type);
474	0	op->convert.to = next->convert.to;
475	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
476	0	goto retry;
477	0	}
478
479		/* Conversion followed by integer expansion */
480	0	if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481	0	ff_sws_pixel_type_is_int(op->type) &&
482	0	ff_sws_pixel_type_is_int(op->convert.to) &&
483	0	!av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
484	0	{
485	0	op->convert.expand = true;
486	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
487	0	goto retry;
488	0	}
489	0	break;
490
491	0	case SWS_OP_MIN:
492	0	for (int i = 0; i < 4; i++) {
493	0	if (next->comps.unused[i] \|\| !op->c.q4[i].den)
494	0	continue;
495	0	if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
496	0	noop = false;
497	0	}
498
499	0	if (noop) {
500	0	ff_sws_op_list_remove_at(ops, n, 1);
501	0	goto retry;
502	0	}
503	0	break;
504
505	0	case SWS_OP_MAX:
506	0	for (int i = 0; i < 4; i++) {
507	0	if (next->comps.unused[i] \|\| !op->c.q4[i].den)
508	0	continue;
509	0	if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
510	0	noop = false;
511	0	}
512
513	0	if (noop) {
514	0	ff_sws_op_list_remove_at(ops, n, 1);
515	0	goto retry;
516	0	}
517	0	break;
518
519	0	case SWS_OP_DITHER:
520	0	for (int i = 0; i < 4; i++) {
521	0	if (next->comps.unused[i] \|\| op->dither.y_offset[i] < 0)
522	0	continue;
523	0	if (prev->comps.flags[i] & SWS_COMP_EXACT) {
524	0	op->dither.y_offset[i] = -1; /* unnecessary dither */
525	0	goto retry;
526	0	} else {
527	0	noop = false;
528	0	}
529	0	}
530
531	0	if (noop) {
532	0	ff_sws_op_list_remove_at(ops, n, 1);
533	0	goto retry;
534	0	}
535	0	break;
536
537	0	case SWS_OP_LINEAR: {
538	0	SwsSwizzleOp swizzle;
539	0	SwsConst c;
540
541		/* No-op (identity) linear operation */
542	0	if (!op->lin.mask) {
543	0	ff_sws_op_list_remove_at(ops, n, 1);
544	0	goto retry;
545	0	}
546
547	0	if (next->op == SWS_OP_LINEAR) {
548		/* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
549	0	const SwsLinearOp m1 = op->lin;
550	0	const SwsLinearOp m2 = next->lin;
551	0	for (int i = 0; i < 4; i++) {
552	0	for (int j = 0; j < 5; j++) {
553	0	AVRational sum = Q(0);
554	0	for (int k = 0; k < 4; k++)
555	0	sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
556	0	if (j == 4) /* m1.m[4][j] == 1 */
557	0	sum = av_add_q(sum, m2.m[i][4]);
558	0	op->lin.m[i][j] = sum;
559	0	}
560	0	}
561	0	op->lin.mask = ff_sws_linear_mask(op->lin);
562	0	ff_sws_op_list_remove_at(ops, n + 1, 1);
563	0	goto retry;
564	0	}
565
566		/* Optimize away zero columns */
567	0	for (int j = 0; j < 4; j++) {
568	0	const uint32_t col = SWS_MASK_COL(j);
569	0	if (!(prev->comps.flags[j] & SWS_COMP_ZERO) \|\| !(op->lin.mask & col))
570	0	continue;
571	0	for (int i = 0; i < 4; i++)
572	0	op->lin.m[i][j] = Q(i == j);
573	0	op->lin.mask &= ~col;
574	0	goto retry;
575	0	}
576
577		/* Optimize away unused rows */
578	0	for (int i = 0; i < 4; i++) {
579	0	const uint32_t row = SWS_MASK_ROW(i);
580	0	if (!next->comps.unused[i] \|\| !(op->lin.mask & row))
581	0	continue;
582	0	for (int j = 0; j < 5; j++)
583	0	op->lin.m[i][j] = Q(i == j);
584	0	op->lin.mask &= ~row;
585	0	goto retry;
586	0	}
587
588		/* Convert constant rows to explicit clear instruction */
589	0	if (extract_constant_rows(&op->lin, prev->comps, &c)) {
590	0	RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
591	0	.op = SWS_OP_CLEAR,
592	0	.type = op->type,
593	0	.comps = op->comps,
594	0	.c = c,
595	0	}));
596	0	goto retry;
597	0	}
598
599		/* Multiplication by scalar constant */
600	0	if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
601	0	op->op = SWS_OP_SCALE;
602	0	op->c = c;
603	0	goto retry;
604	0	}
605
606		/* Swizzle by fixed pattern */
607	0	if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
608	0	RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
609	0	.op = SWS_OP_SWIZZLE,
610	0	.type = op->type,
611	0	.swizzle = swizzle,
612	0	}));
613	0	goto retry;
614	0	}
615	0	break;
616	0	}
617
618	0	case SWS_OP_SCALE: {
619	0	const int factor2 = exact_log2_q(op->c.q);
620
621		/* No-op scaling */
622	0	if (op->c.q.num == 1 && op->c.q.den == 1) {
623	0	ff_sws_op_list_remove_at(ops, n, 1);
624	0	goto retry;
625	0	}
626
627		/* Scaling by exact power of two */
628	0	if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
629	0	op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
630	0	op->c.u = FFABS(factor2);
631	0	goto retry;
632	0	}
633	0	break;
634	0	}
635	0	}
636	0	}
637
638		/* Push clears to the back to void any unused components */
639	0	for (int n = 0; n < ops->num_ops - 1; n++) {
640	0	SwsOp *op = &ops->ops[n];
641	0	SwsOp *next = &ops->ops[n + 1];
642
643	0	switch (op->op) {
644	0	case SWS_OP_CLEAR:
645	0	if (op_commute_clear(op, next)) {
646	0	FFSWAP(SwsOp, op, next);
647	0	goto retry;
648	0	}
649	0	break;
650	0	}
651	0	}
652
653		/* Apply any remaining preferential re-ordering optimizations; do these
654		* last because they are more likely to block other optimizations if done
655		* too aggressively */
656	0	for (int n = 0; n < ops->num_ops - 1; n++) {
657	0	SwsOp *op = &ops->ops[n];
658	0	SwsOp *next = &ops->ops[n + 1];
659
660	0	switch (op->op) {
661	0	case SWS_OP_SWIZZLE: {
662		/* Try to push swizzles towards the output */
663	0	if (op_commute_swizzle(op, next)) {
664	0	FFSWAP(SwsOp, op, next);
665	0	goto retry;
666	0	}
667	0	break;
668	0	}
669
670	0	case SWS_OP_SCALE:
671		/* Scaling by integer before conversion to int */
672	0	if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
673	0	ff_sws_pixel_type_is_int(next->convert.to))
674	0	{
675	0	op->type = next->convert.to;
676	0	FFSWAP(SwsOp, op, next);
677	0	goto retry;
678	0	}
679	0	break;
680	0	}
681	0	}
682
683	0	return 0;
684	0	}
685
686		int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
687		int size, uint8_t clear_val,
688		int read_bytes, int write_bytes)
689	0	{
690	0	if (!ops->num_ops)
691	0	return AVERROR(EINVAL);
692
693	0	const SwsOp *read = ff_sws_op_list_input(ops);
694	0	if (!read \|\| read->rw.frac \|\| (!read->rw.packed && read->rw.elems > 1))
695	0	return AVERROR(ENOTSUP);
696
697	0	const int read_size = ff_sws_pixel_type_size(read->type);
698	0	uint32_t mask[4] = {0};
699	0	for (int i = 0; i < read->rw.elems; i++)
700	0	mask[i] = 0x01010101 * i * read_size + 0x03020100;
701
702	0	for (int opidx = 1; opidx < ops->num_ops; opidx++) {
703	0	const SwsOp *op = &ops->ops[opidx];
704	0	switch (op->op) {
705	0	case SWS_OP_SWIZZLE: {
706	0	uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
707	0	for (int i = 0; i < 4; i++)
708	0	mask[i] = orig[op->swizzle.in[i]];
709	0	break;
710	0	}
711
712	0	case SWS_OP_SWAP_BYTES:
713	0	for (int i = 0; i < 4; i++) {
714	0	switch (ff_sws_pixel_type_size(op->type)) {
715	0	case 2: mask[i] = av_bswap16(mask[i]); break;
716	0	case 4: mask[i] = av_bswap32(mask[i]); break;
717	0	}
718	0	}
719	0	break;
720
721	0	case SWS_OP_CLEAR:
722	0	for (int i = 0; i < 4; i++) {
723	0	if (!op->c.q4[i].den)
724	0	continue;
725	0	if (op->c.q4[i].num != 0 \|\| !clear_val)
726	0	return AVERROR(ENOTSUP);
727	0	mask[i] = 0x1010101ul * clear_val;
728	0	}
729	0	break;
730
731	0	case SWS_OP_CONVERT: {
732	0	if (!op->convert.expand)
733	0	return AVERROR(ENOTSUP);
734	0	for (int i = 0; i < 4; i++) {
735	0	switch (ff_sws_pixel_type_size(op->type)) {
736	0	case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
737	0	case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
738	0	}
739	0	}
740	0	break;
741	0	}
742
743	0	case SWS_OP_WRITE: {
744	0	if (op->rw.frac \|\| (!op->rw.packed && op->rw.elems > 1))
745	0	return AVERROR(ENOTSUP);
746
747		/* Initialize to no-op */
748	0	memset(shuffle, clear_val, size);
749
750	0	const int write_size = ff_sws_pixel_type_size(op->type);
751	0	const int read_chunk = read->rw.elems * read_size;
752	0	const int write_chunk = op->rw.elems * write_size;
753	0	const int num_groups = size / FFMAX(read_chunk, write_chunk);
754	0	for (int n = 0; n < num_groups; n++) {
755	0	const int base_in = n * read_chunk;
756	0	const int base_out = n * write_chunk;
757	0	for (int i = 0; i < op->rw.elems; i++) {
758	0	const int offset = base_out + i * write_size;
759	0	for (int b = 0; b < write_size; b++) {
760	0	const uint8_t idx = mask[i] >> (b * 8);
761	0	if (idx != clear_val)
762	0	shuffle[offset + b] = base_in + idx;
763	0	}
764	0	}
765	0	}
766
767	0	read_bytes = num_groups read_chunk;
768	0	write_bytes = num_groups write_chunk;
769	0	return num_groups;
770	0	}
771
772	0	default:
773	0	return AVERROR(ENOTSUP);
774	0	}
775	0	}
776
777	0	return AVERROR(EINVAL);
778	0	}