/src/ffmpeg/libswscale/ops_chain.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/rational.h"

#include "ops_chain.h"

#define Q(N) ((AVRational) { N, 1 })

SwsOpChain *ff_sws_op_chain_alloc(void)
{
    return av_mallocz(sizeof(SwsOpChain));
}

void ff_sws_op_chain_free_cb(void *ptr)
{
    if (!ptr)
        return;

    SwsOpChain *chain = ptr;
    for (int i = 0; i < chain->num_impl + 1; i++) {
        if (chain->free[i])
            chain->free[i](chain->impl[i].priv.ptr);
    }

    av_free(chain);
}

int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func,
                           void (*free)(void *), const SwsOpPriv *priv)
{
    const int idx = chain->num_impl;
    if (idx == SWS_MAX_OPS)
        return AVERROR(EINVAL);

    av_assert1(func);
    chain->impl[idx].cont = func;
    chain->impl[idx + 1].priv = *priv;
    chain->free[idx + 1] = free;
    chain->num_impl++;
    return 0;
}

/**
 * Match an operation against a reference operation. Returns a score for how
 * well the reference matches the operation, or 0 if there is no match.
 *
 * If `ref->comps` has any flags set, they must be set in `op` as well.
 * Likewise, if `ref->comps` has any components marked as unused, they must be
 * marked as unused in `ops` as well.
 *
 * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of
 * `op->linear.mask`, but may not contain any columns explicitly ignored by
 * `op->comps.unused`.
 *
 * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the
 * exact type is not checked, just the size.
 *
 * Components set in `next.unused` are ignored when matching. If `flexible`
 * is true, the op body is ignored - only the operation, pixel type, and
 * component masks are checked.
 */
static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next)
{
    int score = 10;
    if (op->op != entry->op)
        return 0;

    switch (op->op) {
    case SWS_OP_READ:
    case SWS_OP_WRITE:
    case SWS_OP_SWAP_BYTES:
    case SWS_OP_SWIZZLE:
        /* Only the size matters for these operations */
        if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type))
            return 0;
        break;
    default:
        if (op->type != entry->type)
            return 0;
        break;
    }

    for (int i = 0; i < 4; i++) {
        if (entry->unused[i]) {
            if (op->comps.unused[i])
                score += 1; /* Operating on fewer components is better .. */
            else
                return 0; /* .. but not too few! */
        }
    }

    if (op->op == SWS_OP_CLEAR) {
        /* Clear pattern must match exactly, regardless of `entry->flexible` */
        for (int i = 0; i < 4; i++) {
            if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den)
                return 0;
        }
    }

    /* Flexible variants always match, but lower the score to prioritize more
     * specific implementations if they exist */
    if (entry->flexible)
        return score - 5;

    switch (op->op) {
    case SWS_OP_INVALID:
        return 0;
    case SWS_OP_READ:
    case SWS_OP_WRITE:
        if (op->rw.elems   != entry->rw.elems ||
            op->rw.frac    != entry->rw.frac  ||
            (op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
            return 0;
        return score;
    case SWS_OP_SWAP_BYTES:
        return score;
    case SWS_OP_PACK:
    case SWS_OP_UNPACK:
        for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
            if (op->pack.pattern[i] != entry->pack.pattern[i])
                return 0;
        }
        return score;
    case SWS_OP_CLEAR:
        for (int i = 0; i < 4; i++) {
            if (!op->c.q4[i].den)
                continue;
            if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i])
                return 0;
        }
        return score;
    case SWS_OP_LSHIFT:
    case SWS_OP_RSHIFT:
        av_assert1(entry->flexible);
        return score;
    case SWS_OP_SWIZZLE:
        for (int i = 0; i < 4; i++) {
            if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i])
                return 0;
        }
        return score;
    case SWS_OP_CONVERT:
        if (op->convert.to     != entry->convert.to ||
            op->convert.expand != entry->convert.expand)
            return 0;
        return score;
    case SWS_OP_DITHER:
        return op->dither.size_log2 == entry->dither_size ? score : 0;
    case SWS_OP_MIN:
    case SWS_OP_MAX:
        av_assert1(entry->flexible);
        return score;
    case SWS_OP_LINEAR:
        /* All required elements must be present */
        if (op->lin.mask & ~entry->linear_mask)
            return 0;
        /* To avoid operating on possibly undefined memory, filter out
         * implementations that operate on more input components */
        for (int i = 0; i < 4; i++) {
            if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i])
                return 0;
        }
        /* Prioritize smaller implementations */
        score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask);
        return score;
    case SWS_OP_SCALE:
        return score;
    case SWS_OP_TYPE_NB:
        break;
    }

    av_unreachable("Invalid operation type!");
    return 0;
}

int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
                             SwsOpList *ops, const int block_size,
                             SwsOpChain *chain)
{
    static const SwsOp dummy = { .comps.unused = { true, true, true, true }};
    const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy;
    const unsigned cpu_flags = av_get_cpu_flags();
    const SwsOpEntry *best = NULL;
    const SwsOp *op = &ops->ops[0];
    int ret, best_score = 0, best_cpu_flags;
    SwsOpPriv priv = {0};

    for (int n = 0; n < num_tables; n++) {
        const SwsOpTable *table = tables[n];
        if (table->block_size && table->block_size != block_size ||
            table->cpu_flags & ~cpu_flags)
            continue;

        for (int i = 0; table->entries[i]; i++) {
            const SwsOpEntry *entry = table->entries[i];
            int score = op_match(op, entry, next->comps);
            if (score > best_score) {
                best_score = score;
                best_cpu_flags = table->cpu_flags;
                best = entry;
            }
        }
    }

    if (!best)
        return AVERROR(ENOTSUP);

    if (best->setup) {
        ret = best->setup(op, &priv);
        if (ret < 0)
            return ret;
    }

    chain->cpu_flags |= best_cpu_flags;
    ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv);
    if (ret < 0) {
        if (best->free)
            best->free(priv.ptr);
        return ret;
    }

    ops->ops++;
    ops->num_ops--;
    return ops->num_ops ? AVERROR(EAGAIN) : 0;
}

#define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)

int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
{
    out->u8[0] = op->c.u;
    return 0;
}

int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
{
    switch (op->type) {
    case SWS_PIXEL_U8:  out->u8[0]  = op->c.u; return 0;
    case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0;
    case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0;
    case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0;
    default: return AVERROR(EINVAL);
    }
}

int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
{
    switch (op->type) {
    case SWS_PIXEL_U8:  out->u8[0]  = q2pixel(uint8_t,  op->c.q); return 0;
    case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0;
    case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0;
    case SWS_PIXEL_F32: out->f32[0] = q2pixel(float,    op->c.q); return 0;
    default: return AVERROR(EINVAL);
    }

    return 0;
}

int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
{
    for (int i = 0; i < 4; i++) {
        switch (op->type) {
        case SWS_PIXEL_U8:  out->u8[i]  = q2pixel(uint8_t,  op->c.q4[i]); break;
        case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break;
        case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break;
        case SWS_PIXEL_F32: out->f32[i] = q2pixel(float,    op->c.q4[i]); break;
        default: return AVERROR(EINVAL);
        }
    }

    return 0;
}

Coverage Report

Created: 2026-01-25 07:18

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/avassert.h"
22		#include "libavutil/mem.h"
23		#include "libavutil/rational.h"
24
25		#include "ops_chain.h"
26
27	0	#define Q(N) ((AVRational) { N, 1 })
28
29		SwsOpChain *ff_sws_op_chain_alloc(void)
30	0	{
31	0	return av_mallocz(sizeof(SwsOpChain));
32	0	}
33
34		void ff_sws_op_chain_free_cb(void *ptr)
35	0	{
36	0	if (!ptr)
37	0	return;
38
39	0	SwsOpChain *chain = ptr;
40	0	for (int i = 0; i < chain->num_impl + 1; i++) {
41	0	if (chain->free[i])
42	0	chain->free[i](chain->impl[i].priv.ptr);
43	0	}
44
45	0	av_free(chain);
46	0	}
47
48		int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func,
49		void (free)(void ), const SwsOpPriv *priv)
50	0	{
51	0	const int idx = chain->num_impl;
52	0	if (idx == SWS_MAX_OPS)
53	0	return AVERROR(EINVAL);
54
55	0	av_assert1(func);
56	0	chain->impl[idx].cont = func;
57	0	chain->impl[idx + 1].priv = *priv;
58	0	chain->free[idx + 1] = free;
59	0	chain->num_impl++;
60	0	return 0;
61	0	}
62
63		/**
64		* Match an operation against a reference operation. Returns a score for how
65		* well the reference matches the operation, or 0 if there is no match.
66		*
67		* If `ref->comps` has any flags set, they must be set in `op` as well.
68		* Likewise, if `ref->comps` has any components marked as unused, they must be
69		* marked as unused in `ops` as well.
70		*
71		* For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of
72		* `op->linear.mask`, but may not contain any columns explicitly ignored by
73		* `op->comps.unused`.
74		*
75		* For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the
76		* exact type is not checked, just the size.
77		*
78		* Components set in `next.unused` are ignored when matching. If `flexible`
79		* is true, the op body is ignored - only the operation, pixel type, and
80		* component masks are checked.
81		*/
82		static int op_match(const SwsOp op, const SwsOpEntry entry, const SwsComps next)
83	0	{
84	0	int score = 10;
85	0	if (op->op != entry->op)
86	0	return 0;
87
88	0	switch (op->op) {
89	0	case SWS_OP_READ:
90	0	case SWS_OP_WRITE:
91	0	case SWS_OP_SWAP_BYTES:
92	0	case SWS_OP_SWIZZLE:
93		/* Only the size matters for these operations */
94	0	if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type))
95	0	return 0;
96	0	break;
97	0	default:
98	0	if (op->type != entry->type)
99	0	return 0;
100	0	break;
101	0	}
102
103	0	for (int i = 0; i < 4; i++) {
104	0	if (entry->unused[i]) {
105	0	if (op->comps.unused[i])
106	0	score += 1; /* Operating on fewer components is better .. */
107	0	else
108	0	return 0; /* .. but not too few! */
109	0	}
110	0	}
111
112	0	if (op->op == SWS_OP_CLEAR) {
113		/* Clear pattern must match exactly, regardless of `entry->flexible` */
114	0	for (int i = 0; i < 4; i++) {
115	0	if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den)
116	0	return 0;
117	0	}
118	0	}
119
120		/* Flexible variants always match, but lower the score to prioritize more
121		* specific implementations if they exist */
122	0	if (entry->flexible)
123	0	return score - 5;
124
125	0	switch (op->op) {
126	0	case SWS_OP_INVALID:
127	0	return 0;
128	0	case SWS_OP_READ:
129	0	case SWS_OP_WRITE:
130	0	if (op->rw.elems != entry->rw.elems \|\|
131	0	op->rw.frac != entry->rw.frac \|\|
132	0	(op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
133	0	return 0;
134	0	return score;
135	0	case SWS_OP_SWAP_BYTES:
136	0	return score;
137	0	case SWS_OP_PACK:
138	0	case SWS_OP_UNPACK:
139	0	for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
140	0	if (op->pack.pattern[i] != entry->pack.pattern[i])
141	0	return 0;
142	0	}
143	0	return score;
144	0	case SWS_OP_CLEAR:
145	0	for (int i = 0; i < 4; i++) {
146	0	if (!op->c.q4[i].den)
147	0	continue;
148	0	if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i])
149	0	return 0;
150	0	}
151	0	return score;
152	0	case SWS_OP_LSHIFT:
153	0	case SWS_OP_RSHIFT:
154	0	av_assert1(entry->flexible);
155	0	return score;
156	0	case SWS_OP_SWIZZLE:
157	0	for (int i = 0; i < 4; i++) {
158	0	if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i])
159	0	return 0;
160	0	}
161	0	return score;
162	0	case SWS_OP_CONVERT:
163	0	if (op->convert.to != entry->convert.to \|\|
164	0	op->convert.expand != entry->convert.expand)
165	0	return 0;
166	0	return score;
167	0	case SWS_OP_DITHER:
168	0	return op->dither.size_log2 == entry->dither_size ? score : 0;
169	0	case SWS_OP_MIN:
170	0	case SWS_OP_MAX:
171	0	av_assert1(entry->flexible);
172	0	return score;
173	0	case SWS_OP_LINEAR:
174		/* All required elements must be present */
175	0	if (op->lin.mask & ~entry->linear_mask)
176	0	return 0;
177		/* To avoid operating on possibly undefined memory, filter out
178		* implementations that operate on more input components */
179	0	for (int i = 0; i < 4; i++) {
180	0	if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i])
181	0	return 0;
182	0	}
183		/* Prioritize smaller implementations */
184	0	score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask);
185	0	return score;
186	0	case SWS_OP_SCALE:
187	0	return score;
188	0	case SWS_OP_TYPE_NB:
189	0	break;
190	0	}
191
192	0	av_unreachable("Invalid operation type!");
193	0	return 0;
194	0	}
195
196		int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
197		SwsOpList *ops, const int block_size,
198		SwsOpChain *chain)
199	0	{
200	0	static const SwsOp dummy = { .comps.unused = { true, true, true, true }};
201	0	const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy;
202	0	const unsigned cpu_flags = av_get_cpu_flags();
203	0	const SwsOpEntry *best = NULL;
204	0	const SwsOp *op = &ops->ops[0];
205	0	int ret, best_score = 0, best_cpu_flags;
206	0	SwsOpPriv priv = {0};
207
208	0	for (int n = 0; n < num_tables; n++) {
209	0	const SwsOpTable *table = tables[n];
210	0	if (table->block_size && table->block_size != block_size \|\|
211	0	table->cpu_flags & ~cpu_flags)
212	0	continue;
213
214	0	for (int i = 0; table->entries[i]; i++) {
215	0	const SwsOpEntry *entry = table->entries[i];
216	0	int score = op_match(op, entry, next->comps);
217	0	if (score > best_score) {
218	0	best_score = score;
219	0	best_cpu_flags = table->cpu_flags;
220	0	best = entry;
221	0	}
222	0	}
223	0	}
224
225	0	if (!best)
226	0	return AVERROR(ENOTSUP);
227
228	0	if (best->setup) {
229	0	ret = best->setup(op, &priv);
230	0	if (ret < 0)
231	0	return ret;
232	0	}
233
234	0	chain->cpu_flags \|= best_cpu_flags;
235	0	ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv);
236	0	if (ret < 0) {
237	0	if (best->free)
238	0	best->free(priv.ptr);
239	0	return ret;
240	0	}
241
242	0	ops->ops++;
243	0	ops->num_ops--;
244	0	return ops->num_ops ? AVERROR(EAGAIN) : 0;
245	0	}
246
247	0	#define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)
248
249		int ff_sws_setup_u8(const SwsOp op, SwsOpPriv out)
250	0	{
251	0	out->u8[0] = op->c.u;
252	0	return 0;
253	0	}
254
255		int ff_sws_setup_u(const SwsOp op, SwsOpPriv out)
256	0	{
257	0	switch (op->type) {
258	0	case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0;
259	0	case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0;
260	0	case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0;
261	0	case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0;
262	0	default: return AVERROR(EINVAL);
263	0	}
264	0	}
265
266		int ff_sws_setup_q(const SwsOp op, SwsOpPriv out)
267	0	{
268	0	switch (op->type) {
269	0	case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0;
270	0	case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0;
271	0	case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0;
272	0	case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0;
273	0	default: return AVERROR(EINVAL);
274	0	}
275
276	0	return 0;
277	0	}
278
279		int ff_sws_setup_q4(const SwsOp op, SwsOpPriv out)
280	0	{
281	0	for (int i = 0; i < 4; i++) {
282	0	switch (op->type) {
283	0	case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break;
284	0	case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break;
285	0	case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break;
286	0	case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break;
287	0	default: return AVERROR(EINVAL);
288	0	}
289	0	}
290
291	0	return 0;
292	0	}