/src/ffmpeg/libswscale/ops_memcpy.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avassert.h"

#include "ops_backend.h"

typedef struct MemcpyPriv {
    int num_planes;
    int index[4]; /* or -1 to clear plane */
    uint8_t clear_value[4];
} MemcpyPriv;

/**
 * Switch to loop if total padding exceeds this number of bytes. Chosen to
 * align with the typical L1 cache size of modern CPUs, as this avoids the
 * risk of the implementation loading one extra unnecessary cache line.
 */
#define SWS_MAX_PADDING 64

/* Memcpy backend for trivial cases */

static void process(const SwsOpExec *exec, const void *priv,
                    int x_start, int y_start, int x_end, int y_end)
{
    const MemcpyPriv *p = priv;
    const int lines = y_end - y_start;
    const int bytes = x_end * exec->block_size_out;
    av_assert1(x_start == 0 && x_end == exec->width);

    for (int i = 0; i < p->num_planes; i++) {
        uint8_t *out = exec->out[i];
        const int idx = p->index[i];
        const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING;
        if (idx < 0 && !use_loop) {
            memset(out, p->clear_value[i], exec->out_stride[i] * lines);
        } else if (idx < 0) {
            for (int y = y_start; y < y_end; y++) {
                memset(out, p->clear_value[i], bytes);
                out += exec->out_stride[i];
            }
        } else if (exec->out_stride[i] == exec->in_stride[idx] && !use_loop) {
            memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
        } else {
            const uint8_t *in = exec->in[idx];
            for (int y = y_start; y < y_end; y++) {
                memcpy(out, in, bytes);
                out += exec->out_stride[i];
                in  += exec->in_stride[idx];
            }
        }
    }
}

static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
{
    MemcpyPriv p = {0};

    for (int n = 0; n < ops->num_ops; n++) {
        const SwsOp *op = &ops->ops[n];
        switch (op->op) {
        case SWS_OP_READ:
            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || op->rw.filter)
                return AVERROR(ENOTSUP);
            for (int i = 0; i < op->rw.elems; i++)
                p.index[i] = i;
            break;

        case SWS_OP_SWIZZLE: {
            const MemcpyPriv orig = p;
            for (int i = 0; i < 4; i++) {
                /* Explicitly exclude swizzle masks that contain duplicates,
                 * because these are wasteful to implement as a memcpy */
                for (int j = 0; j < i; j++) {
                    if (op->swizzle.in[i] == op->swizzle.in[j])
                        return AVERROR(ENOTSUP);
                }
                p.index[i] = orig.index[op->swizzle.in[i]];
            }
            break;
        }

        case SWS_OP_CLEAR:
            for (int i = 0; i < 4; i++) {
                if (!SWS_COMP_TEST(op->clear.mask, i))
                    continue;
                if (op->clear.value[i].den != 1)
                    return AVERROR(ENOTSUP);

                /* Ensure all bytes to be cleared are the same, because we
                 * can't memset on multi-byte sequences */
                uint8_t val = op->clear.value[i].num & 0xFF;
                uint32_t ref = val;
                switch (ff_sws_pixel_type_size(op->type)) {
                case 2: ref *= 0x101; break;
                case 4: ref *= 0x1010101; break;
                }
                if (ref != op->clear.value[i].num)
                    return AVERROR(ENOTSUP);
                p.clear_value[i] = val;
                p.index[i] = -1;
            }
            break;

        case SWS_OP_WRITE:
            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || op->rw.filter)
                return AVERROR(ENOTSUP);
            p.num_planes = op->rw.elems;
            break;

        default:
            return AVERROR(ENOTSUP);
        }
    }

    *out = (SwsCompiledOp) {
        .slice_align = 1,
        .block_size  = 1,
        .func = process,
        .priv = av_memdup(&p, sizeof(p)),
        .free = av_free,
    };
    return out->priv ? 0 : AVERROR(ENOMEM);
}

const SwsOpBackend backend_murder = {
    .name       = "memcpy",
    .compile    = compile,
    .hw_format  = AV_PIX_FMT_NONE,
};

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/avassert.h"
22
23		#include "ops_backend.h"
24
25		typedef struct MemcpyPriv {
26		int num_planes;
27		int index[4]; /* or -1 to clear plane */
28		uint8_t clear_value[4];
29		} MemcpyPriv;
30
31		/**
32		* Switch to loop if total padding exceeds this number of bytes. Chosen to
33		* align with the typical L1 cache size of modern CPUs, as this avoids the
34		* risk of the implementation loading one extra unnecessary cache line.
35		*/
36	0	#define SWS_MAX_PADDING 64
37
38		/* Memcpy backend for trivial cases */
39
40		static void process(const SwsOpExec exec, const void priv,
41		int x_start, int y_start, int x_end, int y_end)
42	0	{
43	0	const MemcpyPriv *p = priv;
44	0	const int lines = y_end - y_start;
45	0	const int bytes = x_end * exec->block_size_out;
46	0	av_assert1(x_start == 0 && x_end == exec->width);
47
48	0	for (int i = 0; i < p->num_planes; i++) {
49	0	uint8_t *out = exec->out[i];
50	0	const int idx = p->index[i];
51	0	const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING;
52	0	if (idx < 0 && !use_loop) {
53	0	memset(out, p->clear_value[i], exec->out_stride[i] * lines);
54	0	} else if (idx < 0) {
55	0	for (int y = y_start; y < y_end; y++) {
56	0	memset(out, p->clear_value[i], bytes);
57	0	out += exec->out_stride[i];
58	0	}
59	0	} else if (exec->out_stride[i] == exec->in_stride[idx] && !use_loop) {
60	0	memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
61	0	} else {
62	0	const uint8_t *in = exec->in[idx];
63	0	for (int y = y_start; y < y_end; y++) {
64	0	memcpy(out, in, bytes);
65	0	out += exec->out_stride[i];
66	0	in += exec->in_stride[idx];
67	0	}
68	0	}
69	0	}
70	0	}
71
72		static int compile(SwsContext ctx, SwsOpList ops, SwsCompiledOp *out)
73	0	{
74	0	MemcpyPriv p = {0};
75
76	0	for (int n = 0; n < ops->num_ops; n++) {
77	0	const SwsOp *op = &ops->ops[n];
78	0	switch (op->op) {
79	0	case SWS_OP_READ:
80	0	if ((op->rw.packed && op->rw.elems != 1) \|\| op->rw.frac \|\| op->rw.filter)
81	0	return AVERROR(ENOTSUP);
82	0	for (int i = 0; i < op->rw.elems; i++)
83	0	p.index[i] = i;
84	0	break;
85
86	0	case SWS_OP_SWIZZLE: {
87	0	const MemcpyPriv orig = p;
88	0	for (int i = 0; i < 4; i++) {
89		/* Explicitly exclude swizzle masks that contain duplicates,
90		* because these are wasteful to implement as a memcpy */
91	0	for (int j = 0; j < i; j++) {
92	0	if (op->swizzle.in[i] == op->swizzle.in[j])
93	0	return AVERROR(ENOTSUP);
94	0	}
95	0	p.index[i] = orig.index[op->swizzle.in[i]];
96	0	}
97	0	break;
98	0	}
99
100	0	case SWS_OP_CLEAR:
101	0	for (int i = 0; i < 4; i++) {
102	0	if (!SWS_COMP_TEST(op->clear.mask, i))
103	0	continue;
104	0	if (op->clear.value[i].den != 1)
105	0	return AVERROR(ENOTSUP);
106
107		/* Ensure all bytes to be cleared are the same, because we
108		* can't memset on multi-byte sequences */
109	0	uint8_t val = op->clear.value[i].num & 0xFF;
110	0	uint32_t ref = val;
111	0	switch (ff_sws_pixel_type_size(op->type)) {
112	0	case 2: ref *= 0x101; break;
113	0	case 4: ref *= 0x1010101; break;
114	0	}
115	0	if (ref != op->clear.value[i].num)
116	0	return AVERROR(ENOTSUP);
117	0	p.clear_value[i] = val;
118	0	p.index[i] = -1;
119	0	}
120	0	break;
121
122	0	case SWS_OP_WRITE:
123	0	if ((op->rw.packed && op->rw.elems != 1) \|\| op->rw.frac \|\| op->rw.filter)
124	0	return AVERROR(ENOTSUP);
125	0	p.num_planes = op->rw.elems;
126	0	break;
127
128	0	default:
129	0	return AVERROR(ENOTSUP);
130	0	}
131	0	}
132
133	0	*out = (SwsCompiledOp) {
134	0	.slice_align = 1,
135	0	.block_size = 1,
136	0	.func = process,
137	0	.priv = av_memdup(&p, sizeof(p)),
138	0	.free = av_free,
139	0	};
140	0	return out->priv ? 0 : AVERROR(ENOMEM);
141	0	}
142
143		const SwsOpBackend backend_murder = {
144		.name = "memcpy",
145		.compile = compile,
146		.hw_format = AV_PIX_FMT_NONE,
147		};

Coverage Report

Created: 2026-04-29 07:00