/src/ffmpeg/libswscale/ops_memcpy.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avassert.h"
#include "libavutil/mem.h"

#include "ops_internal.h"

typedef struct MemcpyPriv {
    int num_planes;
    int index[4]; /* or -1 to clear plane */
    uint8_t clear_value[4];
} MemcpyPriv;

/**
 * Switch to loop if total padding exceeds this number of bytes. Chosen to
 * align with the typical L1 cache size of modern CPUs, as this avoids the
 * risk of the implementation loading one extra unnecessary cache line.
 */
#define SWS_MAX_PADDING 64

/* Memcpy backend for trivial cases */

static void process(const SwsOpExec *exec, const void *priv,
                    int x_start, int y_start, int x_end, int y_end)
{
    const MemcpyPriv *p = priv;
    const int lines = y_end - y_start;
    const int bytes = x_end * exec->block_size_out;
    av_assert1(x_start == 0 && x_end == exec->width);

    for (int i = 0; i < p->num_planes; i++) {
        uint8_t *out = exec->out[i];
        const int idx = p->index[i];
        const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING;
        if (idx < 0 && !use_loop) {
            memset(out, p->clear_value[i], exec->out_stride[i] * lines);
        } else if (idx < 0) {
            for (int y = y_start; y < y_end; y++) {
                memset(out, p->clear_value[i], bytes);
                out += exec->out_stride[i];
            }
        } else if (exec->out_stride[i] == exec->in_stride[idx] && !use_loop) {
            memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
        } else {
            const uint8_t *in = exec->in[idx];
            for (int y = y_start; y < y_end; y++) {
                memcpy(out, in, bytes);
                out += exec->out_stride[i];
                in  += exec->in_stride[idx];
            }
        }
    }
}

static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
{
    MemcpyPriv p = {0};

    for (int n = 0; n < ops->num_ops; n++) {
        const SwsOp *op = &ops->ops[n];
        switch (op->op) {
        case SWS_OP_READ:
            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || op->rw.filter)
                return AVERROR(ENOTSUP);
            for (int i = 0; i < op->rw.elems; i++)
                p.index[i] = i;
            break;

        case SWS_OP_SWIZZLE: {
            const MemcpyPriv orig = p;
            for (int i = 0; i < 4; i++) {
                /* Explicitly exclude swizzle masks that contain duplicates,
                 * because these are wasteful to implement as a memcpy */
                for (int j = 0; j < i; j++) {
                    if (op->swizzle.in[i] == op->swizzle.in[j])
                        return AVERROR(ENOTSUP);
                }
                p.index[i] = orig.index[op->swizzle.in[i]];
            }
            break;
        }

        case SWS_OP_CLEAR:
            for (int i = 0; i < 4; i++) {
                if (!SWS_COMP_TEST(op->clear.mask, i))
                    continue;
                if (op->clear.value[i].den != 1)
                    return AVERROR(ENOTSUP);

                /* Ensure all bytes to be cleared are the same, because we
                 * can't memset on multi-byte sequences */
                uint8_t val = op->clear.value[i].num & 0xFF;
                uint32_t ref = val;
                switch (ff_sws_pixel_type_size(op->type)) {
                case 2: ref *= 0x101; break;
                case 4: ref *= 0x1010101; break;
                }
                if (ref != op->clear.value[i].num)
                    return AVERROR(ENOTSUP);
                p.clear_value[i] = val;
                p.index[i] = -1;
            }
            break;

        case SWS_OP_WRITE:
            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac || op->rw.filter)
                return AVERROR(ENOTSUP);
            p.num_planes = op->rw.elems;
            break;

        default:
            return AVERROR(ENOTSUP);
        }
    }

    *out = (SwsCompiledOp) {
        .slice_align = 1,
        .block_size  = 1,
        .func = process,
        .priv = av_memdup(&p, sizeof(p)),
        .free = av_free,
    };
    return out->priv ? 0 : AVERROR(ENOMEM);
}

const SwsOpBackend backend_murder = {
    .name       = "memcpy",
    .compile    = compile,
    .hw_format  = AV_PIX_FMT_NONE,
};

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/avassert.h"
22		#include "libavutil/mem.h"
23
24		#include "ops_internal.h"
25
26		typedef struct MemcpyPriv {
27		int num_planes;
28		int index[4]; /* or -1 to clear plane */
29		uint8_t clear_value[4];
30		} MemcpyPriv;
31
32		/**
33		* Switch to loop if total padding exceeds this number of bytes. Chosen to
34		* align with the typical L1 cache size of modern CPUs, as this avoids the
35		* risk of the implementation loading one extra unnecessary cache line.
36		*/
37	0	#define SWS_MAX_PADDING 64
38
39		/* Memcpy backend for trivial cases */
40
41		static void process(const SwsOpExec exec, const void priv,
42		int x_start, int y_start, int x_end, int y_end)
43	0	{
44	0	const MemcpyPriv *p = priv;
45	0	const int lines = y_end - y_start;
46	0	const int bytes = x_end * exec->block_size_out;
47	0	av_assert1(x_start == 0 && x_end == exec->width);
48
49	0	for (int i = 0; i < p->num_planes; i++) {
50	0	uint8_t *out = exec->out[i];
51	0	const int idx = p->index[i];
52	0	const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING;
53	0	if (idx < 0 && !use_loop) {
54	0	memset(out, p->clear_value[i], exec->out_stride[i] * lines);
55	0	} else if (idx < 0) {
56	0	for (int y = y_start; y < y_end; y++) {
57	0	memset(out, p->clear_value[i], bytes);
58	0	out += exec->out_stride[i];
59	0	}
60	0	} else if (exec->out_stride[i] == exec->in_stride[idx] && !use_loop) {
61	0	memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
62	0	} else {
63	0	const uint8_t *in = exec->in[idx];
64	0	for (int y = y_start; y < y_end; y++) {
65	0	memcpy(out, in, bytes);
66	0	out += exec->out_stride[i];
67	0	in += exec->in_stride[idx];
68	0	}
69	0	}
70	0	}
71	0	}
72
73		static int compile(SwsContext ctx, SwsOpList ops, SwsCompiledOp *out)
74	0	{
75	0	MemcpyPriv p = {0};
76
77	0	for (int n = 0; n < ops->num_ops; n++) {
78	0	const SwsOp *op = &ops->ops[n];
79	0	switch (op->op) {
80	0	case SWS_OP_READ:
81	0	if ((op->rw.packed && op->rw.elems != 1) \|\| op->rw.frac \|\| op->rw.filter)
82	0	return AVERROR(ENOTSUP);
83	0	for (int i = 0; i < op->rw.elems; i++)
84	0	p.index[i] = i;
85	0	break;
86
87	0	case SWS_OP_SWIZZLE: {
88	0	const MemcpyPriv orig = p;
89	0	for (int i = 0; i < 4; i++) {
90		/* Explicitly exclude swizzle masks that contain duplicates,
91		* because these are wasteful to implement as a memcpy */
92	0	for (int j = 0; j < i; j++) {
93	0	if (op->swizzle.in[i] == op->swizzle.in[j])
94	0	return AVERROR(ENOTSUP);
95	0	}
96	0	p.index[i] = orig.index[op->swizzle.in[i]];
97	0	}
98	0	break;
99	0	}
100
101	0	case SWS_OP_CLEAR:
102	0	for (int i = 0; i < 4; i++) {
103	0	if (!SWS_COMP_TEST(op->clear.mask, i))
104	0	continue;
105	0	if (op->clear.value[i].den != 1)
106	0	return AVERROR(ENOTSUP);
107
108		/* Ensure all bytes to be cleared are the same, because we
109		* can't memset on multi-byte sequences */
110	0	uint8_t val = op->clear.value[i].num & 0xFF;
111	0	uint32_t ref = val;
112	0	switch (ff_sws_pixel_type_size(op->type)) {
113	0	case 2: ref *= 0x101; break;
114	0	case 4: ref *= 0x1010101; break;
115	0	}
116	0	if (ref != op->clear.value[i].num)
117	0	return AVERROR(ENOTSUP);
118	0	p.clear_value[i] = val;
119	0	p.index[i] = -1;
120	0	}
121	0	break;
122
123	0	case SWS_OP_WRITE:
124	0	if ((op->rw.packed && op->rw.elems != 1) \|\| op->rw.frac \|\| op->rw.filter)
125	0	return AVERROR(ENOTSUP);
126	0	p.num_planes = op->rw.elems;
127	0	break;
128
129	0	default:
130	0	return AVERROR(ENOTSUP);
131	0	}
132	0	}
133
134	0	*out = (SwsCompiledOp) {
135	0	.slice_align = 1,
136	0	.block_size = 1,
137	0	.func = process,
138	0	.priv = av_memdup(&p, sizeof(p)),
139	0	.free = av_free,
140	0	};
141	0	return out->priv ? 0 : AVERROR(ENOMEM);
142	0	}
143
144		const SwsOpBackend backend_murder = {
145		.name = "memcpy",
146		.compile = compile,
147		.hw_format = AV_PIX_FMT_NONE,
148		};

Coverage Report

Created: 2026-05-31 06:50