/src/ffmpeg/libswscale/ops_tmpl_common.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "ops_backend.h"

#ifndef BIT_DEPTH
#  error Should only be included from ops_tmpl_*.c!
#endif

#define WRAP_CONVERT_UINT(N)                                                    \
DECL_PATTERN(convert_uint##N)                                                   \
{                                                                               \
    u##N##block_t xu, yu, zu, wu;                                               \
                                                                                \
    SWS_LOOP                                                                    \
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {                                  \
        if (X)                                                                  \
            xu[i] = x[i];                                                       \
        if (Y)                                                                  \
            yu[i] = y[i];                                                       \
        if (Z)                                                                  \
            zu[i] = z[i];                                                       \
        if (W)                                                                  \
            wu[i] = w[i];                                                       \
    }                                                                           \
                                                                                \
    CONTINUE(xu, yu, zu, wu);                                                   \
}                                                                               \
                                                                                \
WRAP_COMMON_PATTERNS(convert_uint##N,                                           \
    .op = SWS_OP_CONVERT,                                                       \
    .convert.to = SWS_PIXEL_U##N,                                               \
);

#if BIT_DEPTH != 8
WRAP_CONVERT_UINT(8)
#endif

#if BIT_DEPTH != 16
WRAP_CONVERT_UINT(16)
#endif

#if BIT_DEPTH != 32 || defined(IS_FLOAT)
WRAP_CONVERT_UINT(32)
#endif

DECL_PATTERN(clear)
{
    SWS_LOOP
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        if (X)
            x[i] = impl->priv.px[0];
        if (Y)
            y[i] = impl->priv.px[1];
        if (Z)
            z[i] = impl->priv.px[2];
        if (W)
            w[i] = impl->priv.px[3];
    }

    CONTINUE(x, y, z, w);
}

#define WRAP_CLEAR(X, Y, Z, W)                                                  \
DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W)                              \
                                                                                \
DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL,                                  \
    .setup = ff_sws_setup_clear,                                                \
    .op = SWS_OP_CLEAR,                                                         \
    .clear.mask = SWS_COMP_MASK(X, Y, Z, W),                                    \
);

WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */

WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */

WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */

DECL_PATTERN(min)
{
    SWS_LOOP
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        if (X)
            x[i] = FFMIN(x[i], impl->priv.px[0]);
        if (Y)
            y[i] = FFMIN(y[i], impl->priv.px[1]);
        if (Z)
            z[i] = FFMIN(z[i], impl->priv.px[2]);
        if (W)
            w[i] = FFMIN(w[i], impl->priv.px[3]);
    }

    CONTINUE(x, y, z, w);
}

DECL_PATTERN(max)
{
    SWS_LOOP
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        if (X)
            x[i] = FFMAX(x[i], impl->priv.px[0]);
        if (Y)
            y[i] = FFMAX(y[i], impl->priv.px[1]);
        if (Z)
            z[i] = FFMAX(z[i], impl->priv.px[2]);
        if (W)
            w[i] = FFMAX(w[i], impl->priv.px[3]);
    }

    CONTINUE(x, y, z, w);
}

WRAP_COMMON_PATTERNS(min,
    .op = SWS_OP_MIN,
    .setup = ff_sws_setup_clamp,
    .flexible = true,
);

WRAP_COMMON_PATTERNS(max,
    .op = SWS_OP_MAX,
    .setup = ff_sws_setup_clamp,
    .flexible = true,
);

DECL_PATTERN(scale)
{
    const pixel_t scale = impl->priv.px[0];

    SWS_LOOP
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        if (X)
            x[i] *= scale;
        if (Y)
            y[i] *= scale;
        if (Z)
            z[i] *= scale;
        if (W)
            w[i] *= scale;
    }

    CONTINUE(x, y, z, w);
}

WRAP_COMMON_PATTERNS(scale,
    .op = SWS_OP_SCALE,
    .setup = ff_sws_setup_scale,
    .flexible = true,
);

DECL_SETUP(setup_filter_v, params, out)
{
    const SwsFilterWeights *filter = params->op->rw.kernel;
    static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
                  ">8 byte pointers not supported");

    /* Pre-convert weights to float */
    float *weights = av_calloc(filter->num_weights, sizeof(float));
    if (!weights)
        return AVERROR(ENOMEM);

    for (int i = 0; i < filter->num_weights; i++)
        weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;

    out->priv.ptr = weights;
    out->priv.i32[2] = filter->filter_size;
    out->free = ff_op_priv_free;
    return 0;
}

/* Fully general vertical planar filter case */
DECL_READ(filter_v, const int elems)
{
    const SwsOpExec *exec = iter->exec;
    const float *restrict weights = impl->priv.ptr;
    const int filter_size = impl->priv.i32[2];
    weights += filter_size * iter->y;

    f32block_t xs, ys, zs, ws;
    memset(xs, 0, sizeof(xs));
    if (elems > 1)
        memset(ys, 0, sizeof(ys));
    if (elems > 2)
        memset(zs, 0, sizeof(zs));
    if (elems > 3)
        memset(ws, 0, sizeof(ws));

    for (int j = 0; j < filter_size; j++) {
        const float weight = weights[j];

        SWS_LOOP
        for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
            xs[i] += weight * in0[i];
            if (elems > 1)
                ys[i] += weight * in1[i];
            if (elems > 2)
                zs[i] += weight * in2[i];
            if (elems > 3)
                ws[i] += weight * in3[i];
        }

        in0 = bump_ptr(in0, exec->in_stride[0]);
        if (elems > 1)
            in1 = bump_ptr(in1, exec->in_stride[1]);
        if (elems > 2)
            in2 = bump_ptr(in2, exec->in_stride[2]);
        if (elems > 3)
            in3 = bump_ptr(in3, exec->in_stride[3]);
    }

    for (int i = 0; i < elems; i++)
        iter->in[i] += sizeof(block_t);

    CONTINUE(xs, ys, zs, ws);
}

DECL_SETUP(setup_filter_h, params, out)
{
    SwsFilterWeights *filter = params->op->rw.kernel;
    out->priv.ptr = av_refstruct_ref(filter->weights);
    out->priv.i32[2] = filter->filter_size;
    out->free = ff_op_priv_unref;
    return 0;
}

/* Fully general horizontal planar filter case */
DECL_READ(filter_h, const int elems)
{
    const SwsOpExec *exec = iter->exec;
    const int *restrict weights = impl->priv.ptr;
    const int filter_size = impl->priv.i32[2];
    const float scale = 1.0f / SWS_FILTER_SCALE;
    const int xpos = iter->x;
    weights += filter_size * iter->x;

    f32block_t xs, ys, zs, ws;
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        const int offset = exec->in_offset_x[xpos + i];
        pixel_t *start0 = bump_ptr(in0, offset);
        pixel_t *start1 = bump_ptr(in1, offset);
        pixel_t *start2 = bump_ptr(in2, offset);
        pixel_t *start3 = bump_ptr(in3, offset);

        inter_t sx = 0, sy = 0, sz = 0, sw = 0;
        for (int j = 0; j < filter_size; j++) {
            const int weight = weights[j];
            sx += weight * start0[j];
            if (elems > 1)
                sy += weight * start1[j];
            if (elems > 2)
                sz += weight * start2[j];
            if (elems > 3)
                sw += weight * start3[j];
        }

        xs[i] = (float) sx * scale;
        if (elems > 1)
            ys[i] = (float) sy * scale;
        if (elems > 2)
            zs[i] = (float) sz * scale;
        if (elems > 3)
            ws[i] = (float) sw * scale;

        weights += filter_size;
    }

    CONTINUE(xs, ys, zs, ws);
}

#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)                                   \
static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter,        \
                                             const SwsOpImpl *restrict impl,    \
                                             void *restrict x, void *restrict y,\
                                             void *restrict z, void *restrict w)\
{                                                                               \
    CALL_READ(FUNC##SUFFIX, ELEMS);                                             \
}                                                                               \
                                                                                \
DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS),                          \
    .op = SWS_OP_READ,                                                          \
    .setup = fn(setup_filter##SUFFIX),                                          \
    .rw.elems = ELEMS,                                                          \
    .rw.filter = SWS_OP_FILTER_##DIR,                                           \
);

WRAP_FILTER(filter, V, 1, _v)
WRAP_FILTER(filter, V, 2, _v)
WRAP_FILTER(filter, V, 3, _v)
WRAP_FILTER(filter, V, 4, _v)

WRAP_FILTER(filter, H, 1, _h)
WRAP_FILTER(filter, H, 2, _h)
WRAP_FILTER(filter, H, 3, _h)
WRAP_FILTER(filter, H, 4, _h)

static void fn(process)(const SwsOpExec *exec, const void *priv,
                        const int bx_start, const int y_start,
                        int bx_end, int y_end)
{
    const SwsOpChain *chain = priv;
    const SwsOpImpl *impl = chain->impl;
    u32block_t x, y, z, w; /* allocate enough space for any intermediate */

    SwsOpIter iterdata;
    SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
    iter->exec = exec;
    for (int i = 0; i < 4; i++) {
        iter->in[i]  = (uintptr_t) exec->in[i];
        iter->out[i] = (uintptr_t) exec->out[i];
    }

    for (iter->y = y_start; iter->y < y_end; iter->y++) {
        for (int block = bx_start; block < bx_end; block++) {
            iter->x = block * SWS_BLOCK_SIZE;
            CONTINUE(x, y, z, w);
        }

        const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
        for (int i = 0; i < 4; i++) {
            iter->in[i]  += exec->in_bump[i] + y_bump * exec->in_stride[i];
            iter->out[i] += exec->out_bump[i];
        }
    }
}

Coverage Report

Created: 2026-04-29 07:00

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "ops_backend.h"
22
23		#ifndef BIT_DEPTH
24		# error Should only be included from ops_tmpl_*.c!
25		#endif
26
27		#define WRAP_CONVERT_UINT(N) \
28	0	DECL_PATTERN(convert_uint##N) \
29	0	{ \
30	0	u##N##block_t xu, yu, zu, wu; \
31	0	\
32	0	SWS_LOOP \
33	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
34	0	if (X) \
35	0	xu[i] = x[i]; \
36	0	if (Y) \
37	0	yu[i] = y[i]; \
38	0	if (Z) \
39	0	zu[i] = z[i]; \
40	0	if (W) \
41	0	wu[i] = w[i]; \
42	0	} \
43	0	\
44	0	CONTINUE(xu, yu, zu, wu); \
45	0	} \ Unexecuted instantiation: ops_backend.c:convert_uint16_u8 Unexecuted instantiation: ops_backend.c:convert_uint32_u8 Unexecuted instantiation: ops_backend.c:convert_uint8_u16 Unexecuted instantiation: ops_backend.c:convert_uint32_u16 Unexecuted instantiation: ops_backend.c:convert_uint8_u32 Unexecuted instantiation: ops_backend.c:convert_uint16_u32 Unexecuted instantiation: ops_backend.c:convert_uint8_f32 Unexecuted instantiation: ops_backend.c:convert_uint16_f32 Unexecuted instantiation: ops_backend.c:convert_uint32_f32 Unexecuted instantiation: ops_backend.c:convert_uint32_u32
46		\
47		WRAP_COMMON_PATTERNS(convert_uint##N, \
48		.op = SWS_OP_CONVERT, \
49		.convert.to = SWS_PIXEL_U##N, \
50		);
51
52		#if BIT_DEPTH != 8
53		WRAP_CONVERT_UINT(8)
54		#endif
55
56		#if BIT_DEPTH != 16
57		WRAP_CONVERT_UINT(16)
58		#endif
59
60		#if BIT_DEPTH != 32 \|\| defined(IS_FLOAT)
61		WRAP_CONVERT_UINT(32)
62		#endif
63
64		DECL_PATTERN(clear)
65	0	{
66	0	SWS_LOOP
67	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
68	0	if (X)
69	0	x[i] = impl->priv.px[0];
70	0	if (Y)
71	0	y[i] = impl->priv.px[1];
72	0	if (Z)
73	0	z[i] = impl->priv.px[2];
74	0	if (W)
75	0	w[i] = impl->priv.px[3];
76	0	}
77
78	0	CONTINUE(x, y, z, w);
79	0	} Unexecuted instantiation: ops_backend.c:clear_u8 Unexecuted instantiation: ops_backend.c:clear_u16 Unexecuted instantiation: ops_backend.c:clear_u32 Unexecuted instantiation: ops_backend.c:clear_f32
80
81		#define WRAP_CLEAR(X, Y, Z, W) \
82		DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
83		\
84		DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \
85		.setup = ff_sws_setup_clear, \
86		.op = SWS_OP_CLEAR, \
87		.clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
88		);
89
90		WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
91		WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
92		WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */
93
94		WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
95		WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
96		WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
97		WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
98		WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */
99
100		WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
101		WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
102		WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */
103
104		DECL_PATTERN(min)
105	0	{
106	0	SWS_LOOP
107	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
108	0	if (X)
109	0	x[i] = FFMIN(x[i], impl->priv.px[0]);
110	0	if (Y)
111	0	y[i] = FFMIN(y[i], impl->priv.px[1]);
112	0	if (Z)
113	0	z[i] = FFMIN(z[i], impl->priv.px[2]);
114	0	if (W)
115	0	w[i] = FFMIN(w[i], impl->priv.px[3]);
116	0	}
117
118	0	CONTINUE(x, y, z, w);
119	0	} Unexecuted instantiation: ops_backend.c:min_u8 Unexecuted instantiation: ops_backend.c:min_u16 Unexecuted instantiation: ops_backend.c:min_u32 Unexecuted instantiation: ops_backend.c:min_f32
120
121		DECL_PATTERN(max)
122	0	{
123	0	SWS_LOOP
124	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
125	0	if (X)
126	0	x[i] = FFMAX(x[i], impl->priv.px[0]);
127	0	if (Y)
128	0	y[i] = FFMAX(y[i], impl->priv.px[1]);
129	0	if (Z)
130	0	z[i] = FFMAX(z[i], impl->priv.px[2]);
131	0	if (W)
132	0	w[i] = FFMAX(w[i], impl->priv.px[3]);
133	0	}
134
135	0	CONTINUE(x, y, z, w);
136	0	} Unexecuted instantiation: ops_backend.c:max_u8 Unexecuted instantiation: ops_backend.c:max_u16 Unexecuted instantiation: ops_backend.c:max_u32 Unexecuted instantiation: ops_backend.c:max_f32
137
138		WRAP_COMMON_PATTERNS(min,
139		.op = SWS_OP_MIN,
140		.setup = ff_sws_setup_clamp,
141		.flexible = true,
142		);
143
144		WRAP_COMMON_PATTERNS(max,
145		.op = SWS_OP_MAX,
146		.setup = ff_sws_setup_clamp,
147		.flexible = true,
148		);
149
150		DECL_PATTERN(scale)
151	0	{
152	0	const pixel_t scale = impl->priv.px[0];
153
154	0	SWS_LOOP
155	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
156	0	if (X)
157	0	x[i] *= scale;
158	0	if (Y)
159	0	y[i] *= scale;
160	0	if (Z)
161	0	z[i] *= scale;
162	0	if (W)
163	0	w[i] *= scale;
164	0	}
165
166	0	CONTINUE(x, y, z, w);
167	0	} Unexecuted instantiation: ops_backend.c:scale_u8 Unexecuted instantiation: ops_backend.c:scale_u16 Unexecuted instantiation: ops_backend.c:scale_u32 Unexecuted instantiation: ops_backend.c:scale_f32
168
169		WRAP_COMMON_PATTERNS(scale,
170		.op = SWS_OP_SCALE,
171		.setup = ff_sws_setup_scale,
172		.flexible = true,
173		);
174
175		DECL_SETUP(setup_filter_v, params, out)
176	0	{
177	0	const SwsFilterWeights *filter = params->op->rw.kernel;
178	0	static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
179	0	">8 byte pointers not supported");
180
181		/* Pre-convert weights to float */
182	0	float *weights = av_calloc(filter->num_weights, sizeof(float));
183	0	if (!weights)
184	0	return AVERROR(ENOMEM);
185
186	0	for (int i = 0; i < filter->num_weights; i++)
187	0	weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
188
189	0	out->priv.ptr = weights;
190	0	out->priv.i32[2] = filter->filter_size;
191	0	out->free = ff_op_priv_free;
192	0	return 0;
193	0	} Unexecuted instantiation: ops_backend.c:setup_filter_v_u8 Unexecuted instantiation: ops_backend.c:setup_filter_v_u16 Unexecuted instantiation: ops_backend.c:setup_filter_v_u32 Unexecuted instantiation: ops_backend.c:setup_filter_v_f32
194
195		/* Fully general vertical planar filter case */
196		DECL_READ(filter_v, const int elems)
197	0	{
198	0	const SwsOpExec *exec = iter->exec;
199	0	const float *restrict weights = impl->priv.ptr;
200	0	const int filter_size = impl->priv.i32[2];
201	0	weights += filter_size * iter->y;
202
203	0	f32block_t xs, ys, zs, ws;
204	0	memset(xs, 0, sizeof(xs));
205	0	if (elems > 1)
206	0	memset(ys, 0, sizeof(ys));
207	0	if (elems > 2)
208	0	memset(zs, 0, sizeof(zs));
209	0	if (elems > 3)
210	0	memset(ws, 0, sizeof(ws));
211
212	0	for (int j = 0; j < filter_size; j++) {
213	0	const float weight = weights[j];
214
215	0	SWS_LOOP
216	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
217	0	xs[i] += weight * in0[i];
218	0	if (elems > 1)
219	0	ys[i] += weight * in1[i];
220	0	if (elems > 2)
221	0	zs[i] += weight * in2[i];
222	0	if (elems > 3)
223	0	ws[i] += weight * in3[i];
224	0	}
225
226	0	in0 = bump_ptr(in0, exec->in_stride[0]);
227	0	if (elems > 1)
228	0	in1 = bump_ptr(in1, exec->in_stride[1]);
229	0	if (elems > 2)
230	0	in2 = bump_ptr(in2, exec->in_stride[2]);
231	0	if (elems > 3)
232	0	in3 = bump_ptr(in3, exec->in_stride[3]);
233	0	}
234
235	0	for (int i = 0; i < elems; i++)
236	0	iter->in[i] += sizeof(block_t);
237
238	0	CONTINUE(xs, ys, zs, ws);
239	0	} Unexecuted instantiation: ops_backend.c:filter_v_u8 Unexecuted instantiation: ops_backend.c:filter_v_u16 Unexecuted instantiation: ops_backend.c:filter_v_u32 Unexecuted instantiation: ops_backend.c:filter_v_f32
240
241		DECL_SETUP(setup_filter_h, params, out)
242	0	{
243	0	SwsFilterWeights *filter = params->op->rw.kernel;
244	0	out->priv.ptr = av_refstruct_ref(filter->weights);
245	0	out->priv.i32[2] = filter->filter_size;
246	0	out->free = ff_op_priv_unref;
247	0	return 0;
248	0	} Unexecuted instantiation: ops_backend.c:setup_filter_h_u8 Unexecuted instantiation: ops_backend.c:setup_filter_h_u16 Unexecuted instantiation: ops_backend.c:setup_filter_h_u32 Unexecuted instantiation: ops_backend.c:setup_filter_h_f32
249
250		/* Fully general horizontal planar filter case */
251		DECL_READ(filter_h, const int elems)
252	0	{
253	0	const SwsOpExec *exec = iter->exec;
254	0	const int *restrict weights = impl->priv.ptr;
255	0	const int filter_size = impl->priv.i32[2];
256	0	const float scale = 1.0f / SWS_FILTER_SCALE;
257	0	const int xpos = iter->x;
258	0	weights += filter_size * iter->x;
259
260	0	f32block_t xs, ys, zs, ws;
261	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
262	0	const int offset = exec->in_offset_x[xpos + i];
263	0	pixel_t *start0 = bump_ptr(in0, offset);
264	0	pixel_t *start1 = bump_ptr(in1, offset);
265	0	pixel_t *start2 = bump_ptr(in2, offset);
266	0	pixel_t *start3 = bump_ptr(in3, offset);
267
268	0	inter_t sx = 0, sy = 0, sz = 0, sw = 0;
269	0	for (int j = 0; j < filter_size; j++) {
270	0	const int weight = weights[j];
271	0	sx += weight * start0[j];
272	0	if (elems > 1)
273	0	sy += weight * start1[j];
274	0	if (elems > 2)
275	0	sz += weight * start2[j];
276	0	if (elems > 3)
277	0	sw += weight * start3[j];
278	0	}
279
280	0	xs[i] = (float) sx * scale;
281	0	if (elems > 1)
282	0	ys[i] = (float) sy * scale;
283	0	if (elems > 2)
284	0	zs[i] = (float) sz * scale;
285	0	if (elems > 3)
286	0	ws[i] = (float) sw * scale;
287
288	0	weights += filter_size;
289	0	}
290
291	0	CONTINUE(xs, ys, zs, ws);
292	0	} Unexecuted instantiation: ops_backend.c:filter_h_u8 Unexecuted instantiation: ops_backend.c:filter_h_u16 Unexecuted instantiation: ops_backend.c:filter_h_u32 Unexecuted instantiation: ops_backend.c:filter_h_f32
293
294		#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
295		static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \
296		const SwsOpImpl *restrict impl, \
297		void restrict x, void restrict y,\
298	0	void restrict z, void restrict w)\
299	0	{ \
300	0	CALL_READ(FUNC##SUFFIX, ELEMS); \
301	0	} \ Unexecuted instantiation: ops_backend.c:filter1_v_u8 Unexecuted instantiation: ops_backend.c:filter2_v_u8 Unexecuted instantiation: ops_backend.c:filter3_v_u8 Unexecuted instantiation: ops_backend.c:filter4_v_u8 Unexecuted instantiation: ops_backend.c:filter1_h_u8 Unexecuted instantiation: ops_backend.c:filter2_h_u8 Unexecuted instantiation: ops_backend.c:filter3_h_u8 Unexecuted instantiation: ops_backend.c:filter4_h_u8 Unexecuted instantiation: ops_backend.c:filter1_v_u16 Unexecuted instantiation: ops_backend.c:filter2_v_u16 Unexecuted instantiation: ops_backend.c:filter3_v_u16 Unexecuted instantiation: ops_backend.c:filter4_v_u16 Unexecuted instantiation: ops_backend.c:filter1_h_u16 Unexecuted instantiation: ops_backend.c:filter2_h_u16 Unexecuted instantiation: ops_backend.c:filter3_h_u16 Unexecuted instantiation: ops_backend.c:filter4_h_u16 Unexecuted instantiation: ops_backend.c:filter1_v_u32 Unexecuted instantiation: ops_backend.c:filter2_v_u32 Unexecuted instantiation: ops_backend.c:filter3_v_u32 Unexecuted instantiation: ops_backend.c:filter4_v_u32 Unexecuted instantiation: ops_backend.c:filter1_h_u32 Unexecuted instantiation: ops_backend.c:filter2_h_u32 Unexecuted instantiation: ops_backend.c:filter3_h_u32 Unexecuted instantiation: ops_backend.c:filter4_h_u32 Unexecuted instantiation: ops_backend.c:filter1_v_f32 Unexecuted instantiation: ops_backend.c:filter2_v_f32 Unexecuted instantiation: ops_backend.c:filter3_v_f32 Unexecuted instantiation: ops_backend.c:filter4_v_f32 Unexecuted instantiation: ops_backend.c:filter1_h_f32 Unexecuted instantiation: ops_backend.c:filter2_h_f32 Unexecuted instantiation: ops_backend.c:filter3_h_f32 Unexecuted instantiation: ops_backend.c:filter4_h_f32
302		\
303		DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \
304		.op = SWS_OP_READ, \
305		.setup = fn(setup_filter##SUFFIX), \
306		.rw.elems = ELEMS, \
307		.rw.filter = SWS_OP_FILTER_##DIR, \
308		);
309
310		WRAP_FILTER(filter, V, 1, _v)
311		WRAP_FILTER(filter, V, 2, _v)
312		WRAP_FILTER(filter, V, 3, _v)
313		WRAP_FILTER(filter, V, 4, _v)
314
315		WRAP_FILTER(filter, H, 1, _h)
316		WRAP_FILTER(filter, H, 2, _h)
317		WRAP_FILTER(filter, H, 3, _h)
318		WRAP_FILTER(filter, H, 4, _h)
319
320		static void fn(process)(const SwsOpExec exec, const void priv,
321		const int bx_start, const int y_start,
322		int bx_end, int y_end)
323	0	{
324	0	const SwsOpChain *chain = priv;
325	0	const SwsOpImpl *impl = chain->impl;
326	0	u32block_t x, y, z, w; /* allocate enough space for any intermediate */
327
328	0	SwsOpIter iterdata;
329	0	SwsOpIter iter = &iterdata; / for CONTINUE() macro to work */
330	0	iter->exec = exec;
331	0	for (int i = 0; i < 4; i++) {
332	0	iter->in[i] = (uintptr_t) exec->in[i];
333	0	iter->out[i] = (uintptr_t) exec->out[i];
334	0	}
335
336	0	for (iter->y = y_start; iter->y < y_end; iter->y++) {
337	0	for (int block = bx_start; block < bx_end; block++) {
338	0	iter->x = block * SWS_BLOCK_SIZE;
339	0	CONTINUE(x, y, z, w);
340	0	}
341
342	0	const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
343	0	for (int i = 0; i < 4; i++) {
344	0	iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
345	0	iter->out[i] += exec->out_bump[i];
346	0	}
347	0	}
348	0	} Unexecuted instantiation: ops_backend.c:process_u8 Unexecuted instantiation: ops_backend.c:process_u16 Unexecuted instantiation: ops_backend.c:process_u32 Unexecuted instantiation: ops_backend.c:process_f32