/src/ffmpeg/libswscale/ops_tmpl_float.c

Source
/**
 * Copyright (C) 2025 Niklas Haas
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/avassert.h"

#include "ops_backend.h"

#ifndef BIT_DEPTH
#  define BIT_DEPTH 32
#endif

#if BIT_DEPTH == 32
#  define PIXEL_TYPE SWS_PIXEL_F32
#  define PIXEL_MAX  FLT_MAX
#  define pixel_t    float
#  define inter_t    float
#  define block_t    f32block_t
#  define px         f32
#else
#  error Invalid BIT_DEPTH
#endif

#define IS_FLOAT 1
#define FMT_CHAR f
#include "ops_tmpl_common.c"

DECL_SETUP(setup_dither, params, out)
{
    const SwsOp *op = params->op;
    const int size = 1 << op->dither.size_log2;
    if (size == 1) {
        /* We special case this value */
        av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2)));
        out->priv.ptr = NULL;
        return 0;
    }

    const int width = FFMAX(size, SWS_BLOCK_SIZE);
    pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * width);
    if (!matrix)
        return AVERROR(ENOMEM);
    out->free = ff_op_priv_free;

    static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]),
                  ">8 byte pointers not supported");

    int8_t *offset = &out->priv.i8[8];
    for (int i = 0; i < 4; i++)
        offset[i] = op->dither.y_offset[i];

    for (int y = 0; y < size; y++) {
        for (int x = 0; x < size; x++)
            matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]);
        for (int x = size; x < width; x++) /* pad to block size */
            matrix[y * width + x] = matrix[y * width + (x % size)];
    }

    return 0;
}

DECL_FUNC(dither, const int size_log2)
{
    const pixel_t *restrict matrix = impl->priv.ptr;
    const int8_t *restrict offset = &impl->priv.i8[8];
    const int mask = (1 << size_log2) - 1;
    const int y_line = iter->y;
    const int size = 1 << size_log2;
    const int width = FFMAX(size, SWS_BLOCK_SIZE);
    const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1);

#define DITHER_COMP(VAR, IDX)                                                            \
    if (offset[IDX] >= 0) {                                                              \
        const int row = (y_line + offset[IDX]) & mask;                                   \
        SWS_LOOP                                                                         \
        for (int i = 0; i < SWS_BLOCK_SIZE; i++)                                         \
            VAR[i] += size_log2 ? matrix[row * width + base + i] : (pixel_t) 0.5;        \
    }

    DITHER_COMP(x, 0)
    DITHER_COMP(y, 1)
    DITHER_COMP(z, 2)
    DITHER_COMP(w, 3)

    CONTINUE(x, y, z, w);
}

#define WRAP_DITHER(N)                                                          \
DECL_IMPL(dither, dither##N, N)                                                 \
                                                                                \
DECL_ENTRY(dither##N, SWS_COMP_ALL,                                             \
    .op = SWS_OP_DITHER,                                                        \
    .dither_size = N,                                                           \
    .setup = fn(setup_dither),                                                  \
);

WRAP_DITHER(0)
WRAP_DITHER(1)
WRAP_DITHER(2)
WRAP_DITHER(3)
WRAP_DITHER(4)
WRAP_DITHER(5)
WRAP_DITHER(6)
WRAP_DITHER(7)
WRAP_DITHER(8)

typedef struct {
    /* Stored in split form for convenience */
    pixel_t m[4][4];
    pixel_t k[4];
} fn(LinCoeffs);

DECL_SETUP(setup_linear, params, out)
{
    const SwsOp *op = params->op;
    fn(LinCoeffs) c;

    for (int i = 0; i < 4; i++) {
        for (int j = 0; j < 4; j++)
            c.m[i][j] = av_q2pixel(op->lin.m[i][j]);
        c.k[i] = av_q2pixel(op->lin.m[i][4]);
    }

    return SETUP_MEMDUP(c, out);
}

/**
 * Fully general case for a 5x5 linear affine transformation. Should never be
 * called without constant `mask`. This function will compile down to the
 * appropriately optimized version for the required subset of operations when
 * called with a constant mask.
 */
DECL_FUNC(linear_mask, const uint32_t mask)
{
    const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr;

    SWS_LOOP
    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
        const pixel_t xx = x[i];
        const pixel_t yy = y[i];
        const pixel_t zz = z[i];
        const pixel_t ww = w[i];

        x[i]  = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0;
        x[i] += (mask & SWS_MASK(0, 0))  ? c.m[0][0] * xx : xx;
        x[i] += (mask & SWS_MASK(0, 1))  ? c.m[0][1] * yy : 0;
        x[i] += (mask & SWS_MASK(0, 2))  ? c.m[0][2] * zz : 0;
        x[i] += (mask & SWS_MASK(0, 3))  ? c.m[0][3] * ww : 0;

        y[i]  = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0;
        y[i] += (mask & SWS_MASK(1, 0))  ? c.m[1][0] * xx : 0;
        y[i] += (mask & SWS_MASK(1, 1))  ? c.m[1][1] * yy : yy;
        y[i] += (mask & SWS_MASK(1, 2))  ? c.m[1][2] * zz : 0;
        y[i] += (mask & SWS_MASK(1, 3))  ? c.m[1][3] * ww : 0;

        z[i]  = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0;
        z[i] += (mask & SWS_MASK(2, 0))  ? c.m[2][0] * xx : 0;
        z[i] += (mask & SWS_MASK(2, 1))  ? c.m[2][1] * yy : 0;
        z[i] += (mask & SWS_MASK(2, 2))  ? c.m[2][2] * zz : zz;
        z[i] += (mask & SWS_MASK(2, 3))  ? c.m[2][3] * ww : 0;

        w[i]  = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0;
        w[i] += (mask & SWS_MASK(3, 0))  ? c.m[3][0] * xx : 0;
        w[i] += (mask & SWS_MASK(3, 1))  ? c.m[3][1] * yy : 0;
        w[i] += (mask & SWS_MASK(3, 2))  ? c.m[3][2] * zz : 0;
        w[i] += (mask & SWS_MASK(3, 3))  ? c.m[3][3] * ww : ww;
    }

    CONTINUE(x, y, z, w);
}

#define WRAP_LINEAR(NAME, MASK)                                                 \
DECL_IMPL(linear_mask, linear_##NAME, MASK)                                     \
                                                                                \
DECL_ENTRY(linear_##NAME, SWS_COMP_ALL,                                         \
    .op    = SWS_OP_LINEAR,                                                     \
    .setup = fn(setup_linear),                                                  \
    .linear_mask = (MASK),                                                      \
);

WRAP_LINEAR(luma,      SWS_MASK_LUMA)
WRAP_LINEAR(alpha,     SWS_MASK_ALPHA)
WRAP_LINEAR(lumalpha,  SWS_MASK_LUMA | SWS_MASK_ALPHA)
WRAP_LINEAR(yalpha,    SWS_MASK(1, 1)) /* ya alpha */
WRAP_LINEAR(dot3,      0x7)
WRAP_LINEAR(dot3a,     0x7 | SWS_MASK_ALPHA)
WRAP_LINEAR(row0,      SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */
WRAP_LINEAR(diag3,     SWS_MASK_DIAG3)
WRAP_LINEAR(diag4,     SWS_MASK_DIAG4)
WRAP_LINEAR(diagoff3,  SWS_MASK_DIAG3 | SWS_MASK_OFF3)
WRAP_LINEAR(affine3,   SWS_MASK_MAT3 | SWS_MASK_OFF3)
WRAP_LINEAR(affine3uv, SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2))
WRAP_LINEAR(affine3x,  SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3)
WRAP_LINEAR(affine3xa, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | SWS_MASK_ALPHA)
WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3)
WRAP_LINEAR(affine3a,  SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA)

static const SwsOpTable fn(op_table_float) = {
    .block_size = SWS_BLOCK_SIZE,
    .entries = {
        REF_COMMON_PATTERNS(convert_uint8),
        REF_COMMON_PATTERNS(convert_uint16),
        REF_COMMON_PATTERNS(convert_uint32),

        &fn(op_clear_0001),
        REF_COMMON_PATTERNS(min),
        REF_COMMON_PATTERNS(max),
        REF_COMMON_PATTERNS(scale),

        &fn(op_dither0),
        &fn(op_dither1),
        &fn(op_dither2),
        &fn(op_dither3),
        &fn(op_dither4),
        &fn(op_dither5),
        &fn(op_dither6),
        &fn(op_dither7),
        &fn(op_dither8),

        &fn(op_clear_0001),
        &fn(op_clear_1000),
        &fn(op_clear_1100),

        &fn(op_linear_luma),
        &fn(op_linear_alpha),
        &fn(op_linear_lumalpha),
        &fn(op_linear_yalpha),
        &fn(op_linear_dot3),
        &fn(op_linear_dot3a),
        &fn(op_linear_row0),
        &fn(op_linear_diag3),
        &fn(op_linear_diag4),
        &fn(op_linear_diagoff3),
        &fn(op_linear_affine3),
        &fn(op_linear_affine3uv),
        &fn(op_linear_affine3x),
        &fn(op_linear_affine3xa),
        &fn(op_linear_affine3xy),
        &fn(op_linear_affine3a),

        &fn(op_filter1_v),
        &fn(op_filter2_v),
        &fn(op_filter3_v),
        &fn(op_filter4_v),

        &fn(op_filter1_h),
        &fn(op_filter2_h),
        &fn(op_filter3_h),
        &fn(op_filter4_h),

        NULL
    },
};

#undef PIXEL_TYPE
#undef PIXEL_MAX
#undef pixel_t
#undef inter_t
#undef block_t
#undef px

#undef FMT_CHAR
#undef IS_FLOAT

Coverage Report

Created: 2026-04-29 07:00

Line	Count	Source
1		/**
2		* Copyright (C) 2025 Niklas Haas
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/avassert.h"
22
23		#include "ops_backend.h"
24
25		#ifndef BIT_DEPTH
26		# define BIT_DEPTH 32
27		#endif
28
29		#if BIT_DEPTH == 32
30		# define PIXEL_TYPE SWS_PIXEL_F32
31		# define PIXEL_MAX FLT_MAX
32	0	# define pixel_t float
33	0	# define inter_t float
34		# define block_t f32block_t
35	0	# define px f32
36		#else
37		# error Invalid BIT_DEPTH
38		#endif
39
40		#define IS_FLOAT 1
41		#define FMT_CHAR f
42		#include "ops_tmpl_common.c"
43
44		DECL_SETUP(setup_dither, params, out)
45	0	{
46	0	const SwsOp *op = params->op;
47	0	const int size = 1 << op->dither.size_log2;
48	0	if (size == 1) {
49		/* We special case this value */
50	0	av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2)));
51	0	out->priv.ptr = NULL;
52	0	return 0;
53	0	}
54
55	0	const int width = FFMAX(size, SWS_BLOCK_SIZE);
56	0	pixel_t matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) size * width);
57	0	if (!matrix)
58	0	return AVERROR(ENOMEM);
59	0	out->free = ff_op_priv_free;
60
61	0	static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]),
62	0	">8 byte pointers not supported");
63
64	0	int8_t *offset = &out->priv.i8[8];
65	0	for (int i = 0; i < 4; i++)
66	0	offset[i] = op->dither.y_offset[i];
67
68	0	for (int y = 0; y < size; y++) {
69	0	for (int x = 0; x < size; x++)
70	0	matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]);
71	0	for (int x = size; x < width; x++) /* pad to block size */
72	0	matrix[y * width + x] = matrix[y * width + (x % size)];
73	0	}
74
75	0	return 0;
76	0	}
77
78		DECL_FUNC(dither, const int size_log2)
79	0	{
80	0	const pixel_t *restrict matrix = impl->priv.ptr;
81	0	const int8_t *restrict offset = &impl->priv.i8[8];
82	0	const int mask = (1 << size_log2) - 1;
83	0	const int y_line = iter->y;
84	0	const int size = 1 << size_log2;
85	0	const int width = FFMAX(size, SWS_BLOCK_SIZE);
86	0	const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1);
87
88	0	#define DITHER_COMP(VAR, IDX) \
89	0	if (offset[IDX] >= 0) { \
90	0	const int row = (y_line + offset[IDX]) & mask; \
91	0	SWS_LOOP \
92	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) \
93	0	VAR[i] += size_log2 ? matrix[row * width + base + i] : (pixel_t) 0.5; \
94	0	}
95
96	0	DITHER_COMP(x, 0)
97	0	DITHER_COMP(y, 1)
98	0	DITHER_COMP(z, 2)
99	0	DITHER_COMP(w, 3)
100
101	0	CONTINUE(x, y, z, w);
102	0	}
103
104		#define WRAP_DITHER(N) \
105		DECL_IMPL(dither, dither##N, N) \
106		\
107		DECL_ENTRY(dither##N, SWS_COMP_ALL, \
108		.op = SWS_OP_DITHER, \
109		.dither_size = N, \
110		.setup = fn(setup_dither), \
111		);
112
113		WRAP_DITHER(0)
114		WRAP_DITHER(1)
115		WRAP_DITHER(2)
116		WRAP_DITHER(3)
117		WRAP_DITHER(4)
118		WRAP_DITHER(5)
119		WRAP_DITHER(6)
120		WRAP_DITHER(7)
121		WRAP_DITHER(8)
122
123		typedef struct {
124		/* Stored in split form for convenience */
125		pixel_t m[4][4];
126		pixel_t k[4];
127		} fn(LinCoeffs);
128
129		DECL_SETUP(setup_linear, params, out)
130	0	{
131	0	const SwsOp *op = params->op;
132	0	fn(LinCoeffs) c;
133
134	0	for (int i = 0; i < 4; i++) {
135	0	for (int j = 0; j < 4; j++)
136	0	c.m[i][j] = av_q2pixel(op->lin.m[i][j]);
137	0	c.k[i] = av_q2pixel(op->lin.m[i][4]);
138	0	}
139
140	0	return SETUP_MEMDUP(c, out);
141	0	}
142
143		/**
144		* Fully general case for a 5x5 linear affine transformation. Should never be
145		* called without constant `mask`. This function will compile down to the
146		* appropriately optimized version for the required subset of operations when
147		* called with a constant mask.
148		*/
149		DECL_FUNC(linear_mask, const uint32_t mask)
150	0	{
151	0	const fn(LinCoeffs) c = (const fn(LinCoeffs) ) impl->priv.ptr;
152
153	0	SWS_LOOP
154	0	for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
155	0	const pixel_t xx = x[i];
156	0	const pixel_t yy = y[i];
157	0	const pixel_t zz = z[i];
158	0	const pixel_t ww = w[i];
159
160	0	x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0;
161	0	x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx;
162	0	x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0;
163	0	x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0;
164	0	x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0;
165
166	0	y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0;
167	0	y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0;
168	0	y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy;
169	0	y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0;
170	0	y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0;
171
172	0	z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0;
173	0	z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0;
174	0	z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0;
175	0	z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz;
176	0	z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0;
177
178	0	w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0;
179	0	w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0;
180	0	w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0;
181	0	w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0;
182	0	w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww;
183	0	}
184
185	0	CONTINUE(x, y, z, w);
186	0	}
187
188		#define WRAP_LINEAR(NAME, MASK) \
189		DECL_IMPL(linear_mask, linear_##NAME, MASK) \
190		\
191		DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \
192		.op = SWS_OP_LINEAR, \
193		.setup = fn(setup_linear), \
194		.linear_mask = (MASK), \
195		);
196
197		WRAP_LINEAR(luma, SWS_MASK_LUMA)
198		WRAP_LINEAR(alpha, SWS_MASK_ALPHA)
199		WRAP_LINEAR(lumalpha, SWS_MASK_LUMA \| SWS_MASK_ALPHA)
200		WRAP_LINEAR(yalpha, SWS_MASK(1, 1)) /* ya alpha */
201		WRAP_LINEAR(dot3, 0x7)
202		WRAP_LINEAR(dot3a, 0x7 \| SWS_MASK_ALPHA)
203		WRAP_LINEAR(row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */
204		WRAP_LINEAR(diag3, SWS_MASK_DIAG3)
205		WRAP_LINEAR(diag4, SWS_MASK_DIAG4)
206		WRAP_LINEAR(diagoff3, SWS_MASK_DIAG3 \| SWS_MASK_OFF3)
207		WRAP_LINEAR(affine3, SWS_MASK_MAT3 \| SWS_MASK_OFF3)
208		WRAP_LINEAR(affine3uv, SWS_MASK_MAT3 \| SWS_MASK_OFF(1) \| SWS_MASK_OFF(2))
209		WRAP_LINEAR(affine3x, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) \| SWS_MASK_OFF3)
210		WRAP_LINEAR(affine3xa, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) \| SWS_MASK_OFF3 \| SWS_MASK_ALPHA)
211		WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) \| SWS_MASK_OFF3)
212		WRAP_LINEAR(affine3a, SWS_MASK_MAT3 \| SWS_MASK_OFF3 \| SWS_MASK_ALPHA)
213
214		static const SwsOpTable fn(op_table_float) = {
215		.block_size = SWS_BLOCK_SIZE,
216		.entries = {
217		REF_COMMON_PATTERNS(convert_uint8),
218		REF_COMMON_PATTERNS(convert_uint16),
219		REF_COMMON_PATTERNS(convert_uint32),
220
221		&fn(op_clear_0001),
222		REF_COMMON_PATTERNS(min),
223		REF_COMMON_PATTERNS(max),
224		REF_COMMON_PATTERNS(scale),
225
226		&fn(op_dither0),
227		&fn(op_dither1),
228		&fn(op_dither2),
229		&fn(op_dither3),
230		&fn(op_dither4),
231		&fn(op_dither5),
232		&fn(op_dither6),
233		&fn(op_dither7),
234		&fn(op_dither8),
235
236		&fn(op_clear_0001),
237		&fn(op_clear_1000),
238		&fn(op_clear_1100),
239
240		&fn(op_linear_luma),
241		&fn(op_linear_alpha),
242		&fn(op_linear_lumalpha),
243		&fn(op_linear_yalpha),
244		&fn(op_linear_dot3),
245		&fn(op_linear_dot3a),
246		&fn(op_linear_row0),
247		&fn(op_linear_diag3),
248		&fn(op_linear_diag4),
249		&fn(op_linear_diagoff3),
250		&fn(op_linear_affine3),
251		&fn(op_linear_affine3uv),
252		&fn(op_linear_affine3x),
253		&fn(op_linear_affine3xa),
254		&fn(op_linear_affine3xy),
255		&fn(op_linear_affine3a),
256
257		&fn(op_filter1_v),
258		&fn(op_filter2_v),
259		&fn(op_filter3_v),
260		&fn(op_filter4_v),
261
262		&fn(op_filter1_h),
263		&fn(op_filter2_h),
264		&fn(op_filter3_h),
265		&fn(op_filter4_h),
266
267		NULL
268		},
269		};
270
271		#undef PIXEL_TYPE
272		#undef PIXEL_MAX
273		#undef pixel_t
274		#undef inter_t
275		#undef block_t
276		#undef px
277
278		#undef FMT_CHAR
279		#undef IS_FLOAT