/src/ffmpeg/libswscale/ops_tmpl_float.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/avassert.h" |
22 | | |
23 | | #include "ops_backend.h" |
24 | | |
25 | | #ifndef BIT_DEPTH |
26 | | # define BIT_DEPTH 32 |
27 | | #endif |
28 | | |
29 | | #if BIT_DEPTH == 32 |
30 | | # define PIXEL_TYPE SWS_PIXEL_F32 |
31 | | # define PIXEL_MAX FLT_MAX |
32 | | # define PIXEL_MIN FLT_MIN |
33 | 0 | # define pixel_t float |
34 | | # define block_t f32block_t |
35 | 0 | # define px f32 |
36 | | #else |
37 | | # error Invalid BIT_DEPTH |
38 | | #endif |
39 | | |
40 | | #define IS_FLOAT 1 |
41 | | #define FMT_CHAR f |
42 | | #include "ops_tmpl_common.c" |
43 | | |
44 | | DECL_SETUP(setup_dither) |
45 | 0 | { |
46 | 0 | const int size = 1 << op->dither.size_log2; |
47 | 0 | if (size == 1) { |
48 | | /* We special case this value */ |
49 | 0 | av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2))); |
50 | 0 | out->ptr = NULL; |
51 | 0 | return 0; |
52 | 0 | } |
53 | | |
54 | 0 | const int width = FFMAX(size, SWS_BLOCK_SIZE); |
55 | 0 | pixel_t *matrix = out->ptr = av_malloc(sizeof(pixel_t) * size * width); |
56 | 0 | if (!matrix) |
57 | 0 | return AVERROR(ENOMEM); |
58 | | |
59 | 0 | static_assert(sizeof(out->ptr) <= sizeof(uint8_t[8]), ">8 byte pointers not supported"); |
60 | 0 | uint8_t *offset = &out->u8[8]; |
61 | 0 | for (int i = 0; i < 4; i++) |
62 | 0 | offset[i] = op->dither.y_offset[i]; |
63 | |
|
64 | 0 | for (int y = 0; y < size; y++) { |
65 | 0 | for (int x = 0; x < size; x++) |
66 | 0 | matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]); |
67 | 0 | for (int x = size; x < width; x++) /* pad to block size */ |
68 | 0 | matrix[y * width + x] = matrix[y * width + (x % size)]; |
69 | 0 | } |
70 | |
|
71 | 0 | return 0; |
72 | 0 | } |
73 | | |
74 | | DECL_FUNC(dither, const int size_log2) |
75 | 0 | { |
76 | 0 | const pixel_t *restrict matrix = impl->priv.ptr; |
77 | 0 | const uint8_t *offset = &impl->priv.u8[8]; |
78 | 0 | const int mask = (1 << size_log2) - 1; |
79 | 0 | const int y_line = iter->y; |
80 | 0 | const int row0 = (y_line + offset[0]) & mask; |
81 | 0 | const int row1 = (y_line + offset[1]) & mask; |
82 | 0 | const int row2 = (y_line + offset[2]) & mask; |
83 | 0 | const int row3 = (y_line + offset[3]) & mask; |
84 | 0 | const int size = 1 << size_log2; |
85 | 0 | const int width = FFMAX(size, SWS_BLOCK_SIZE); |
86 | 0 | const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1); |
87 | |
|
88 | 0 | SWS_LOOP |
89 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
90 | 0 | x[i] += size_log2 ? matrix[row0 * width + base + i] : (pixel_t) 0.5; |
91 | 0 | y[i] += size_log2 ? matrix[row1 * width + base + i] : (pixel_t) 0.5; |
92 | 0 | z[i] += size_log2 ? matrix[row2 * width + base + i] : (pixel_t) 0.5; |
93 | 0 | w[i] += size_log2 ? matrix[row3 * width + base + i] : (pixel_t) 0.5; |
94 | 0 | } |
95 | |
|
96 | 0 | CONTINUE(block_t, x, y, z, w); |
97 | 0 | } |
98 | | |
99 | | #define WRAP_DITHER(N) \ |
100 | 0 | DECL_IMPL(dither##N) \ |
101 | 0 | { \ |
102 | 0 | CALL(dither, N); \ |
103 | 0 | } \ Unexecuted instantiation: ops_backend.c:dither0_f32 Unexecuted instantiation: ops_backend.c:dither1_f32 Unexecuted instantiation: ops_backend.c:dither2_f32 Unexecuted instantiation: ops_backend.c:dither3_f32 Unexecuted instantiation: ops_backend.c:dither4_f32 Unexecuted instantiation: ops_backend.c:dither5_f32 Unexecuted instantiation: ops_backend.c:dither6_f32 Unexecuted instantiation: ops_backend.c:dither7_f32 Unexecuted instantiation: ops_backend.c:dither8_f32 |
104 | | \ |
105 | | DECL_ENTRY(dither##N, \ |
106 | | .op = SWS_OP_DITHER, \ |
107 | | .dither_size = N, \ |
108 | | .setup = fn(setup_dither), \ |
109 | | .free = av_free, \ |
110 | | ); |
111 | | |
112 | | WRAP_DITHER(0) |
113 | | WRAP_DITHER(1) |
114 | | WRAP_DITHER(2) |
115 | | WRAP_DITHER(3) |
116 | | WRAP_DITHER(4) |
117 | | WRAP_DITHER(5) |
118 | | WRAP_DITHER(6) |
119 | | WRAP_DITHER(7) |
120 | | WRAP_DITHER(8) |
121 | | |
122 | | typedef struct { |
123 | | /* Stored in split form for convenience */ |
124 | | pixel_t m[4][4]; |
125 | | pixel_t k[4]; |
126 | | } fn(LinCoeffs); |
127 | | |
128 | | DECL_SETUP(setup_linear) |
129 | 0 | { |
130 | 0 | fn(LinCoeffs) c; |
131 | |
|
132 | 0 | for (int i = 0; i < 4; i++) { |
133 | 0 | for (int j = 0; j < 4; j++) |
134 | 0 | c.m[i][j] = av_q2pixel(op->lin.m[i][j]); |
135 | 0 | c.k[i] = av_q2pixel(op->lin.m[i][4]); |
136 | 0 | } |
137 | |
|
138 | 0 | return SETUP_MEMDUP(c); |
139 | 0 | } |
140 | | |
141 | | /** |
142 | | * Fully general case for a 5x5 linear affine transformation. Should never be |
143 | | * called without constant `mask`. This function will compile down to the |
144 | | * appropriately optimized version for the required subset of operations when |
145 | | * called with a constant mask. |
146 | | */ |
147 | | DECL_FUNC(linear_mask, const uint32_t mask) |
148 | 0 | { |
149 | 0 | const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr; |
150 | |
|
151 | 0 | SWS_LOOP |
152 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
153 | 0 | const pixel_t xx = x[i]; |
154 | 0 | const pixel_t yy = y[i]; |
155 | 0 | const pixel_t zz = z[i]; |
156 | 0 | const pixel_t ww = w[i]; |
157 | |
|
158 | 0 | x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0; |
159 | 0 | x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx; |
160 | 0 | x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0; |
161 | 0 | x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0; |
162 | 0 | x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0; |
163 | |
|
164 | 0 | y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0; |
165 | 0 | y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0; |
166 | 0 | y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy; |
167 | 0 | y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0; |
168 | 0 | y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0; |
169 | |
|
170 | 0 | z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0; |
171 | 0 | z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0; |
172 | 0 | z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0; |
173 | 0 | z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz; |
174 | 0 | z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0; |
175 | |
|
176 | 0 | w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0; |
177 | 0 | w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0; |
178 | 0 | w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0; |
179 | 0 | w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0; |
180 | 0 | w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww; |
181 | 0 | } |
182 | |
|
183 | 0 | CONTINUE(block_t, x, y, z, w); |
184 | 0 | } |
185 | | |
186 | | #define WRAP_LINEAR(NAME, MASK) \ |
187 | 0 | DECL_IMPL(linear_##NAME) \ |
188 | 0 | { \ |
189 | 0 | CALL(linear_mask, MASK); \ |
190 | 0 | } \ Unexecuted instantiation: ops_backend.c:linear_luma_f32 Unexecuted instantiation: ops_backend.c:linear_alpha_f32 Unexecuted instantiation: ops_backend.c:linear_lumalpha_f32 Unexecuted instantiation: ops_backend.c:linear_dot3_f32 Unexecuted instantiation: ops_backend.c:linear_row0_f32 Unexecuted instantiation: ops_backend.c:linear_row0a_f32 Unexecuted instantiation: ops_backend.c:linear_diag3_f32 Unexecuted instantiation: ops_backend.c:linear_diag4_f32 Unexecuted instantiation: ops_backend.c:linear_diagoff3_f32 Unexecuted instantiation: ops_backend.c:linear_matrix3_f32 Unexecuted instantiation: ops_backend.c:linear_affine3_f32 Unexecuted instantiation: ops_backend.c:linear_affine3a_f32 Unexecuted instantiation: ops_backend.c:linear_matrix4_f32 Unexecuted instantiation: ops_backend.c:linear_affine4_f32 |
191 | | \ |
192 | | DECL_ENTRY(linear_##NAME, \ |
193 | | .op = SWS_OP_LINEAR, \ |
194 | | .setup = fn(setup_linear), \ |
195 | | .free = av_free, \ |
196 | | .linear_mask = (MASK), \ |
197 | | ); |
198 | | |
199 | | WRAP_LINEAR(luma, SWS_MASK_LUMA) |
200 | | WRAP_LINEAR(alpha, SWS_MASK_ALPHA) |
201 | | WRAP_LINEAR(lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) |
202 | | WRAP_LINEAR(dot3, 0x7) |
203 | | WRAP_LINEAR(row0, SWS_MASK_ROW(0)) |
204 | | WRAP_LINEAR(row0a, SWS_MASK_ROW(0) | SWS_MASK_ALPHA) |
205 | | WRAP_LINEAR(diag3, SWS_MASK_DIAG3) |
206 | | WRAP_LINEAR(diag4, SWS_MASK_DIAG4) |
207 | | WRAP_LINEAR(diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) |
208 | | WRAP_LINEAR(matrix3, SWS_MASK_MAT3) |
209 | | WRAP_LINEAR(affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) |
210 | | WRAP_LINEAR(affine3a, SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) |
211 | | WRAP_LINEAR(matrix4, SWS_MASK_MAT4) |
212 | | WRAP_LINEAR(affine4, SWS_MASK_MAT4 | SWS_MASK_OFF4) |
213 | | |
214 | | static const SwsOpTable fn(op_table_float) = { |
215 | | .block_size = SWS_BLOCK_SIZE, |
216 | | .entries = { |
217 | | REF_COMMON_PATTERNS(convert_uint8), |
218 | | REF_COMMON_PATTERNS(convert_uint16), |
219 | | REF_COMMON_PATTERNS(convert_uint32), |
220 | | |
221 | | &fn(op_clear_1110), |
222 | | REF_COMMON_PATTERNS(min), |
223 | | REF_COMMON_PATTERNS(max), |
224 | | REF_COMMON_PATTERNS(scale), |
225 | | |
226 | | &fn(op_dither0), |
227 | | &fn(op_dither1), |
228 | | &fn(op_dither2), |
229 | | &fn(op_dither3), |
230 | | &fn(op_dither4), |
231 | | &fn(op_dither5), |
232 | | &fn(op_dither6), |
233 | | &fn(op_dither7), |
234 | | &fn(op_dither8), |
235 | | |
236 | | &fn(op_linear_luma), |
237 | | &fn(op_linear_alpha), |
238 | | &fn(op_linear_lumalpha), |
239 | | &fn(op_linear_dot3), |
240 | | &fn(op_linear_row0), |
241 | | &fn(op_linear_row0a), |
242 | | &fn(op_linear_diag3), |
243 | | &fn(op_linear_diag4), |
244 | | &fn(op_linear_diagoff3), |
245 | | &fn(op_linear_matrix3), |
246 | | &fn(op_linear_affine3), |
247 | | &fn(op_linear_affine3a), |
248 | | &fn(op_linear_matrix4), |
249 | | &fn(op_linear_affine4), |
250 | | |
251 | | NULL |
252 | | }, |
253 | | }; |
254 | | |
255 | | #undef PIXEL_TYPE |
256 | | #undef PIXEL_MAX |
257 | | #undef PIXEL_MIN |
258 | | #undef pixel_t |
259 | | #undef block_t |
260 | | #undef px |
261 | | |
262 | | #undef FMT_CHAR |
263 | | #undef IS_FLOAT |