/src/ffmpeg/libswscale/ops_tmpl_common.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "ops_backend.h" |
22 | | |
23 | | #ifndef BIT_DEPTH |
24 | | # error Should only be included from ops_tmpl_*.c! |
25 | | #endif |
26 | | |
27 | | #define WRAP_CONVERT_UINT(N) \ |
28 | 0 | DECL_PATTERN(convert_uint##N) \ |
29 | 0 | { \ |
30 | 0 | u##N##block_t xu, yu, zu, wu; \ |
31 | 0 | \ |
32 | 0 | SWS_LOOP \ |
33 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \ |
34 | 0 | if (X) \ |
35 | 0 | xu[i] = x[i]; \ |
36 | 0 | if (Y) \ |
37 | 0 | yu[i] = y[i]; \ |
38 | 0 | if (Z) \ |
39 | 0 | zu[i] = z[i]; \ |
40 | 0 | if (W) \ |
41 | 0 | wu[i] = w[i]; \ |
42 | 0 | } \ |
43 | 0 | \ |
44 | 0 | CONTINUE(xu, yu, zu, wu); \ |
45 | 0 | } \ Unexecuted instantiation: ops_backend.c:convert_uint16_u8 Unexecuted instantiation: ops_backend.c:convert_uint32_u8 Unexecuted instantiation: ops_backend.c:convert_uint8_u16 Unexecuted instantiation: ops_backend.c:convert_uint32_u16 Unexecuted instantiation: ops_backend.c:convert_uint8_u32 Unexecuted instantiation: ops_backend.c:convert_uint16_u32 Unexecuted instantiation: ops_backend.c:convert_uint8_f32 Unexecuted instantiation: ops_backend.c:convert_uint16_f32 Unexecuted instantiation: ops_backend.c:convert_uint32_f32 Unexecuted instantiation: ops_backend.c:convert_uint32_u32 |
46 | | \ |
47 | | WRAP_COMMON_PATTERNS(convert_uint##N, \ |
48 | | .op = SWS_OP_CONVERT, \ |
49 | | .convert.to = SWS_PIXEL_U##N, \ |
50 | | ); |
51 | | |
52 | | #if BIT_DEPTH != 8 |
53 | | WRAP_CONVERT_UINT(8) |
54 | | #endif |
55 | | |
56 | | #if BIT_DEPTH != 16 |
57 | | WRAP_CONVERT_UINT(16) |
58 | | #endif |
59 | | |
60 | | #if BIT_DEPTH != 32 || defined(IS_FLOAT) |
61 | | WRAP_CONVERT_UINT(32) |
62 | | #endif |
63 | | |
64 | | DECL_PATTERN(clear) |
65 | 0 | { |
66 | 0 | SWS_LOOP |
67 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
68 | 0 | if (X) |
69 | 0 | x[i] = impl->priv.px[0]; |
70 | 0 | if (Y) |
71 | 0 | y[i] = impl->priv.px[1]; |
72 | 0 | if (Z) |
73 | 0 | z[i] = impl->priv.px[2]; |
74 | 0 | if (W) |
75 | 0 | w[i] = impl->priv.px[3]; |
76 | 0 | } |
77 | |
|
78 | 0 | CONTINUE(x, y, z, w); |
79 | 0 | } Unexecuted instantiation: ops_backend.c:clear_u8 Unexecuted instantiation: ops_backend.c:clear_u16 Unexecuted instantiation: ops_backend.c:clear_u32 Unexecuted instantiation: ops_backend.c:clear_f32 |
80 | | |
81 | | #define WRAP_CLEAR(X, Y, Z, W) \ |
82 | | DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \ |
83 | | \ |
84 | | DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \ |
85 | | .setup = ff_sws_setup_clear, \ |
86 | | .op = SWS_OP_CLEAR, \ |
87 | | .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \ |
88 | | ); |
89 | | |
90 | | WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */ |
91 | | WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */ |
92 | | WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */ |
93 | | |
94 | | WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */ |
95 | | WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */ |
96 | | WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */ |
97 | | WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */ |
98 | | WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */ |
99 | | |
100 | | WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */ |
101 | | WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */ |
102 | | WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */ |
103 | | |
104 | | DECL_PATTERN(min) |
105 | 0 | { |
106 | 0 | SWS_LOOP |
107 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
108 | 0 | if (X) |
109 | 0 | x[i] = FFMIN(x[i], impl->priv.px[0]); |
110 | 0 | if (Y) |
111 | 0 | y[i] = FFMIN(y[i], impl->priv.px[1]); |
112 | 0 | if (Z) |
113 | 0 | z[i] = FFMIN(z[i], impl->priv.px[2]); |
114 | 0 | if (W) |
115 | 0 | w[i] = FFMIN(w[i], impl->priv.px[3]); |
116 | 0 | } |
117 | |
|
118 | 0 | CONTINUE(x, y, z, w); |
119 | 0 | } Unexecuted instantiation: ops_backend.c:min_u8 Unexecuted instantiation: ops_backend.c:min_u16 Unexecuted instantiation: ops_backend.c:min_u32 Unexecuted instantiation: ops_backend.c:min_f32 |
120 | | |
121 | | DECL_PATTERN(max) |
122 | 0 | { |
123 | 0 | SWS_LOOP |
124 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
125 | 0 | if (X) |
126 | 0 | x[i] = FFMAX(x[i], impl->priv.px[0]); |
127 | 0 | if (Y) |
128 | 0 | y[i] = FFMAX(y[i], impl->priv.px[1]); |
129 | 0 | if (Z) |
130 | 0 | z[i] = FFMAX(z[i], impl->priv.px[2]); |
131 | 0 | if (W) |
132 | 0 | w[i] = FFMAX(w[i], impl->priv.px[3]); |
133 | 0 | } |
134 | |
|
135 | 0 | CONTINUE(x, y, z, w); |
136 | 0 | } Unexecuted instantiation: ops_backend.c:max_u8 Unexecuted instantiation: ops_backend.c:max_u16 Unexecuted instantiation: ops_backend.c:max_u32 Unexecuted instantiation: ops_backend.c:max_f32 |
137 | | |
138 | | WRAP_COMMON_PATTERNS(min, |
139 | | .op = SWS_OP_MIN, |
140 | | .setup = ff_sws_setup_clamp, |
141 | | .flexible = true, |
142 | | ); |
143 | | |
144 | | WRAP_COMMON_PATTERNS(max, |
145 | | .op = SWS_OP_MAX, |
146 | | .setup = ff_sws_setup_clamp, |
147 | | .flexible = true, |
148 | | ); |
149 | | |
150 | | DECL_PATTERN(scale) |
151 | 0 | { |
152 | 0 | const pixel_t scale = impl->priv.px[0]; |
153 | |
|
154 | 0 | SWS_LOOP |
155 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
156 | 0 | if (X) |
157 | 0 | x[i] *= scale; |
158 | 0 | if (Y) |
159 | 0 | y[i] *= scale; |
160 | 0 | if (Z) |
161 | 0 | z[i] *= scale; |
162 | 0 | if (W) |
163 | 0 | w[i] *= scale; |
164 | 0 | } |
165 | |
|
166 | 0 | CONTINUE(x, y, z, w); |
167 | 0 | } Unexecuted instantiation: ops_backend.c:scale_u8 Unexecuted instantiation: ops_backend.c:scale_u16 Unexecuted instantiation: ops_backend.c:scale_u32 Unexecuted instantiation: ops_backend.c:scale_f32 |
168 | | |
169 | | WRAP_COMMON_PATTERNS(scale, |
170 | | .op = SWS_OP_SCALE, |
171 | | .setup = ff_sws_setup_scale, |
172 | | .flexible = true, |
173 | | ); |
174 | | |
175 | | DECL_SETUP(setup_filter_v, params, out) |
176 | 0 | { |
177 | 0 | const SwsFilterWeights *filter = params->op->rw.kernel; |
178 | 0 | static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]), |
179 | 0 | ">8 byte pointers not supported"); |
180 | | |
181 | | /* Pre-convert weights to float */ |
182 | 0 | float *weights = av_calloc(filter->num_weights, sizeof(float)); |
183 | 0 | if (!weights) |
184 | 0 | return AVERROR(ENOMEM); |
185 | | |
186 | 0 | for (int i = 0; i < filter->num_weights; i++) |
187 | 0 | weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE; |
188 | |
|
189 | 0 | out->priv.ptr = weights; |
190 | 0 | out->priv.i32[2] = filter->filter_size; |
191 | 0 | out->free = ff_op_priv_free; |
192 | 0 | return 0; |
193 | 0 | } Unexecuted instantiation: ops_backend.c:setup_filter_v_u8 Unexecuted instantiation: ops_backend.c:setup_filter_v_u16 Unexecuted instantiation: ops_backend.c:setup_filter_v_u32 Unexecuted instantiation: ops_backend.c:setup_filter_v_f32 |
194 | | |
195 | | /* Fully general vertical planar filter case */ |
196 | | DECL_READ(filter_v, const int elems) |
197 | 0 | { |
198 | 0 | const SwsOpExec *exec = iter->exec; |
199 | 0 | const float *restrict weights = impl->priv.ptr; |
200 | 0 | const int filter_size = impl->priv.i32[2]; |
201 | 0 | weights += filter_size * iter->y; |
202 | |
|
203 | 0 | f32block_t xs, ys, zs, ws; |
204 | 0 | memset(xs, 0, sizeof(xs)); |
205 | 0 | if (elems > 1) |
206 | 0 | memset(ys, 0, sizeof(ys)); |
207 | 0 | if (elems > 2) |
208 | 0 | memset(zs, 0, sizeof(zs)); |
209 | 0 | if (elems > 3) |
210 | 0 | memset(ws, 0, sizeof(ws)); |
211 | |
|
212 | 0 | for (int j = 0; j < filter_size; j++) { |
213 | 0 | const float weight = weights[j]; |
214 | |
|
215 | 0 | SWS_LOOP |
216 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
217 | 0 | xs[i] += weight * in0[i]; |
218 | 0 | if (elems > 1) |
219 | 0 | ys[i] += weight * in1[i]; |
220 | 0 | if (elems > 2) |
221 | 0 | zs[i] += weight * in2[i]; |
222 | 0 | if (elems > 3) |
223 | 0 | ws[i] += weight * in3[i]; |
224 | 0 | } |
225 | |
|
226 | 0 | in0 = bump_ptr(in0, exec->in_stride[0]); |
227 | 0 | if (elems > 1) |
228 | 0 | in1 = bump_ptr(in1, exec->in_stride[1]); |
229 | 0 | if (elems > 2) |
230 | 0 | in2 = bump_ptr(in2, exec->in_stride[2]); |
231 | 0 | if (elems > 3) |
232 | 0 | in3 = bump_ptr(in3, exec->in_stride[3]); |
233 | 0 | } |
234 | |
|
235 | 0 | for (int i = 0; i < elems; i++) |
236 | 0 | iter->in[i] += sizeof(block_t); |
237 | |
|
238 | 0 | CONTINUE(xs, ys, zs, ws); |
239 | 0 | } Unexecuted instantiation: ops_backend.c:filter_v_u8 Unexecuted instantiation: ops_backend.c:filter_v_u16 Unexecuted instantiation: ops_backend.c:filter_v_u32 Unexecuted instantiation: ops_backend.c:filter_v_f32 |
240 | | |
241 | | DECL_SETUP(setup_filter_h, params, out) |
242 | 0 | { |
243 | 0 | SwsFilterWeights *filter = params->op->rw.kernel; |
244 | 0 | out->priv.ptr = av_refstruct_ref(filter->weights); |
245 | 0 | out->priv.i32[2] = filter->filter_size; |
246 | 0 | out->free = ff_op_priv_unref; |
247 | 0 | return 0; |
248 | 0 | } Unexecuted instantiation: ops_backend.c:setup_filter_h_u8 Unexecuted instantiation: ops_backend.c:setup_filter_h_u16 Unexecuted instantiation: ops_backend.c:setup_filter_h_u32 Unexecuted instantiation: ops_backend.c:setup_filter_h_f32 |
249 | | |
250 | | /* Fully general horizontal planar filter case */ |
251 | | DECL_READ(filter_h, const int elems) |
252 | 0 | { |
253 | 0 | const SwsOpExec *exec = iter->exec; |
254 | 0 | const int *restrict weights = impl->priv.ptr; |
255 | 0 | const int filter_size = impl->priv.i32[2]; |
256 | 0 | const float scale = 1.0f / SWS_FILTER_SCALE; |
257 | 0 | const int xpos = iter->x; |
258 | 0 | weights += filter_size * iter->x; |
259 | |
|
260 | 0 | f32block_t xs, ys, zs, ws; |
261 | 0 | for (int i = 0; i < SWS_BLOCK_SIZE; i++) { |
262 | 0 | const int offset = exec->in_offset_x[xpos + i]; |
263 | 0 | pixel_t *start0 = bump_ptr(in0, offset); |
264 | 0 | pixel_t *start1 = bump_ptr(in1, offset); |
265 | 0 | pixel_t *start2 = bump_ptr(in2, offset); |
266 | 0 | pixel_t *start3 = bump_ptr(in3, offset); |
267 | |
|
268 | 0 | inter_t sx = 0, sy = 0, sz = 0, sw = 0; |
269 | 0 | for (int j = 0; j < filter_size; j++) { |
270 | 0 | const int weight = weights[j]; |
271 | 0 | sx += weight * start0[j]; |
272 | 0 | if (elems > 1) |
273 | 0 | sy += weight * start1[j]; |
274 | 0 | if (elems > 2) |
275 | 0 | sz += weight * start2[j]; |
276 | 0 | if (elems > 3) |
277 | 0 | sw += weight * start3[j]; |
278 | 0 | } |
279 | |
|
280 | 0 | xs[i] = (float) sx * scale; |
281 | 0 | if (elems > 1) |
282 | 0 | ys[i] = (float) sy * scale; |
283 | 0 | if (elems > 2) |
284 | 0 | zs[i] = (float) sz * scale; |
285 | 0 | if (elems > 3) |
286 | 0 | ws[i] = (float) sw * scale; |
287 | |
|
288 | 0 | weights += filter_size; |
289 | 0 | } |
290 | |
|
291 | 0 | CONTINUE(xs, ys, zs, ws); |
292 | 0 | } Unexecuted instantiation: ops_backend.c:filter_h_u8 Unexecuted instantiation: ops_backend.c:filter_h_u16 Unexecuted instantiation: ops_backend.c:filter_h_u32 Unexecuted instantiation: ops_backend.c:filter_h_f32 |
293 | | |
294 | | #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \ |
295 | | static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \ |
296 | | const SwsOpImpl *restrict impl, \ |
297 | | void *restrict x, void *restrict y,\ |
298 | 0 | void *restrict z, void *restrict w)\ |
299 | 0 | { \ |
300 | 0 | CALL_READ(FUNC##SUFFIX, ELEMS); \ |
301 | 0 | } \ Unexecuted instantiation: ops_backend.c:filter1_v_u8 Unexecuted instantiation: ops_backend.c:filter2_v_u8 Unexecuted instantiation: ops_backend.c:filter3_v_u8 Unexecuted instantiation: ops_backend.c:filter4_v_u8 Unexecuted instantiation: ops_backend.c:filter1_h_u8 Unexecuted instantiation: ops_backend.c:filter2_h_u8 Unexecuted instantiation: ops_backend.c:filter3_h_u8 Unexecuted instantiation: ops_backend.c:filter4_h_u8 Unexecuted instantiation: ops_backend.c:filter1_v_u16 Unexecuted instantiation: ops_backend.c:filter2_v_u16 Unexecuted instantiation: ops_backend.c:filter3_v_u16 Unexecuted instantiation: ops_backend.c:filter4_v_u16 Unexecuted instantiation: ops_backend.c:filter1_h_u16 Unexecuted instantiation: ops_backend.c:filter2_h_u16 Unexecuted instantiation: ops_backend.c:filter3_h_u16 Unexecuted instantiation: ops_backend.c:filter4_h_u16 Unexecuted instantiation: ops_backend.c:filter1_v_u32 Unexecuted instantiation: ops_backend.c:filter2_v_u32 Unexecuted instantiation: ops_backend.c:filter3_v_u32 Unexecuted instantiation: ops_backend.c:filter4_v_u32 Unexecuted instantiation: ops_backend.c:filter1_h_u32 Unexecuted instantiation: ops_backend.c:filter2_h_u32 Unexecuted instantiation: ops_backend.c:filter3_h_u32 Unexecuted instantiation: ops_backend.c:filter4_h_u32 Unexecuted instantiation: ops_backend.c:filter1_v_f32 Unexecuted instantiation: ops_backend.c:filter2_v_f32 Unexecuted instantiation: ops_backend.c:filter3_v_f32 Unexecuted instantiation: ops_backend.c:filter4_v_f32 Unexecuted instantiation: ops_backend.c:filter1_h_f32 Unexecuted instantiation: ops_backend.c:filter2_h_f32 Unexecuted instantiation: ops_backend.c:filter3_h_f32 Unexecuted instantiation: ops_backend.c:filter4_h_f32 |
302 | | \ |
303 | | DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \ |
304 | | .op = SWS_OP_READ, \ |
305 | | .setup = fn(setup_filter##SUFFIX), \ |
306 | | .rw.elems = ELEMS, \ |
307 | | .rw.filter = SWS_OP_FILTER_##DIR, \ |
308 | | ); |
309 | | |
310 | | WRAP_FILTER(filter, V, 1, _v) |
311 | | WRAP_FILTER(filter, V, 2, _v) |
312 | | WRAP_FILTER(filter, V, 3, _v) |
313 | | WRAP_FILTER(filter, V, 4, _v) |
314 | | |
315 | | WRAP_FILTER(filter, H, 1, _h) |
316 | | WRAP_FILTER(filter, H, 2, _h) |
317 | | WRAP_FILTER(filter, H, 3, _h) |
318 | | WRAP_FILTER(filter, H, 4, _h) |
319 | | |
320 | | static void fn(process)(const SwsOpExec *exec, const void *priv, |
321 | | const int bx_start, const int y_start, |
322 | | int bx_end, int y_end) |
323 | 0 | { |
324 | 0 | const SwsOpChain *chain = priv; |
325 | 0 | const SwsOpImpl *impl = chain->impl; |
326 | 0 | u32block_t x, y, z, w; /* allocate enough space for any intermediate */ |
327 | |
|
328 | 0 | SwsOpIter iterdata; |
329 | 0 | SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */ |
330 | 0 | iter->exec = exec; |
331 | 0 | for (int i = 0; i < 4; i++) { |
332 | 0 | iter->in[i] = (uintptr_t) exec->in[i]; |
333 | 0 | iter->out[i] = (uintptr_t) exec->out[i]; |
334 | 0 | } |
335 | |
|
336 | 0 | for (iter->y = y_start; iter->y < y_end; iter->y++) { |
337 | 0 | for (int block = bx_start; block < bx_end; block++) { |
338 | 0 | iter->x = block * SWS_BLOCK_SIZE; |
339 | 0 | CONTINUE(x, y, z, w); |
340 | 0 | } |
341 | |
|
342 | 0 | const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0; |
343 | 0 | for (int i = 0; i < 4; i++) { |
344 | 0 | iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i]; |
345 | 0 | iter->out[i] += exec->out_bump[i]; |
346 | 0 | } |
347 | 0 | } |
348 | 0 | } Unexecuted instantiation: ops_backend.c:process_u8 Unexecuted instantiation: ops_backend.c:process_u16 Unexecuted instantiation: ops_backend.c:process_u32 Unexecuted instantiation: ops_backend.c:process_f32 |