/src/ffmpeg/libswscale/ops.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/avassert.h" |
22 | | #include "libavutil/avstring.h" |
23 | | #include "libavutil/bswap.h" |
24 | | #include "libavutil/mem.h" |
25 | | #include "libavutil/rational.h" |
26 | | #include "libavutil/refstruct.h" |
27 | | |
28 | | #include "ops.h" |
29 | | #include "ops_internal.h" |
30 | | |
31 | | extern const SwsOpBackend backend_c; |
32 | | extern const SwsOpBackend backend_murder; |
33 | | extern const SwsOpBackend backend_x86; |
34 | | |
35 | | const SwsOpBackend * const ff_sws_op_backends[] = { |
36 | | &backend_murder, |
37 | | #if ARCH_X86_64 && HAVE_X86ASM |
38 | | &backend_x86, |
39 | | #endif |
40 | | &backend_c, |
41 | | NULL |
42 | | }; |
43 | | |
44 | | #define RET(x) \ |
45 | 0 | do { \ |
46 | 0 | if ((ret = (x)) < 0) \ |
47 | 0 | return ret; \ |
48 | 0 | } while (0) |
49 | | |
50 | | const char *ff_sws_pixel_type_name(SwsPixelType type) |
51 | 0 | { |
52 | 0 | switch (type) { |
53 | 0 | case SWS_PIXEL_U8: return "u8"; |
54 | 0 | case SWS_PIXEL_U16: return "u16"; |
55 | 0 | case SWS_PIXEL_U32: return "u32"; |
56 | 0 | case SWS_PIXEL_F32: return "f32"; |
57 | 0 | case SWS_PIXEL_NONE: return "none"; |
58 | 0 | case SWS_PIXEL_TYPE_NB: break; |
59 | 0 | } |
60 | | |
61 | 0 | av_unreachable("Invalid pixel type!"); |
62 | 0 | return "ERR"; |
63 | 0 | } |
64 | | |
65 | | int ff_sws_pixel_type_size(SwsPixelType type) |
66 | 0 | { |
67 | 0 | switch (type) { |
68 | 0 | case SWS_PIXEL_U8: return sizeof(uint8_t); |
69 | 0 | case SWS_PIXEL_U16: return sizeof(uint16_t); |
70 | 0 | case SWS_PIXEL_U32: return sizeof(uint32_t); |
71 | 0 | case SWS_PIXEL_F32: return sizeof(float); |
72 | 0 | case SWS_PIXEL_NONE: break; |
73 | 0 | case SWS_PIXEL_TYPE_NB: break; |
74 | 0 | } |
75 | | |
76 | 0 | av_unreachable("Invalid pixel type!"); |
77 | 0 | return 0; |
78 | 0 | } |
79 | | |
80 | | bool ff_sws_pixel_type_is_int(SwsPixelType type) |
81 | 0 | { |
82 | 0 | switch (type) { |
83 | 0 | case SWS_PIXEL_U8: |
84 | 0 | case SWS_PIXEL_U16: |
85 | 0 | case SWS_PIXEL_U32: |
86 | 0 | return true; |
87 | 0 | case SWS_PIXEL_F32: |
88 | 0 | return false; |
89 | 0 | case SWS_PIXEL_NONE: |
90 | 0 | case SWS_PIXEL_TYPE_NB: break; |
91 | 0 | } |
92 | | |
93 | 0 | av_unreachable("Invalid pixel type!"); |
94 | 0 | return false; |
95 | 0 | } |
96 | | |
97 | | /* biased towards `a` */ |
98 | | static AVRational av_min_q(AVRational a, AVRational b) |
99 | 0 | { |
100 | 0 | return av_cmp_q(a, b) == 1 ? b : a; |
101 | 0 | } |
102 | | |
103 | | static AVRational av_max_q(AVRational a, AVRational b) |
104 | 0 | { |
105 | 0 | return av_cmp_q(a, b) == -1 ? b : a; |
106 | 0 | } |
107 | | |
108 | | void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4]) |
109 | 0 | { |
110 | 0 | uint64_t mask[4]; |
111 | 0 | int shift[4]; |
112 | |
|
113 | 0 | switch (op->op) { |
114 | 0 | case SWS_OP_READ: |
115 | 0 | case SWS_OP_WRITE: |
116 | 0 | return; |
117 | 0 | case SWS_OP_UNPACK: { |
118 | 0 | av_assert1(ff_sws_pixel_type_is_int(op->type)); |
119 | 0 | ff_sws_pack_op_decode(op, mask, shift); |
120 | 0 | unsigned val = x[0].num; |
121 | 0 | for (int i = 0; i < 4; i++) |
122 | 0 | x[i] = Q((val >> shift[i]) & mask[i]); |
123 | 0 | return; |
124 | 0 | } |
125 | 0 | case SWS_OP_PACK: { |
126 | 0 | av_assert1(ff_sws_pixel_type_is_int(op->type)); |
127 | 0 | ff_sws_pack_op_decode(op, mask, shift); |
128 | 0 | unsigned val = 0; |
129 | 0 | for (int i = 0; i < 4; i++) |
130 | 0 | val |= (x[i].num & mask[i]) << shift[i]; |
131 | 0 | x[0] = Q(val); |
132 | 0 | return; |
133 | 0 | } |
134 | 0 | case SWS_OP_SWAP_BYTES: |
135 | 0 | av_assert1(ff_sws_pixel_type_is_int(op->type)); |
136 | 0 | switch (ff_sws_pixel_type_size(op->type)) { |
137 | 0 | case 2: |
138 | 0 | for (int i = 0; i < 4; i++) |
139 | 0 | x[i].num = av_bswap16(x[i].num); |
140 | 0 | break; |
141 | 0 | case 4: |
142 | 0 | for (int i = 0; i < 4; i++) |
143 | 0 | x[i].num = av_bswap32(x[i].num); |
144 | 0 | break; |
145 | 0 | } |
146 | 0 | return; |
147 | 0 | case SWS_OP_CLEAR: |
148 | 0 | for (int i = 0; i < 4; i++) { |
149 | 0 | if (op->c.q4[i].den) |
150 | 0 | x[i] = op->c.q4[i]; |
151 | 0 | } |
152 | 0 | return; |
153 | 0 | case SWS_OP_LSHIFT: { |
154 | 0 | av_assert1(ff_sws_pixel_type_is_int(op->type)); |
155 | 0 | AVRational mult = Q(1 << op->c.u); |
156 | 0 | for (int i = 0; i < 4; i++) |
157 | 0 | x[i] = x[i].den ? av_mul_q(x[i], mult) : x[i]; |
158 | 0 | return; |
159 | 0 | } |
160 | 0 | case SWS_OP_RSHIFT: { |
161 | 0 | av_assert1(ff_sws_pixel_type_is_int(op->type)); |
162 | 0 | for (int i = 0; i < 4; i++) |
163 | 0 | x[i] = x[i].den ? Q((x[i].num / x[i].den) >> op->c.u) : x[i]; |
164 | 0 | return; |
165 | 0 | } |
166 | 0 | case SWS_OP_SWIZZLE: { |
167 | 0 | const AVRational orig[4] = { x[0], x[1], x[2], x[3] }; |
168 | 0 | for (int i = 0; i < 4; i++) |
169 | 0 | x[i] = orig[op->swizzle.in[i]]; |
170 | 0 | return; |
171 | 0 | } |
172 | 0 | case SWS_OP_CONVERT: |
173 | 0 | if (ff_sws_pixel_type_is_int(op->convert.to)) { |
174 | 0 | const AVRational scale = ff_sws_pixel_expand(op->type, op->convert.to); |
175 | 0 | for (int i = 0; i < 4; i++) { |
176 | 0 | x[i] = x[i].den ? Q(x[i].num / x[i].den) : x[i]; |
177 | 0 | if (op->convert.expand) |
178 | 0 | x[i] = av_mul_q(x[i], scale); |
179 | 0 | } |
180 | 0 | } |
181 | 0 | return; |
182 | 0 | case SWS_OP_DITHER: |
183 | 0 | av_assert1(!ff_sws_pixel_type_is_int(op->type)); |
184 | 0 | for (int i = 0; i < 4; i++) |
185 | 0 | x[i] = x[i].den ? av_add_q(x[i], av_make_q(1, 2)) : x[i]; |
186 | 0 | return; |
187 | 0 | case SWS_OP_MIN: |
188 | 0 | for (int i = 0; i < 4; i++) |
189 | 0 | x[i] = av_min_q(x[i], op->c.q4[i]); |
190 | 0 | return; |
191 | 0 | case SWS_OP_MAX: |
192 | 0 | for (int i = 0; i < 4; i++) |
193 | 0 | x[i] = av_max_q(x[i], op->c.q4[i]); |
194 | 0 | return; |
195 | 0 | case SWS_OP_LINEAR: { |
196 | 0 | av_assert1(!ff_sws_pixel_type_is_int(op->type)); |
197 | 0 | const AVRational orig[4] = { x[0], x[1], x[2], x[3] }; |
198 | 0 | for (int i = 0; i < 4; i++) { |
199 | 0 | AVRational sum = op->lin.m[i][4]; |
200 | 0 | for (int j = 0; j < 4; j++) |
201 | 0 | sum = av_add_q(sum, av_mul_q(orig[j], op->lin.m[i][j])); |
202 | 0 | x[i] = sum; |
203 | 0 | } |
204 | 0 | return; |
205 | 0 | } |
206 | 0 | case SWS_OP_SCALE: |
207 | 0 | for (int i = 0; i < 4; i++) |
208 | 0 | x[i] = x[i].den ? av_mul_q(x[i], op->c.q) : x[i]; |
209 | 0 | return; |
210 | 0 | } |
211 | | |
212 | 0 | av_unreachable("Invalid operation type!"); |
213 | 0 | } |
214 | | |
215 | | /* merge_comp_flags() forms a monoid with flags_identity as the null element */ |
216 | | static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT; |
217 | | static unsigned merge_comp_flags(unsigned a, unsigned b) |
218 | 0 | { |
219 | 0 | const unsigned flags_or = SWS_COMP_GARBAGE; |
220 | 0 | const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT; |
221 | 0 | return ((a & b) & flags_and) | ((a | b) & flags_or); |
222 | 0 | } |
223 | | |
224 | | /* Linearly propagate flags per component */ |
225 | | static void propagate_flags(SwsOp *op, const SwsComps *prev) |
226 | 0 | { |
227 | 0 | for (int i = 0; i < 4; i++) |
228 | 0 | op->comps.flags[i] = prev->flags[i]; |
229 | 0 | } |
230 | | |
231 | | /* Clear undefined values in dst with src */ |
232 | | static void clear_undefined_values(AVRational dst[4], const AVRational src[4]) |
233 | 0 | { |
234 | 0 | for (int i = 0; i < 4; i++) { |
235 | 0 | if (dst[i].den == 0) |
236 | 0 | dst[i] = src[i]; |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | | /* Infer + propagate known information about components */ |
241 | | void ff_sws_op_list_update_comps(SwsOpList *ops) |
242 | 0 | { |
243 | 0 | SwsComps next = { .unused = {true, true, true, true} }; |
244 | 0 | SwsComps prev = { .flags = { |
245 | 0 | SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, |
246 | 0 | }}; |
247 | | |
248 | | /* Forwards pass, propagates knowledge about the incoming pixel values */ |
249 | 0 | for (int n = 0; n < ops->num_ops; n++) { |
250 | 0 | SwsOp *op = &ops->ops[n]; |
251 | |
|
252 | 0 | switch (op->op) { |
253 | 0 | case SWS_OP_READ: |
254 | 0 | case SWS_OP_LINEAR: |
255 | 0 | case SWS_OP_SWAP_BYTES: |
256 | 0 | case SWS_OP_UNPACK: |
257 | 0 | break; /* special cases, handled below */ |
258 | 0 | default: |
259 | 0 | memcpy(op->comps.min, prev.min, sizeof(prev.min)); |
260 | 0 | memcpy(op->comps.max, prev.max, sizeof(prev.max)); |
261 | 0 | ff_sws_apply_op_q(op, op->comps.min); |
262 | 0 | ff_sws_apply_op_q(op, op->comps.max); |
263 | 0 | break; |
264 | 0 | } |
265 | | |
266 | 0 | switch (op->op) { |
267 | 0 | case SWS_OP_READ: |
268 | | /* Active components are taken from the user-provided values, |
269 | | * other components are explicitly stripped */ |
270 | 0 | for (int i = 0; i < op->rw.elems; i++) { |
271 | 0 | const int idx = op->rw.packed ? i : ops->order_src.in[i]; |
272 | 0 | op->comps.flags[i] = ops->comps_src.flags[idx]; |
273 | 0 | op->comps.min[i] = ops->comps_src.min[idx]; |
274 | 0 | op->comps.max[i] = ops->comps_src.max[idx]; |
275 | 0 | } |
276 | 0 | for (int i = op->rw.elems; i < 4; i++) { |
277 | 0 | op->comps.flags[i] = prev.flags[i]; |
278 | 0 | op->comps.min[i] = prev.min[i]; |
279 | 0 | op->comps.max[i] = prev.max[i]; |
280 | 0 | } |
281 | 0 | break; |
282 | 0 | case SWS_OP_SWAP_BYTES: |
283 | 0 | for (int i = 0; i < 4; i++) { |
284 | 0 | op->comps.flags[i] = prev.flags[i] ^ SWS_COMP_SWAPPED; |
285 | 0 | op->comps.min[i] = prev.min[i]; |
286 | 0 | op->comps.max[i] = prev.max[i]; |
287 | 0 | } |
288 | 0 | break; |
289 | 0 | case SWS_OP_WRITE: |
290 | 0 | for (int i = 0; i < op->rw.elems; i++) |
291 | 0 | av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE)); |
292 | | /* fall through */ |
293 | 0 | case SWS_OP_LSHIFT: |
294 | 0 | case SWS_OP_RSHIFT: |
295 | 0 | propagate_flags(op, &prev); |
296 | 0 | break; |
297 | 0 | case SWS_OP_MIN: |
298 | 0 | propagate_flags(op, &prev); |
299 | 0 | clear_undefined_values(op->comps.max, op->c.q4); |
300 | 0 | break; |
301 | 0 | case SWS_OP_MAX: |
302 | 0 | propagate_flags(op, &prev); |
303 | 0 | clear_undefined_values(op->comps.min, op->c.q4); |
304 | 0 | break; |
305 | 0 | case SWS_OP_DITHER: |
306 | | /* Strip zero flag because of the nonzero dithering offset */ |
307 | 0 | for (int i = 0; i < 4; i++) |
308 | 0 | op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO; |
309 | 0 | break; |
310 | 0 | case SWS_OP_UNPACK: |
311 | 0 | for (int i = 0; i < 4; i++) { |
312 | 0 | const int pattern = op->pack.pattern[i]; |
313 | 0 | if (pattern) { |
314 | 0 | av_assert1(pattern < 32); |
315 | 0 | op->comps.flags[i] = prev.flags[0]; |
316 | 0 | op->comps.min[i] = Q(0); |
317 | 0 | op->comps.max[i] = Q((1ULL << pattern) - 1); |
318 | 0 | } else |
319 | 0 | op->comps.flags[i] = SWS_COMP_GARBAGE; |
320 | 0 | } |
321 | 0 | break; |
322 | 0 | case SWS_OP_PACK: { |
323 | 0 | unsigned flags = flags_identity; |
324 | 0 | for (int i = 0; i < 4; i++) { |
325 | 0 | if (op->pack.pattern[i]) |
326 | 0 | flags = merge_comp_flags(flags, prev.flags[i]); |
327 | 0 | if (i > 0) /* clear remaining comps for sanity */ |
328 | 0 | op->comps.flags[i] = SWS_COMP_GARBAGE; |
329 | 0 | } |
330 | 0 | op->comps.flags[0] = flags; |
331 | 0 | break; |
332 | 0 | } |
333 | 0 | case SWS_OP_CLEAR: |
334 | 0 | for (int i = 0; i < 4; i++) { |
335 | 0 | if (op->c.q4[i].den) { |
336 | 0 | op->comps.flags[i] = 0; |
337 | 0 | if (op->c.q4[i].num == 0) |
338 | 0 | op->comps.flags[i] |= SWS_COMP_ZERO; |
339 | 0 | if (op->c.q4[i].den == 1) |
340 | 0 | op->comps.flags[i] |= SWS_COMP_EXACT; |
341 | 0 | } else { |
342 | 0 | op->comps.flags[i] = prev.flags[i]; |
343 | 0 | } |
344 | 0 | } |
345 | 0 | break; |
346 | 0 | case SWS_OP_SWIZZLE: |
347 | 0 | for (int i = 0; i < 4; i++) |
348 | 0 | op->comps.flags[i] = prev.flags[op->swizzle.in[i]]; |
349 | 0 | break; |
350 | 0 | case SWS_OP_CONVERT: |
351 | 0 | for (int i = 0; i < 4; i++) { |
352 | 0 | op->comps.flags[i] = prev.flags[i]; |
353 | 0 | if (ff_sws_pixel_type_is_int(op->convert.to)) |
354 | 0 | op->comps.flags[i] |= SWS_COMP_EXACT; |
355 | 0 | } |
356 | 0 | break; |
357 | 0 | case SWS_OP_LINEAR: |
358 | 0 | for (int i = 0; i < 4; i++) { |
359 | 0 | unsigned flags = flags_identity; |
360 | 0 | AVRational min = Q(0), max = Q(0); |
361 | 0 | for (int j = 0; j < 4; j++) { |
362 | 0 | const AVRational k = op->lin.m[i][j]; |
363 | 0 | AVRational mink = av_mul_q(prev.min[j], k); |
364 | 0 | AVRational maxk = av_mul_q(prev.max[j], k); |
365 | 0 | if (k.num) { |
366 | 0 | flags = merge_comp_flags(flags, prev.flags[j]); |
367 | 0 | if (k.den != 1) /* fractional coefficient */ |
368 | 0 | flags &= ~SWS_COMP_EXACT; |
369 | 0 | if (k.num < 0) |
370 | 0 | FFSWAP(AVRational, mink, maxk); |
371 | 0 | min = av_add_q(min, mink); |
372 | 0 | max = av_add_q(max, maxk); |
373 | 0 | } |
374 | 0 | } |
375 | 0 | if (op->lin.m[i][4].num) { /* nonzero offset */ |
376 | 0 | flags &= ~SWS_COMP_ZERO; |
377 | 0 | if (op->lin.m[i][4].den != 1) /* fractional offset */ |
378 | 0 | flags &= ~SWS_COMP_EXACT; |
379 | 0 | min = av_add_q(min, op->lin.m[i][4]); |
380 | 0 | max = av_add_q(max, op->lin.m[i][4]); |
381 | 0 | } |
382 | 0 | op->comps.flags[i] = flags; |
383 | 0 | op->comps.min[i] = min; |
384 | 0 | op->comps.max[i] = max; |
385 | 0 | } |
386 | 0 | break; |
387 | 0 | case SWS_OP_SCALE: |
388 | 0 | for (int i = 0; i < 4; i++) { |
389 | 0 | op->comps.flags[i] = prev.flags[i]; |
390 | 0 | if (op->c.q.den != 1) /* fractional scale */ |
391 | 0 | op->comps.flags[i] &= ~SWS_COMP_EXACT; |
392 | 0 | if (op->c.q.num < 0) |
393 | 0 | FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]); |
394 | 0 | } |
395 | 0 | break; |
396 | | |
397 | 0 | case SWS_OP_INVALID: |
398 | 0 | case SWS_OP_TYPE_NB: |
399 | 0 | av_unreachable("Invalid operation type!"); |
400 | 0 | } |
401 | | |
402 | 0 | prev = op->comps; |
403 | 0 | } |
404 | | |
405 | | /* Backwards pass, solves for component dependencies */ |
406 | 0 | for (int n = ops->num_ops - 1; n >= 0; n--) { |
407 | 0 | SwsOp *op = &ops->ops[n]; |
408 | |
|
409 | 0 | switch (op->op) { |
410 | 0 | case SWS_OP_READ: |
411 | 0 | case SWS_OP_WRITE: |
412 | 0 | for (int i = 0; i < op->rw.elems; i++) |
413 | 0 | op->comps.unused[i] = op->op == SWS_OP_READ; |
414 | 0 | for (int i = op->rw.elems; i < 4; i++) |
415 | 0 | op->comps.unused[i] = next.unused[i]; |
416 | 0 | break; |
417 | 0 | case SWS_OP_SWAP_BYTES: |
418 | 0 | case SWS_OP_LSHIFT: |
419 | 0 | case SWS_OP_RSHIFT: |
420 | 0 | case SWS_OP_CONVERT: |
421 | 0 | case SWS_OP_DITHER: |
422 | 0 | case SWS_OP_MIN: |
423 | 0 | case SWS_OP_MAX: |
424 | 0 | case SWS_OP_SCALE: |
425 | 0 | for (int i = 0; i < 4; i++) |
426 | 0 | op->comps.unused[i] = next.unused[i]; |
427 | 0 | break; |
428 | 0 | case SWS_OP_UNPACK: { |
429 | 0 | bool unused = true; |
430 | 0 | for (int i = 0; i < 4; i++) { |
431 | 0 | if (op->pack.pattern[i]) |
432 | 0 | unused &= next.unused[i]; |
433 | 0 | op->comps.unused[i] = i > 0; |
434 | 0 | } |
435 | 0 | op->comps.unused[0] = unused; |
436 | 0 | break; |
437 | 0 | } |
438 | 0 | case SWS_OP_PACK: |
439 | 0 | for (int i = 0; i < 4; i++) { |
440 | 0 | if (op->pack.pattern[i]) |
441 | 0 | op->comps.unused[i] = next.unused[0]; |
442 | 0 | else |
443 | 0 | op->comps.unused[i] = true; |
444 | 0 | } |
445 | 0 | break; |
446 | 0 | case SWS_OP_CLEAR: |
447 | 0 | for (int i = 0; i < 4; i++) { |
448 | 0 | if (op->c.q4[i].den) |
449 | 0 | op->comps.unused[i] = true; |
450 | 0 | else |
451 | 0 | op->comps.unused[i] = next.unused[i]; |
452 | 0 | } |
453 | 0 | break; |
454 | 0 | case SWS_OP_SWIZZLE: { |
455 | 0 | bool unused[4] = { true, true, true, true }; |
456 | 0 | for (int i = 0; i < 4; i++) |
457 | 0 | unused[op->swizzle.in[i]] &= next.unused[i]; |
458 | 0 | for (int i = 0; i < 4; i++) |
459 | 0 | op->comps.unused[i] = unused[i]; |
460 | 0 | break; |
461 | 0 | } |
462 | 0 | case SWS_OP_LINEAR: |
463 | 0 | for (int j = 0; j < 4; j++) { |
464 | 0 | bool unused = true; |
465 | 0 | for (int i = 0; i < 4; i++) { |
466 | 0 | if (op->lin.m[i][j].num) |
467 | 0 | unused &= next.unused[i]; |
468 | 0 | } |
469 | 0 | op->comps.unused[j] = unused; |
470 | 0 | } |
471 | 0 | break; |
472 | 0 | } |
473 | | |
474 | 0 | next = op->comps; |
475 | 0 | } |
476 | 0 | } |
477 | | |
478 | | static void op_uninit(SwsOp *op) |
479 | 0 | { |
480 | 0 | switch (op->op) { |
481 | 0 | case SWS_OP_DITHER: |
482 | 0 | av_refstruct_unref(&op->dither.matrix); |
483 | 0 | break; |
484 | 0 | } |
485 | | |
486 | 0 | *op = (SwsOp) {0}; |
487 | 0 | } |
488 | | |
489 | | SwsOpList *ff_sws_op_list_alloc(void) |
490 | 0 | { |
491 | 0 | SwsOpList *ops = av_mallocz(sizeof(SwsOpList)); |
492 | 0 | if (!ops) |
493 | 0 | return NULL; |
494 | | |
495 | 0 | ops->order_src = ops->order_dst = SWS_SWIZZLE(0, 1, 2, 3); |
496 | 0 | ff_fmt_clear(&ops->src); |
497 | 0 | ff_fmt_clear(&ops->dst); |
498 | 0 | return ops; |
499 | 0 | } |
500 | | |
501 | | void ff_sws_op_list_free(SwsOpList **p_ops) |
502 | 0 | { |
503 | 0 | SwsOpList *ops = *p_ops; |
504 | 0 | if (!ops) |
505 | 0 | return; |
506 | | |
507 | 0 | for (int i = 0; i < ops->num_ops; i++) |
508 | 0 | op_uninit(&ops->ops[i]); |
509 | |
|
510 | 0 | av_freep(&ops->ops); |
511 | 0 | av_free(ops); |
512 | 0 | *p_ops = NULL; |
513 | 0 | } |
514 | | |
515 | | SwsOpList *ff_sws_op_list_duplicate(const SwsOpList *ops) |
516 | 0 | { |
517 | 0 | SwsOpList *copy = av_malloc(sizeof(*copy)); |
518 | 0 | if (!copy) |
519 | 0 | return NULL; |
520 | | |
521 | 0 | int num = ops->num_ops; |
522 | 0 | if (num) |
523 | 0 | num = 1 << av_ceil_log2(num); |
524 | |
|
525 | 0 | *copy = *ops; |
526 | 0 | copy->ops = av_memdup(ops->ops, num * sizeof(ops->ops[0])); |
527 | 0 | if (!copy->ops) { |
528 | 0 | av_free(copy); |
529 | 0 | return NULL; |
530 | 0 | } |
531 | | |
532 | 0 | for (int i = 0; i < ops->num_ops; i++) { |
533 | 0 | const SwsOp *op = &ops->ops[i]; |
534 | 0 | switch (op->op) { |
535 | 0 | case SWS_OP_DITHER: |
536 | 0 | av_refstruct_ref(copy->ops[i].dither.matrix); |
537 | 0 | break; |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | 0 | return copy; |
542 | 0 | } |
543 | | |
544 | | void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count) |
545 | 0 | { |
546 | 0 | const int end = ops->num_ops - count; |
547 | 0 | av_assert2(index >= 0 && count >= 0 && index + count <= ops->num_ops); |
548 | 0 | op_uninit(&ops->ops[index]); |
549 | 0 | for (int i = index; i < end; i++) |
550 | 0 | ops->ops[i] = ops->ops[i + count]; |
551 | 0 | ops->num_ops = end; |
552 | 0 | } |
553 | | |
554 | | int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op) |
555 | 0 | { |
556 | 0 | void *ret = av_dynarray2_add((void **) &ops->ops, &ops->num_ops, sizeof(*op), NULL); |
557 | 0 | if (!ret) { |
558 | 0 | op_uninit(op); |
559 | 0 | return AVERROR(ENOMEM); |
560 | 0 | } |
561 | | |
562 | 0 | for (int i = ops->num_ops - 1; i > index; i--) |
563 | 0 | ops->ops[i] = ops->ops[i - 1]; |
564 | 0 | ops->ops[index] = *op; |
565 | 0 | return 0; |
566 | 0 | } |
567 | | |
568 | | int ff_sws_op_list_append(SwsOpList *ops, SwsOp *op) |
569 | 0 | { |
570 | 0 | return ff_sws_op_list_insert_at(ops, ops->num_ops, op); |
571 | 0 | } |
572 | | |
573 | | bool ff_sws_op_list_is_noop(const SwsOpList *ops) |
574 | 0 | { |
575 | 0 | if (!ops->num_ops) |
576 | 0 | return true; |
577 | | |
578 | 0 | const SwsOp *read = &ops->ops[0]; |
579 | 0 | const SwsOp *write = &ops->ops[1]; |
580 | 0 | if (ops->num_ops != 2 || |
581 | 0 | read->op != SWS_OP_READ || |
582 | 0 | write->op != SWS_OP_WRITE || |
583 | 0 | read->type != write->type || |
584 | 0 | read->rw.packed != write->rw.packed || |
585 | 0 | read->rw.elems != write->rw.elems || |
586 | 0 | read->rw.frac != write->rw.frac) |
587 | 0 | return false; |
588 | | |
589 | | /** |
590 | | * Note that this check is unlikely to ever be hit in practice, since it |
591 | | * would imply the existence of planar formats with different plane orders |
592 | | * between them, e.g. rgbap <-> gbrap, which doesn't currently exist. |
593 | | * However, the check is cheap and lets me sleep at night. |
594 | | */ |
595 | 0 | const int num_planes = read->rw.packed ? 1 : read->rw.elems; |
596 | 0 | for (int i = 0; i < num_planes; i++) { |
597 | 0 | if (ops->order_src.in[i] != ops->order_dst.in[i]) |
598 | 0 | return false; |
599 | 0 | } |
600 | | |
601 | 0 | return true; |
602 | 0 | } |
603 | | |
604 | | int ff_sws_op_list_max_size(const SwsOpList *ops) |
605 | 0 | { |
606 | 0 | int max_size = 0; |
607 | 0 | for (int i = 0; i < ops->num_ops; i++) { |
608 | 0 | const int size = ff_sws_pixel_type_size(ops->ops[i].type); |
609 | 0 | max_size = FFMAX(max_size, size); |
610 | 0 | } |
611 | |
|
612 | 0 | return max_size; |
613 | 0 | } |
614 | | |
615 | | uint32_t ff_sws_linear_mask(const SwsLinearOp c) |
616 | 0 | { |
617 | 0 | uint32_t mask = 0; |
618 | 0 | for (int i = 0; i < 4; i++) { |
619 | 0 | for (int j = 0; j < 5; j++) { |
620 | 0 | if (av_cmp_q(c.m[i][j], Q(i == j))) |
621 | 0 | mask |= SWS_MASK(i, j); |
622 | 0 | } |
623 | 0 | } |
624 | 0 | return mask; |
625 | 0 | } |
626 | | |
627 | | static const char *describe_lin_mask(uint32_t mask) |
628 | 0 | { |
629 | | /* Try to be fairly descriptive without assuming too much */ |
630 | 0 | static const struct { |
631 | 0 | char name[24]; |
632 | 0 | uint32_t mask; |
633 | 0 | } patterns[] = { |
634 | 0 | { "noop", 0 }, |
635 | 0 | { "luma", SWS_MASK_LUMA }, |
636 | 0 | { "alpha", SWS_MASK_ALPHA }, |
637 | 0 | { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA }, |
638 | 0 | { "dot3", 0x7 }, |
639 | 0 | { "dot4", 0xF }, |
640 | 0 | { "row0", SWS_MASK_ROW(0) }, |
641 | 0 | { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA }, |
642 | 0 | { "col0", SWS_MASK_COL(0) }, |
643 | 0 | { "col0+off3", SWS_MASK_COL(0) | SWS_MASK_OFF3 }, |
644 | 0 | { "off3", SWS_MASK_OFF3 }, |
645 | 0 | { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA }, |
646 | 0 | { "diag3", SWS_MASK_DIAG3 }, |
647 | 0 | { "diag4", SWS_MASK_DIAG4 }, |
648 | 0 | { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA }, |
649 | 0 | { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 }, |
650 | 0 | { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, |
651 | 0 | { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 }, |
652 | 0 | { "matrix3", SWS_MASK_MAT3 }, |
653 | 0 | { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 }, |
654 | 0 | { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, |
655 | 0 | { "matrix4", SWS_MASK_MAT4 }, |
656 | 0 | { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 }, |
657 | 0 | }; |
658 | |
|
659 | 0 | for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { |
660 | 0 | if (!(mask & ~patterns[i].mask)) |
661 | 0 | return patterns[i].name; |
662 | 0 | } |
663 | | |
664 | 0 | av_unreachable("Invalid linear mask!"); |
665 | 0 | return "ERR"; |
666 | 0 | } |
667 | | |
668 | | static char describe_comp_flags(unsigned flags) |
669 | 0 | { |
670 | 0 | if (flags & SWS_COMP_GARBAGE) |
671 | 0 | return 'X'; |
672 | 0 | else if (flags & SWS_COMP_ZERO) |
673 | 0 | return '0'; |
674 | 0 | else if (flags & SWS_COMP_SWAPPED) |
675 | 0 | return 'z'; |
676 | 0 | else if (flags & SWS_COMP_EXACT) |
677 | 0 | return '+'; |
678 | 0 | else |
679 | 0 | return '.'; |
680 | 0 | } |
681 | | |
682 | | static const char *describe_order(SwsSwizzleOp order, int planes, char buf[32]) |
683 | 0 | { |
684 | 0 | if (order.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) |
685 | 0 | return ""; |
686 | | |
687 | 0 | av_strlcpy(buf, ", via {", 32); |
688 | 0 | for (int i = 0; i < planes; i++) |
689 | 0 | av_strlcatf(buf, 32, "%s%d", i ? ", " : "", order.in[i]); |
690 | 0 | av_strlcat(buf, "}", 32); |
691 | 0 | return buf; |
692 | 0 | } |
693 | | |
694 | | static const char *print_q(const AVRational q, char buf[], int buf_len) |
695 | 0 | { |
696 | 0 | if (!q.den) { |
697 | 0 | return q.num > 0 ? "inf" : q.num < 0 ? "-inf" : "nan"; |
698 | 0 | } else if (q.den == 1) { |
699 | 0 | snprintf(buf, buf_len, "%d", q.num); |
700 | 0 | return buf; |
701 | 0 | } else if (abs(q.num) > 1000 || abs(q.den) > 1000) { |
702 | 0 | snprintf(buf, buf_len, "%f", av_q2d(q)); |
703 | 0 | return buf; |
704 | 0 | } else { |
705 | 0 | snprintf(buf, buf_len, "%d/%d", q.num, q.den); |
706 | 0 | return buf; |
707 | 0 | } |
708 | 0 | } |
709 | | |
710 | 0 | #define PRINTQ(q) print_q(q, (char[32]){0}, sizeof(char[32])) |
711 | | |
712 | | void ff_sws_op_list_print(void *log, int lev, int lev_extra, |
713 | | const SwsOpList *ops) |
714 | 0 | { |
715 | 0 | if (!ops->num_ops) { |
716 | 0 | av_log(log, lev, " (empty)\n"); |
717 | 0 | return; |
718 | 0 | } |
719 | | |
720 | 0 | for (int i = 0; i < ops->num_ops; i++) { |
721 | 0 | const SwsOp *op = &ops->ops[i]; |
722 | 0 | const SwsOp *next = i + 1 < ops->num_ops ? &ops->ops[i + 1] : op; |
723 | 0 | char buf[32]; |
724 | |
|
725 | 0 | av_log(log, lev, " [%3s %c%c%c%c -> %c%c%c%c] ", |
726 | 0 | ff_sws_pixel_type_name(op->type), |
727 | 0 | op->comps.unused[0] ? 'X' : '.', |
728 | 0 | op->comps.unused[1] ? 'X' : '.', |
729 | 0 | op->comps.unused[2] ? 'X' : '.', |
730 | 0 | op->comps.unused[3] ? 'X' : '.', |
731 | 0 | next->comps.unused[0] ? 'X' : describe_comp_flags(op->comps.flags[0]), |
732 | 0 | next->comps.unused[1] ? 'X' : describe_comp_flags(op->comps.flags[1]), |
733 | 0 | next->comps.unused[2] ? 'X' : describe_comp_flags(op->comps.flags[2]), |
734 | 0 | next->comps.unused[3] ? 'X' : describe_comp_flags(op->comps.flags[3])); |
735 | |
|
736 | 0 | switch (op->op) { |
737 | 0 | case SWS_OP_INVALID: |
738 | 0 | av_log(log, lev, "SWS_OP_INVALID\n"); |
739 | 0 | break; |
740 | 0 | case SWS_OP_READ: |
741 | 0 | case SWS_OP_WRITE: |
742 | 0 | av_log(log, lev, "%-20s: %d elem(s) %s >> %d%s\n", |
743 | 0 | op->op == SWS_OP_READ ? "SWS_OP_READ" |
744 | 0 | : "SWS_OP_WRITE", |
745 | 0 | op->rw.elems, op->rw.packed ? "packed" : "planar", |
746 | 0 | op->rw.frac, |
747 | 0 | describe_order(op->op == SWS_OP_READ ? ops->order_src |
748 | 0 | : ops->order_dst, |
749 | 0 | op->rw.packed ? 1 : op->rw.elems, buf)); |
750 | 0 | break; |
751 | 0 | case SWS_OP_SWAP_BYTES: |
752 | 0 | av_log(log, lev, "SWS_OP_SWAP_BYTES\n"); |
753 | 0 | break; |
754 | 0 | case SWS_OP_LSHIFT: |
755 | 0 | av_log(log, lev, "%-20s: << %u\n", "SWS_OP_LSHIFT", op->c.u); |
756 | 0 | break; |
757 | 0 | case SWS_OP_RSHIFT: |
758 | 0 | av_log(log, lev, "%-20s: >> %u\n", "SWS_OP_RSHIFT", op->c.u); |
759 | 0 | break; |
760 | 0 | case SWS_OP_PACK: |
761 | 0 | case SWS_OP_UNPACK: |
762 | 0 | av_log(log, lev, "%-20s: {%d %d %d %d}\n", |
763 | 0 | op->op == SWS_OP_PACK ? "SWS_OP_PACK" |
764 | 0 | : "SWS_OP_UNPACK", |
765 | 0 | op->pack.pattern[0], op->pack.pattern[1], |
766 | 0 | op->pack.pattern[2], op->pack.pattern[3]); |
767 | 0 | break; |
768 | 0 | case SWS_OP_CLEAR: |
769 | 0 | av_log(log, lev, "%-20s: {%s %s %s %s}\n", "SWS_OP_CLEAR", |
770 | 0 | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", |
771 | 0 | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", |
772 | 0 | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", |
773 | 0 | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); |
774 | 0 | break; |
775 | 0 | case SWS_OP_SWIZZLE: |
776 | 0 | av_log(log, lev, "%-20s: %d%d%d%d\n", "SWS_OP_SWIZZLE", |
777 | 0 | op->swizzle.x, op->swizzle.y, op->swizzle.z, op->swizzle.w); |
778 | 0 | break; |
779 | 0 | case SWS_OP_CONVERT: |
780 | 0 | av_log(log, lev, "%-20s: %s -> %s%s\n", "SWS_OP_CONVERT", |
781 | 0 | ff_sws_pixel_type_name(op->type), |
782 | 0 | ff_sws_pixel_type_name(op->convert.to), |
783 | 0 | op->convert.expand ? " (expand)" : ""); |
784 | 0 | break; |
785 | 0 | case SWS_OP_DITHER: |
786 | 0 | av_log(log, lev, "%-20s: %dx%d matrix + {%d %d %d %d}\n", "SWS_OP_DITHER", |
787 | 0 | 1 << op->dither.size_log2, 1 << op->dither.size_log2, |
788 | 0 | op->dither.y_offset[0], op->dither.y_offset[1], |
789 | 0 | op->dither.y_offset[2], op->dither.y_offset[3]); |
790 | 0 | break; |
791 | 0 | case SWS_OP_MIN: |
792 | 0 | av_log(log, lev, "%-20s: x <= {%s %s %s %s}\n", "SWS_OP_MIN", |
793 | 0 | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", |
794 | 0 | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", |
795 | 0 | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", |
796 | 0 | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); |
797 | 0 | break; |
798 | 0 | case SWS_OP_MAX: |
799 | 0 | av_log(log, lev, "%-20s: {%s %s %s %s} <= x\n", "SWS_OP_MAX", |
800 | 0 | op->c.q4[0].den ? PRINTQ(op->c.q4[0]) : "_", |
801 | 0 | op->c.q4[1].den ? PRINTQ(op->c.q4[1]) : "_", |
802 | 0 | op->c.q4[2].den ? PRINTQ(op->c.q4[2]) : "_", |
803 | 0 | op->c.q4[3].den ? PRINTQ(op->c.q4[3]) : "_"); |
804 | 0 | break; |
805 | 0 | case SWS_OP_LINEAR: |
806 | 0 | av_log(log, lev, "%-20s: %s [[%s %s %s %s %s] " |
807 | 0 | "[%s %s %s %s %s] " |
808 | 0 | "[%s %s %s %s %s] " |
809 | 0 | "[%s %s %s %s %s]]\n", |
810 | 0 | "SWS_OP_LINEAR", describe_lin_mask(op->lin.mask), |
811 | 0 | PRINTQ(op->lin.m[0][0]), PRINTQ(op->lin.m[0][1]), PRINTQ(op->lin.m[0][2]), PRINTQ(op->lin.m[0][3]), PRINTQ(op->lin.m[0][4]), |
812 | 0 | PRINTQ(op->lin.m[1][0]), PRINTQ(op->lin.m[1][1]), PRINTQ(op->lin.m[1][2]), PRINTQ(op->lin.m[1][3]), PRINTQ(op->lin.m[1][4]), |
813 | 0 | PRINTQ(op->lin.m[2][0]), PRINTQ(op->lin.m[2][1]), PRINTQ(op->lin.m[2][2]), PRINTQ(op->lin.m[2][3]), PRINTQ(op->lin.m[2][4]), |
814 | 0 | PRINTQ(op->lin.m[3][0]), PRINTQ(op->lin.m[3][1]), PRINTQ(op->lin.m[3][2]), PRINTQ(op->lin.m[3][3]), PRINTQ(op->lin.m[3][4])); |
815 | 0 | break; |
816 | 0 | case SWS_OP_SCALE: |
817 | 0 | av_log(log, lev, "%-20s: * %s\n", "SWS_OP_SCALE", |
818 | 0 | PRINTQ(op->c.q)); |
819 | 0 | break; |
820 | 0 | case SWS_OP_TYPE_NB: |
821 | 0 | break; |
822 | 0 | } |
823 | | |
824 | 0 | if (op->comps.min[0].den || op->comps.min[1].den || |
825 | 0 | op->comps.min[2].den || op->comps.min[3].den || |
826 | 0 | op->comps.max[0].den || op->comps.max[1].den || |
827 | 0 | op->comps.max[2].den || op->comps.max[3].den) |
828 | 0 | { |
829 | 0 | av_log(log, lev_extra, " min: {%s, %s, %s, %s}, max: {%s, %s, %s, %s}\n", |
830 | 0 | next->comps.unused[0] ? "_" : PRINTQ(op->comps.min[0]), |
831 | 0 | next->comps.unused[1] ? "_" : PRINTQ(op->comps.min[1]), |
832 | 0 | next->comps.unused[2] ? "_" : PRINTQ(op->comps.min[2]), |
833 | 0 | next->comps.unused[3] ? "_" : PRINTQ(op->comps.min[3]), |
834 | 0 | next->comps.unused[0] ? "_" : PRINTQ(op->comps.max[0]), |
835 | 0 | next->comps.unused[1] ? "_" : PRINTQ(op->comps.max[1]), |
836 | 0 | next->comps.unused[2] ? "_" : PRINTQ(op->comps.max[2]), |
837 | 0 | next->comps.unused[3] ? "_" : PRINTQ(op->comps.max[3])); |
838 | 0 | } |
839 | |
|
840 | 0 | } |
841 | | |
842 | 0 | av_log(log, lev, " (X = unused, z = byteswapped, + = exact, 0 = zero)\n"); |
843 | 0 | } |
844 | | |
845 | | int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, |
846 | | const SwsOpList *ops, SwsCompiledOp *out) |
847 | 0 | { |
848 | 0 | SwsOpList *copy, rest; |
849 | 0 | SwsCompiledOp compiled = {0}; |
850 | 0 | int ret = 0; |
851 | |
|
852 | 0 | copy = ff_sws_op_list_duplicate(ops); |
853 | 0 | if (!copy) |
854 | 0 | return AVERROR(ENOMEM); |
855 | | |
856 | | /* Ensure these are always set during compilation */ |
857 | 0 | ff_sws_op_list_update_comps(copy); |
858 | | |
859 | | /* Make an on-stack copy of `ops` to ensure we can still properly clean up |
860 | | * the copy afterwards */ |
861 | 0 | rest = *copy; |
862 | |
|
863 | 0 | ret = backend->compile(ctx, &rest, &compiled); |
864 | 0 | if (ret < 0) { |
865 | 0 | int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR; |
866 | 0 | av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n", |
867 | 0 | backend->name, av_err2str(ret)); |
868 | 0 | if (rest.num_ops != ops->num_ops) { |
869 | 0 | av_log(ctx, msg_lev, "Uncompiled remainder:\n"); |
870 | 0 | ff_sws_op_list_print(ctx, msg_lev, AV_LOG_TRACE, &rest); |
871 | 0 | } |
872 | 0 | } else { |
873 | 0 | *out = compiled; |
874 | 0 | } |
875 | |
|
876 | 0 | ff_sws_op_list_free(©); |
877 | 0 | return ret; |
878 | 0 | } |
879 | | |
880 | | int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) |
881 | 0 | { |
882 | 0 | for (int n = 0; ff_sws_op_backends[n]; n++) { |
883 | 0 | const SwsOpBackend *backend = ff_sws_op_backends[n]; |
884 | 0 | if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0) |
885 | 0 | continue; |
886 | | |
887 | 0 | av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': " |
888 | 0 | "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n", |
889 | 0 | backend->name, out->block_size, out->over_read, out->over_write, |
890 | 0 | out->cpu_flags); |
891 | 0 | return 0; |
892 | 0 | } |
893 | | |
894 | 0 | av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n"); |
895 | 0 | ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops); |
896 | 0 | return AVERROR(ENOTSUP); |
897 | 0 | } |
898 | | |
899 | | typedef struct SwsOpPass { |
900 | | SwsCompiledOp comp; |
901 | | SwsOpExec exec_base; |
902 | | int num_blocks; |
903 | | int tail_off_in; |
904 | | int tail_off_out; |
905 | | int tail_size_in; |
906 | | int tail_size_out; |
907 | | int planes_in; |
908 | | int planes_out; |
909 | | int pixel_bits_in; |
910 | | int pixel_bits_out; |
911 | | int idx_in[4]; |
912 | | int idx_out[4]; |
913 | | bool memcpy_in; |
914 | | bool memcpy_out; |
915 | | } SwsOpPass; |
916 | | |
917 | | static void op_pass_free(void *ptr) |
918 | 0 | { |
919 | 0 | SwsOpPass *p = ptr; |
920 | 0 | if (!p) |
921 | 0 | return; |
922 | | |
923 | 0 | if (p->comp.free) |
924 | 0 | p->comp.free(p->comp.priv); |
925 | |
|
926 | 0 | av_free(p); |
927 | 0 | } |
928 | | |
929 | | static inline SwsImg img_shift_idx(const SwsImg *base, const int y, |
930 | | const int plane_idx[4]) |
931 | 0 | { |
932 | 0 | SwsImg img = *base; |
933 | 0 | for (int i = 0; i < 4; i++) { |
934 | 0 | const int idx = plane_idx[i]; |
935 | 0 | if (idx >= 0) { |
936 | 0 | const int yshift = y >> ff_fmt_vshift(base->fmt, idx); |
937 | 0 | img.data[i] = base->data[idx] + yshift * base->linesize[idx]; |
938 | 0 | } else { |
939 | 0 | img.data[i] = NULL; |
940 | 0 | } |
941 | 0 | } |
942 | 0 | return img; |
943 | 0 | } |
944 | | |
945 | | static void op_pass_setup(const SwsImg *out_base, const SwsImg *in_base, |
946 | | const SwsPass *pass) |
947 | 0 | { |
948 | 0 | const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in_base->fmt); |
949 | 0 | const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out_base->fmt); |
950 | |
|
951 | 0 | SwsOpPass *p = pass->priv; |
952 | 0 | SwsOpExec *exec = &p->exec_base; |
953 | 0 | const SwsCompiledOp *comp = &p->comp; |
954 | 0 | const int block_size = comp->block_size; |
955 | 0 | p->num_blocks = (pass->width + block_size - 1) / block_size; |
956 | | |
957 | | /* Set up main loop parameters */ |
958 | 0 | const int aligned_w = p->num_blocks * block_size; |
959 | 0 | const int safe_width = (p->num_blocks - 1) * block_size; |
960 | 0 | const int tail_size = pass->width - safe_width; |
961 | 0 | p->tail_off_in = safe_width * p->pixel_bits_in >> 3; |
962 | 0 | p->tail_off_out = safe_width * p->pixel_bits_out >> 3; |
963 | 0 | p->tail_size_in = tail_size * p->pixel_bits_in >> 3; |
964 | 0 | p->tail_size_out = tail_size * p->pixel_bits_out >> 3; |
965 | 0 | p->memcpy_in = false; |
966 | 0 | p->memcpy_out = false; |
967 | |
|
968 | 0 | const SwsImg in = img_shift_idx(in_base, 0, p->idx_in); |
969 | 0 | const SwsImg out = img_shift_idx(out_base, 0, p->idx_out); |
970 | |
|
971 | 0 | for (int i = 0; i < p->planes_in; i++) { |
972 | 0 | const int idx = p->idx_in[i]; |
973 | 0 | const int sub_x = (idx == 1 || idx == 2) ? indesc->log2_chroma_w : 0; |
974 | 0 | const int plane_w = (aligned_w + sub_x) >> sub_x; |
975 | 0 | const int plane_pad = (comp->over_read + sub_x) >> sub_x; |
976 | 0 | const int plane_size = plane_w * p->pixel_bits_in >> 3; |
977 | 0 | p->memcpy_in |= plane_size + plane_pad > in.linesize[i]; |
978 | 0 | exec->in_stride[i] = in.linesize[i]; |
979 | 0 | } |
980 | |
|
981 | 0 | for (int i = 0; i < p->planes_out; i++) { |
982 | 0 | const int idx = p->idx_out[i]; |
983 | 0 | const int sub_x = (idx == 1 || idx == 2) ? outdesc->log2_chroma_w : 0; |
984 | 0 | const int plane_w = (aligned_w + sub_x) >> sub_x; |
985 | 0 | const int plane_pad = (comp->over_write + sub_x) >> sub_x; |
986 | 0 | const int plane_size = plane_w * p->pixel_bits_out >> 3; |
987 | 0 | p->memcpy_out |= plane_size + plane_pad > out.linesize[i]; |
988 | 0 | exec->out_stride[i] = out.linesize[i]; |
989 | 0 | } |
990 | | |
991 | | /* Pre-fill pointer bump for the main section only; this value does not |
992 | | * matter at all for the tail / last row handlers because they only ever |
993 | | * process a single line */ |
994 | 0 | const int blocks_main = p->num_blocks - p->memcpy_out; |
995 | 0 | for (int i = 0; i < 4; i++) { |
996 | 0 | exec->in_bump[i] = in.linesize[i] - blocks_main * exec->block_size_in; |
997 | 0 | exec->out_bump[i] = out.linesize[i] - blocks_main * exec->block_size_out; |
998 | 0 | } |
999 | 0 | } |
1000 | | |
1001 | | /* Dispatch kernel over the last column of the image using memcpy */ |
1002 | | static av_always_inline void |
1003 | | handle_tail(const SwsOpPass *p, SwsOpExec *exec, |
1004 | | const SwsImg *out_base, const bool copy_out, |
1005 | | const SwsImg *in_base, const bool copy_in, |
1006 | | int y, const int h) |
1007 | 0 | { |
1008 | 0 | DECLARE_ALIGNED_64(uint8_t, tmp)[2][4][sizeof(uint32_t[128])]; |
1009 | |
|
1010 | 0 | const SwsCompiledOp *comp = &p->comp; |
1011 | 0 | const int tail_size_in = p->tail_size_in; |
1012 | 0 | const int tail_size_out = p->tail_size_out; |
1013 | 0 | const int bx = p->num_blocks - 1; |
1014 | |
|
1015 | 0 | SwsImg in = img_shift_idx(in_base, y, p->idx_in); |
1016 | 0 | SwsImg out = img_shift_idx(out_base, y, p->idx_out); |
1017 | 0 | for (int i = 0; i < p->planes_in; i++) { |
1018 | 0 | in.data[i] += p->tail_off_in; |
1019 | 0 | if (copy_in) { |
1020 | 0 | exec->in[i] = (void *) tmp[0][i]; |
1021 | 0 | exec->in_stride[i] = sizeof(tmp[0][i]); |
1022 | 0 | } else { |
1023 | 0 | exec->in[i] = in.data[i]; |
1024 | 0 | } |
1025 | 0 | } |
1026 | |
|
1027 | 0 | for (int i = 0; i < p->planes_out; i++) { |
1028 | 0 | out.data[i] += p->tail_off_out; |
1029 | 0 | if (copy_out) { |
1030 | 0 | exec->out[i] = (void *) tmp[1][i]; |
1031 | 0 | exec->out_stride[i] = sizeof(tmp[1][i]); |
1032 | 0 | } else { |
1033 | 0 | exec->out[i] = out.data[i]; |
1034 | 0 | } |
1035 | 0 | } |
1036 | |
|
1037 | 0 | for (int y_end = y + h; y < y_end; y++) { |
1038 | 0 | if (copy_in) { |
1039 | 0 | for (int i = 0; i < p->planes_in; i++) { |
1040 | 0 | av_assert2(tmp[0][i] + tail_size_in < (uint8_t *) tmp[1]); |
1041 | 0 | memcpy(tmp[0][i], in.data[i], tail_size_in); |
1042 | 0 | in.data[i] += in.linesize[i]; |
1043 | 0 | } |
1044 | 0 | } |
1045 | |
|
1046 | 0 | comp->func(exec, comp->priv, bx, y, p->num_blocks, y + 1); |
1047 | |
|
1048 | 0 | if (copy_out) { |
1049 | 0 | for (int i = 0; i < p->planes_out; i++) { |
1050 | 0 | av_assert2(tmp[1][i] + tail_size_out < (uint8_t *) tmp[2]); |
1051 | 0 | memcpy(out.data[i], tmp[1][i], tail_size_out); |
1052 | 0 | out.data[i] += out.linesize[i]; |
1053 | 0 | } |
1054 | 0 | } |
1055 | |
|
1056 | 0 | for (int i = 0; i < 4; i++) { |
1057 | 0 | if (!copy_in) |
1058 | 0 | exec->in[i] += in.linesize[i]; |
1059 | 0 | if (!copy_out) |
1060 | 0 | exec->out[i] += out.linesize[i]; |
1061 | 0 | } |
1062 | 0 | } |
1063 | 0 | } |
1064 | | |
1065 | | static void op_pass_run(const SwsImg *out_base, const SwsImg *in_base, |
1066 | | const int y, const int h, const SwsPass *pass) |
1067 | 0 | { |
1068 | 0 | const SwsOpPass *p = pass->priv; |
1069 | 0 | const SwsCompiledOp *comp = &p->comp; |
1070 | 0 | const SwsImg in = img_shift_idx(in_base, y, p->idx_in); |
1071 | 0 | const SwsImg out = img_shift_idx(out_base, y, p->idx_out); |
1072 | | |
1073 | | /* Fill exec metadata for this slice */ |
1074 | 0 | DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base; |
1075 | 0 | exec.slice_y = y; |
1076 | 0 | exec.slice_h = h; |
1077 | 0 | for (int i = 0; i < 4; i++) { |
1078 | 0 | exec.in[i] = in.data[i]; |
1079 | 0 | exec.out[i] = out.data[i]; |
1080 | 0 | } |
1081 | | |
1082 | | /** |
1083 | | * To ensure safety, we need to consider the following: |
1084 | | * |
1085 | | * 1. We can overread the input, unless this is the last line of an |
1086 | | * unpadded buffer. All defined operations can handle arbitrary pixel |
1087 | | * input, so overread of arbitrary data is fine. |
1088 | | * |
1089 | | * 2. We can overwrite the output, as long as we don't write more than the |
1090 | | * amount of pixels that fit into one linesize. So we always need to |
1091 | | * memcpy the last column on the output side if unpadded. |
1092 | | * |
1093 | | * 3. For the last row, we also need to memcpy the remainder of the input, |
1094 | | * to avoid reading past the end of the buffer. Note that since we know |
1095 | | * the run() function is called on stripes of the same buffer, we don't |
1096 | | * need to worry about this for the end of a slice. |
1097 | | */ |
1098 | |
|
1099 | 0 | const int last_slice = y + h == pass->height; |
1100 | 0 | const bool memcpy_in = last_slice && p->memcpy_in; |
1101 | 0 | const bool memcpy_out = p->memcpy_out; |
1102 | 0 | const int num_blocks = p->num_blocks; |
1103 | 0 | const int blocks_main = num_blocks - memcpy_out; |
1104 | 0 | const int h_main = h - memcpy_in; |
1105 | | |
1106 | | /* Handle main section */ |
1107 | 0 | comp->func(&exec, comp->priv, 0, y, blocks_main, y + h_main); |
1108 | |
|
1109 | 0 | if (memcpy_in) { |
1110 | | /* Safe part of last row */ |
1111 | 0 | for (int i = 0; i < 4; i++) { |
1112 | 0 | exec.in[i] += h_main * in.linesize[i]; |
1113 | 0 | exec.out[i] += h_main * out.linesize[i]; |
1114 | 0 | } |
1115 | 0 | comp->func(&exec, comp->priv, 0, y + h_main, num_blocks - 1, y + h); |
1116 | 0 | } |
1117 | | |
1118 | | /* Handle last column via memcpy, takes over `exec` so call these last */ |
1119 | 0 | if (memcpy_out) |
1120 | 0 | handle_tail(p, &exec, out_base, true, in_base, false, y, h_main); |
1121 | 0 | if (memcpy_in) |
1122 | 0 | handle_tail(p, &exec, out_base, memcpy_out, in_base, true, y + h_main, 1); |
1123 | 0 | } |
1124 | | |
1125 | | static int rw_planes(const SwsOp *op) |
1126 | 0 | { |
1127 | 0 | return op->rw.packed ? 1 : op->rw.elems; |
1128 | 0 | } |
1129 | | |
1130 | | static int rw_pixel_bits(const SwsOp *op) |
1131 | 0 | { |
1132 | 0 | const int elems = op->rw.packed ? op->rw.elems : 1; |
1133 | 0 | const int size = ff_sws_pixel_type_size(op->type); |
1134 | 0 | const int bits = 8 >> op->rw.frac; |
1135 | 0 | av_assert1(bits >= 1); |
1136 | 0 | return elems * size * bits; |
1137 | 0 | } |
1138 | | |
1139 | | int ff_sws_compile_pass(SwsGraph *graph, SwsOpList *ops, int flags, SwsFormat dst, |
1140 | | SwsPass *input, SwsPass **output) |
1141 | 0 | { |
1142 | 0 | SwsContext *ctx = graph->ctx; |
1143 | 0 | SwsOpPass *p = NULL; |
1144 | 0 | const SwsOp *read = &ops->ops[0]; |
1145 | 0 | const SwsOp *write = &ops->ops[ops->num_ops - 1]; |
1146 | 0 | SwsPass *pass; |
1147 | 0 | int ret; |
1148 | | |
1149 | | /* Check if the whole operation graph is an end-to-end no-op */ |
1150 | 0 | if (ff_sws_op_list_is_noop(ops)) { |
1151 | 0 | *output = input; |
1152 | 0 | return 0; |
1153 | 0 | } |
1154 | | |
1155 | 0 | if (ops->num_ops < 2) { |
1156 | 0 | av_log(ctx, AV_LOG_ERROR, "Need at least two operations.\n"); |
1157 | 0 | return AVERROR(EINVAL); |
1158 | 0 | } |
1159 | | |
1160 | 0 | if (read->op != SWS_OP_READ || write->op != SWS_OP_WRITE) { |
1161 | 0 | av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read " |
1162 | 0 | "and write, respectively.\n"); |
1163 | 0 | return AVERROR(EINVAL); |
1164 | 0 | } |
1165 | | |
1166 | 0 | if (flags & SWS_OP_FLAG_OPTIMIZE) |
1167 | 0 | RET(ff_sws_op_list_optimize(ops)); |
1168 | 0 | else |
1169 | 0 | ff_sws_op_list_update_comps(ops); |
1170 | | |
1171 | 0 | p = av_mallocz(sizeof(*p)); |
1172 | 0 | if (!p) |
1173 | 0 | return AVERROR(ENOMEM); |
1174 | | |
1175 | 0 | ret = ff_sws_ops_compile(ctx, ops, &p->comp); |
1176 | 0 | if (ret < 0) |
1177 | 0 | goto fail; |
1178 | | |
1179 | 0 | p->planes_in = rw_planes(read); |
1180 | 0 | p->planes_out = rw_planes(write); |
1181 | 0 | p->pixel_bits_in = rw_pixel_bits(read); |
1182 | 0 | p->pixel_bits_out = rw_pixel_bits(write); |
1183 | 0 | p->exec_base = (SwsOpExec) { |
1184 | 0 | .width = dst.width, |
1185 | 0 | .height = dst.height, |
1186 | 0 | .block_size_in = p->comp.block_size * p->pixel_bits_in >> 3, |
1187 | 0 | .block_size_out = p->comp.block_size * p->pixel_bits_out >> 3, |
1188 | 0 | }; |
1189 | |
|
1190 | 0 | for (int i = 0; i < 4; i++) { |
1191 | 0 | p->idx_in[i] = i < p->planes_in ? ops->order_src.in[i] : -1; |
1192 | 0 | p->idx_out[i] = i < p->planes_out ? ops->order_dst.in[i] : -1; |
1193 | 0 | } |
1194 | |
|
1195 | 0 | pass = ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, input, |
1196 | 0 | 1, p, op_pass_run); |
1197 | 0 | if (!pass) { |
1198 | 0 | ret = AVERROR(ENOMEM); |
1199 | 0 | goto fail; |
1200 | 0 | } |
1201 | 0 | pass->setup = op_pass_setup; |
1202 | 0 | pass->free = op_pass_free; |
1203 | |
|
1204 | 0 | *output = pass; |
1205 | 0 | return 0; |
1206 | | |
1207 | 0 | fail: |
1208 | 0 | op_pass_free(p); |
1209 | 0 | return ret; |
1210 | 0 | } |