/src/ffmpeg/libswscale/ops_optimizer.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/attributes.h" |
22 | | #include "libavutil/avassert.h" |
23 | | #include "libavutil/bswap.h" |
24 | | #include "libavutil/rational.h" |
25 | | |
26 | | #include "ops.h" |
27 | | #include "ops_internal.h" |
28 | | |
29 | | #define RET(x) \ |
30 | 0 | do { \ |
31 | 0 | if ((ret = (x)) < 0) \ |
32 | 0 | return ret; \ |
33 | 0 | } while (0) |
34 | | |
35 | | /** |
36 | | * Try to commute a clear op with the next operation. Makes any adjustments |
37 | | * to the operations as needed, but does not perform the actual commutation. |
38 | | * |
39 | | * Returns whether successful. |
40 | | */ |
41 | | static bool op_commute_clear(SwsOp *op, SwsOp *next) |
42 | 0 | { |
43 | 0 | SwsClearOp tmp = {0}; |
44 | |
|
45 | 0 | av_assert1(op->op == SWS_OP_CLEAR); |
46 | 0 | switch (next->op) { |
47 | 0 | case SWS_OP_CONVERT: |
48 | 0 | op->type = next->convert.to; |
49 | 0 | av_fallthrough; |
50 | 0 | case SWS_OP_LSHIFT: |
51 | 0 | case SWS_OP_RSHIFT: |
52 | 0 | case SWS_OP_DITHER: |
53 | 0 | case SWS_OP_MIN: |
54 | 0 | case SWS_OP_MAX: |
55 | 0 | case SWS_OP_SCALE: |
56 | 0 | case SWS_OP_READ: |
57 | 0 | case SWS_OP_FILTER_H: |
58 | 0 | case SWS_OP_FILTER_V: |
59 | 0 | ff_sws_apply_op_q(next, op->clear.value); |
60 | 0 | return true; |
61 | 0 | case SWS_OP_SWIZZLE: |
62 | 0 | op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, next->swizzle); |
63 | 0 | ff_sws_apply_op_q(next, op->clear.value); |
64 | 0 | return true; |
65 | 0 | case SWS_OP_SWAP_BYTES: |
66 | 0 | switch (next->type) { |
67 | 0 | case SWS_PIXEL_U16: |
68 | 0 | ff_sws_apply_op_q(next, op->clear.value); /* always works */ |
69 | 0 | return true; |
70 | 0 | case SWS_PIXEL_U32: |
71 | 0 | for (int i = 0; i < 4; i++) { |
72 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
73 | 0 | continue; |
74 | 0 | uint32_t v = av_bswap32(op->clear.value[i].num); |
75 | 0 | if (v > INT_MAX) |
76 | 0 | return false; /* can't represent as AVRational anymore */ |
77 | 0 | tmp.value[i] = Q(v); |
78 | 0 | } |
79 | 0 | op->clear = tmp; |
80 | 0 | return true; |
81 | 0 | default: |
82 | 0 | return false; |
83 | 0 | } |
84 | 0 | case SWS_OP_INVALID: |
85 | 0 | case SWS_OP_WRITE: |
86 | 0 | case SWS_OP_LINEAR: |
87 | 0 | case SWS_OP_PACK: |
88 | 0 | case SWS_OP_UNPACK: |
89 | 0 | case SWS_OP_CLEAR: |
90 | 0 | return false; |
91 | 0 | case SWS_OP_TYPE_NB: |
92 | 0 | break; |
93 | 0 | } |
94 | | |
95 | 0 | av_unreachable("Invalid operation type!"); |
96 | 0 | return false; |
97 | 0 | } |
98 | | |
99 | | /** |
100 | | * Try to commute a swizzle op with the next operation. Makes any adjustments |
101 | | * to the operations as needed, but does not perform the actual commutation. |
102 | | * |
103 | | * Returns whether successful. |
104 | | */ |
105 | | static bool op_commute_swizzle(SwsOp *op, SwsOp *next) |
106 | 0 | { |
107 | 0 | bool seen[4] = {0}; |
108 | |
|
109 | 0 | av_assert1(op->op == SWS_OP_SWIZZLE); |
110 | 0 | switch (next->op) { |
111 | 0 | case SWS_OP_CONVERT: |
112 | 0 | op->type = next->convert.to; |
113 | 0 | av_fallthrough; |
114 | 0 | case SWS_OP_SWAP_BYTES: |
115 | 0 | case SWS_OP_LSHIFT: |
116 | 0 | case SWS_OP_RSHIFT: |
117 | 0 | case SWS_OP_SCALE: |
118 | 0 | case SWS_OP_FILTER_H: |
119 | 0 | case SWS_OP_FILTER_V: |
120 | 0 | return true; |
121 | | |
122 | | /** |
123 | | * We can commute per-channel ops only if the per-channel constants are the |
124 | | * same for all duplicated channels; e.g.: |
125 | | * SWIZZLE {0, 0, 0, 3} |
126 | | * NEXT {x, x, x, w} |
127 | | * -> |
128 | | * NEXT {x, _, _, w} |
129 | | * SWIZZLE {0, 0, 0, 3} |
130 | | */ |
131 | 0 | case SWS_OP_MIN: |
132 | 0 | case SWS_OP_MAX: { |
133 | 0 | const SwsClampOp c = next->clamp; |
134 | 0 | for (int i = 0; i < 4; i++) { |
135 | 0 | if (!SWS_OP_NEEDED(op, i)) |
136 | 0 | continue; |
137 | 0 | const int j = op->swizzle.in[i]; |
138 | 0 | if (seen[j] && av_cmp_q(next->clamp.limit[j], c.limit[i])) |
139 | 0 | return false; |
140 | 0 | next->clamp.limit[j] = c.limit[i]; |
141 | 0 | seen[j] = true; |
142 | 0 | } |
143 | 0 | return true; |
144 | 0 | } |
145 | | |
146 | 0 | case SWS_OP_DITHER: { |
147 | 0 | const SwsDitherOp d = next->dither; |
148 | 0 | for (int i = 0; i < 4; i++) { |
149 | 0 | if (!SWS_OP_NEEDED(op, i)) |
150 | 0 | continue; |
151 | 0 | const int j = op->swizzle.in[i]; |
152 | 0 | if (seen[j] && next->dither.y_offset[j] != d.y_offset[i]) |
153 | 0 | return false; |
154 | 0 | next->dither.y_offset[j] = d.y_offset[i]; |
155 | 0 | seen[j] = true; |
156 | 0 | } |
157 | 0 | return true; |
158 | 0 | } |
159 | | |
160 | 0 | case SWS_OP_INVALID: |
161 | 0 | case SWS_OP_READ: |
162 | 0 | case SWS_OP_WRITE: |
163 | 0 | case SWS_OP_SWIZZLE: |
164 | 0 | case SWS_OP_CLEAR: |
165 | 0 | case SWS_OP_LINEAR: |
166 | 0 | case SWS_OP_PACK: |
167 | 0 | case SWS_OP_UNPACK: |
168 | 0 | return false; |
169 | 0 | case SWS_OP_TYPE_NB: |
170 | 0 | break; |
171 | 0 | } |
172 | | |
173 | 0 | av_unreachable("Invalid operation type!"); |
174 | 0 | return false; |
175 | 0 | } |
176 | | |
177 | | /** |
178 | | * Try to commute a filter op with the previous operation. Makes any |
179 | | * adjustments to the operations as needed, but does not perform the actual |
180 | | * commutation. |
181 | | * |
182 | | * Returns whether successful. |
183 | | */ |
184 | | static bool op_commute_filter(SwsOp *op, SwsOp *prev) |
185 | 0 | { |
186 | 0 | switch (prev->op) { |
187 | 0 | case SWS_OP_SWIZZLE: |
188 | 0 | case SWS_OP_SCALE: |
189 | 0 | case SWS_OP_LINEAR: |
190 | 0 | case SWS_OP_DITHER: |
191 | 0 | prev->type = SWS_PIXEL_F32; |
192 | 0 | return true; |
193 | 0 | case SWS_OP_CONVERT: |
194 | 0 | if (prev->convert.to == SWS_PIXEL_F32) { |
195 | 0 | av_assert0(!prev->convert.expand); |
196 | 0 | FFSWAP(SwsPixelType, op->type, prev->type); |
197 | 0 | return true; |
198 | 0 | } |
199 | 0 | return false; |
200 | 0 | case SWS_OP_INVALID: |
201 | 0 | case SWS_OP_READ: |
202 | 0 | case SWS_OP_WRITE: |
203 | 0 | case SWS_OP_SWAP_BYTES: |
204 | 0 | case SWS_OP_UNPACK: |
205 | 0 | case SWS_OP_PACK: |
206 | 0 | case SWS_OP_LSHIFT: |
207 | 0 | case SWS_OP_RSHIFT: |
208 | 0 | case SWS_OP_CLEAR: |
209 | 0 | case SWS_OP_MIN: |
210 | 0 | case SWS_OP_MAX: |
211 | 0 | case SWS_OP_FILTER_H: |
212 | 0 | case SWS_OP_FILTER_V: |
213 | 0 | return false; |
214 | 0 | case SWS_OP_TYPE_NB: |
215 | 0 | break; |
216 | 0 | } |
217 | | |
218 | 0 | av_unreachable("Invalid operation type!"); |
219 | 0 | return false; |
220 | 0 | } |
221 | | |
222 | | /* returns log2(x) only if x is a power of two, or 0 otherwise */ |
223 | | static int exact_log2(const int x) |
224 | 0 | { |
225 | 0 | int p; |
226 | 0 | if (x <= 0) |
227 | 0 | return 0; |
228 | 0 | p = av_log2(x); |
229 | 0 | return (1 << p) == x ? p : 0; |
230 | 0 | } |
231 | | |
232 | | static int exact_log2_q(const AVRational x) |
233 | 0 | { |
234 | 0 | if (x.den == 1) |
235 | 0 | return exact_log2(x.num); |
236 | 0 | else if (x.num == 1) |
237 | 0 | return -exact_log2(x.den); |
238 | 0 | else |
239 | 0 | return 0; |
240 | 0 | } |
241 | | |
242 | | /** |
243 | | * If a linear operation can be reduced to a scalar multiplication, returns |
244 | | * the corresponding scaling factor, or 0 otherwise. |
245 | | */ |
246 | | static bool extract_scalar(const SwsLinearOp *c, SwsComps comps, SwsComps prev, |
247 | | SwsScaleOp *out_scale) |
248 | 0 | { |
249 | 0 | SwsScaleOp scale = {0}; |
250 | | |
251 | | /* There are components not on the main diagonal */ |
252 | 0 | if (c->mask & ~SWS_MASK_DIAG4) |
253 | 0 | return false; |
254 | | |
255 | 0 | for (int i = 0; i < 4; i++) { |
256 | 0 | const AVRational s = c->m[i][i]; |
257 | 0 | if ((prev.flags[i] & SWS_COMP_ZERO) || |
258 | 0 | (comps.flags[i] & SWS_COMP_GARBAGE)) |
259 | 0 | continue; |
260 | 0 | if (scale.factor.den && av_cmp_q(s, scale.factor)) |
261 | 0 | return false; |
262 | 0 | scale.factor = s; |
263 | 0 | } |
264 | | |
265 | 0 | if (scale.factor.den) |
266 | 0 | *out_scale = scale; |
267 | 0 | return scale.factor.den; |
268 | 0 | } |
269 | | |
270 | | /* Extracts an integer clear operation (subset) from the given linear op. */ |
271 | | static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, |
272 | | SwsClearOp *out_clear) |
273 | 0 | { |
274 | 0 | SwsClearOp clear = {0}; |
275 | 0 | bool ret = false; |
276 | |
|
277 | 0 | for (int i = 0; i < 4; i++) { |
278 | 0 | bool const_row = c->m[i][4].den == 1; /* offset is integer */ |
279 | 0 | for (int j = 0; j < 4; j++) { |
280 | 0 | const_row &= c->m[i][j].num == 0 || /* scalar is zero */ |
281 | 0 | (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ |
282 | 0 | } |
283 | 0 | if (const_row && (c->mask & SWS_MASK_ROW(i))) { |
284 | 0 | clear.mask |= SWS_COMP(i); |
285 | 0 | clear.value[i] = c->m[i][4]; |
286 | 0 | for (int j = 0; j < 5; j++) |
287 | 0 | c->m[i][j] = Q(i == j); |
288 | 0 | c->mask &= ~SWS_MASK_ROW(i); |
289 | 0 | ret = true; |
290 | 0 | } |
291 | 0 | } |
292 | |
|
293 | 0 | if (ret) |
294 | 0 | *out_clear = clear; |
295 | 0 | return ret; |
296 | 0 | } |
297 | | |
298 | | /* Unswizzle a linear operation by aligning single-input rows with |
299 | | * their corresponding diagonal */ |
300 | | static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) |
301 | 0 | { |
302 | 0 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); |
303 | 0 | SwsLinearOp c = *op; |
304 | | |
305 | | /* Find non-zero coefficients in the main 4x4 matrix */ |
306 | 0 | uint32_t nonzero = 0; |
307 | 0 | for (int i = 0; i < 4; i++) { |
308 | 0 | for (int j = 0; j < 4; j++) { |
309 | 0 | if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) |
310 | 0 | continue; |
311 | 0 | nonzero |= SWS_MASK(i, j); |
312 | 0 | } |
313 | 0 | } |
314 | | |
315 | | /* If a value is unique in its row and the target column is |
316 | | * empty, move it there and update the input swizzle */ |
317 | 0 | for (int i = 0; i < 4; i++) { |
318 | 0 | if (nonzero & SWS_MASK_COL(i)) |
319 | 0 | continue; /* target column is not empty */ |
320 | 0 | for (int j = 0; j < 4; j++) { |
321 | 0 | if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) { |
322 | | /* Move coefficient to the diagonal */ |
323 | 0 | c.m[i][i] = c.m[i][j]; |
324 | 0 | c.m[i][j] = Q(0); |
325 | 0 | swiz.in[i] = j; |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } |
330 | |
|
331 | 0 | if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) |
332 | 0 | return false; /* no swizzle was identified */ |
333 | | |
334 | 0 | c.mask = ff_sws_linear_mask(c); |
335 | 0 | *out_swiz = swiz; |
336 | 0 | *op = c; |
337 | 0 | return true; |
338 | 0 | } |
339 | | |
340 | | static int op_result_is_exact(const SwsOp *op) |
341 | 0 | { |
342 | 0 | for (int i = 0; i < 4; i++) { |
343 | 0 | if (SWS_OP_NEEDED(op, i) && !(op->comps.flags[i] & SWS_COMP_EXACT)) |
344 | 0 | return false; |
345 | 0 | } |
346 | | |
347 | 0 | return true; |
348 | 0 | } |
349 | | |
350 | | int ff_sws_op_list_optimize(SwsOpList *ops) |
351 | 0 | { |
352 | 0 | int ret; |
353 | |
|
354 | 0 | retry: |
355 | 0 | ff_sws_op_list_update_comps(ops); |
356 | | |
357 | | /* Try to push filters towards the input; do this first to unblock |
358 | | * in-place optimizations like linear op fusion */ |
359 | 0 | for (int n = 1; n < ops->num_ops; n++) { |
360 | 0 | SwsOp *op = &ops->ops[n]; |
361 | 0 | SwsOp *prev = &ops->ops[n - 1]; |
362 | |
|
363 | 0 | switch (op->op) { |
364 | 0 | case SWS_OP_FILTER_H: |
365 | 0 | case SWS_OP_FILTER_V: |
366 | 0 | if (op_commute_filter(op, prev)) { |
367 | 0 | FFSWAP(SwsOp, *op, *prev); |
368 | 0 | goto retry; |
369 | 0 | } |
370 | 0 | break; |
371 | 0 | } |
372 | 0 | } |
373 | | |
374 | | /* Apply all in-place optimizations (that do not re-order the list) */ |
375 | 0 | for (int n = 0; n < ops->num_ops; n++) { |
376 | 0 | SwsOp dummy = {0}; |
377 | 0 | SwsOp *op = &ops->ops[n]; |
378 | 0 | SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; |
379 | 0 | SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; |
380 | | |
381 | | /* common helper variable */ |
382 | 0 | bool noop = true; |
383 | |
|
384 | 0 | if (!SWS_OP_NEEDED(op, 0) && !SWS_OP_NEEDED(op, 1) && |
385 | 0 | !SWS_OP_NEEDED(op, 2) && !SWS_OP_NEEDED(op, 3) && |
386 | 0 | op->op != SWS_OP_WRITE) |
387 | 0 | { |
388 | | /* Remove any operation whose output is not needed */ |
389 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
390 | 0 | goto retry; |
391 | 0 | } |
392 | | |
393 | 0 | switch (op->op) { |
394 | 0 | case SWS_OP_READ: |
395 | | /* "Compress" planar reads where not all components are needed */ |
396 | 0 | if (!op->rw.packed) { |
397 | 0 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); |
398 | 0 | int nb_planes = 0; |
399 | 0 | for (int i = 0; i < op->rw.elems; i++) { |
400 | 0 | if (!SWS_OP_NEEDED(op, i)) { |
401 | 0 | swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */ |
402 | 0 | continue; |
403 | 0 | } |
404 | | |
405 | 0 | const int idx = nb_planes++; |
406 | 0 | av_assert1(idx <= i); |
407 | 0 | ops->plane_src[idx] = ops->plane_src[i]; |
408 | 0 | swiz.in[i] = idx; |
409 | 0 | } |
410 | |
|
411 | 0 | if (nb_planes < op->rw.elems) { |
412 | 0 | op->rw.elems = nb_planes; |
413 | 0 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
414 | 0 | .op = SWS_OP_SWIZZLE, |
415 | 0 | .type = op->rw.filter ? SWS_PIXEL_F32 : op->type, |
416 | 0 | .swizzle = swiz, |
417 | 0 | })); |
418 | 0 | goto retry; |
419 | 0 | } |
420 | 0 | } |
421 | 0 | break; |
422 | | |
423 | 0 | case SWS_OP_SWAP_BYTES: |
424 | | /* Redundant (double) swap */ |
425 | 0 | if (next->op == SWS_OP_SWAP_BYTES) { |
426 | 0 | ff_sws_op_list_remove_at(ops, n, 2); |
427 | 0 | goto retry; |
428 | 0 | } |
429 | 0 | break; |
430 | | |
431 | 0 | case SWS_OP_UNPACK: |
432 | | /* Redundant unpack+pack */ |
433 | 0 | if (next->op == SWS_OP_PACK && next->type == op->type && |
434 | 0 | next->pack.pattern[0] == op->pack.pattern[0] && |
435 | 0 | next->pack.pattern[1] == op->pack.pattern[1] && |
436 | 0 | next->pack.pattern[2] == op->pack.pattern[2] && |
437 | 0 | next->pack.pattern[3] == op->pack.pattern[3]) |
438 | 0 | { |
439 | 0 | ff_sws_op_list_remove_at(ops, n, 2); |
440 | 0 | goto retry; |
441 | 0 | } |
442 | 0 | break; |
443 | | |
444 | 0 | case SWS_OP_LSHIFT: |
445 | 0 | case SWS_OP_RSHIFT: |
446 | | /* Two shifts in the same direction */ |
447 | 0 | if (next->op == op->op) { |
448 | 0 | op->shift.amount += next->shift.amount; |
449 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
450 | 0 | goto retry; |
451 | 0 | } |
452 | | |
453 | | /* No-op shift */ |
454 | 0 | if (!op->shift.amount) { |
455 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
456 | 0 | goto retry; |
457 | 0 | } |
458 | 0 | break; |
459 | | |
460 | 0 | case SWS_OP_CLEAR: |
461 | 0 | for (int i = 0; i < 4; i++) { |
462 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
463 | 0 | continue; |
464 | | |
465 | 0 | if ((prev->comps.flags[i] & SWS_COMP_ZERO) && |
466 | 0 | !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && |
467 | 0 | op->clear.value[i].num == 0) |
468 | 0 | { |
469 | | /* Redundant clear-to-zero of zero component */ |
470 | 0 | op->clear.mask ^= SWS_COMP(i); |
471 | 0 | } else if (!SWS_OP_NEEDED(op, i)) { |
472 | | /* Unnecessary clear of unused component */ |
473 | 0 | op->clear.mask ^= SWS_COMP(i); |
474 | 0 | } else { |
475 | 0 | noop = false; |
476 | 0 | } |
477 | 0 | } |
478 | |
|
479 | 0 | if (noop) { |
480 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
481 | 0 | goto retry; |
482 | 0 | } |
483 | | |
484 | | /* Transitive clear */ |
485 | 0 | if (next->op == SWS_OP_CLEAR) { |
486 | 0 | for (int i = 0; i < 4; i++) { |
487 | 0 | if (SWS_COMP_TEST(next->clear.mask, i)) |
488 | 0 | op->clear.value[i] = next->clear.value[i]; |
489 | 0 | } |
490 | 0 | op->clear.mask |= next->clear.mask; |
491 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
492 | 0 | goto retry; |
493 | 0 | } |
494 | 0 | break; |
495 | | |
496 | 0 | case SWS_OP_SWIZZLE: |
497 | 0 | for (int i = 0; i < 4; i++) { |
498 | 0 | if (!SWS_OP_NEEDED(op, i)) |
499 | 0 | continue; |
500 | 0 | if (op->swizzle.in[i] != i) |
501 | 0 | noop = false; |
502 | 0 | } |
503 | | |
504 | | /* Identity swizzle */ |
505 | 0 | if (noop) { |
506 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
507 | 0 | goto retry; |
508 | 0 | } |
509 | | |
510 | | /* Transitive swizzle */ |
511 | 0 | if (next->op == SWS_OP_SWIZZLE) { |
512 | 0 | const SwsSwizzleOp orig = op->swizzle; |
513 | 0 | for (int i = 0; i < 4; i++) |
514 | 0 | op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; |
515 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
516 | 0 | goto retry; |
517 | 0 | } |
518 | | |
519 | | /* Swizzle planes instead of components, if possible */ |
520 | 0 | if (prev->op == SWS_OP_READ && !prev->rw.packed) { |
521 | 0 | for (int dst = 0; dst < prev->rw.elems; dst++) { |
522 | 0 | const int src = op->swizzle.in[dst]; |
523 | 0 | if (src > dst && src < prev->rw.elems) { |
524 | 0 | FFSWAP(int, ops->plane_src[dst], ops->plane_src[src]); |
525 | 0 | for (int i = dst; i < 4; i++) { |
526 | 0 | if (op->swizzle.in[i] == dst) |
527 | 0 | op->swizzle.in[i] = src; |
528 | 0 | else if (op->swizzle.in[i] == src) |
529 | 0 | op->swizzle.in[i] = dst; |
530 | 0 | } |
531 | 0 | goto retry; |
532 | 0 | } |
533 | 0 | } |
534 | 0 | } |
535 | | |
536 | 0 | if (next->op == SWS_OP_WRITE && !next->rw.packed) { |
537 | 0 | for (int dst = 0; dst < next->rw.elems; dst++) { |
538 | 0 | const int src = op->swizzle.in[dst]; |
539 | 0 | if (src > dst && src < next->rw.elems) { |
540 | 0 | FFSWAP(int, ops->plane_dst[dst], ops->plane_dst[src]); |
541 | 0 | FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]); |
542 | 0 | goto retry; |
543 | 0 | } |
544 | 0 | } |
545 | 0 | } |
546 | 0 | break; |
547 | | |
548 | 0 | case SWS_OP_CONVERT: |
549 | | /* No-op conversion */ |
550 | 0 | if (op->type == op->convert.to) { |
551 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
552 | 0 | goto retry; |
553 | 0 | } |
554 | | |
555 | | /* Transitive conversion */ |
556 | 0 | if (next->op == SWS_OP_CONVERT && |
557 | 0 | op->convert.expand == next->convert.expand) |
558 | 0 | { |
559 | 0 | av_assert1(op->convert.to == next->type); |
560 | 0 | op->convert.to = next->convert.to; |
561 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
562 | 0 | goto retry; |
563 | 0 | } |
564 | | |
565 | | /* Conversion followed by integer expansion */ |
566 | 0 | if (next->op == SWS_OP_SCALE && !op->convert.expand && |
567 | 0 | ff_sws_pixel_type_is_int(op->type) && |
568 | 0 | ff_sws_pixel_type_is_int(op->convert.to) && |
569 | 0 | !av_cmp_q(next->scale.factor, |
570 | 0 | ff_sws_pixel_expand(op->type, op->convert.to))) |
571 | 0 | { |
572 | 0 | op->convert.expand = true; |
573 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
574 | 0 | goto retry; |
575 | 0 | } |
576 | 0 | break; |
577 | | |
578 | 0 | case SWS_OP_MIN: |
579 | 0 | for (int i = 0; i < 4; i++) { |
580 | 0 | if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den) |
581 | 0 | continue; |
582 | 0 | if (av_cmp_q(op->clamp.limit[i], prev->comps.max[i]) < 0) |
583 | 0 | noop = false; |
584 | 0 | } |
585 | |
|
586 | 0 | if (noop) { |
587 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
588 | 0 | goto retry; |
589 | 0 | } |
590 | 0 | break; |
591 | | |
592 | 0 | case SWS_OP_MAX: |
593 | 0 | for (int i = 0; i < 4; i++) { |
594 | 0 | if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den) |
595 | 0 | continue; |
596 | 0 | if (av_cmp_q(prev->comps.min[i], op->clamp.limit[i]) < 0) |
597 | 0 | noop = false; |
598 | 0 | } |
599 | |
|
600 | 0 | if (noop) { |
601 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
602 | 0 | goto retry; |
603 | 0 | } |
604 | 0 | break; |
605 | | |
606 | 0 | case SWS_OP_DITHER: |
607 | 0 | for (int i = 0; i < 4; i++) { |
608 | 0 | if (op->dither.y_offset[i] < 0) |
609 | 0 | continue; |
610 | 0 | if (!SWS_OP_NEEDED(op, i) || (prev->comps.flags[i] & SWS_COMP_EXACT)) { |
611 | 0 | op->dither.y_offset[i] = -1; /* unnecessary dither */ |
612 | 0 | goto retry; |
613 | 0 | } else { |
614 | 0 | noop = false; |
615 | 0 | } |
616 | 0 | } |
617 | | |
618 | 0 | if (noop) { |
619 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
620 | 0 | goto retry; |
621 | 0 | } |
622 | 0 | break; |
623 | | |
624 | 0 | case SWS_OP_LINEAR: { |
625 | 0 | SwsSwizzleOp swizzle; |
626 | 0 | SwsClearOp clear; |
627 | 0 | SwsScaleOp scale; |
628 | | |
629 | | /* No-op (identity) linear operation */ |
630 | 0 | if (!op->lin.mask) { |
631 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
632 | 0 | goto retry; |
633 | 0 | } |
634 | | |
635 | 0 | if (next->op == SWS_OP_LINEAR) { |
636 | | /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ |
637 | 0 | const SwsLinearOp m1 = op->lin; |
638 | 0 | const SwsLinearOp m2 = next->lin; |
639 | 0 | for (int i = 0; i < 4; i++) { |
640 | 0 | for (int j = 0; j < 5; j++) { |
641 | 0 | AVRational sum = Q(0); |
642 | 0 | for (int k = 0; k < 4; k++) |
643 | 0 | sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); |
644 | 0 | if (j == 4) /* m1.m[4][j] == 1 */ |
645 | 0 | sum = av_add_q(sum, m2.m[i][4]); |
646 | 0 | op->lin.m[i][j] = sum; |
647 | 0 | } |
648 | 0 | } |
649 | 0 | op->lin.mask = ff_sws_linear_mask(op->lin); |
650 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
651 | 0 | goto retry; |
652 | 0 | } |
653 | | |
654 | | /* Optimize away zero columns */ |
655 | 0 | for (int j = 0; j < 4; j++) { |
656 | 0 | const uint32_t col = SWS_MASK_COL(j); |
657 | 0 | if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) |
658 | 0 | continue; |
659 | 0 | for (int i = 0; i < 4; i++) |
660 | 0 | op->lin.m[i][j] = Q(i == j); |
661 | 0 | op->lin.mask &= ~col; |
662 | 0 | goto retry; |
663 | 0 | } |
664 | | |
665 | | /* Optimize away unused rows */ |
666 | 0 | for (int i = 0; i < 4; i++) { |
667 | 0 | const uint32_t row = SWS_MASK_ROW(i); |
668 | 0 | if (SWS_OP_NEEDED(op, i) || !(op->lin.mask & row)) |
669 | 0 | continue; |
670 | 0 | for (int j = 0; j < 5; j++) |
671 | 0 | op->lin.m[i][j] = Q(i == j); |
672 | 0 | op->lin.mask &= ~row; |
673 | 0 | goto retry; |
674 | 0 | } |
675 | | |
676 | | /* Convert constant rows to explicit clear instruction */ |
677 | 0 | if (extract_constant_rows(&op->lin, prev->comps, &clear)) { |
678 | 0 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
679 | 0 | .op = SWS_OP_CLEAR, |
680 | 0 | .type = op->type, |
681 | 0 | .comps = op->comps, |
682 | 0 | .clear = clear, |
683 | 0 | })); |
684 | 0 | goto retry; |
685 | 0 | } |
686 | | |
687 | | /* Multiplication by scalar constant */ |
688 | 0 | if (extract_scalar(&op->lin, op->comps, prev->comps, &scale)) { |
689 | 0 | op->op = SWS_OP_SCALE; |
690 | 0 | op->scale = scale; |
691 | 0 | goto retry; |
692 | 0 | } |
693 | | |
694 | | /* Swizzle by fixed pattern */ |
695 | 0 | if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { |
696 | 0 | RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { |
697 | 0 | .op = SWS_OP_SWIZZLE, |
698 | 0 | .type = op->type, |
699 | 0 | .swizzle = swizzle, |
700 | 0 | })); |
701 | 0 | goto retry; |
702 | 0 | } |
703 | 0 | break; |
704 | 0 | } |
705 | | |
706 | 0 | case SWS_OP_SCALE: { |
707 | 0 | const int factor2 = exact_log2_q(op->scale.factor); |
708 | | |
709 | | /* No-op scaling */ |
710 | 0 | if (op->scale.factor.num == 1 && op->scale.factor.den == 1) { |
711 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
712 | 0 | goto retry; |
713 | 0 | } |
714 | | |
715 | | /* Merge consecutive scaling operations (that don't overflow) */ |
716 | 0 | if (next->op == SWS_OP_SCALE) { |
717 | 0 | int64_t p = op->scale.factor.num * (int64_t) next->scale.factor.num; |
718 | 0 | int64_t q = op->scale.factor.den * (int64_t) next->scale.factor.den; |
719 | 0 | if (FFABS(p) <= INT_MAX && FFABS(q) <= INT_MAX) { |
720 | 0 | av_reduce(&op->scale.factor.num, &op->scale.factor.den, p, q, INT_MAX); |
721 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
722 | 0 | goto retry; |
723 | 0 | } |
724 | 0 | } |
725 | | |
726 | | /* Scaling by exact power of two */ |
727 | 0 | if (factor2 && ff_sws_pixel_type_is_int(op->type)) { |
728 | 0 | op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; |
729 | 0 | op->shift.amount = FFABS(factor2); |
730 | 0 | goto retry; |
731 | 0 | } |
732 | 0 | break; |
733 | 0 | } |
734 | | |
735 | 0 | case SWS_OP_FILTER_H: |
736 | 0 | case SWS_OP_FILTER_V: |
737 | | /* Merge with prior simple planar read */ |
738 | 0 | if (prev->op == SWS_OP_READ && !prev->rw.filter && |
739 | 0 | !prev->rw.packed && !prev->rw.frac) { |
740 | 0 | prev->rw.filter = op->op; |
741 | 0 | prev->rw.kernel = av_refstruct_ref(op->filter.kernel); |
742 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
743 | 0 | goto retry; |
744 | 0 | } |
745 | 0 | break; |
746 | 0 | } |
747 | 0 | } |
748 | | |
749 | | /* Push clears to the back to void any unused components */ |
750 | 0 | for (int n = 0; n < ops->num_ops - 1; n++) { |
751 | 0 | SwsOp *op = &ops->ops[n]; |
752 | 0 | SwsOp *next = &ops->ops[n + 1]; |
753 | |
|
754 | 0 | switch (op->op) { |
755 | 0 | case SWS_OP_CLEAR: |
756 | 0 | if (op_commute_clear(op, next)) { |
757 | 0 | FFSWAP(SwsOp, *op, *next); |
758 | 0 | goto retry; |
759 | 0 | } |
760 | 0 | break; |
761 | 0 | } |
762 | 0 | } |
763 | | |
764 | | /* Apply any remaining preferential re-ordering optimizations; do these |
765 | | * last because they are more likely to block other optimizations if done |
766 | | * too aggressively */ |
767 | 0 | for (int n = 0; n < ops->num_ops - 1; n++) { |
768 | 0 | SwsOp *op = &ops->ops[n]; |
769 | 0 | SwsOp *next = &ops->ops[n + 1]; |
770 | |
|
771 | 0 | switch (op->op) { |
772 | 0 | case SWS_OP_SWIZZLE: { |
773 | | /* Try to push swizzles towards the output */ |
774 | 0 | if (op_commute_swizzle(op, next)) { |
775 | 0 | FFSWAP(SwsOp, *op, *next); |
776 | 0 | goto retry; |
777 | 0 | } |
778 | 0 | break; |
779 | 0 | } |
780 | | |
781 | 0 | case SWS_OP_SCALE: |
782 | | /* Exact integer multiplication */ |
783 | 0 | if (op->scale.factor.den == 1 && next->op == SWS_OP_CONVERT && |
784 | 0 | ff_sws_pixel_type_is_int(next->convert.to) && |
785 | 0 | op_result_is_exact(op)) |
786 | 0 | { |
787 | 0 | op->type = next->convert.to; |
788 | 0 | FFSWAP(SwsOp, *op, *next); |
789 | 0 | goto retry; |
790 | 0 | } |
791 | 0 | break; |
792 | 0 | } |
793 | 0 | } |
794 | | |
795 | 0 | return 0; |
796 | 0 | } |
797 | | |
798 | | int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], |
799 | | int size, uint8_t clear_val, |
800 | | int *read_bytes, int *write_bytes) |
801 | 0 | { |
802 | 0 | if (!ops->num_ops) |
803 | 0 | return AVERROR(EINVAL); |
804 | | |
805 | 0 | const SwsOp *read = ff_sws_op_list_input(ops); |
806 | 0 | if (!read || read->rw.frac || read->rw.filter || |
807 | 0 | (!read->rw.packed && read->rw.elems > 1)) |
808 | 0 | return AVERROR(ENOTSUP); |
809 | | |
810 | 0 | const int read_size = ff_sws_pixel_type_size(read->type); |
811 | 0 | uint32_t mask[4] = {0}; |
812 | 0 | for (int i = 0; i < read->rw.elems; i++) |
813 | 0 | mask[i] = 0x01010101 * i * read_size + 0x03020100; |
814 | |
|
815 | 0 | for (int opidx = 1; opidx < ops->num_ops; opidx++) { |
816 | 0 | const SwsOp *op = &ops->ops[opidx]; |
817 | 0 | switch (op->op) { |
818 | 0 | case SWS_OP_SWIZZLE: { |
819 | 0 | uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; |
820 | 0 | for (int i = 0; i < 4; i++) |
821 | 0 | mask[i] = orig[op->swizzle.in[i]]; |
822 | 0 | break; |
823 | 0 | } |
824 | | |
825 | 0 | case SWS_OP_SWAP_BYTES: |
826 | 0 | for (int i = 0; i < 4; i++) { |
827 | 0 | switch (ff_sws_pixel_type_size(op->type)) { |
828 | 0 | case 2: mask[i] = av_bswap16(mask[i]); break; |
829 | 0 | case 4: mask[i] = av_bswap32(mask[i]); break; |
830 | 0 | } |
831 | 0 | } |
832 | 0 | break; |
833 | | |
834 | 0 | case SWS_OP_CLEAR: |
835 | 0 | for (int i = 0; i < 4; i++) { |
836 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
837 | 0 | continue; |
838 | 0 | if (op->clear.value[i].num != 0 || !clear_val) |
839 | 0 | return AVERROR(ENOTSUP); |
840 | 0 | mask[i] = 0x1010101ul * clear_val; |
841 | 0 | } |
842 | 0 | break; |
843 | | |
844 | 0 | case SWS_OP_CONVERT: { |
845 | 0 | if (!op->convert.expand) |
846 | 0 | return AVERROR(ENOTSUP); |
847 | 0 | for (int i = 0; i < 4; i++) { |
848 | 0 | switch (ff_sws_pixel_type_size(op->type)) { |
849 | 0 | case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; |
850 | 0 | case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; |
851 | 0 | } |
852 | 0 | } |
853 | 0 | break; |
854 | 0 | } |
855 | | |
856 | 0 | case SWS_OP_WRITE: { |
857 | 0 | if (op->rw.frac || op->rw.filter || |
858 | 0 | (!op->rw.packed && op->rw.elems > 1)) |
859 | 0 | return AVERROR(ENOTSUP); |
860 | | |
861 | | /* Initialize to no-op */ |
862 | 0 | memset(shuffle, clear_val, size); |
863 | |
|
864 | 0 | const int write_size = ff_sws_pixel_type_size(op->type); |
865 | 0 | const int read_chunk = read->rw.elems * read_size; |
866 | 0 | const int write_chunk = op->rw.elems * write_size; |
867 | 0 | const int num_groups = size / FFMAX(read_chunk, write_chunk); |
868 | 0 | for (int n = 0; n < num_groups; n++) { |
869 | 0 | const int base_in = n * read_chunk; |
870 | 0 | const int base_out = n * write_chunk; |
871 | 0 | for (int i = 0; i < op->rw.elems; i++) { |
872 | 0 | const int offset = base_out + i * write_size; |
873 | 0 | for (int b = 0; b < write_size; b++) { |
874 | 0 | const uint8_t idx = mask[i] >> (b * 8); |
875 | 0 | if (idx != clear_val) |
876 | 0 | shuffle[offset + b] = base_in + idx; |
877 | 0 | } |
878 | 0 | } |
879 | 0 | } |
880 | |
|
881 | 0 | *read_bytes = num_groups * read_chunk; |
882 | 0 | *write_bytes = num_groups * write_chunk; |
883 | 0 | return num_groups; |
884 | 0 | } |
885 | | |
886 | 0 | default: |
887 | 0 | return AVERROR(ENOTSUP); |
888 | 0 | } |
889 | 0 | } |
890 | | |
891 | 0 | return AVERROR(EINVAL); |
892 | 0 | } |
893 | | |
894 | | /** |
895 | | * Determine a suitable intermediate buffer format for a given combination |
896 | | * of pixel types and number of planes. The exact interpretation of these |
897 | | * formats does not matter at all; since they will only ever be used as |
898 | | * temporary intermediate buffers. We still need to pick *some* format as |
899 | | * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the |
900 | | * output buffer. |
901 | | */ |
902 | | static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes) |
903 | 0 | { |
904 | 0 | switch (ff_sws_pixel_type_size(type)) { |
905 | 0 | case 1: |
906 | 0 | switch (nb_planes) { |
907 | 0 | case 1: return AV_PIX_FMT_GRAY8; |
908 | 0 | case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt |
909 | 0 | case 3: return AV_PIX_FMT_YUV444P; |
910 | 0 | case 4: return AV_PIX_FMT_YUVA444P; |
911 | 0 | } |
912 | 0 | break; |
913 | 0 | case 2: |
914 | 0 | switch (nb_planes) { |
915 | 0 | case 1: return AV_PIX_FMT_GRAY16; |
916 | 0 | case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt |
917 | 0 | case 3: return AV_PIX_FMT_YUV444P16; |
918 | 0 | case 4: return AV_PIX_FMT_YUVA444P16; |
919 | 0 | } |
920 | 0 | break; |
921 | 0 | case 4: |
922 | 0 | switch (nb_planes) { |
923 | 0 | case 1: return AV_PIX_FMT_GRAYF32; |
924 | 0 | case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt |
925 | 0 | case 3: return AV_PIX_FMT_GBRPF32; |
926 | 0 | case 4: return AV_PIX_FMT_GBRAPF32; |
927 | 0 | } |
928 | 0 | break; |
929 | 0 | } |
930 | | |
931 | 0 | av_unreachable("Invalid pixel type or number of planes?"); |
932 | 0 | return AV_PIX_FMT_NONE; |
933 | 0 | } |
934 | | |
935 | | static void get_input_size(const SwsOpList *ops, SwsFormat *fmt) |
936 | 0 | { |
937 | 0 | fmt->width = ops->src.width; |
938 | 0 | fmt->height = ops->src.height; |
939 | |
|
940 | 0 | const SwsOp *read = ff_sws_op_list_input(ops); |
941 | 0 | if (read && read->rw.filter == SWS_OP_FILTER_V) { |
942 | 0 | fmt->height = read->rw.kernel->dst_size; |
943 | 0 | } else if (read && read->rw.filter == SWS_OP_FILTER_H) { |
944 | 0 | fmt->width = read->rw.kernel->dst_size; |
945 | 0 | } |
946 | 0 | } |
947 | | |
948 | | int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest) |
949 | 0 | { |
950 | 0 | const SwsOp *op; |
951 | 0 | int ret, idx; |
952 | |
|
953 | 0 | for (idx = 0; idx < ops1->num_ops; idx++) { |
954 | 0 | op = &ops1->ops[idx]; |
955 | 0 | if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V) |
956 | 0 | break; |
957 | 0 | } |
958 | |
|
959 | 0 | if (idx == ops1->num_ops) { |
960 | 0 | *out_rest = NULL; |
961 | 0 | return 0; |
962 | 0 | } |
963 | | |
964 | 0 | av_assert0(idx > 0); |
965 | 0 | const SwsOp *prev = &ops1->ops[idx - 1]; |
966 | |
|
967 | 0 | SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1); |
968 | 0 | if (!ops2) |
969 | 0 | return AVERROR(ENOMEM); |
970 | | |
971 | | /** |
972 | | * Not all components may be needed; but we need the ones that *are* |
973 | | * used to be contiguous for the write/read operations. So, first |
974 | | * compress them into a linearly ascending list of components |
975 | | */ |
976 | 0 | int nb_planes = 0; |
977 | 0 | SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3); |
978 | 0 | SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3); |
979 | 0 | for (int i = 0; i < 4; i++) { |
980 | 0 | if (SWS_OP_NEEDED(prev, i)) { |
981 | 0 | const int o = nb_planes++; |
982 | 0 | swiz_wr.in[o] = i; |
983 | 0 | swiz_rd.in[i] = o; |
984 | 0 | } |
985 | 0 | } |
986 | | |
987 | | /* Determine metadata for the intermediate format */ |
988 | 0 | const SwsPixelType type = op->type; |
989 | 0 | ops2->src.format = get_planar_fmt(type, nb_planes); |
990 | 0 | ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format); |
991 | 0 | get_input_size(ops1, &ops2->src); |
992 | 0 | ops1->dst = ops2->src; |
993 | |
|
994 | 0 | for (int i = 0; i < nb_planes; i++) { |
995 | 0 | ops1->plane_dst[i] = ops2->plane_src[i] = i; |
996 | 0 | ops2->comps_src.flags[i] = prev->comps.flags[swiz_wr.in[i]]; |
997 | 0 | } |
998 | |
|
999 | 0 | ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx); |
1000 | 0 | ff_sws_op_list_remove_at(ops2, 0, idx); |
1001 | 0 | op = NULL; /* the above command may invalidate op */ |
1002 | |
|
1003 | 0 | if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) { |
1004 | 0 | ret = ff_sws_op_list_append(ops1, &(SwsOp) { |
1005 | 0 | .op = SWS_OP_SWIZZLE, |
1006 | 0 | .type = type, |
1007 | 0 | .swizzle = swiz_wr, |
1008 | 0 | }); |
1009 | 0 | if (ret < 0) |
1010 | 0 | goto fail; |
1011 | 0 | } |
1012 | | |
1013 | 0 | ret = ff_sws_op_list_append(ops1, &(SwsOp) { |
1014 | 0 | .op = SWS_OP_WRITE, |
1015 | 0 | .type = type, |
1016 | 0 | .rw.elems = nb_planes, |
1017 | 0 | }); |
1018 | 0 | if (ret < 0) |
1019 | 0 | goto fail; |
1020 | | |
1021 | 0 | ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) { |
1022 | 0 | .op = SWS_OP_READ, |
1023 | 0 | .type = type, |
1024 | 0 | .rw.elems = nb_planes, |
1025 | 0 | }); |
1026 | 0 | if (ret < 0) |
1027 | 0 | goto fail; |
1028 | | |
1029 | 0 | if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) { |
1030 | 0 | ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) { |
1031 | 0 | .op = SWS_OP_SWIZZLE, |
1032 | 0 | .type = type, |
1033 | 0 | .swizzle = swiz_rd, |
1034 | 0 | }); |
1035 | 0 | if (ret < 0) |
1036 | 0 | goto fail; |
1037 | 0 | } |
1038 | | |
1039 | 0 | ret = ff_sws_op_list_optimize(ops1); |
1040 | 0 | if (ret < 0) |
1041 | 0 | goto fail; |
1042 | | |
1043 | 0 | ret = ff_sws_op_list_optimize(ops2); |
1044 | 0 | if (ret < 0) |
1045 | 0 | goto fail; |
1046 | | |
1047 | 0 | *out_rest = ops2; |
1048 | 0 | return 0; |
1049 | | |
1050 | 0 | fail: |
1051 | 0 | ff_sws_op_list_free(&ops2); |
1052 | 0 | return ret; |
1053 | 0 | } |