/src/ffmpeg/libswscale/ops_optimizer.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/attributes.h" |
22 | | #include "libavutil/avassert.h" |
23 | | #include "libavutil/bswap.h" |
24 | | #include "libavutil/rational.h" |
25 | | |
26 | | #include "ops.h" |
27 | | #include "ops_internal.h" |
28 | | |
29 | | #define RET(x) \ |
30 | 0 | do { \ |
31 | 0 | if ((ret = (x)) < 0) \ |
32 | 0 | return ret; \ |
33 | 0 | } while (0) |
34 | | |
35 | | /** |
36 | | * Try to commute a clear op with the next operation. Makes any adjustments |
37 | | * to the operations as needed, but does not perform the actual commutation. |
38 | | * |
39 | | * Returns whether successful. |
40 | | */ |
41 | | static bool op_commute_clear(SwsOp *op, SwsOp *next) |
42 | 0 | { |
43 | 0 | SwsClearOp tmp = {0}; |
44 | |
|
45 | 0 | av_assert1(op->op == SWS_OP_CLEAR); |
46 | 0 | switch (next->op) { |
47 | 0 | case SWS_OP_CONVERT: |
48 | 0 | op->type = next->convert.to; |
49 | 0 | av_fallthrough; |
50 | 0 | case SWS_OP_LSHIFT: |
51 | 0 | case SWS_OP_RSHIFT: |
52 | 0 | case SWS_OP_DITHER: |
53 | 0 | case SWS_OP_MIN: |
54 | 0 | case SWS_OP_MAX: |
55 | 0 | case SWS_OP_SCALE: |
56 | 0 | case SWS_OP_READ: |
57 | 0 | case SWS_OP_FILTER_H: |
58 | 0 | case SWS_OP_FILTER_V: |
59 | 0 | ff_sws_apply_op_q(next, op->clear.value); |
60 | 0 | return true; |
61 | 0 | case SWS_OP_SWIZZLE: |
62 | 0 | op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, next->swizzle); |
63 | 0 | ff_sws_apply_op_q(next, op->clear.value); |
64 | 0 | return true; |
65 | 0 | case SWS_OP_SWAP_BYTES: |
66 | 0 | switch (next->type) { |
67 | 0 | case SWS_PIXEL_U16: |
68 | 0 | ff_sws_apply_op_q(next, op->clear.value); /* always works */ |
69 | 0 | return true; |
70 | 0 | case SWS_PIXEL_U32: |
71 | 0 | for (int i = 0; i < 4; i++) { |
72 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
73 | 0 | continue; |
74 | 0 | uint32_t v = av_bswap32(op->clear.value[i].num); |
75 | 0 | if (v > INT_MAX) |
76 | 0 | return false; /* can't represent as AVRational anymore */ |
77 | 0 | tmp.value[i] = Q(v); |
78 | 0 | } |
79 | 0 | op->clear = tmp; |
80 | 0 | return true; |
81 | 0 | default: |
82 | 0 | return false; |
83 | 0 | } |
84 | 0 | case SWS_OP_INVALID: |
85 | 0 | case SWS_OP_WRITE: |
86 | 0 | case SWS_OP_LINEAR: |
87 | 0 | case SWS_OP_PACK: |
88 | 0 | case SWS_OP_UNPACK: |
89 | 0 | case SWS_OP_CLEAR: |
90 | 0 | return false; |
91 | 0 | case SWS_OP_TYPE_NB: |
92 | 0 | break; |
93 | 0 | } |
94 | | |
95 | 0 | av_unreachable("Invalid operation type!"); |
96 | 0 | return false; |
97 | 0 | } |
98 | | |
99 | | /** |
100 | | * Try to commute a swizzle op with the next operation. Makes any adjustments |
101 | | * to the operations as needed, but does not perform the actual commutation. |
102 | | * |
103 | | * Returns whether successful. |
104 | | */ |
105 | | static bool op_commute_swizzle(SwsOp *op, SwsOp *next) |
106 | 0 | { |
107 | 0 | bool seen[4] = {0}; |
108 | |
|
109 | 0 | av_assert1(op->op == SWS_OP_SWIZZLE); |
110 | 0 | switch (next->op) { |
111 | 0 | case SWS_OP_CONVERT: |
112 | 0 | op->type = next->convert.to; |
113 | 0 | av_fallthrough; |
114 | 0 | case SWS_OP_SWAP_BYTES: |
115 | 0 | case SWS_OP_LSHIFT: |
116 | 0 | case SWS_OP_RSHIFT: |
117 | 0 | case SWS_OP_SCALE: |
118 | 0 | case SWS_OP_FILTER_H: |
119 | 0 | case SWS_OP_FILTER_V: |
120 | 0 | return true; |
121 | | |
122 | | /** |
123 | | * We can commute per-channel ops only if the per-channel constants are the |
124 | | * same for all duplicated channels; e.g.: |
125 | | * SWIZZLE {0, 0, 0, 3} |
126 | | * NEXT {x, x, x, w} |
127 | | * -> |
128 | | * NEXT {x, _, _, w} |
129 | | * SWIZZLE {0, 0, 0, 3} |
130 | | */ |
131 | 0 | case SWS_OP_MIN: |
132 | 0 | case SWS_OP_MAX: { |
133 | 0 | const SwsClampOp c = next->clamp; |
134 | 0 | for (int i = 0; i < 4; i++) { |
135 | 0 | if (!SWS_OP_NEEDED(op, i)) |
136 | 0 | continue; |
137 | 0 | const int j = op->swizzle.in[i]; |
138 | 0 | if (seen[j] && av_cmp_q(next->clamp.limit[j], c.limit[i])) |
139 | 0 | return false; |
140 | 0 | next->clamp.limit[j] = c.limit[i]; |
141 | 0 | seen[j] = true; |
142 | 0 | } |
143 | 0 | return true; |
144 | 0 | } |
145 | | |
146 | 0 | case SWS_OP_DITHER: { |
147 | 0 | const SwsDitherOp d = next->dither; |
148 | 0 | for (int i = 0; i < 4; i++) { |
149 | 0 | if (!SWS_OP_NEEDED(op, i)) |
150 | 0 | continue; |
151 | 0 | const int j = op->swizzle.in[i]; |
152 | 0 | if (seen[j] && next->dither.y_offset[j] != d.y_offset[i]) |
153 | 0 | return false; |
154 | 0 | next->dither.y_offset[j] = d.y_offset[i]; |
155 | 0 | seen[j] = true; |
156 | 0 | } |
157 | 0 | return true; |
158 | 0 | } |
159 | | |
160 | 0 | case SWS_OP_INVALID: |
161 | 0 | case SWS_OP_READ: |
162 | 0 | case SWS_OP_WRITE: |
163 | 0 | case SWS_OP_SWIZZLE: |
164 | 0 | case SWS_OP_CLEAR: |
165 | 0 | case SWS_OP_LINEAR: |
166 | 0 | case SWS_OP_PACK: |
167 | 0 | case SWS_OP_UNPACK: |
168 | 0 | return false; |
169 | 0 | case SWS_OP_TYPE_NB: |
170 | 0 | break; |
171 | 0 | } |
172 | | |
173 | 0 | av_unreachable("Invalid operation type!"); |
174 | 0 | return false; |
175 | 0 | } |
176 | | |
177 | | /** |
178 | | * Try to commute a filter op with the previous operation. Makes any |
179 | | * adjustments to the operations as needed, but does not perform the actual |
180 | | * commutation. |
181 | | * |
182 | | * Returns whether successful. |
183 | | */ |
184 | | static bool op_commute_filter(SwsOp *op, SwsOp *prev) |
185 | 0 | { |
186 | 0 | switch (prev->op) { |
187 | 0 | case SWS_OP_SWIZZLE: |
188 | 0 | case SWS_OP_SCALE: |
189 | 0 | case SWS_OP_LINEAR: |
190 | 0 | case SWS_OP_DITHER: |
191 | 0 | prev->type = SWS_PIXEL_F32; |
192 | 0 | return true; |
193 | 0 | case SWS_OP_CONVERT: |
194 | 0 | if (prev->convert.to == SWS_PIXEL_F32) { |
195 | 0 | av_assert0(!prev->convert.expand); |
196 | 0 | FFSWAP(SwsPixelType, op->type, prev->type); |
197 | 0 | return true; |
198 | 0 | } |
199 | 0 | return false; |
200 | 0 | case SWS_OP_INVALID: |
201 | 0 | case SWS_OP_READ: |
202 | 0 | case SWS_OP_WRITE: |
203 | 0 | case SWS_OP_SWAP_BYTES: |
204 | 0 | case SWS_OP_UNPACK: |
205 | 0 | case SWS_OP_PACK: |
206 | 0 | case SWS_OP_LSHIFT: |
207 | 0 | case SWS_OP_RSHIFT: |
208 | 0 | case SWS_OP_CLEAR: |
209 | 0 | case SWS_OP_MIN: |
210 | 0 | case SWS_OP_MAX: |
211 | 0 | case SWS_OP_FILTER_H: |
212 | 0 | case SWS_OP_FILTER_V: |
213 | 0 | return false; |
214 | 0 | case SWS_OP_TYPE_NB: |
215 | 0 | break; |
216 | 0 | } |
217 | | |
218 | 0 | av_unreachable("Invalid operation type!"); |
219 | 0 | return false; |
220 | 0 | } |
221 | | |
222 | | /* returns log2(x) only if x is a power of two, or 0 otherwise */ |
223 | | static int exact_log2(const int x) |
224 | 0 | { |
225 | 0 | int p; |
226 | 0 | if (x <= 0) |
227 | 0 | return 0; |
228 | 0 | p = av_log2(x); |
229 | 0 | return (1 << p) == x ? p : 0; |
230 | 0 | } |
231 | | |
232 | | static int exact_log2_q(const AVRational x) |
233 | 0 | { |
234 | 0 | if (x.den == 1) |
235 | 0 | return exact_log2(x.num); |
236 | 0 | else if (x.num == 1) |
237 | 0 | return -exact_log2(x.den); |
238 | 0 | else |
239 | 0 | return 0; |
240 | 0 | } |
241 | | |
242 | | /** |
243 | | * If a linear operation can be reduced to a scalar multiplication, returns |
244 | | * the corresponding scaling factor, or 0 otherwise. |
245 | | */ |
246 | | static bool extract_scalar(const SwsLinearOp *c, SwsComps comps, SwsComps prev, |
247 | | SwsScaleOp *out_scale) |
248 | 0 | { |
249 | 0 | SwsScaleOp scale = {0}; |
250 | | |
251 | | /* There are components not on the main diagonal */ |
252 | 0 | if (c->mask & ~SWS_MASK_DIAG4) |
253 | 0 | return false; |
254 | | |
255 | 0 | for (int i = 0; i < 4; i++) { |
256 | 0 | const AVRational s = c->m[i][i]; |
257 | 0 | if ((prev.flags[i] & SWS_COMP_ZERO) || |
258 | 0 | (comps.flags[i] & SWS_COMP_GARBAGE)) |
259 | 0 | continue; |
260 | 0 | if (scale.factor.den && av_cmp_q(s, scale.factor)) |
261 | 0 | return false; |
262 | 0 | scale.factor = s; |
263 | 0 | } |
264 | | |
265 | 0 | if (scale.factor.den) |
266 | 0 | *out_scale = scale; |
267 | 0 | return scale.factor.den; |
268 | 0 | } |
269 | | |
270 | | /* Extracts an integer clear operation (subset) from the given linear op. */ |
271 | | static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, |
272 | | SwsClearOp *out_clear) |
273 | 0 | { |
274 | 0 | SwsClearOp clear = {0}; |
275 | 0 | bool ret = false; |
276 | |
|
277 | 0 | for (int i = 0; i < 4; i++) { |
278 | 0 | bool const_row = c->m[i][4].den == 1; /* offset is integer */ |
279 | 0 | for (int j = 0; j < 4; j++) { |
280 | 0 | const_row &= c->m[i][j].num == 0 || /* scalar is zero */ |
281 | 0 | (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ |
282 | 0 | } |
283 | 0 | if (const_row && (c->mask & SWS_MASK_ROW(i))) { |
284 | 0 | clear.mask |= SWS_COMP(i); |
285 | 0 | clear.value[i] = c->m[i][4]; |
286 | 0 | for (int j = 0; j < 5; j++) |
287 | 0 | c->m[i][j] = Q(i == j); |
288 | 0 | c->mask &= ~SWS_MASK_ROW(i); |
289 | 0 | ret = true; |
290 | 0 | } |
291 | 0 | } |
292 | |
|
293 | 0 | if (ret) |
294 | 0 | *out_clear = clear; |
295 | 0 | return ret; |
296 | 0 | } |
297 | | |
298 | | /* Unswizzle a linear operation by aligning single-input rows with |
299 | | * their corresponding diagonal */ |
300 | | static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) |
301 | 0 | { |
302 | 0 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); |
303 | 0 | SwsLinearOp c = *op; |
304 | | |
305 | | /* Find non-zero coefficients in the main 4x4 matrix */ |
306 | 0 | uint32_t nonzero = 0; |
307 | 0 | for (int i = 0; i < 4; i++) { |
308 | 0 | for (int j = 0; j < 4; j++) { |
309 | 0 | if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) |
310 | 0 | continue; |
311 | 0 | nonzero |= SWS_MASK(i, j); |
312 | 0 | } |
313 | 0 | } |
314 | | |
315 | | /* If a value is unique in its row and the target column is |
316 | | * empty, move it there and update the input swizzle */ |
317 | 0 | for (int i = 0; i < 4; i++) { |
318 | 0 | if (nonzero & SWS_MASK_COL(i)) |
319 | 0 | continue; /* target column is not empty */ |
320 | 0 | for (int j = 0; j < 4; j++) { |
321 | 0 | if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) { |
322 | | /* Move coefficient to the diagonal */ |
323 | 0 | c.m[i][i] = c.m[i][j]; |
324 | 0 | c.m[i][j] = Q(0); |
325 | 0 | swiz.in[i] = j; |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } |
330 | |
|
331 | 0 | if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) |
332 | 0 | return false; /* no swizzle was identified */ |
333 | | |
334 | 0 | c.mask = ff_sws_linear_mask(c); |
335 | 0 | *out_swiz = swiz; |
336 | 0 | *op = c; |
337 | 0 | return true; |
338 | 0 | } |
339 | | |
340 | | int ff_sws_op_list_optimize(SwsOpList *ops) |
341 | 0 | { |
342 | 0 | int ret; |
343 | |
|
344 | 0 | retry: |
345 | 0 | ff_sws_op_list_update_comps(ops); |
346 | | |
347 | | /* Try to push filters towards the input; do this first to unblock |
348 | | * in-place optimizations like linear op fusion */ |
349 | 0 | for (int n = 1; n < ops->num_ops; n++) { |
350 | 0 | SwsOp *op = &ops->ops[n]; |
351 | 0 | SwsOp *prev = &ops->ops[n - 1]; |
352 | |
|
353 | 0 | switch (op->op) { |
354 | 0 | case SWS_OP_FILTER_H: |
355 | 0 | case SWS_OP_FILTER_V: |
356 | 0 | if (op_commute_filter(op, prev)) { |
357 | 0 | FFSWAP(SwsOp, *op, *prev); |
358 | 0 | goto retry; |
359 | 0 | } |
360 | 0 | break; |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | /* Apply all in-place optimizations (that do not re-order the list) */ |
365 | 0 | for (int n = 0; n < ops->num_ops; n++) { |
366 | 0 | SwsOp dummy = {0}; |
367 | 0 | SwsOp *op = &ops->ops[n]; |
368 | 0 | SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; |
369 | 0 | SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; |
370 | | |
371 | | /* common helper variable */ |
372 | 0 | bool noop = true; |
373 | |
|
374 | 0 | if (!SWS_OP_NEEDED(op, 0) && !SWS_OP_NEEDED(op, 1) && |
375 | 0 | !SWS_OP_NEEDED(op, 2) && !SWS_OP_NEEDED(op, 3) && |
376 | 0 | op->op != SWS_OP_WRITE) |
377 | 0 | { |
378 | | /* Remove any operation whose output is not needed */ |
379 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
380 | 0 | goto retry; |
381 | 0 | } |
382 | | |
383 | 0 | switch (op->op) { |
384 | 0 | case SWS_OP_READ: |
385 | | /* "Compress" planar reads where not all components are needed */ |
386 | 0 | if (!op->rw.packed) { |
387 | 0 | SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); |
388 | 0 | int nb_planes = 0; |
389 | 0 | for (int i = 0; i < op->rw.elems; i++) { |
390 | 0 | if (!SWS_OP_NEEDED(op, i)) { |
391 | 0 | swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */ |
392 | 0 | continue; |
393 | 0 | } |
394 | | |
395 | 0 | const int idx = nb_planes++; |
396 | 0 | av_assert1(idx <= i); |
397 | 0 | ops->plane_src[idx] = ops->plane_src[i]; |
398 | 0 | swiz.in[i] = idx; |
399 | 0 | } |
400 | |
|
401 | 0 | if (nb_planes < op->rw.elems) { |
402 | 0 | op->rw.elems = nb_planes; |
403 | 0 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
404 | 0 | .op = SWS_OP_SWIZZLE, |
405 | 0 | .type = op->rw.filter ? SWS_PIXEL_F32 : op->type, |
406 | 0 | .swizzle = swiz, |
407 | 0 | })); |
408 | 0 | goto retry; |
409 | 0 | } |
410 | 0 | } |
411 | 0 | break; |
412 | | |
413 | 0 | case SWS_OP_SWAP_BYTES: |
414 | | /* Redundant (double) swap */ |
415 | 0 | if (next->op == SWS_OP_SWAP_BYTES) { |
416 | 0 | ff_sws_op_list_remove_at(ops, n, 2); |
417 | 0 | goto retry; |
418 | 0 | } |
419 | 0 | break; |
420 | | |
421 | 0 | case SWS_OP_UNPACK: |
422 | | /* Redundant unpack+pack */ |
423 | 0 | if (next->op == SWS_OP_PACK && next->type == op->type && |
424 | 0 | next->pack.pattern[0] == op->pack.pattern[0] && |
425 | 0 | next->pack.pattern[1] == op->pack.pattern[1] && |
426 | 0 | next->pack.pattern[2] == op->pack.pattern[2] && |
427 | 0 | next->pack.pattern[3] == op->pack.pattern[3]) |
428 | 0 | { |
429 | 0 | ff_sws_op_list_remove_at(ops, n, 2); |
430 | 0 | goto retry; |
431 | 0 | } |
432 | 0 | break; |
433 | | |
434 | 0 | case SWS_OP_LSHIFT: |
435 | 0 | case SWS_OP_RSHIFT: |
436 | | /* Two shifts in the same direction */ |
437 | 0 | if (next->op == op->op) { |
438 | 0 | op->shift.amount += next->shift.amount; |
439 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
440 | 0 | goto retry; |
441 | 0 | } |
442 | | |
443 | | /* No-op shift */ |
444 | 0 | if (!op->shift.amount) { |
445 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
446 | 0 | goto retry; |
447 | 0 | } |
448 | 0 | break; |
449 | | |
450 | 0 | case SWS_OP_CLEAR: |
451 | 0 | for (int i = 0; i < 4; i++) { |
452 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
453 | 0 | continue; |
454 | | |
455 | 0 | if ((prev->comps.flags[i] & SWS_COMP_ZERO) && |
456 | 0 | !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && |
457 | 0 | op->clear.value[i].num == 0) |
458 | 0 | { |
459 | | /* Redundant clear-to-zero of zero component */ |
460 | 0 | op->clear.mask ^= SWS_COMP(i); |
461 | 0 | } else if (!SWS_OP_NEEDED(op, i)) { |
462 | | /* Unnecessary clear of unused component */ |
463 | 0 | op->clear.mask ^= SWS_COMP(i); |
464 | 0 | } else { |
465 | 0 | noop = false; |
466 | 0 | } |
467 | 0 | } |
468 | |
|
469 | 0 | if (noop) { |
470 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
471 | 0 | goto retry; |
472 | 0 | } |
473 | | |
474 | | /* Transitive clear */ |
475 | 0 | if (next->op == SWS_OP_CLEAR) { |
476 | 0 | for (int i = 0; i < 4; i++) { |
477 | 0 | if (SWS_COMP_TEST(next->clear.mask, i)) |
478 | 0 | op->clear.value[i] = next->clear.value[i]; |
479 | 0 | } |
480 | 0 | op->clear.mask |= next->clear.mask; |
481 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
482 | 0 | goto retry; |
483 | 0 | } |
484 | 0 | break; |
485 | | |
486 | 0 | case SWS_OP_SWIZZLE: |
487 | 0 | for (int i = 0; i < 4; i++) { |
488 | 0 | if (!SWS_OP_NEEDED(op, i)) |
489 | 0 | continue; |
490 | 0 | if (op->swizzle.in[i] != i) |
491 | 0 | noop = false; |
492 | 0 | } |
493 | | |
494 | | /* Identity swizzle */ |
495 | 0 | if (noop) { |
496 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
497 | 0 | goto retry; |
498 | 0 | } |
499 | | |
500 | | /* Transitive swizzle */ |
501 | 0 | if (next->op == SWS_OP_SWIZZLE) { |
502 | 0 | const SwsSwizzleOp orig = op->swizzle; |
503 | 0 | for (int i = 0; i < 4; i++) |
504 | 0 | op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; |
505 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
506 | 0 | goto retry; |
507 | 0 | } |
508 | | |
509 | | /* Swizzle planes instead of components, if possible */ |
510 | 0 | if (prev->op == SWS_OP_READ && !prev->rw.packed) { |
511 | 0 | for (int dst = 0; dst < prev->rw.elems; dst++) { |
512 | 0 | const int src = op->swizzle.in[dst]; |
513 | 0 | if (src > dst && src < prev->rw.elems) { |
514 | 0 | FFSWAP(int, ops->plane_src[dst], ops->plane_src[src]); |
515 | 0 | for (int i = dst; i < 4; i++) { |
516 | 0 | if (op->swizzle.in[i] == dst) |
517 | 0 | op->swizzle.in[i] = src; |
518 | 0 | else if (op->swizzle.in[i] == src) |
519 | 0 | op->swizzle.in[i] = dst; |
520 | 0 | } |
521 | 0 | goto retry; |
522 | 0 | } |
523 | 0 | } |
524 | 0 | } |
525 | | |
526 | 0 | if (next->op == SWS_OP_WRITE && !next->rw.packed) { |
527 | 0 | for (int dst = 0; dst < next->rw.elems; dst++) { |
528 | 0 | const int src = op->swizzle.in[dst]; |
529 | 0 | if (src > dst && src < next->rw.elems) { |
530 | 0 | FFSWAP(int, ops->plane_dst[dst], ops->plane_dst[src]); |
531 | 0 | FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]); |
532 | 0 | goto retry; |
533 | 0 | } |
534 | 0 | } |
535 | 0 | } |
536 | 0 | break; |
537 | | |
538 | 0 | case SWS_OP_CONVERT: |
539 | | /* No-op conversion */ |
540 | 0 | if (op->type == op->convert.to) { |
541 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
542 | 0 | goto retry; |
543 | 0 | } |
544 | | |
545 | | /* Transitive conversion */ |
546 | 0 | if (next->op == SWS_OP_CONVERT && |
547 | 0 | op->convert.expand == next->convert.expand) |
548 | 0 | { |
549 | 0 | av_assert1(op->convert.to == next->type); |
550 | 0 | op->convert.to = next->convert.to; |
551 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
552 | 0 | goto retry; |
553 | 0 | } |
554 | | |
555 | | /* Conversion followed by integer expansion */ |
556 | 0 | if (next->op == SWS_OP_SCALE && !op->convert.expand && |
557 | 0 | ff_sws_pixel_type_is_int(op->type) && |
558 | 0 | ff_sws_pixel_type_is_int(op->convert.to) && |
559 | 0 | !av_cmp_q(next->scale.factor, |
560 | 0 | ff_sws_pixel_expand(op->type, op->convert.to))) |
561 | 0 | { |
562 | 0 | op->convert.expand = true; |
563 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
564 | 0 | goto retry; |
565 | 0 | } |
566 | 0 | break; |
567 | | |
568 | 0 | case SWS_OP_MIN: |
569 | 0 | for (int i = 0; i < 4; i++) { |
570 | 0 | if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den) |
571 | 0 | continue; |
572 | 0 | if (av_cmp_q(op->clamp.limit[i], prev->comps.max[i]) < 0) |
573 | 0 | noop = false; |
574 | 0 | } |
575 | |
|
576 | 0 | if (noop) { |
577 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
578 | 0 | goto retry; |
579 | 0 | } |
580 | 0 | break; |
581 | | |
582 | 0 | case SWS_OP_MAX: |
583 | 0 | for (int i = 0; i < 4; i++) { |
584 | 0 | if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den) |
585 | 0 | continue; |
586 | 0 | if (av_cmp_q(prev->comps.min[i], op->clamp.limit[i]) < 0) |
587 | 0 | noop = false; |
588 | 0 | } |
589 | |
|
590 | 0 | if (noop) { |
591 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
592 | 0 | goto retry; |
593 | 0 | } |
594 | 0 | break; |
595 | | |
596 | 0 | case SWS_OP_DITHER: |
597 | 0 | for (int i = 0; i < 4; i++) { |
598 | 0 | if (op->dither.y_offset[i] < 0) |
599 | 0 | continue; |
600 | 0 | if (!SWS_OP_NEEDED(op, i) || (prev->comps.flags[i] & SWS_COMP_EXACT)) { |
601 | 0 | op->dither.y_offset[i] = -1; /* unnecessary dither */ |
602 | 0 | goto retry; |
603 | 0 | } else { |
604 | 0 | noop = false; |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | 0 | if (noop) { |
609 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
610 | 0 | goto retry; |
611 | 0 | } |
612 | 0 | break; |
613 | | |
614 | 0 | case SWS_OP_LINEAR: { |
615 | 0 | SwsSwizzleOp swizzle; |
616 | 0 | SwsClearOp clear; |
617 | 0 | SwsScaleOp scale; |
618 | | |
619 | | /* No-op (identity) linear operation */ |
620 | 0 | if (!op->lin.mask) { |
621 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
622 | 0 | goto retry; |
623 | 0 | } |
624 | | |
625 | 0 | if (next->op == SWS_OP_LINEAR) { |
626 | | /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ |
627 | 0 | const SwsLinearOp m1 = op->lin; |
628 | 0 | const SwsLinearOp m2 = next->lin; |
629 | 0 | for (int i = 0; i < 4; i++) { |
630 | 0 | for (int j = 0; j < 5; j++) { |
631 | 0 | AVRational sum = Q(0); |
632 | 0 | for (int k = 0; k < 4; k++) |
633 | 0 | sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); |
634 | 0 | if (j == 4) /* m1.m[4][j] == 1 */ |
635 | 0 | sum = av_add_q(sum, m2.m[i][4]); |
636 | 0 | op->lin.m[i][j] = sum; |
637 | 0 | } |
638 | 0 | } |
639 | 0 | op->lin.mask = ff_sws_linear_mask(op->lin); |
640 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
641 | 0 | goto retry; |
642 | 0 | } |
643 | | |
644 | | /* Optimize away zero columns */ |
645 | 0 | for (int j = 0; j < 4; j++) { |
646 | 0 | const uint32_t col = SWS_MASK_COL(j); |
647 | 0 | if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) |
648 | 0 | continue; |
649 | 0 | for (int i = 0; i < 4; i++) |
650 | 0 | op->lin.m[i][j] = Q(i == j); |
651 | 0 | op->lin.mask &= ~col; |
652 | 0 | goto retry; |
653 | 0 | } |
654 | | |
655 | | /* Optimize away unused rows */ |
656 | 0 | for (int i = 0; i < 4; i++) { |
657 | 0 | const uint32_t row = SWS_MASK_ROW(i); |
658 | 0 | if (SWS_OP_NEEDED(op, i) || !(op->lin.mask & row)) |
659 | 0 | continue; |
660 | 0 | for (int j = 0; j < 5; j++) |
661 | 0 | op->lin.m[i][j] = Q(i == j); |
662 | 0 | op->lin.mask &= ~row; |
663 | 0 | goto retry; |
664 | 0 | } |
665 | | |
666 | | /* Convert constant rows to explicit clear instruction */ |
667 | 0 | if (extract_constant_rows(&op->lin, prev->comps, &clear)) { |
668 | 0 | RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
669 | 0 | .op = SWS_OP_CLEAR, |
670 | 0 | .type = op->type, |
671 | 0 | .comps = op->comps, |
672 | 0 | .clear = clear, |
673 | 0 | })); |
674 | 0 | goto retry; |
675 | 0 | } |
676 | | |
677 | | /* Multiplication by scalar constant */ |
678 | 0 | if (extract_scalar(&op->lin, op->comps, prev->comps, &scale)) { |
679 | 0 | op->op = SWS_OP_SCALE; |
680 | 0 | op->scale = scale; |
681 | 0 | goto retry; |
682 | 0 | } |
683 | | |
684 | | /* Swizzle by fixed pattern */ |
685 | 0 | if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { |
686 | 0 | RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { |
687 | 0 | .op = SWS_OP_SWIZZLE, |
688 | 0 | .type = op->type, |
689 | 0 | .swizzle = swizzle, |
690 | 0 | })); |
691 | 0 | goto retry; |
692 | 0 | } |
693 | 0 | break; |
694 | 0 | } |
695 | | |
696 | 0 | case SWS_OP_SCALE: { |
697 | 0 | const int factor2 = exact_log2_q(op->scale.factor); |
698 | | |
699 | | /* No-op scaling */ |
700 | 0 | if (op->scale.factor.num == 1 && op->scale.factor.den == 1) { |
701 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
702 | 0 | goto retry; |
703 | 0 | } |
704 | | |
705 | | /* Merge consecutive scaling operations (that don't overflow) */ |
706 | 0 | if (next->op == SWS_OP_SCALE) { |
707 | 0 | int64_t p = op->scale.factor.num * (int64_t) next->scale.factor.num; |
708 | 0 | int64_t q = op->scale.factor.den * (int64_t) next->scale.factor.den; |
709 | 0 | if (FFABS(p) <= INT_MAX && FFABS(q) <= INT_MAX) { |
710 | 0 | av_reduce(&op->scale.factor.num, &op->scale.factor.den, p, q, INT_MAX); |
711 | 0 | ff_sws_op_list_remove_at(ops, n + 1, 1); |
712 | 0 | goto retry; |
713 | 0 | } |
714 | 0 | } |
715 | | |
716 | | /* Scaling by exact power of two */ |
717 | 0 | if (factor2 && ff_sws_pixel_type_is_int(op->type)) { |
718 | 0 | op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; |
719 | 0 | op->shift.amount = FFABS(factor2); |
720 | 0 | goto retry; |
721 | 0 | } |
722 | 0 | break; |
723 | 0 | } |
724 | | |
725 | 0 | case SWS_OP_FILTER_H: |
726 | 0 | case SWS_OP_FILTER_V: |
727 | | /* Merge with prior simple planar read */ |
728 | 0 | if (prev->op == SWS_OP_READ && !prev->rw.filter && |
729 | 0 | !prev->rw.packed && !prev->rw.frac) { |
730 | 0 | prev->rw.filter = op->op; |
731 | 0 | prev->rw.kernel = av_refstruct_ref(op->filter.kernel); |
732 | 0 | ff_sws_op_list_remove_at(ops, n, 1); |
733 | 0 | goto retry; |
734 | 0 | } |
735 | 0 | break; |
736 | 0 | } |
737 | 0 | } |
738 | | |
739 | | /* Push clears to the back to void any unused components */ |
740 | 0 | for (int n = 0; n < ops->num_ops - 1; n++) { |
741 | 0 | SwsOp *op = &ops->ops[n]; |
742 | 0 | SwsOp *next = &ops->ops[n + 1]; |
743 | |
|
744 | 0 | switch (op->op) { |
745 | 0 | case SWS_OP_CLEAR: |
746 | 0 | if (op_commute_clear(op, next)) { |
747 | 0 | FFSWAP(SwsOp, *op, *next); |
748 | 0 | goto retry; |
749 | 0 | } |
750 | 0 | break; |
751 | 0 | } |
752 | 0 | } |
753 | | |
754 | | /* Apply any remaining preferential re-ordering optimizations; do these |
755 | | * last because they are more likely to block other optimizations if done |
756 | | * too aggressively */ |
757 | 0 | for (int n = 0; n < ops->num_ops - 1; n++) { |
758 | 0 | SwsOp *op = &ops->ops[n]; |
759 | 0 | SwsOp *next = &ops->ops[n + 1]; |
760 | |
|
761 | 0 | switch (op->op) { |
762 | 0 | case SWS_OP_SWIZZLE: { |
763 | | /* Try to push swizzles towards the output */ |
764 | 0 | if (op_commute_swizzle(op, next)) { |
765 | 0 | FFSWAP(SwsOp, *op, *next); |
766 | 0 | goto retry; |
767 | 0 | } |
768 | 0 | break; |
769 | 0 | } |
770 | | |
771 | 0 | case SWS_OP_SCALE: |
772 | | /* Scaling by integer before conversion to int */ |
773 | 0 | if (op->scale.factor.den == 1 && next->op == SWS_OP_CONVERT && |
774 | 0 | ff_sws_pixel_type_is_int(next->convert.to)) |
775 | 0 | { |
776 | 0 | op->type = next->convert.to; |
777 | 0 | FFSWAP(SwsOp, *op, *next); |
778 | 0 | goto retry; |
779 | 0 | } |
780 | 0 | break; |
781 | 0 | } |
782 | 0 | } |
783 | | |
784 | 0 | return 0; |
785 | 0 | } |
786 | | |
787 | | int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], |
788 | | int size, uint8_t clear_val, |
789 | | int *read_bytes, int *write_bytes) |
790 | 0 | { |
791 | 0 | if (!ops->num_ops) |
792 | 0 | return AVERROR(EINVAL); |
793 | | |
794 | 0 | const SwsOp *read = ff_sws_op_list_input(ops); |
795 | 0 | if (!read || read->rw.frac || read->rw.filter || |
796 | 0 | (!read->rw.packed && read->rw.elems > 1)) |
797 | 0 | return AVERROR(ENOTSUP); |
798 | | |
799 | 0 | const int read_size = ff_sws_pixel_type_size(read->type); |
800 | 0 | uint32_t mask[4] = {0}; |
801 | 0 | for (int i = 0; i < read->rw.elems; i++) |
802 | 0 | mask[i] = 0x01010101 * i * read_size + 0x03020100; |
803 | |
|
804 | 0 | for (int opidx = 1; opidx < ops->num_ops; opidx++) { |
805 | 0 | const SwsOp *op = &ops->ops[opidx]; |
806 | 0 | switch (op->op) { |
807 | 0 | case SWS_OP_SWIZZLE: { |
808 | 0 | uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; |
809 | 0 | for (int i = 0; i < 4; i++) |
810 | 0 | mask[i] = orig[op->swizzle.in[i]]; |
811 | 0 | break; |
812 | 0 | } |
813 | | |
814 | 0 | case SWS_OP_SWAP_BYTES: |
815 | 0 | for (int i = 0; i < 4; i++) { |
816 | 0 | switch (ff_sws_pixel_type_size(op->type)) { |
817 | 0 | case 2: mask[i] = av_bswap16(mask[i]); break; |
818 | 0 | case 4: mask[i] = av_bswap32(mask[i]); break; |
819 | 0 | } |
820 | 0 | } |
821 | 0 | break; |
822 | | |
823 | 0 | case SWS_OP_CLEAR: |
824 | 0 | for (int i = 0; i < 4; i++) { |
825 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i)) |
826 | 0 | continue; |
827 | 0 | if (op->clear.value[i].num != 0 || !clear_val) |
828 | 0 | return AVERROR(ENOTSUP); |
829 | 0 | mask[i] = 0x1010101ul * clear_val; |
830 | 0 | } |
831 | 0 | break; |
832 | | |
833 | 0 | case SWS_OP_CONVERT: { |
834 | 0 | if (!op->convert.expand) |
835 | 0 | return AVERROR(ENOTSUP); |
836 | 0 | for (int i = 0; i < 4; i++) { |
837 | 0 | switch (ff_sws_pixel_type_size(op->type)) { |
838 | 0 | case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; |
839 | 0 | case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; |
840 | 0 | } |
841 | 0 | } |
842 | 0 | break; |
843 | 0 | } |
844 | | |
845 | 0 | case SWS_OP_WRITE: { |
846 | 0 | if (op->rw.frac || op->rw.filter || |
847 | 0 | (!op->rw.packed && op->rw.elems > 1)) |
848 | 0 | return AVERROR(ENOTSUP); |
849 | | |
850 | | /* Initialize to no-op */ |
851 | 0 | memset(shuffle, clear_val, size); |
852 | |
|
853 | 0 | const int write_size = ff_sws_pixel_type_size(op->type); |
854 | 0 | const int read_chunk = read->rw.elems * read_size; |
855 | 0 | const int write_chunk = op->rw.elems * write_size; |
856 | 0 | const int num_groups = size / FFMAX(read_chunk, write_chunk); |
857 | 0 | for (int n = 0; n < num_groups; n++) { |
858 | 0 | const int base_in = n * read_chunk; |
859 | 0 | const int base_out = n * write_chunk; |
860 | 0 | for (int i = 0; i < op->rw.elems; i++) { |
861 | 0 | const int offset = base_out + i * write_size; |
862 | 0 | for (int b = 0; b < write_size; b++) { |
863 | 0 | const uint8_t idx = mask[i] >> (b * 8); |
864 | 0 | if (idx != clear_val) |
865 | 0 | shuffle[offset + b] = base_in + idx; |
866 | 0 | } |
867 | 0 | } |
868 | 0 | } |
869 | |
|
870 | 0 | *read_bytes = num_groups * read_chunk; |
871 | 0 | *write_bytes = num_groups * write_chunk; |
872 | 0 | return num_groups; |
873 | 0 | } |
874 | | |
875 | 0 | default: |
876 | 0 | return AVERROR(ENOTSUP); |
877 | 0 | } |
878 | 0 | } |
879 | | |
880 | 0 | return AVERROR(EINVAL); |
881 | 0 | } |
882 | | |
883 | | /** |
884 | | * Determine a suitable intermediate buffer format for a given combination |
885 | | * of pixel types and number of planes. The exact interpretation of these |
886 | | * formats does not matter at all; since they will only ever be used as |
887 | | * temporary intermediate buffers. We still need to pick *some* format as |
888 | | * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the |
889 | | * output buffer. |
890 | | */ |
891 | | static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes) |
892 | 0 | { |
893 | 0 | switch (ff_sws_pixel_type_size(type)) { |
894 | 0 | case 1: |
895 | 0 | switch (nb_planes) { |
896 | 0 | case 1: return AV_PIX_FMT_GRAY8; |
897 | 0 | case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt |
898 | 0 | case 3: return AV_PIX_FMT_YUV444P; |
899 | 0 | case 4: return AV_PIX_FMT_YUVA444P; |
900 | 0 | } |
901 | 0 | break; |
902 | 0 | case 2: |
903 | 0 | switch (nb_planes) { |
904 | 0 | case 1: return AV_PIX_FMT_GRAY16; |
905 | 0 | case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt |
906 | 0 | case 3: return AV_PIX_FMT_YUV444P16; |
907 | 0 | case 4: return AV_PIX_FMT_YUVA444P16; |
908 | 0 | } |
909 | 0 | break; |
910 | 0 | case 4: |
911 | 0 | switch (nb_planes) { |
912 | 0 | case 1: return AV_PIX_FMT_GRAYF32; |
913 | 0 | case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt |
914 | 0 | case 3: return AV_PIX_FMT_GBRPF32; |
915 | 0 | case 4: return AV_PIX_FMT_GBRAPF32; |
916 | 0 | } |
917 | 0 | break; |
918 | 0 | } |
919 | | |
920 | 0 | av_unreachable("Invalid pixel type or number of planes?"); |
921 | 0 | return AV_PIX_FMT_NONE; |
922 | 0 | } |
923 | | |
924 | | static void get_input_size(const SwsOpList *ops, SwsFormat *fmt) |
925 | 0 | { |
926 | 0 | fmt->width = ops->src.width; |
927 | 0 | fmt->height = ops->src.height; |
928 | |
|
929 | 0 | const SwsOp *read = ff_sws_op_list_input(ops); |
930 | 0 | if (read && read->rw.filter == SWS_OP_FILTER_V) { |
931 | 0 | fmt->height = read->rw.kernel->dst_size; |
932 | 0 | } else if (read && read->rw.filter == SWS_OP_FILTER_H) { |
933 | 0 | fmt->width = read->rw.kernel->dst_size; |
934 | 0 | } |
935 | 0 | } |
936 | | |
937 | | int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest) |
938 | 0 | { |
939 | 0 | const SwsOp *op; |
940 | 0 | int ret, idx; |
941 | |
|
942 | 0 | for (idx = 0; idx < ops1->num_ops; idx++) { |
943 | 0 | op = &ops1->ops[idx]; |
944 | 0 | if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V) |
945 | 0 | break; |
946 | 0 | } |
947 | |
|
948 | 0 | if (idx == ops1->num_ops) { |
949 | 0 | *out_rest = NULL; |
950 | 0 | return 0; |
951 | 0 | } |
952 | | |
953 | 0 | av_assert0(idx > 0); |
954 | 0 | const SwsOp *prev = &ops1->ops[idx - 1]; |
955 | |
|
956 | 0 | SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1); |
957 | 0 | if (!ops2) |
958 | 0 | return AVERROR(ENOMEM); |
959 | | |
960 | | /** |
961 | | * Not all components may be needed; but we need the ones that *are* |
962 | | * used to be contiguous for the write/read operations. So, first |
963 | | * compress them into a linearly ascending list of components |
964 | | */ |
965 | 0 | int nb_planes = 0; |
966 | 0 | SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3); |
967 | 0 | SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3); |
968 | 0 | for (int i = 0; i < 4; i++) { |
969 | 0 | if (SWS_OP_NEEDED(prev, i)) { |
970 | 0 | const int o = nb_planes++; |
971 | 0 | swiz_wr.in[o] = i; |
972 | 0 | swiz_rd.in[i] = o; |
973 | 0 | } |
974 | 0 | } |
975 | | |
976 | | /* Determine metadata for the intermediate format */ |
977 | 0 | const SwsPixelType type = op->type; |
978 | 0 | ops2->src.format = get_planar_fmt(type, nb_planes); |
979 | 0 | ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format); |
980 | 0 | get_input_size(ops1, &ops2->src); |
981 | 0 | ops1->dst = ops2->src; |
982 | |
|
983 | 0 | for (int i = 0; i < nb_planes; i++) { |
984 | 0 | ops1->plane_dst[i] = ops2->plane_src[i] = i; |
985 | 0 | ops2->comps_src.flags[i] = prev->comps.flags[swiz_wr.in[i]]; |
986 | 0 | } |
987 | |
|
988 | 0 | ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx); |
989 | 0 | ff_sws_op_list_remove_at(ops2, 0, idx); |
990 | 0 | op = NULL; /* the above command may invalidate op */ |
991 | |
|
992 | 0 | if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) { |
993 | 0 | ret = ff_sws_op_list_append(ops1, &(SwsOp) { |
994 | 0 | .op = SWS_OP_SWIZZLE, |
995 | 0 | .type = type, |
996 | 0 | .swizzle = swiz_wr, |
997 | 0 | }); |
998 | 0 | if (ret < 0) |
999 | 0 | goto fail; |
1000 | 0 | } |
1001 | | |
1002 | 0 | ret = ff_sws_op_list_append(ops1, &(SwsOp) { |
1003 | 0 | .op = SWS_OP_WRITE, |
1004 | 0 | .type = type, |
1005 | 0 | .rw.elems = nb_planes, |
1006 | 0 | }); |
1007 | 0 | if (ret < 0) |
1008 | 0 | goto fail; |
1009 | | |
1010 | 0 | ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) { |
1011 | 0 | .op = SWS_OP_READ, |
1012 | 0 | .type = type, |
1013 | 0 | .rw.elems = nb_planes, |
1014 | 0 | }); |
1015 | 0 | if (ret < 0) |
1016 | 0 | goto fail; |
1017 | | |
1018 | 0 | if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) { |
1019 | 0 | ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) { |
1020 | 0 | .op = SWS_OP_SWIZZLE, |
1021 | 0 | .type = type, |
1022 | 0 | .swizzle = swiz_rd, |
1023 | 0 | }); |
1024 | 0 | if (ret < 0) |
1025 | 0 | goto fail; |
1026 | 0 | } |
1027 | | |
1028 | 0 | ret = ff_sws_op_list_optimize(ops1); |
1029 | 0 | if (ret < 0) |
1030 | 0 | goto fail; |
1031 | | |
1032 | 0 | ret = ff_sws_op_list_optimize(ops2); |
1033 | 0 | if (ret < 0) |
1034 | 0 | goto fail; |
1035 | | |
1036 | 0 | *out_rest = ops2; |
1037 | 0 | return 0; |
1038 | | |
1039 | 0 | fail: |
1040 | 0 | ff_sws_op_list_free(&ops2); |
1041 | 0 | return ret; |
1042 | 0 | } |