/src/ffmpeg/libswscale/ops_chain.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/avassert.h" |
22 | | #include "libavutil/mem.h" |
23 | | #include "libavutil/rational.h" |
24 | | |
25 | | #include "ops_chain.h" |
26 | | |
27 | 0 | #define Q(N) ((AVRational) { N, 1 }) |
28 | | |
29 | | SwsOpChain *ff_sws_op_chain_alloc(void) |
30 | 0 | { |
31 | 0 | return av_mallocz(sizeof(SwsOpChain)); |
32 | 0 | } |
33 | | |
34 | | void ff_sws_op_chain_free_cb(void *ptr) |
35 | 0 | { |
36 | 0 | if (!ptr) |
37 | 0 | return; |
38 | | |
39 | 0 | SwsOpChain *chain = ptr; |
40 | 0 | for (int i = 0; i < chain->num_impl + 1; i++) { |
41 | 0 | if (chain->free[i]) |
42 | 0 | chain->free[i](chain->impl[i].priv.ptr); |
43 | 0 | } |
44 | |
|
45 | 0 | av_free(chain); |
46 | 0 | } |
47 | | |
48 | | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
49 | | void (*free)(void *), const SwsOpPriv *priv) |
50 | 0 | { |
51 | 0 | const int idx = chain->num_impl; |
52 | 0 | if (idx == SWS_MAX_OPS) |
53 | 0 | return AVERROR(EINVAL); |
54 | | |
55 | 0 | av_assert1(func); |
56 | 0 | chain->impl[idx].cont = func; |
57 | 0 | chain->impl[idx + 1].priv = *priv; |
58 | 0 | chain->free[idx + 1] = free; |
59 | 0 | chain->num_impl++; |
60 | 0 | return 0; |
61 | 0 | } |
62 | | |
63 | | /** |
64 | | * Match an operation against a reference operation. Returns a score for how |
65 | | * well the reference matches the operation, or 0 if there is no match. |
66 | | * |
67 | | * If `ref->comps` has any flags set, they must be set in `op` as well. |
68 | | * Likewise, if `ref->comps` has any components marked as unused, they must be |
69 | | * marked as unused in `ops` as well. |
70 | | * |
71 | | * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of |
72 | | * `op->linear.mask`, but may not contain any columns explicitly ignored by |
73 | | * `op->comps.unused`. |
74 | | * |
75 | | * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the |
76 | | * exact type is not checked, just the size. |
77 | | * |
78 | | * Components set in `next.unused` are ignored when matching. If `flexible` |
79 | | * is true, the op body is ignored - only the operation, pixel type, and |
80 | | * component masks are checked. |
81 | | */ |
82 | | static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next) |
83 | 0 | { |
84 | 0 | int score = 10; |
85 | 0 | if (op->op != entry->op) |
86 | 0 | return 0; |
87 | | |
88 | 0 | switch (op->op) { |
89 | 0 | case SWS_OP_READ: |
90 | 0 | case SWS_OP_WRITE: |
91 | 0 | case SWS_OP_SWAP_BYTES: |
92 | 0 | case SWS_OP_SWIZZLE: |
93 | | /* Only the size matters for these operations */ |
94 | 0 | if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type)) |
95 | 0 | return 0; |
96 | 0 | break; |
97 | 0 | default: |
98 | 0 | if (op->type != entry->type) |
99 | 0 | return 0; |
100 | 0 | break; |
101 | 0 | } |
102 | | |
103 | 0 | for (int i = 0; i < 4; i++) { |
104 | 0 | if (entry->unused[i]) { |
105 | 0 | if (op->comps.unused[i]) |
106 | 0 | score += 1; /* Operating on fewer components is better .. */ |
107 | 0 | else |
108 | 0 | return 0; /* .. but not too few! */ |
109 | 0 | } |
110 | 0 | } |
111 | | |
112 | 0 | if (op->op == SWS_OP_CLEAR) { |
113 | | /* Clear pattern must match exactly, regardless of `entry->flexible` */ |
114 | 0 | for (int i = 0; i < 4; i++) { |
115 | 0 | if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den) |
116 | 0 | return 0; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | /* Flexible variants always match, but lower the score to prioritize more |
121 | | * specific implementations if they exist */ |
122 | 0 | if (entry->flexible) |
123 | 0 | return score - 5; |
124 | | |
125 | 0 | switch (op->op) { |
126 | 0 | case SWS_OP_INVALID: |
127 | 0 | return 0; |
128 | 0 | case SWS_OP_READ: |
129 | 0 | case SWS_OP_WRITE: |
130 | 0 | if (op->rw.elems != entry->rw.elems || |
131 | 0 | op->rw.frac != entry->rw.frac || |
132 | 0 | (op->rw.elems > 1 && op->rw.packed != entry->rw.packed)) |
133 | 0 | return 0; |
134 | 0 | return score; |
135 | 0 | case SWS_OP_SWAP_BYTES: |
136 | 0 | return score; |
137 | 0 | case SWS_OP_PACK: |
138 | 0 | case SWS_OP_UNPACK: |
139 | 0 | for (int i = 0; i < 4 && op->pack.pattern[i]; i++) { |
140 | 0 | if (op->pack.pattern[i] != entry->pack.pattern[i]) |
141 | 0 | return 0; |
142 | 0 | } |
143 | 0 | return score; |
144 | 0 | case SWS_OP_CLEAR: |
145 | 0 | for (int i = 0; i < 4; i++) { |
146 | 0 | if (!op->c.q4[i].den) |
147 | 0 | continue; |
148 | 0 | if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i]) |
149 | 0 | return 0; |
150 | 0 | } |
151 | 0 | return score; |
152 | 0 | case SWS_OP_LSHIFT: |
153 | 0 | case SWS_OP_RSHIFT: |
154 | 0 | av_assert1(entry->flexible); |
155 | 0 | return score; |
156 | 0 | case SWS_OP_SWIZZLE: |
157 | 0 | for (int i = 0; i < 4; i++) { |
158 | 0 | if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i]) |
159 | 0 | return 0; |
160 | 0 | } |
161 | 0 | return score; |
162 | 0 | case SWS_OP_CONVERT: |
163 | 0 | if (op->convert.to != entry->convert.to || |
164 | 0 | op->convert.expand != entry->convert.expand) |
165 | 0 | return 0; |
166 | 0 | return score; |
167 | 0 | case SWS_OP_DITHER: |
168 | 0 | return op->dither.size_log2 == entry->dither_size ? score : 0; |
169 | 0 | case SWS_OP_MIN: |
170 | 0 | case SWS_OP_MAX: |
171 | 0 | av_assert1(entry->flexible); |
172 | 0 | return score; |
173 | 0 | case SWS_OP_LINEAR: |
174 | | /* All required elements must be present */ |
175 | 0 | if (op->lin.mask & ~entry->linear_mask) |
176 | 0 | return 0; |
177 | | /* To avoid operating on possibly undefined memory, filter out |
178 | | * implementations that operate on more input components */ |
179 | 0 | for (int i = 0; i < 4; i++) { |
180 | 0 | if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i]) |
181 | 0 | return 0; |
182 | 0 | } |
183 | | /* Prioritize smaller implementations */ |
184 | 0 | score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask); |
185 | 0 | return score; |
186 | 0 | case SWS_OP_SCALE: |
187 | 0 | return score; |
188 | 0 | case SWS_OP_TYPE_NB: |
189 | 0 | break; |
190 | 0 | } |
191 | | |
192 | 0 | av_unreachable("Invalid operation type!"); |
193 | 0 | return 0; |
194 | 0 | } |
195 | | |
196 | | int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, |
197 | | SwsOpList *ops, const int block_size, |
198 | | SwsOpChain *chain) |
199 | 0 | { |
200 | 0 | static const SwsOp dummy = { .comps.unused = { true, true, true, true }}; |
201 | 0 | const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy; |
202 | 0 | const unsigned cpu_flags = av_get_cpu_flags(); |
203 | 0 | const SwsOpEntry *best = NULL; |
204 | 0 | const SwsOp *op = &ops->ops[0]; |
205 | 0 | int ret, best_score = 0, best_cpu_flags; |
206 | 0 | SwsOpPriv priv = {0}; |
207 | |
|
208 | 0 | for (int n = 0; n < num_tables; n++) { |
209 | 0 | const SwsOpTable *table = tables[n]; |
210 | 0 | if (table->block_size && table->block_size != block_size || |
211 | 0 | table->cpu_flags & ~cpu_flags) |
212 | 0 | continue; |
213 | | |
214 | 0 | for (int i = 0; table->entries[i]; i++) { |
215 | 0 | const SwsOpEntry *entry = table->entries[i]; |
216 | 0 | int score = op_match(op, entry, next->comps); |
217 | 0 | if (score > best_score) { |
218 | 0 | best_score = score; |
219 | 0 | best_cpu_flags = table->cpu_flags; |
220 | 0 | best = entry; |
221 | 0 | } |
222 | 0 | } |
223 | 0 | } |
224 | |
|
225 | 0 | if (!best) |
226 | 0 | return AVERROR(ENOTSUP); |
227 | | |
228 | 0 | if (best->setup) { |
229 | 0 | ret = best->setup(op, &priv); |
230 | 0 | if (ret < 0) |
231 | 0 | return ret; |
232 | 0 | } |
233 | | |
234 | 0 | chain->cpu_flags |= best_cpu_flags; |
235 | 0 | ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv); |
236 | 0 | if (ret < 0) { |
237 | 0 | if (best->free) |
238 | 0 | best->free(priv.ptr); |
239 | 0 | return ret; |
240 | 0 | } |
241 | | |
242 | 0 | ops->ops++; |
243 | 0 | ops->num_ops--; |
244 | 0 | return ops->num_ops ? AVERROR(EAGAIN) : 0; |
245 | 0 | } |
246 | | |
247 | 0 | #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0) |
248 | | |
249 | | int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out) |
250 | 0 | { |
251 | 0 | out->u8[0] = op->c.u; |
252 | 0 | return 0; |
253 | 0 | } |
254 | | |
255 | | int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out) |
256 | 0 | { |
257 | 0 | switch (op->type) { |
258 | 0 | case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0; |
259 | 0 | case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0; |
260 | 0 | case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0; |
261 | 0 | case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0; |
262 | 0 | default: return AVERROR(EINVAL); |
263 | 0 | } |
264 | 0 | } |
265 | | |
266 | | int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out) |
267 | 0 | { |
268 | 0 | switch (op->type) { |
269 | 0 | case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0; |
270 | 0 | case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0; |
271 | 0 | case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0; |
272 | 0 | case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0; |
273 | 0 | default: return AVERROR(EINVAL); |
274 | 0 | } |
275 | | |
276 | 0 | return 0; |
277 | 0 | } |
278 | | |
279 | | int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out) |
280 | 0 | { |
281 | 0 | for (int i = 0; i < 4; i++) { |
282 | 0 | switch (op->type) { |
283 | 0 | case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break; |
284 | 0 | case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break; |
285 | 0 | case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break; |
286 | 0 | case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break; |
287 | 0 | default: return AVERROR(EINVAL); |
288 | 0 | } |
289 | 0 | } |
290 | | |
291 | 0 | return 0; |
292 | 0 | } |