/src/ffmpeg/libswscale/ops_chain.c
Line | Count | Source |
1 | | /** |
2 | | * Copyright (C) 2025 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/avassert.h" |
22 | | #include "libavutil/mem.h" |
23 | | #include "libavutil/rational.h" |
24 | | |
25 | | #include "ops_chain.h" |
26 | | |
27 | | #define Q(N) ((AVRational) { N, 1 }) |
28 | | |
29 | | SwsOpChain *ff_sws_op_chain_alloc(void) |
30 | 0 | { |
31 | 0 | return av_mallocz(sizeof(SwsOpChain)); |
32 | 0 | } |
33 | | |
34 | | void ff_sws_op_chain_free_cb(void *ptr) |
35 | 0 | { |
36 | 0 | if (!ptr) |
37 | 0 | return; |
38 | | |
39 | 0 | SwsOpChain *chain = ptr; |
40 | 0 | for (int i = 0; i < chain->num_impl + 1; i++) { |
41 | 0 | if (chain->free[i]) |
42 | 0 | chain->free[i](&chain->impl[i].priv); |
43 | 0 | } |
44 | |
|
45 | 0 | av_free(chain); |
46 | 0 | } |
47 | | |
48 | | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
49 | | void (*free)(SwsOpPriv *), const SwsOpPriv *priv) |
50 | 0 | { |
51 | 0 | const int idx = chain->num_impl; |
52 | 0 | if (idx == SWS_MAX_OPS) |
53 | 0 | return AVERROR(EINVAL); |
54 | | |
55 | 0 | av_assert1(func); |
56 | 0 | chain->impl[idx].cont = func; |
57 | 0 | chain->impl[idx + 1].priv = *priv; |
58 | 0 | chain->free[idx + 1] = free; |
59 | 0 | chain->num_impl++; |
60 | 0 | return 0; |
61 | 0 | } |
62 | | |
63 | | /** |
64 | | * Match an operation against a reference operation. Returns a score for how |
65 | | * well the reference matches the operation, or 0 if there is no match. |
66 | | * |
67 | | * For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and |
68 | | * SWS_OP_SWIZZLE, the exact type is not checked, just the size. |
69 | | * |
70 | | * Components marked SWS_COMP_GARBAGE are ignored when matching. If `flexible` |
71 | | * is true, the op body is ignored - only the operation, pixel type, and |
72 | | * component masks are checked. |
73 | | */ |
74 | | static int op_match(const SwsOp *op, const SwsOpEntry *entry) |
75 | 0 | { |
76 | 0 | int score = 10; |
77 | 0 | if (op->op != entry->op) |
78 | 0 | return 0; |
79 | | |
80 | 0 | switch (op->op) { |
81 | 0 | case SWS_OP_READ: |
82 | 0 | case SWS_OP_WRITE: |
83 | 0 | if (op->rw.filter && op->type != entry->type) |
84 | 0 | return 0; |
85 | 0 | /* fall through */; |
86 | 0 | case SWS_OP_SWAP_BYTES: |
87 | 0 | case SWS_OP_SWIZZLE: |
88 | | /* Only the size matters for these operations */ |
89 | 0 | if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type)) |
90 | 0 | return 0; |
91 | 0 | break; |
92 | 0 | default: |
93 | 0 | if (op->type != entry->type) |
94 | 0 | return 0; |
95 | 0 | break; |
96 | 0 | } |
97 | | |
98 | 0 | const SwsCompMask needed = ff_sws_comp_mask_needed(op); |
99 | 0 | if (needed & ~entry->mask) |
100 | 0 | return 0; /* Entry doesn't compute all needed components */ |
101 | | |
102 | | /* Otherwise, operating on fewer components is better */ |
103 | 0 | score += av_popcount(SWS_COMP_INV(entry->mask)); |
104 | | |
105 | | /* Flexible variants always match, but lower the score to prioritize more |
106 | | * specific implementations if they exist */ |
107 | 0 | if (entry->flexible) |
108 | 0 | return score - 5; |
109 | | |
110 | 0 | switch (op->op) { |
111 | 0 | case SWS_OP_INVALID: |
112 | 0 | return 0; |
113 | 0 | case SWS_OP_READ: |
114 | 0 | case SWS_OP_WRITE: |
115 | 0 | if (op->rw.elems != entry->rw.elems || |
116 | 0 | op->rw.frac != entry->rw.frac || |
117 | 0 | op->rw.filter != entry->rw.filter || |
118 | 0 | (op->rw.elems > 1 && op->rw.packed != entry->rw.packed)) |
119 | 0 | return 0; |
120 | 0 | return score; |
121 | 0 | case SWS_OP_SWAP_BYTES: |
122 | 0 | return score; |
123 | 0 | case SWS_OP_PACK: |
124 | 0 | case SWS_OP_UNPACK: |
125 | 0 | for (int i = 0; i < 4 && op->pack.pattern[i]; i++) { |
126 | 0 | if (op->pack.pattern[i] != entry->pack.pattern[i]) |
127 | 0 | return 0; |
128 | 0 | } |
129 | 0 | return score; |
130 | 0 | case SWS_OP_CLEAR: |
131 | | /* Clear mask must match exactly */ |
132 | 0 | if (op->clear.mask != entry->clear.mask) |
133 | 0 | return 0; |
134 | 0 | for (int i = 0; i < 4; i++) { |
135 | 0 | if (!SWS_COMP_TEST(op->clear.mask, i) || !SWS_OP_NEEDED(op, i)) |
136 | 0 | continue; |
137 | 0 | else if (!entry->clear.value[i].den) |
138 | 0 | continue; /* Any clear value supported */ |
139 | 0 | else if (av_cmp_q(op->clear.value[i], entry->clear.value[i])) |
140 | 0 | return 0; |
141 | 0 | } |
142 | 0 | return score; |
143 | 0 | case SWS_OP_LSHIFT: |
144 | 0 | case SWS_OP_RSHIFT: |
145 | 0 | av_assert1(entry->flexible); |
146 | 0 | break; |
147 | 0 | case SWS_OP_SWIZZLE: |
148 | 0 | for (int i = 0; i < 4; i++) { |
149 | 0 | if (SWS_OP_NEEDED(op, i) && op->swizzle.in[i] != entry->swizzle.in[i]) |
150 | 0 | return 0; |
151 | 0 | } |
152 | 0 | return score; |
153 | 0 | case SWS_OP_CONVERT: |
154 | 0 | if (op->convert.to != entry->convert.to || |
155 | 0 | op->convert.expand != entry->convert.expand) |
156 | 0 | return 0; |
157 | 0 | return score; |
158 | 0 | case SWS_OP_DITHER: |
159 | 0 | return op->dither.size_log2 == entry->dither_size ? score : 0; |
160 | 0 | case SWS_OP_MIN: |
161 | 0 | case SWS_OP_MAX: |
162 | 0 | av_assert1(entry->flexible); |
163 | 0 | break; |
164 | 0 | case SWS_OP_LINEAR: |
165 | 0 | if (op->lin.mask != entry->linear_mask) |
166 | 0 | return 0; |
167 | 0 | return score; |
168 | 0 | case SWS_OP_SCALE: |
169 | 0 | return av_cmp_q(op->scale.factor, entry->scale) ? 0 : score; |
170 | 0 | case SWS_OP_FILTER_H: |
171 | 0 | case SWS_OP_FILTER_V: |
172 | 0 | return score; |
173 | 0 | case SWS_OP_TYPE_NB: |
174 | 0 | break; |
175 | 0 | } |
176 | | |
177 | 0 | av_unreachable("Invalid operation type!"); |
178 | 0 | return 0; |
179 | 0 | } |
180 | | |
181 | | int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], |
182 | | int num_tables, SwsOpList *ops, int ops_index, |
183 | | const int block_size, SwsOpChain *chain) |
184 | 0 | { |
185 | 0 | const SwsOp *op = &ops->ops[ops_index]; |
186 | 0 | const unsigned cpu_flags = av_get_cpu_flags(); |
187 | 0 | const SwsOpEntry *best = NULL; |
188 | 0 | const SwsOpTable *best_table = NULL; |
189 | 0 | int ret, best_score = 0; |
190 | |
|
191 | 0 | SwsImplParams params = { |
192 | 0 | .ctx = ctx, |
193 | 0 | .op = op |
194 | 0 | }; |
195 | |
|
196 | 0 | for (int n = 0; n < num_tables; n++) { |
197 | 0 | const SwsOpTable *table = tables[n]; |
198 | 0 | if (table->block_size && table->block_size != block_size || |
199 | 0 | table->cpu_flags & ~cpu_flags) |
200 | 0 | continue; |
201 | | |
202 | 0 | params.table = table; |
203 | 0 | for (int i = 0; table->entries[i]; i++) { |
204 | 0 | const SwsOpEntry *entry = table->entries[i]; |
205 | 0 | int score = op_match(op, entry); |
206 | 0 | if (score <= best_score) |
207 | 0 | continue; |
208 | 0 | if (entry->check && !entry->check(¶ms)) |
209 | 0 | continue; |
210 | 0 | best_score = score; |
211 | 0 | best_table = table; |
212 | 0 | best = entry; |
213 | 0 | } |
214 | 0 | } |
215 | |
|
216 | 0 | if (!best) |
217 | 0 | return AVERROR(ENOTSUP); |
218 | | |
219 | 0 | params.table = best_table; |
220 | |
|
221 | 0 | SwsImplResult res = {0}; |
222 | 0 | if (best->setup) { |
223 | 0 | ret = best->setup(¶ms, &res); |
224 | 0 | if (ret < 0) |
225 | 0 | return ret; |
226 | 0 | } |
227 | | |
228 | 0 | ret = ff_sws_op_chain_append(chain, res.func ? res.func : best->func, |
229 | 0 | res.free, &res.priv); |
230 | 0 | if (ret < 0) { |
231 | 0 | if (res.free) |
232 | 0 | res.free(&res.priv); |
233 | 0 | return ret; |
234 | 0 | } |
235 | | |
236 | 0 | chain->cpu_flags |= best_table->cpu_flags; |
237 | 0 | chain->over_read = FFMAX(chain->over_read, res.over_read); |
238 | 0 | chain->over_write = FFMAX(chain->over_write, res.over_write); |
239 | 0 | return 0; |
240 | 0 | } |
241 | | |
242 | 0 | #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0) |
243 | | |
244 | | int ff_sws_setup_shift(const SwsImplParams *params, SwsImplResult *out) |
245 | 0 | { |
246 | 0 | out->priv.u8[0] = params->op->shift.amount; |
247 | 0 | return 0; |
248 | 0 | } |
249 | | |
250 | | int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out) |
251 | 0 | { |
252 | 0 | const SwsOp *op = params->op; |
253 | 0 | const AVRational factor = op->scale.factor; |
254 | 0 | switch (op->type) { |
255 | 0 | case SWS_PIXEL_U8: out->priv.u8[0] = q2pixel(uint8_t, factor); break; |
256 | 0 | case SWS_PIXEL_U16: out->priv.u16[0] = q2pixel(uint16_t, factor); break; |
257 | 0 | case SWS_PIXEL_U32: out->priv.u32[0] = q2pixel(uint32_t, factor); break; |
258 | 0 | case SWS_PIXEL_F32: out->priv.f32[0] = q2pixel(float, factor); break; |
259 | 0 | default: return AVERROR(EINVAL); |
260 | 0 | } |
261 | | |
262 | 0 | return 0; |
263 | 0 | } |
264 | | |
265 | | int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out) |
266 | 0 | { |
267 | 0 | const SwsOp *op = params->op; |
268 | 0 | for (int i = 0; i < 4; i++) { |
269 | 0 | const AVRational limit = op->clamp.limit[i]; |
270 | 0 | switch (op->type) { |
271 | 0 | case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, limit); break; |
272 | 0 | case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, limit); break; |
273 | 0 | case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, limit); break; |
274 | 0 | case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, limit); break; |
275 | 0 | default: return AVERROR(EINVAL); |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | return 0; |
280 | 0 | } |
281 | | |
282 | | int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out) |
283 | 0 | { |
284 | 0 | const SwsOp *op = params->op; |
285 | 0 | for (int i = 0; i < 4; i++) { |
286 | 0 | const AVRational value = op->clear.value[i]; |
287 | 0 | if (!value.den) |
288 | 0 | continue; |
289 | 0 | switch (op->type) { |
290 | 0 | case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, value); break; |
291 | 0 | case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, value); break; |
292 | 0 | case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, value); break; |
293 | 0 | case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, value); break; |
294 | 0 | default: return AVERROR(EINVAL); |
295 | 0 | } |
296 | 0 | } |
297 | | |
298 | 0 | return 0; |
299 | 0 | } |