/src/ffmpeg/libswscale/graph.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2024 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/avassert.h" |
22 | | #include "libavutil/cpu.h" |
23 | | #include "libavutil/error.h" |
24 | | #include "libavutil/hwcontext.h" |
25 | | #include "libavutil/imgutils.h" |
26 | | #include "libavutil/macros.h" |
27 | | #include "libavutil/mem.h" |
28 | | #include "libavutil/opt.h" |
29 | | #include "libavutil/pixdesc.h" |
30 | | #include "libavutil/refstruct.h" |
31 | | #include "libavutil/slicethread.h" |
32 | | |
33 | | #include "libswscale/swscale.h" |
34 | | #include "libswscale/format.h" |
35 | | |
36 | | #include "cms.h" |
37 | | #include "lut3d.h" |
38 | | #include "swscale_internal.h" |
39 | | #include "graph.h" |
40 | | #include "ops.h" |
41 | | #include "ops_dispatch.h" |
42 | | #if CONFIG_VULKAN |
43 | | #include "vulkan/ops.h" |
44 | | #endif |
45 | | |
46 | | int ff_sws_pass_aligned_width(const SwsPass *pass, int width) |
47 | 0 | { |
48 | 0 | if (!pass) |
49 | 0 | return width; |
50 | | |
51 | 0 | size_t aligned_w = width; |
52 | 0 | aligned_w = FFALIGN(aligned_w, pass->output->width_align); |
53 | 0 | aligned_w += pass->output->width_pad; |
54 | 0 | return aligned_w <= INT_MAX ? aligned_w : width; |
55 | 0 | } |
56 | | |
57 | | /* Allocates one buffer per plane */ |
58 | | static int frame_alloc_planes(AVFrame *dst) |
59 | 0 | { |
60 | 0 | int ret = av_image_check_size2(dst->width, dst->height, INT64_MAX, |
61 | 0 | dst->format, 0, NULL); |
62 | 0 | if (ret < 0) |
63 | 0 | return ret; |
64 | | |
65 | 0 | const int align = av_cpu_max_align(); |
66 | 0 | const int aligned_w = FFALIGN(dst->width, align); |
67 | 0 | ret = av_image_fill_linesizes(dst->linesize, dst->format, aligned_w); |
68 | 0 | if (ret < 0) |
69 | 0 | return ret; |
70 | | |
71 | 0 | ptrdiff_t linesize1[4]; |
72 | 0 | for (int i = 0; i < 4; i++) |
73 | 0 | linesize1[i] = dst->linesize[i] = FFALIGN(dst->linesize[i], align); |
74 | |
|
75 | 0 | size_t sizes[4]; |
76 | 0 | ret = av_image_fill_plane_sizes(sizes, dst->format, dst->height, linesize1); |
77 | 0 | if (ret < 0) |
78 | 0 | return ret; |
79 | | |
80 | 0 | for (int i = 0; i < 4; i++) { |
81 | 0 | if (!sizes[i]) |
82 | 0 | break; |
83 | 0 | AVBufferRef *buf = av_buffer_alloc(sizes[i]); |
84 | 0 | if (!buf) |
85 | 0 | return AVERROR(ENOMEM); |
86 | 0 | dst->data[i] = buf->data; |
87 | 0 | dst->buf[i] = buf; |
88 | 0 | } |
89 | | |
90 | 0 | return 0; |
91 | 0 | } |
92 | | |
93 | | #if CONFIG_VULKAN |
94 | | static int pass_alloc_output_hw(SwsPass *pass, AVFrame *avframe, |
95 | | AVBufferRef *dev_ref) |
96 | | { |
97 | | SwsPassBuffer *buffer = pass->output; |
98 | | AVBufferRef *frames_ref = av_hwframe_ctx_alloc(dev_ref); |
99 | | if (!frames_ref) |
100 | | return AVERROR(ENOMEM); |
101 | | |
102 | | AVHWFramesContext *hwfc = (AVHWFramesContext *)frames_ref->data; |
103 | | hwfc->format = AV_PIX_FMT_VULKAN; |
104 | | hwfc->sw_format = pass->format; |
105 | | hwfc->width = buffer->width; |
106 | | hwfc->height = buffer->height; |
107 | | |
108 | | int ret = av_hwframe_ctx_init(frames_ref); |
109 | | if (ret >= 0) { |
110 | | avframe->format = AV_PIX_FMT_VULKAN; |
111 | | ret = av_hwframe_get_buffer(frames_ref, avframe, 0); |
112 | | } |
113 | | av_buffer_unref(&frames_ref); |
114 | | return ret; |
115 | | } |
116 | | #endif |
117 | | |
118 | | static int pass_alloc_output(SwsPass *pass) |
119 | 0 | { |
120 | 0 | if (!pass || pass->output->avframe) |
121 | 0 | return 0; |
122 | | |
123 | 0 | SwsPassBuffer *buffer = pass->output; |
124 | 0 | AVFrame *avframe = av_frame_alloc(); |
125 | 0 | if (!avframe) |
126 | 0 | return AVERROR(ENOMEM); |
127 | 0 | avframe->width = buffer->width; |
128 | 0 | avframe->height = buffer->height; |
129 | |
|
130 | 0 | int ret; |
131 | |
|
132 | | #if CONFIG_VULKAN |
133 | | const SwsGraph *graph = pass->graph; |
134 | | if (graph->src.hw_format == AV_PIX_FMT_VULKAN && |
135 | | graph->dst.hw_format == AV_PIX_FMT_VULKAN) { |
136 | | AVBufferRef *dev_ref = ff_sws_vk_device_ref(graph->ctx); |
137 | | if (dev_ref) { |
138 | | ret = pass_alloc_output_hw(pass, avframe, dev_ref); |
139 | | if (ret >= 0) |
140 | | goto done; |
141 | | av_frame_unref(avframe); |
142 | | } |
143 | | } |
144 | | #endif |
145 | |
|
146 | 0 | avframe->format = pass->format; |
147 | 0 | ret = frame_alloc_planes(avframe); |
148 | 0 | if (ret < 0) { |
149 | 0 | av_frame_free(&avframe); |
150 | 0 | return ret; |
151 | 0 | } |
152 | | |
153 | | #if CONFIG_VULKAN |
154 | | done: |
155 | | #endif |
156 | 0 | buffer->avframe = avframe; |
157 | 0 | ff_sws_frame_from_avframe(&buffer->frame, avframe); |
158 | 0 | return 0; |
159 | 0 | } |
160 | | |
161 | | static void free_buffer(AVRefStructOpaque opaque, void *obj) |
162 | 0 | { |
163 | 0 | SwsPassBuffer *buffer = obj; |
164 | 0 | av_frame_free(&buffer->avframe); |
165 | 0 | } |
166 | | |
167 | | static void pass_free(SwsPass *pass) |
168 | 0 | { |
169 | 0 | if (pass->free) |
170 | 0 | pass->free(pass->priv); |
171 | 0 | av_refstruct_unref(&pass->output); |
172 | 0 | av_free(pass); |
173 | 0 | } |
174 | | |
175 | | int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, |
176 | | int width, int height, SwsPass *input, |
177 | | int align, SwsPassFunc run, SwsPassSetup setup, |
178 | | void *priv, void (*free_cb)(void *priv), |
179 | | SwsPass **out_pass) |
180 | 0 | { |
181 | 0 | int ret; |
182 | 0 | SwsPass *pass = av_mallocz(sizeof(*pass)); |
183 | 0 | if (!pass) { |
184 | 0 | if (free_cb) |
185 | 0 | free_cb(priv); |
186 | 0 | return AVERROR(ENOMEM); |
187 | 0 | } |
188 | | |
189 | 0 | pass->graph = graph; |
190 | 0 | pass->run = run; |
191 | 0 | pass->setup = setup; |
192 | 0 | pass->priv = priv; |
193 | 0 | pass->free = free_cb; |
194 | 0 | pass->format = fmt; |
195 | 0 | pass->width = width; |
196 | 0 | pass->height = height; |
197 | 0 | pass->input = input; |
198 | 0 | pass->output = av_refstruct_alloc_ext(sizeof(*pass->output), 0, NULL, free_buffer); |
199 | 0 | if (!pass->output) { |
200 | 0 | ret = AVERROR(ENOMEM); |
201 | 0 | goto fail; |
202 | 0 | } |
203 | | |
204 | 0 | if (!align) { |
205 | 0 | pass->slice_h = pass->height; |
206 | 0 | pass->num_slices = 1; |
207 | 0 | } else { |
208 | 0 | pass->slice_h = (pass->height + graph->num_threads - 1) / graph->num_threads; |
209 | 0 | pass->slice_h = FFALIGN(pass->slice_h, align); |
210 | 0 | pass->num_slices = (pass->height + pass->slice_h - 1) / pass->slice_h; |
211 | 0 | } |
212 | | |
213 | | /* Align output buffer to include extra slice padding */ |
214 | 0 | pass->output->height = pass->slice_h * pass->num_slices; |
215 | 0 | pass->output->width = pass->width; |
216 | 0 | pass->output->width_align = 1; |
217 | |
|
218 | 0 | ret = av_dynarray_add_nofree(&graph->passes, &graph->num_passes, pass); |
219 | 0 | if (ret < 0) |
220 | 0 | goto fail; |
221 | | |
222 | 0 | *out_pass = pass; |
223 | 0 | return 0; |
224 | | |
225 | 0 | fail: |
226 | 0 | pass_free(pass); |
227 | 0 | return ret; |
228 | 0 | } |
229 | | |
230 | | static void frame_shift(const SwsFrame *f, const int y, uint8_t *data[4]) |
231 | 0 | { |
232 | 0 | for (int i = 0; i < 4; i++) { |
233 | 0 | if (f->data[i]) |
234 | 0 | data[i] = f->data[i] + (y >> ff_fmt_vshift(f->format, i)) * f->linesize[i]; |
235 | 0 | else |
236 | 0 | data[i] = NULL; |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | | static void run_copy(const SwsFrame *out, const SwsFrame *in, int y, int h, |
241 | | const SwsPass *pass) |
242 | 0 | { |
243 | 0 | uint8_t *in_data[4], *out_data[4]; |
244 | 0 | frame_shift(in, y, in_data); |
245 | 0 | frame_shift(out, y, out_data); |
246 | |
|
247 | 0 | for (int i = 0; i < 4 && out_data[i]; i++) { |
248 | 0 | const int lines = h >> ff_fmt_vshift(in->format, i); |
249 | 0 | av_assert1(in_data[i]); |
250 | |
|
251 | 0 | if (in_data[i] == out_data[i]) { |
252 | 0 | av_assert0(in->linesize[i] == out->linesize[i]); |
253 | 0 | } else if (in->linesize[i] == out->linesize[i]) { |
254 | 0 | memcpy(out_data[i], in_data[i], lines * out->linesize[i]); |
255 | 0 | } else { |
256 | 0 | const int linesize = FFMIN(out->linesize[i], in->linesize[i]); |
257 | 0 | for (int j = 0; j < lines; j++) { |
258 | 0 | memcpy(out_data[i], in_data[i], linesize); |
259 | 0 | in_data[i] += in->linesize[i]; |
260 | 0 | out_data[i] += out->linesize[i]; |
261 | 0 | } |
262 | 0 | } |
263 | 0 | } |
264 | 0 | } |
265 | | |
266 | | static void run_rgb0(const SwsFrame *out, const SwsFrame *in, int y, int h, |
267 | | const SwsPass *pass) |
268 | 0 | { |
269 | 0 | SwsInternal *c = pass->priv; |
270 | 0 | const int x0 = c->src0Alpha - 1; |
271 | 0 | const int w4 = 4 * pass->width; |
272 | 0 | const int src_stride = in->linesize[0]; |
273 | 0 | const int dst_stride = out->linesize[0]; |
274 | 0 | const uint8_t *src = in->data[0] + y * src_stride; |
275 | 0 | uint8_t *dst = out->data[0] + y * dst_stride; |
276 | |
|
277 | 0 | for (int y = 0; y < h; y++) { |
278 | 0 | memcpy(dst, src, w4 * sizeof(*dst)); |
279 | 0 | for (int x = x0; x < w4; x += 4) |
280 | 0 | dst[x] = 0xFF; |
281 | |
|
282 | 0 | src += src_stride; |
283 | 0 | dst += dst_stride; |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | | static void run_xyz2rgb(const SwsFrame *out, const SwsFrame *in, int y, int h, |
288 | | const SwsPass *pass) |
289 | 0 | { |
290 | 0 | const SwsInternal *c = pass->priv; |
291 | 0 | c->xyz12Torgb48(c, out->data[0] + y * out->linesize[0], out->linesize[0], |
292 | 0 | in->data[0] + y * in->linesize[0], in->linesize[0], |
293 | 0 | pass->width, h); |
294 | 0 | } |
295 | | |
296 | | static void run_rgb2xyz(const SwsFrame *out, const SwsFrame *in, int y, int h, |
297 | | const SwsPass *pass) |
298 | 0 | { |
299 | 0 | const SwsInternal *c = pass->priv; |
300 | 0 | c->rgb48Toxyz12(c, out->data[0] + y * out->linesize[0], out->linesize[0], |
301 | 0 | in->data[0] + y * in->linesize[0], in->linesize[0], |
302 | 0 | pass->width, h); |
303 | 0 | } |
304 | | |
305 | | /*********************************************************************** |
306 | | * Internal ff_swscale() wrapper. This reuses the legacy scaling API. * |
307 | | * This is considered fully deprecated, and will be replaced by a full * |
308 | | * reimplementation ASAP. * |
309 | | ***********************************************************************/ |
310 | | |
311 | | static void free_legacy_swscale(void *priv) |
312 | 0 | { |
313 | 0 | SwsContext *sws = priv; |
314 | 0 | sws_free_context(&sws); |
315 | 0 | } |
316 | | |
317 | | static int setup_legacy_swscale(const SwsFrame *out, const SwsFrame *in, |
318 | | const SwsPass *pass) |
319 | 0 | { |
320 | 0 | SwsContext *sws = pass->priv; |
321 | 0 | SwsInternal *c = sws_internal(sws); |
322 | 0 | if (sws->flags & SWS_BITEXACT && sws->dither == SWS_DITHER_ED && c->dither_error[0]) { |
323 | 0 | for (int i = 0; i < 4; i++) |
324 | 0 | memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (sws->dst_w + 2)); |
325 | 0 | } |
326 | |
|
327 | 0 | if (usePal(sws->src_format)) |
328 | 0 | ff_update_palette(c, (const uint32_t *) in->data[1]); |
329 | |
|
330 | 0 | return 0; |
331 | 0 | } |
332 | | |
333 | | static inline SwsContext *slice_ctx(const SwsPass *pass, int y) |
334 | 0 | { |
335 | 0 | SwsContext *sws = pass->priv; |
336 | 0 | SwsInternal *parent = sws_internal(sws); |
337 | 0 | if (pass->num_slices == 1) |
338 | 0 | return sws; |
339 | | |
340 | 0 | av_assert1(parent->nb_slice_ctx == pass->num_slices); |
341 | 0 | sws = parent->slice_ctx[y / pass->slice_h]; |
342 | |
|
343 | 0 | if (usePal(sws->src_format)) { |
344 | 0 | SwsInternal *sub = sws_internal(sws); |
345 | 0 | memcpy(sub->pal_yuv, parent->pal_yuv, sizeof(sub->pal_yuv)); |
346 | 0 | memcpy(sub->pal_rgb, parent->pal_rgb, sizeof(sub->pal_rgb)); |
347 | 0 | } |
348 | |
|
349 | 0 | return sws; |
350 | 0 | } |
351 | | |
352 | | static void run_legacy_unscaled(const SwsFrame *out, const SwsFrame *in, |
353 | | int y, int h, const SwsPass *pass) |
354 | 0 | { |
355 | 0 | SwsContext *sws = slice_ctx(pass, y); |
356 | 0 | SwsInternal *c = sws_internal(sws); |
357 | 0 | uint8_t *in_data[4]; |
358 | 0 | frame_shift(in, y, in_data); |
359 | |
|
360 | 0 | c->convert_unscaled(c, (const uint8_t *const *) in_data, in->linesize, y, h, |
361 | 0 | out->data, out->linesize); |
362 | 0 | } |
363 | | |
364 | | static void run_legacy_swscale(const SwsFrame *out, const SwsFrame *in, |
365 | | int y, int h, const SwsPass *pass) |
366 | 0 | { |
367 | 0 | SwsContext *sws = slice_ctx(pass, y); |
368 | 0 | SwsInternal *c = sws_internal(sws); |
369 | 0 | uint8_t *out_data[4]; |
370 | 0 | frame_shift(out, y, out_data); |
371 | |
|
372 | 0 | ff_swscale(c, (const uint8_t *const *) in->data, in->linesize, 0, |
373 | 0 | sws->src_h, out_data, out->linesize, y, h); |
374 | 0 | } |
375 | | |
376 | | static void get_chroma_pos(SwsGraph *graph, int *h_chr_pos, int *v_chr_pos, |
377 | | const SwsFormat *fmt) |
378 | 0 | { |
379 | 0 | enum AVChromaLocation chroma_loc = fmt->loc; |
380 | 0 | const int sub_x = fmt->desc->log2_chroma_w; |
381 | 0 | const int sub_y = fmt->desc->log2_chroma_h; |
382 | 0 | int x_pos, y_pos; |
383 | | |
384 | | /* Explicitly default to center siting for compatibility with swscale */ |
385 | 0 | if (chroma_loc == AVCHROMA_LOC_UNSPECIFIED) { |
386 | 0 | chroma_loc = AVCHROMA_LOC_CENTER; |
387 | 0 | graph->incomplete |= sub_x || sub_y; |
388 | 0 | } |
389 | | |
390 | | /* av_chroma_location_enum_to_pos() always gives us values in the range from |
391 | | * 0 to 256, but we need to adjust this to the true value range of the |
392 | | * subsampling grid, which may be larger for h/v_sub > 1 */ |
393 | 0 | av_chroma_location_enum_to_pos(&x_pos, &y_pos, chroma_loc); |
394 | 0 | x_pos *= (1 << sub_x) - 1; |
395 | 0 | y_pos *= (1 << sub_y) - 1; |
396 | | |
397 | | /* Fix vertical chroma position for interlaced frames */ |
398 | 0 | if (sub_y && fmt->interlaced) { |
399 | | /* When vertically subsampling, chroma samples are effectively only |
400 | | * placed next to even rows. To access them from the odd field, we need |
401 | | * to account for this shift by offsetting the distance of one luma row. |
402 | | * |
403 | | * For 4x vertical subsampling (v_sub == 2), they are only placed |
404 | | * next to every *other* even row, so we need to shift by three luma |
405 | | * rows to get to the chroma sample. */ |
406 | 0 | if (graph->field == FIELD_BOTTOM) |
407 | 0 | y_pos += (256 << sub_y) - 256; |
408 | | |
409 | | /* Luma row distance is doubled for fields, so halve offsets */ |
410 | 0 | y_pos >>= 1; |
411 | 0 | } |
412 | | |
413 | | /* Explicitly strip chroma offsets when not subsampling, because it |
414 | | * interferes with the operation of flags like SWS_FULL_CHR_H_INP */ |
415 | 0 | *h_chr_pos = sub_x ? x_pos : -513; |
416 | 0 | *v_chr_pos = sub_y ? y_pos : -513; |
417 | 0 | } |
418 | | |
419 | | static void legacy_chr_pos(SwsGraph *graph, int *chr_pos, int override, int *warned) |
420 | 0 | { |
421 | 0 | if (override == -513 || override == *chr_pos) |
422 | 0 | return; |
423 | | |
424 | 0 | if (!*warned) { |
425 | 0 | av_log(NULL, AV_LOG_WARNING, |
426 | 0 | "Setting chroma position directly is deprecated, make sure " |
427 | 0 | "the frame is tagged with the correct chroma location.\n"); |
428 | 0 | *warned = 1; |
429 | 0 | } |
430 | |
|
431 | 0 | *chr_pos = override; |
432 | 0 | } |
433 | | |
434 | | /* Takes over ownership of `sws` */ |
435 | | static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, |
436 | | SwsPass *input, SwsPass **output) |
437 | 0 | { |
438 | 0 | SwsInternal *c = sws_internal(sws); |
439 | 0 | const int src_w = sws->src_w, src_h = sws->src_h; |
440 | 0 | const int dst_w = sws->dst_w, dst_h = sws->dst_h; |
441 | 0 | const int unscaled = src_w == dst_w && src_h == dst_h; |
442 | 0 | int align = c->dst_slice_align; |
443 | 0 | SwsPass *pass = NULL; |
444 | 0 | int ret; |
445 | |
|
446 | 0 | if (c->cascaded_context[0]) { |
447 | 0 | const int num_cascaded = c->cascaded_context[2] ? 3 : 2; |
448 | 0 | for (int i = 0; i < num_cascaded; i++) { |
449 | 0 | const int is_last = i + 1 == num_cascaded; |
450 | | |
451 | | /* Steal cascaded context, so we can manage its lifetime independently */ |
452 | 0 | SwsContext *sub = c->cascaded_context[i]; |
453 | 0 | c->cascaded_context[i] = NULL; |
454 | |
|
455 | 0 | ret = init_legacy_subpass(graph, sub, input, is_last ? output : &input); |
456 | 0 | if (ret < 0) |
457 | 0 | break; |
458 | 0 | } |
459 | |
|
460 | 0 | sws_free_context(&sws); |
461 | 0 | return ret; |
462 | 0 | } |
463 | | |
464 | 0 | if (sws->dither == SWS_DITHER_ED && !c->convert_unscaled) |
465 | 0 | align = 0; /* disable slice threading */ |
466 | |
|
467 | 0 | if (c->src0Alpha && !c->dst0Alpha && isALPHA(sws->dst_format)) { |
468 | 0 | ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGBA, src_w, src_h, input, |
469 | 0 | 1, run_rgb0, NULL, c, NULL, &input); |
470 | 0 | if (ret < 0) { |
471 | 0 | sws_free_context(&sws); |
472 | 0 | return ret; |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | 0 | if (c->srcXYZ && !(c->dstXYZ && unscaled)) { |
477 | 0 | ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGB48, src_w, src_h, input, |
478 | 0 | 1, run_xyz2rgb, NULL, c, NULL, &input); |
479 | 0 | if (ret < 0) { |
480 | 0 | sws_free_context(&sws); |
481 | 0 | return ret; |
482 | 0 | } |
483 | 0 | } |
484 | | |
485 | 0 | ret = ff_sws_graph_add_pass(graph, sws->dst_format, dst_w, dst_h, input, align, |
486 | 0 | c->convert_unscaled ? run_legacy_unscaled : run_legacy_swscale, |
487 | 0 | setup_legacy_swscale, sws, free_legacy_swscale, &pass); |
488 | 0 | if (ret < 0) |
489 | 0 | return ret; |
490 | | |
491 | | /** |
492 | | * For slice threading, we need to create sub contexts, similar to how |
493 | | * swscale normally handles it internally. The most important difference |
494 | | * is that we handle cascaded contexts before threaded contexts; whereas |
495 | | * context_init_threaded() does it the other way around. |
496 | | */ |
497 | | |
498 | 0 | if (pass->num_slices > 1) { |
499 | 0 | c->slice_ctx = av_calloc(pass->num_slices, sizeof(*c->slice_ctx)); |
500 | 0 | if (!c->slice_ctx) |
501 | 0 | return AVERROR(ENOMEM); |
502 | | |
503 | 0 | for (int i = 0; i < pass->num_slices; i++) { |
504 | 0 | SwsContext *slice; |
505 | 0 | SwsInternal *c2; |
506 | 0 | slice = c->slice_ctx[i] = sws_alloc_context(); |
507 | 0 | if (!slice) |
508 | 0 | return AVERROR(ENOMEM); |
509 | 0 | c->nb_slice_ctx++; |
510 | |
|
511 | 0 | c2 = sws_internal(slice); |
512 | 0 | c2->parent = sws; |
513 | |
|
514 | 0 | ret = av_opt_copy(slice, sws); |
515 | 0 | if (ret < 0) |
516 | 0 | return ret; |
517 | | |
518 | 0 | ret = ff_sws_init_single_context(slice, NULL, NULL); |
519 | 0 | if (ret < 0) |
520 | 0 | return ret; |
521 | | |
522 | 0 | sws_setColorspaceDetails(slice, c->srcColorspaceTable, |
523 | 0 | slice->src_range, c->dstColorspaceTable, |
524 | 0 | slice->dst_range, c->brightness, c->contrast, |
525 | 0 | c->saturation); |
526 | |
|
527 | 0 | for (int i = 0; i < FF_ARRAY_ELEMS(c->srcColorspaceTable); i++) { |
528 | 0 | c2->srcColorspaceTable[i] = c->srcColorspaceTable[i]; |
529 | 0 | c2->dstColorspaceTable[i] = c->dstColorspaceTable[i]; |
530 | 0 | } |
531 | 0 | } |
532 | 0 | } |
533 | | |
534 | 0 | if (c->dstXYZ && !(c->srcXYZ && unscaled)) { |
535 | 0 | ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGB48, dst_w, dst_h, pass, |
536 | 0 | 1, run_rgb2xyz, NULL, c, NULL, &pass); |
537 | 0 | if (ret < 0) |
538 | 0 | return ret; |
539 | 0 | } |
540 | | |
541 | 0 | *output = pass; |
542 | 0 | return 0; |
543 | 0 | } |
544 | | |
545 | | static int add_legacy_sws_pass(SwsGraph *graph, const SwsFormat *src, |
546 | | const SwsFormat *dst, SwsPass *input, |
547 | | SwsPass **output) |
548 | 0 | { |
549 | 0 | int ret, warned = 0; |
550 | 0 | SwsContext *const ctx = graph->ctx; |
551 | 0 | if (src->hw_format != AV_PIX_FMT_NONE || dst->hw_format != AV_PIX_FMT_NONE) |
552 | 0 | return AVERROR(ENOTSUP); |
553 | | |
554 | 0 | SwsContext *sws = sws_alloc_context(); |
555 | 0 | if (!sws) |
556 | 0 | return AVERROR(ENOMEM); |
557 | | |
558 | 0 | sws->flags = ctx->flags; |
559 | 0 | sws->dither = ctx->dither; |
560 | 0 | sws->alpha_blend = ctx->alpha_blend; |
561 | 0 | sws->gamma_flag = ctx->gamma_flag; |
562 | 0 | sws->scaler = ctx->scaler; |
563 | 0 | sws->scaler_sub = ctx->scaler_sub; |
564 | |
|
565 | 0 | sws->src_w = src->width; |
566 | 0 | sws->src_h = src->height; |
567 | 0 | sws->src_format = src->format; |
568 | 0 | sws->src_range = src->range == AVCOL_RANGE_JPEG; |
569 | |
|
570 | 0 | sws->dst_w = dst->width; |
571 | 0 | sws->dst_h = dst->height; |
572 | 0 | sws->dst_format = dst->format; |
573 | 0 | sws->dst_range = dst->range == AVCOL_RANGE_JPEG; |
574 | 0 | get_chroma_pos(graph, &sws->src_h_chr_pos, &sws->src_v_chr_pos, src); |
575 | 0 | get_chroma_pos(graph, &sws->dst_h_chr_pos, &sws->dst_v_chr_pos, dst); |
576 | |
|
577 | 0 | graph->incomplete |= src->range == AVCOL_RANGE_UNSPECIFIED; |
578 | 0 | graph->incomplete |= dst->range == AVCOL_RANGE_UNSPECIFIED; |
579 | | |
580 | | /* Allow overriding chroma position with the legacy API */ |
581 | 0 | legacy_chr_pos(graph, &sws->src_h_chr_pos, ctx->src_h_chr_pos, &warned); |
582 | 0 | legacy_chr_pos(graph, &sws->src_v_chr_pos, ctx->src_v_chr_pos, &warned); |
583 | 0 | legacy_chr_pos(graph, &sws->dst_h_chr_pos, ctx->dst_h_chr_pos, &warned); |
584 | 0 | legacy_chr_pos(graph, &sws->dst_v_chr_pos, ctx->dst_v_chr_pos, &warned); |
585 | |
|
586 | 0 | for (int i = 0; i < SWS_NUM_SCALER_PARAMS; i++) |
587 | 0 | sws->scaler_params[i] = ctx->scaler_params[i]; |
588 | |
|
589 | 0 | ret = sws_init_context(sws, NULL, NULL); |
590 | 0 | if (ret < 0) { |
591 | 0 | sws_free_context(&sws); |
592 | 0 | return ret; |
593 | 0 | } |
594 | | |
595 | | /* Set correct color matrices */ |
596 | 0 | { |
597 | 0 | int in_full, out_full, brightness, contrast, saturation; |
598 | 0 | const int *inv_table, *table; |
599 | 0 | sws_getColorspaceDetails(sws, (int **)&inv_table, &in_full, |
600 | 0 | (int **)&table, &out_full, |
601 | 0 | &brightness, &contrast, &saturation); |
602 | |
|
603 | 0 | inv_table = sws_getCoefficients(src->csp); |
604 | 0 | table = sws_getCoefficients(dst->csp); |
605 | |
|
606 | 0 | graph->incomplete |= src->csp != dst->csp && |
607 | 0 | (src->csp == AVCOL_SPC_UNSPECIFIED || |
608 | 0 | dst->csp == AVCOL_SPC_UNSPECIFIED); |
609 | |
|
610 | 0 | sws_setColorspaceDetails(sws, inv_table, in_full, table, out_full, |
611 | 0 | brightness, contrast, saturation); |
612 | 0 | } |
613 | |
|
614 | 0 | return init_legacy_subpass(graph, sws, input, output); |
615 | 0 | } |
616 | | |
617 | | /********************************* |
618 | | * Format conversion and scaling * |
619 | | *********************************/ |
620 | | |
621 | | #if CONFIG_UNSTABLE |
622 | | static int add_convert_pass(SwsGraph *graph, const SwsFormat *src, |
623 | | const SwsFormat *dst, SwsPass *input, |
624 | | SwsPass **output) |
625 | 0 | { |
626 | 0 | SwsContext *ctx = graph->ctx; |
627 | 0 | int ret = AVERROR(ENOTSUP); |
628 | | |
629 | | /* Mark the entire new ops infrastructure as experimental for now */ |
630 | 0 | if (!(ctx->flags & SWS_UNSTABLE)) |
631 | 0 | goto fail; |
632 | | |
633 | 0 | SwsOpList *ops; |
634 | 0 | ret = ff_sws_op_list_generate(ctx, src, dst, &ops, &graph->incomplete); |
635 | 0 | if (ret < 0) |
636 | 0 | goto fail; |
637 | | |
638 | 0 | av_log(ctx, AV_LOG_VERBOSE, "Conversion pass for %s -> %s:\n", |
639 | 0 | av_get_pix_fmt_name(src->format), av_get_pix_fmt_name(dst->format)); |
640 | |
|
641 | 0 | av_log(ctx, AV_LOG_DEBUG, "Unoptimized operation list:\n"); |
642 | 0 | ff_sws_op_list_print(ctx, AV_LOG_DEBUG, AV_LOG_TRACE, ops); |
643 | |
|
644 | 0 | ret = ff_sws_compile_pass(graph, NULL, &ops, SWS_OP_FLAG_OPTIMIZE, input, output); |
645 | 0 | if (ret < 0) |
646 | 0 | goto fail; |
647 | | |
648 | 0 | ret = 0; |
649 | | /* fall through */ |
650 | |
|
651 | 0 | fail: |
652 | 0 | if (ret == AVERROR(ENOTSUP)) |
653 | 0 | return add_legacy_sws_pass(graph, src, dst, input, output); |
654 | 0 | return ret; |
655 | 0 | } |
656 | | #else |
657 | | #define add_convert_pass add_legacy_sws_pass |
658 | | #endif |
659 | | |
660 | | |
661 | | /************************** |
662 | | * Gamut and tone mapping * |
663 | | **************************/ |
664 | | |
665 | | static void free_lut3d(void *priv) |
666 | 0 | { |
667 | 0 | SwsLut3D *lut = priv; |
668 | 0 | ff_sws_lut3d_free(&lut); |
669 | 0 | } |
670 | | |
671 | | static int setup_lut3d(const SwsFrame *out, const SwsFrame *in, const SwsPass *pass) |
672 | 0 | { |
673 | 0 | SwsLut3D *lut = pass->priv; |
674 | | |
675 | | /* Update dynamic frame metadata from the original source frame */ |
676 | 0 | ff_sws_lut3d_update(lut, &pass->graph->src.color); |
677 | 0 | return 0; |
678 | 0 | } |
679 | | |
680 | | static void run_lut3d(const SwsFrame *out, const SwsFrame *in, int y, int h, |
681 | | const SwsPass *pass) |
682 | 0 | { |
683 | 0 | SwsLut3D *lut = pass->priv; |
684 | 0 | uint8_t *in_data[4], *out_data[4]; |
685 | 0 | frame_shift(in, y, in_data); |
686 | 0 | frame_shift(out, y, out_data); |
687 | |
|
688 | 0 | ff_sws_lut3d_apply(lut, in_data[0], in->linesize[0], out_data[0], |
689 | 0 | out->linesize[0], pass->width, h); |
690 | 0 | } |
691 | | |
692 | | static int adapt_colors(SwsGraph *graph, SwsFormat src, SwsFormat dst, |
693 | | SwsPass *input, SwsPass **output) |
694 | 0 | { |
695 | 0 | enum AVPixelFormat fmt_in, fmt_out; |
696 | 0 | SwsColorMap map = {0}; |
697 | 0 | SwsLut3D *lut; |
698 | 0 | int ret; |
699 | | |
700 | | /** |
701 | | * Grayspace does not really have primaries, so just force the use of |
702 | | * the equivalent other primary set to avoid a conversion. Technically, |
703 | | * this does affect the weights used for the Grayscale conversion, but |
704 | | * in practise, that should give the expected results more often than not. |
705 | | */ |
706 | 0 | if (isGray(dst.format)) { |
707 | 0 | dst.color = src.color; |
708 | 0 | } else if (isGray(src.format)) { |
709 | 0 | src.color = dst.color; |
710 | 0 | } |
711 | | |
712 | | /* Fully infer color spaces before color mapping logic */ |
713 | 0 | graph->incomplete |= ff_infer_colors(&src.color, &dst.color); |
714 | |
|
715 | 0 | map.intent = graph->ctx->intent; |
716 | 0 | map.src = src.color; |
717 | 0 | map.dst = dst.color; |
718 | |
|
719 | 0 | if (ff_sws_color_map_noop(&map)) |
720 | 0 | return 0; |
721 | | |
722 | 0 | if (src.hw_format != AV_PIX_FMT_NONE || dst.hw_format != AV_PIX_FMT_NONE) |
723 | 0 | return AVERROR(ENOTSUP); |
724 | | |
725 | 0 | lut = ff_sws_lut3d_alloc(); |
726 | 0 | if (!lut) |
727 | 0 | return AVERROR(ENOMEM); |
728 | | |
729 | 0 | fmt_in = ff_sws_lut3d_pick_pixfmt(src, 0); |
730 | 0 | fmt_out = ff_sws_lut3d_pick_pixfmt(dst, 1); |
731 | 0 | if (fmt_in != src.format) { |
732 | 0 | SwsFormat tmp = src; |
733 | 0 | tmp.format = fmt_in; |
734 | 0 | ret = add_convert_pass(graph, &src, &tmp, input, &input); |
735 | 0 | if (ret < 0) { |
736 | 0 | ff_sws_lut3d_free(&lut); |
737 | 0 | return ret; |
738 | 0 | } |
739 | 0 | } |
740 | | |
741 | 0 | ret = ff_sws_lut3d_generate(lut, fmt_in, fmt_out, &map); |
742 | 0 | if (ret < 0) { |
743 | 0 | ff_sws_lut3d_free(&lut); |
744 | 0 | return ret; |
745 | 0 | } |
746 | | |
747 | 0 | return ff_sws_graph_add_pass(graph, fmt_out, src.width, src.height, |
748 | 0 | input, 1, run_lut3d, setup_lut3d, lut, |
749 | 0 | free_lut3d, output); |
750 | 0 | } |
751 | | |
752 | | /*************************************** |
753 | | * Main filter graph construction code * |
754 | | ***************************************/ |
755 | | |
756 | | static int init_passes(SwsGraph *graph) |
757 | 0 | { |
758 | 0 | SwsFormat src = graph->src; |
759 | 0 | SwsFormat dst = graph->dst; |
760 | 0 | SwsPass *pass = NULL; /* read from main input image */ |
761 | 0 | int ret; |
762 | |
|
763 | 0 | ret = adapt_colors(graph, src, dst, pass, &pass); |
764 | 0 | if (ret < 0) |
765 | 0 | return ret; |
766 | 0 | src.format = pass ? pass->format : src.format; |
767 | 0 | src.color = dst.color; |
768 | |
|
769 | 0 | if (!ff_fmt_equal(&src, &dst)) { |
770 | 0 | ret = add_convert_pass(graph, &src, &dst, pass, &pass); |
771 | 0 | if (ret < 0) |
772 | 0 | return ret; |
773 | 0 | } |
774 | | |
775 | 0 | if (pass) |
776 | 0 | return 0; |
777 | | |
778 | | /* No passes were added, so no operations were necessary */ |
779 | 0 | graph->noop = 1; |
780 | | |
781 | | /* Add threaded memcpy pass */ |
782 | 0 | return ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, |
783 | 0 | pass, 1, run_copy, NULL, NULL, NULL, &pass); |
784 | 0 | } |
785 | | |
786 | | static void sws_graph_worker(void *priv, int jobnr, int threadnr, int nb_jobs, |
787 | | int nb_threads) |
788 | 0 | { |
789 | 0 | SwsGraph *graph = priv; |
790 | 0 | const SwsPass *pass = graph->exec.pass; |
791 | 0 | const int slice_y = jobnr * pass->slice_h; |
792 | 0 | const int slice_h = FFMIN(pass->slice_h, pass->height - slice_y); |
793 | |
|
794 | 0 | pass->run(graph->exec.output, graph->exec.input, slice_y, slice_h, pass); |
795 | 0 | } |
796 | | |
797 | | SwsGraph *ff_sws_graph_alloc(void) |
798 | 0 | { |
799 | 0 | return av_mallocz(sizeof(SwsGraph)); |
800 | 0 | } |
801 | | |
802 | | static void graph_uninit(SwsGraph *graph) |
803 | 0 | { |
804 | 0 | avpriv_slicethread_free(&graph->slicethread); |
805 | |
|
806 | 0 | for (int i = 0; i < graph->num_passes; i++) |
807 | 0 | pass_free(graph->passes[i]); |
808 | 0 | av_free(graph->passes); |
809 | |
|
810 | 0 | memset(graph, 0, sizeof(*graph)); |
811 | 0 | } |
812 | | |
813 | | int ff_sws_graph_init(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, |
814 | | const SwsFormat *src, int field) |
815 | 0 | { |
816 | 0 | int ret; |
817 | 0 | if (graph->ctx) { |
818 | 0 | av_log(ctx, AV_LOG_ERROR, "Graph is already initialized\n"); |
819 | 0 | return AVERROR(EINVAL); |
820 | 0 | } |
821 | | |
822 | 0 | graph->ctx = ctx; |
823 | 0 | graph->src = *src; |
824 | 0 | graph->dst = *dst; |
825 | 0 | graph->field = field; |
826 | 0 | graph->opts_copy = *ctx; |
827 | |
|
828 | 0 | if (ctx->threads == 1) { |
829 | 0 | graph->num_threads = 1; |
830 | 0 | } else { |
831 | 0 | ret = avpriv_slicethread_create(&graph->slicethread, (void *) graph, |
832 | 0 | sws_graph_worker, NULL, ctx->threads); |
833 | 0 | if (ret == AVERROR(ENOSYS)) { |
834 | | /* Fall back to single threaded operation */ |
835 | 0 | graph->num_threads = 1; |
836 | 0 | } else if (ret < 0) { |
837 | 0 | goto error; |
838 | 0 | } else { |
839 | 0 | graph->num_threads = ret; |
840 | 0 | } |
841 | 0 | } |
842 | | |
843 | 0 | ret = init_passes(graph); |
844 | 0 | if (ret < 0) |
845 | 0 | goto error; |
846 | | |
847 | | /* Resolve output buffers for all intermediate passes */ |
848 | 0 | for (int i = 0; i < graph->num_passes; i++) { |
849 | 0 | ret = pass_alloc_output(graph->passes[i]->input); |
850 | 0 | if (ret < 0) |
851 | 0 | goto error; |
852 | 0 | } |
853 | | |
854 | 0 | return 0; |
855 | | |
856 | 0 | error: |
857 | 0 | graph_uninit(graph); |
858 | 0 | return ret; |
859 | 0 | } |
860 | | |
861 | | int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src, |
862 | | int field, SwsGraph **out_graph) |
863 | 0 | { |
864 | 0 | SwsGraph *graph = ff_sws_graph_alloc(); |
865 | 0 | if (!graph) |
866 | 0 | return AVERROR(ENOMEM); |
867 | | |
868 | 0 | int ret = ff_sws_graph_init(graph, ctx, dst, src, field); |
869 | 0 | if (ret < 0) { |
870 | 0 | ff_sws_graph_free(&graph); |
871 | 0 | return ret; |
872 | 0 | } |
873 | | |
874 | 0 | *out_graph = graph; |
875 | 0 | return 0; |
876 | 0 | } |
877 | | |
878 | | void ff_sws_graph_rollback(SwsGraph *graph, int since_idx) |
879 | 0 | { |
880 | 0 | for (int i = since_idx; i < graph->num_passes; i++) |
881 | 0 | pass_free(graph->passes[i]); |
882 | 0 | graph->num_passes = since_idx; |
883 | 0 | } |
884 | | |
885 | | void ff_sws_graph_free(SwsGraph **pgraph) |
886 | 0 | { |
887 | 0 | SwsGraph *graph = *pgraph; |
888 | 0 | if (!graph) |
889 | 0 | return; |
890 | | |
891 | 0 | graph_uninit(graph); |
892 | 0 | av_free(graph); |
893 | 0 | *pgraph = NULL; |
894 | 0 | } |
895 | | |
896 | | /* Tests only options relevant to SwsGraph */ |
897 | | static int opts_equal(const SwsContext *c1, const SwsContext *c2) |
898 | 0 | { |
899 | 0 | return c1->flags == c2->flags && |
900 | 0 | c1->threads == c2->threads && |
901 | 0 | c1->dither == c2->dither && |
902 | 0 | c1->alpha_blend == c2->alpha_blend && |
903 | 0 | c1->gamma_flag == c2->gamma_flag && |
904 | 0 | c1->src_h_chr_pos == c2->src_h_chr_pos && |
905 | 0 | c1->src_v_chr_pos == c2->src_v_chr_pos && |
906 | 0 | c1->dst_h_chr_pos == c2->dst_h_chr_pos && |
907 | 0 | c1->dst_v_chr_pos == c2->dst_v_chr_pos && |
908 | 0 | c1->intent == c2->intent && |
909 | 0 | c1->scaler == c2->scaler && |
910 | 0 | c1->scaler_sub == c2->scaler_sub && |
911 | 0 | !memcmp(c1->scaler_params, c2->scaler_params, sizeof(c1->scaler_params)); |
912 | |
|
913 | 0 | } |
914 | | |
915 | | int ff_sws_graph_reinit(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, |
916 | | const SwsFormat *src, int field) |
917 | 0 | { |
918 | 0 | if (ff_fmt_equal(&graph->src, src) && ff_fmt_equal(&graph->dst, dst) && |
919 | 0 | opts_equal(ctx, &graph->opts_copy)) |
920 | 0 | { |
921 | 0 | ff_sws_graph_update_metadata(graph, &src->color); |
922 | 0 | return 0; |
923 | 0 | } |
924 | | |
925 | 0 | graph_uninit(graph); |
926 | 0 | return ff_sws_graph_init(graph, ctx, dst, src, field); |
927 | 0 | } |
928 | | |
929 | | void ff_sws_graph_update_metadata(SwsGraph *graph, const SwsColor *color) |
930 | 0 | { |
931 | 0 | if (!color) |
932 | 0 | return; |
933 | | |
934 | 0 | ff_color_update_dynamic(&graph->src.color, color); |
935 | 0 | } |
936 | | |
937 | | static void get_field(SwsGraph *graph, const AVFrame *avframe, SwsFrame *frame) |
938 | 0 | { |
939 | 0 | ff_sws_frame_from_avframe(frame, avframe); |
940 | |
|
941 | 0 | if (!(avframe->flags & AV_FRAME_FLAG_INTERLACED)) { |
942 | 0 | av_assert1(!graph->field); |
943 | 0 | return; |
944 | 0 | } |
945 | | |
946 | 0 | if (graph->field == FIELD_BOTTOM) { |
947 | | /* Odd rows, offset by one line */ |
948 | 0 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); |
949 | 0 | for (int i = 0; i < 4; i++) { |
950 | 0 | if (frame->data[i]) |
951 | 0 | frame->data[i] += frame->linesize[i]; |
952 | 0 | if (desc->flags & AV_PIX_FMT_FLAG_PAL) |
953 | 0 | break; |
954 | 0 | } |
955 | 0 | } |
956 | | |
957 | | /* Take only every second line */ |
958 | 0 | for (int i = 0; i < 4; i++) |
959 | 0 | frame->linesize[i] <<= 1; |
960 | |
|
961 | 0 | frame->height = (frame->height + (graph->field == FIELD_TOP)) >> 1; |
962 | 0 | } |
963 | | |
964 | | int ff_sws_graph_run(SwsGraph *graph, const AVFrame *dst, const AVFrame *src) |
965 | 0 | { |
966 | 0 | av_assert0(dst->format == graph->dst.hw_format || dst->format == graph->dst.format); |
967 | 0 | av_assert0(src->format == graph->src.hw_format || src->format == graph->src.format); |
968 | | |
969 | 0 | SwsFrame src_field, dst_field; |
970 | 0 | get_field(graph, dst, &dst_field); |
971 | 0 | get_field(graph, src, &src_field); |
972 | |
|
973 | 0 | for (int i = 0; i < graph->num_passes; i++) { |
974 | 0 | const SwsPass *pass = graph->passes[i]; |
975 | 0 | graph->exec.pass = pass; |
976 | 0 | graph->exec.input = pass->input ? &pass->input->output->frame : &src_field; |
977 | 0 | graph->exec.output = pass->output->avframe ? &pass->output->frame : &dst_field; |
978 | 0 | if (pass->setup) { |
979 | 0 | int ret = pass->setup(graph->exec.output, graph->exec.input, pass); |
980 | 0 | if (ret < 0) |
981 | 0 | return ret; |
982 | 0 | } |
983 | | |
984 | 0 | if (pass->num_slices == 1) { |
985 | 0 | pass->run(graph->exec.output, graph->exec.input, 0, pass->height, pass); |
986 | 0 | } else { |
987 | 0 | avpriv_slicethread_execute(graph->slicethread, pass->num_slices, 0); |
988 | 0 | } |
989 | 0 | } |
990 | | |
991 | 0 | return 0; |
992 | 0 | } |