/src/mpv/video/out/gpu/video.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This file is part of mpv. |
3 | | * |
4 | | * mpv is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * mpv is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with mpv. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | |
18 | | #include <assert.h> |
19 | | #include <float.h> |
20 | | #include <math.h> |
21 | | #include <stdarg.h> |
22 | | #include <stdbool.h> |
23 | | #include <string.h> |
24 | | |
25 | | #include <libavutil/common.h> |
26 | | #include <libavutil/lfg.h> |
27 | | |
28 | | #include "video.h" |
29 | | |
30 | | #include "misc/bstr.h" |
31 | | #include "options/m_config.h" |
32 | | #include "options/path.h" |
33 | | #include "options/options.h" |
34 | | #include "utils.h" |
35 | | #include "hwdec.h" |
36 | | #include "osd.h" |
37 | | #include "ra.h" |
38 | | #include "stream/stream.h" |
39 | | #include "video_shaders.h" |
40 | | #include "user_shaders.h" |
41 | | #include "error_diffusion.h" |
42 | | #include "video/out/filter_kernels.h" |
43 | | #include "video/out/aspect.h" |
44 | | #include "video/out/dither.h" |
45 | | #include "video/out/vo.h" |
46 | | |
47 | | // must be sorted, and terminated with 0 |
48 | | int filter_sizes[] = |
49 | | {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; |
50 | | int tscale_sizes[] = {2, 4, 6, 8, 0}; |
51 | | |
52 | | struct vertex_pt { |
53 | | float x, y; |
54 | | }; |
55 | | |
56 | | struct texplane { |
57 | | struct ra_tex *tex; |
58 | | int w, h; |
59 | | bool flipped; |
60 | | }; |
61 | | |
62 | | struct video_image { |
63 | | struct texplane planes[4]; |
64 | | struct mp_image *mpi; // original input image |
65 | | uint64_t id; // unique ID identifying mpi contents |
66 | | bool hwdec_mapped; |
67 | | }; |
68 | | |
69 | | enum plane_type { |
70 | | PLANE_NONE = 0, |
71 | | PLANE_RGB, |
72 | | PLANE_LUMA, |
73 | | PLANE_CHROMA, |
74 | | PLANE_ALPHA, |
75 | | PLANE_XYZ, |
76 | | }; |
77 | | |
78 | | static const char *plane_names[] = { |
79 | | [PLANE_NONE] = "unknown", |
80 | | [PLANE_RGB] = "rgb", |
81 | | [PLANE_LUMA] = "luma", |
82 | | [PLANE_CHROMA] = "chroma", |
83 | | [PLANE_ALPHA] = "alpha", |
84 | | [PLANE_XYZ] = "xyz", |
85 | | }; |
86 | | |
87 | | // A self-contained description of a source image which can be bound to a |
88 | | // texture unit and sampled from. Contains metadata about how it's to be used |
89 | | struct image { |
90 | | enum plane_type type; // must be set to something non-zero |
91 | | int components; // number of relevant coordinates |
92 | | float multiplier; // multiplier to be used when sampling |
93 | | struct ra_tex *tex; |
94 | | int w, h; // logical size (after transformation) |
95 | | struct gl_transform transform; // rendering transformation |
96 | | int padding; // number of leading padding components (e.g. 2 = rg is padding) |
97 | | }; |
98 | | |
99 | | // A named image, for user scripting purposes |
100 | | struct saved_img { |
101 | | const char *name; |
102 | | struct image img; |
103 | | }; |
104 | | |
105 | | // A texture hook. This is some operation that transforms a named texture as |
106 | | // soon as it's generated |
107 | | struct tex_hook { |
108 | | const char *save_tex; |
109 | | const char *hook_tex[SHADER_MAX_HOOKS]; |
110 | | const char *bind_tex[SHADER_MAX_BINDS]; |
111 | | int components; // how many components are relevant (0 = same as input) |
112 | | bool align_offset; // whether to align hooked tex with reference. |
113 | | void *priv; // this gets talloc_freed when the tex_hook is removed |
114 | | void (*hook)(struct gl_video *p, struct image img, // generates GLSL |
115 | | struct gl_transform *trans, void *priv); |
116 | | bool (*cond)(struct gl_video *p, struct image img, void *priv); |
117 | | }; |
118 | | |
119 | | struct surface { |
120 | | struct ra_tex *tex; |
121 | | uint64_t id; |
122 | | double pts; |
123 | | }; |
124 | | |
125 | 0 | #define SURFACES_MAX 10 |
126 | | |
127 | | struct cached_file { |
128 | | char *path; |
129 | | struct bstr body; |
130 | | }; |
131 | | |
132 | | struct pass_info { |
133 | | struct bstr desc; |
134 | | struct mp_pass_perf perf; |
135 | | }; |
136 | | |
137 | | struct dr_buffer { |
138 | | struct ra_buf *buf; |
139 | | // The mpi reference will keep the data from being recycled (or from other |
140 | | // references gaining write access) while the GPU is accessing the buffer. |
141 | | struct mp_image *mpi; |
142 | | }; |
143 | | |
144 | | struct gl_video { |
145 | | struct ra *ra; |
146 | | |
147 | | struct mpv_global *global; |
148 | | struct mp_log *log; |
149 | | struct gl_video_opts opts; |
150 | | struct m_config_cache *opts_cache; |
151 | | struct gl_lcms *cms; |
152 | | |
153 | | int fb_depth; // actual bits available in GL main framebuffer |
154 | | struct m_color clear_color; |
155 | | bool force_clear_color; |
156 | | |
157 | | struct gl_shader_cache *sc; |
158 | | |
159 | | struct osd_state *osd_state; |
160 | | struct mpgl_osd *osd; |
161 | | double osd_pts; |
162 | | |
163 | | struct ra_tex *lut_3d_texture; |
164 | | bool use_lut_3d; |
165 | | int lut_3d_size[3]; |
166 | | |
167 | | struct ra_tex *dither_texture; |
168 | | |
169 | | struct mp_image_params real_image_params; // configured format |
170 | | struct mp_image_params image_params; // texture format (mind hwdec case) |
171 | | struct mp_image_params target_params; // target format |
172 | | struct ra_imgfmt_desc ra_format; // texture format |
173 | | int plane_count; |
174 | | |
175 | | bool is_gray; |
176 | | bool has_alpha; |
177 | | char color_swizzle[5]; |
178 | | bool use_integer_conversion; |
179 | | |
180 | | struct video_image image; |
181 | | |
182 | | struct dr_buffer *dr_buffers; |
183 | | int num_dr_buffers; |
184 | | |
185 | | bool using_dr_path; |
186 | | |
187 | | bool dumb_mode; |
188 | | bool forced_dumb_mode; |
189 | | |
190 | | // Cached vertex array, to avoid re-allocation per frame. For simplicity, |
191 | | // our vertex format is simply a list of `vertex_pt`s, since this greatly |
192 | | // simplifies offset calculation at the cost of (unneeded) flexibility. |
193 | | struct vertex_pt *tmp_vertex; |
194 | | struct ra_renderpass_input *vao; |
195 | | int vao_len; |
196 | | |
197 | | const struct ra_format *fbo_format; |
198 | | struct ra_tex *merge_tex[4]; |
199 | | struct ra_tex *scale_tex[4]; |
200 | | struct ra_tex *integer_tex[4]; |
201 | | struct ra_tex *chroma_tex[4]; |
202 | | struct ra_tex *indirect_tex; |
203 | | struct ra_tex *blend_subs_tex; |
204 | | struct ra_tex *error_diffusion_tex[2]; |
205 | | struct ra_tex *screen_tex; |
206 | | struct ra_tex *output_tex; |
207 | | struct ra_tex **hook_textures; |
208 | | int num_hook_textures; |
209 | | int idx_hook_textures; |
210 | | |
211 | | struct ra_buf *hdr_peak_ssbo; |
212 | | struct surface surfaces[SURFACES_MAX]; |
213 | | |
214 | | // user pass descriptions and textures |
215 | | struct tex_hook *tex_hooks; |
216 | | int num_tex_hooks; |
217 | | struct gl_user_shader_tex *user_textures; |
218 | | int num_user_textures; |
219 | | |
220 | | int surface_idx; |
221 | | int surface_now; |
222 | | int frames_drawn; |
223 | | bool is_interpolated; |
224 | | bool output_tex_valid; |
225 | | |
226 | | // state for configured scalers |
227 | | struct scaler scaler[SCALER_COUNT]; |
228 | | |
229 | | struct mp_csp_equalizer_state *video_eq; |
230 | | |
231 | | struct mp_rect src_rect; // displayed part of the source video |
232 | | struct mp_rect dst_rect; // video rectangle on output window |
233 | | struct mp_osd_res osd_rect; // OSD size/margins |
234 | | |
235 | | // temporary during rendering |
236 | | struct compute_info pass_compute; // compute shader metadata for this pass |
237 | | struct image *pass_imgs; // bound images for this pass |
238 | | int num_pass_imgs; |
239 | | struct saved_img *saved_imgs; // saved (named) images for this frame |
240 | | int num_saved_imgs; |
241 | | |
242 | | // effective current texture metadata - this will essentially affect the |
243 | | // next render pass target, as well as implicitly tracking what needs to |
244 | | // be done with the image |
245 | | int texture_w, texture_h; |
246 | | struct gl_transform texture_offset; // texture transform without rotation |
247 | | int components; |
248 | | bool use_linear; |
249 | | float user_gamma; |
250 | | |
251 | | // pass info / metrics |
252 | | struct pass_info pass_fresh[VO_PASS_PERF_MAX]; |
253 | | struct pass_info pass_redraw[VO_PASS_PERF_MAX]; |
254 | | struct pass_info *pass; |
255 | | int pass_idx; |
256 | | struct timer_pool *upload_timer; |
257 | | struct timer_pool *blit_timer; |
258 | | struct timer_pool *osd_timer; |
259 | | |
260 | | int frames_uploaded; |
261 | | int frames_rendered; |
262 | | AVLFG lfg; |
263 | | |
264 | | // Cached because computing it can take relatively long |
265 | | int last_dither_matrix_size; |
266 | | float *last_dither_matrix; |
267 | | |
268 | | struct cached_file *files; |
269 | | int num_files; |
270 | | |
271 | | struct ra_hwdec_ctx hwdec_ctx; |
272 | | struct ra_hwdec_mapper *hwdec_mapper; |
273 | | struct ra_hwdec *hwdec_overlay; |
274 | | bool hwdec_active; |
275 | | |
276 | | bool dsi_warned; |
277 | | bool broken_frame; // temporary error state |
278 | | |
279 | | bool colorspace_override_warned; |
280 | | bool correct_downscaling_warned; |
281 | | }; |
282 | | |
283 | | #define FIXED_SCALE_KERNELS \ |
284 | | {"bilinear", SCALER_BILINEAR}, \ |
285 | | {"bicubic_fast", SCALER_BICUBIC_FAST}, \ |
286 | | {"oversample", SCALER_OVERSAMPLE}, \ |
287 | | |
288 | | #define NON_POLAR_FILTER_KERNELS \ |
289 | | {"spline16", SCALER_SPLINE16}, \ |
290 | | {"spline36", SCALER_SPLINE36}, \ |
291 | | {"spline64", SCALER_SPLINE64}, \ |
292 | | {"sinc", SCALER_SINC}, \ |
293 | | {"lanczos", SCALER_LANCZOS}, \ |
294 | | {"ginseng", SCALER_GINSENG}, \ |
295 | | {"bicubic", SCALER_BICUBIC}, \ |
296 | | {"hermite", SCALER_HERMITE}, \ |
297 | | {"catmull_rom", SCALER_CATMULL_ROM}, \ |
298 | | {"mitchell", SCALER_MITCHELL}, \ |
299 | | {"robidoux", SCALER_ROBIDOUX}, \ |
300 | | {"robidouxsharp", SCALER_ROBIDOUXSHARP}, \ |
301 | | {"box", SCALER_BOX}, \ |
302 | | {"nearest", SCALER_NEAREST}, \ |
303 | | {"triangle", SCALER_TRIANGLE}, \ |
304 | | {"gaussian", SCALER_GAUSSIAN}, \ |
305 | | |
306 | | #define POLAR_FILTER_KERNELS \ |
307 | | {"jinc", SCALER_JINC}, \ |
308 | | {"ewa_lanczos", SCALER_EWA_LANCZOS}, \ |
309 | | {"ewa_hanning", SCALER_EWA_HANNING}, \ |
310 | | {"ewa_ginseng", SCALER_EWA_GINSENG}, \ |
311 | | {"ewa_lanczossharp", SCALER_EWA_LANCZOSSHARP}, \ |
312 | | {"ewa_lanczos4sharpest", SCALER_EWA_LANCZOS4SHARPEST}, \ |
313 | | {"ewa_lanczossoft", SCALER_EWA_LANCZOSSOFT}, \ |
314 | | {"haasnsoft", SCALER_HAASNSOFT}, \ |
315 | | {"ewa_robidoux", SCALER_EWA_ROBIDOUX}, \ |
316 | | {"ewa_robidouxsharp", SCALER_EWA_ROBIDOUXSHARP}, \ |
317 | | |
318 | | #define FILTER_WINDOWS \ |
319 | | {"bartlett", WINDOW_BARTLETT}, \ |
320 | | {"cosine", WINDOW_COSINE}, \ |
321 | | {"hanning", WINDOW_HANNING}, \ |
322 | | {"tukey", WINDOW_TUKEY}, \ |
323 | | {"hamming", WINDOW_HAMMING}, \ |
324 | | {"quadric", WINDOW_QUADRIC}, \ |
325 | | {"welch", WINDOW_WELCH}, \ |
326 | | {"kaiser", WINDOW_KAISER}, \ |
327 | | {"blackman", WINDOW_BLACKMAN}, \ |
328 | | {"sphinx", WINDOW_SPHINX}, \ |
329 | | |
330 | | static const struct m_opt_choice_alternatives scale_filters[] = { |
331 | | FIXED_SCALE_KERNELS |
332 | | NON_POLAR_FILTER_KERNELS |
333 | | POLAR_FILTER_KERNELS |
334 | | FILTER_WINDOWS |
335 | | {0}, |
336 | | }; |
337 | | |
338 | | static const struct m_opt_choice_alternatives cdscale_filters[] = { |
339 | | {"", SCALER_INHERIT}, |
340 | | FIXED_SCALE_KERNELS |
341 | | NON_POLAR_FILTER_KERNELS |
342 | | POLAR_FILTER_KERNELS |
343 | | FILTER_WINDOWS |
344 | | {0}, |
345 | | }; |
346 | | |
347 | | static const struct m_opt_choice_alternatives tscale_filters[] = { |
348 | | {"oversample", SCALER_OVERSAMPLE}, |
349 | | {"linear", SCALER_LINEAR}, |
350 | | NON_POLAR_FILTER_KERNELS |
351 | | FILTER_WINDOWS |
352 | | {"jinc", WINDOW_JINC}, |
353 | | {0}, |
354 | | }; |
355 | | |
356 | | static const struct m_opt_choice_alternatives filter_windows[] = { |
357 | | {"", WINDOW_PREFERRED}, |
358 | | FILTER_WINDOWS |
359 | | {"jinc", WINDOW_JINC}, |
360 | | {0}, |
361 | | }; |
362 | | |
363 | | static const struct gl_video_opts gl_video_opts_def = { |
364 | | .dither_algo = DITHER_FRUIT, |
365 | | .dither_size = 6, |
366 | | .temporal_dither_period = 1, |
367 | | .error_diffusion = "sierra-lite", |
368 | | .fbo_format = "auto", |
369 | | .sigmoid_center = 0.75, |
370 | | .sigmoid_slope = 6.5, |
371 | | .scaler = { |
372 | | [SCALER_SCALE] = { |
373 | | {SCALER_LANCZOS, .params = {NAN, NAN}, .functions = scale_filters}, |
374 | | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
375 | | }, |
376 | | [SCALER_DSCALE] = { |
377 | | {SCALER_HERMITE, .params = {NAN, NAN}, .functions = cdscale_filters}, |
378 | | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
379 | | }, |
380 | | [SCALER_CSCALE] = { |
381 | | {SCALER_INHERIT, .params = {NAN, NAN}, .functions = cdscale_filters}, |
382 | | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
383 | | }, |
384 | | [SCALER_TSCALE] = { |
385 | | {SCALER_OVERSAMPLE, .params = {NAN, NAN}, .functions = tscale_filters}, |
386 | | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
387 | | }, |
388 | | }, |
389 | | .scaler_resizes_only = true, |
390 | | .correct_downscaling = true, |
391 | | .linear_downscaling = true, |
392 | | .sigmoid_upscaling = true, |
393 | | .interpolation_threshold = 0.01, |
394 | | .background = BACKGROUND_TILES, |
395 | | .background_color = {0, 0, 0, 255}, |
396 | | .gamma = 1.0f, |
397 | | .tone_map = { |
398 | | .curve = TONE_MAPPING_AUTO, |
399 | | .curve_param = NAN, |
400 | | .max_boost = 1.0, |
401 | | .decay_rate = 20.0, |
402 | | .scene_threshold_low = 1.0, |
403 | | .scene_threshold_high = 3.0, |
404 | | .contrast_smoothness = 3.5, |
405 | | }, |
406 | | .early_flush = -1, |
407 | | .shader_cache = true, |
408 | | .hwdec_interop = "auto", |
409 | | }; |
410 | | |
411 | | static OPT_STRING_VALIDATE_FUNC(validate_error_diffusion_opt); |
412 | | |
413 | | #define OPT_BASE_STRUCT struct gl_video_opts |
414 | | |
415 | | // Use for options which use NAN for defaults. |
416 | | #define OPT_FLOATDEF(field) \ |
417 | | OPT_FLOAT(field), \ |
418 | | .flags = M_OPT_DEFAULT_NAN |
419 | | |
420 | | #define SCALER_OPTS(n, i) \ |
421 | | {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])}, \ |
422 | | {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])}, \ |
423 | | {n"-blur", OPT_FLOAT(scaler[i].kernel.blur)}, \ |
424 | | {n"-taper", OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)}, \ |
425 | | {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])}, \ |
426 | | {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)}, \ |
427 | | {n"-clamp", OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)}, \ |
428 | | {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)}, \ |
429 | | {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)}, \ |
430 | | {n"-window", OPT_CHOICE_C(scaler[i].window.function, filter_windows)} |
431 | | |
432 | | const struct m_sub_options gl_video_conf = { |
433 | | .opts = (const m_option_t[]) { |
434 | | {"gpu-dumb-mode", OPT_CHOICE(dumb_mode, |
435 | | {"auto", 0}, {"yes", 1}, {"no", -1})}, |
436 | | {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0)}, |
437 | | {"gamma-auto", OPT_BOOL(gamma_auto), |
438 | | .deprecation_message = "replacement: gamma-auto.lua"}, |
439 | | {"target-prim", OPT_CHOICE_C(target_prim, pl_csp_prim_names)}, |
440 | | {"target-trc", OPT_CHOICE_C(target_trc, pl_csp_trc_names)}, |
441 | | {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}), |
442 | | M_RANGE(10, 10000)}, |
443 | | {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}), |
444 | | M_RANGE(10, 1000000)}, |
445 | | {"target-gamut", OPT_CHOICE_C(target_gamut, pl_csp_prim_names)}, |
446 | | {"tone-mapping", OPT_CHOICE(tone_map.curve, |
447 | | {"auto", TONE_MAPPING_AUTO}, |
448 | | {"clip", TONE_MAPPING_CLIP}, |
449 | | {"mobius", TONE_MAPPING_MOBIUS}, |
450 | | {"reinhard", TONE_MAPPING_REINHARD}, |
451 | | {"hable", TONE_MAPPING_HABLE}, |
452 | | {"gamma", TONE_MAPPING_GAMMA}, |
453 | | {"linear", TONE_MAPPING_LINEAR}, |
454 | | {"spline", TONE_MAPPING_SPLINE}, |
455 | | {"bt.2390", TONE_MAPPING_BT_2390}, |
456 | | {"bt.2446a", TONE_MAPPING_BT_2446A}, |
457 | | {"st2094-40", TONE_MAPPING_ST2094_40}, |
458 | | {"st2094-10", TONE_MAPPING_ST2094_10})}, |
459 | | {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)}, |
460 | | {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)}, |
461 | | {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost), |
462 | | M_RANGE(1.0, 10.0)}, |
463 | | {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)}, |
464 | | {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode, |
465 | | {"auto", GAMUT_AUTO}, |
466 | | {"clip", GAMUT_CLIP}, |
467 | | {"perceptual", GAMUT_PERCEPTUAL}, |
468 | | {"relative", GAMUT_RELATIVE}, |
469 | | {"saturation", GAMUT_SATURATION}, |
470 | | {"absolute", GAMUT_ABSOLUTE}, |
471 | | {"desaturate", GAMUT_DESATURATE}, |
472 | | {"darken", GAMUT_DARKEN}, |
473 | | {"warn", GAMUT_WARN}, |
474 | | {"linear", GAMUT_LINEAR})}, |
475 | | {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak, |
476 | | {"auto", 0}, |
477 | | {"yes", 1}, |
478 | | {"no", -1})}, |
479 | | {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile), |
480 | | M_RANGE(0.0, 100.0)}, |
481 | | {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate), |
482 | | M_RANGE(0.0, 1000.0)}, |
483 | | {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low), |
484 | | M_RANGE(0, 20.0)}, |
485 | | {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high), |
486 | | M_RANGE(0, 20.0)}, |
487 | | {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery), |
488 | | M_RANGE(0, 2.0)}, |
489 | | {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness), |
490 | | M_RANGE(1.0, 100.0)}, |
491 | | {"opengl-pbo", OPT_BOOL(pbo)}, |
492 | | {"scale", OPT_CHOICE_C(scaler[SCALER_SCALE].kernel.function, scale_filters)}, |
493 | | SCALER_OPTS("scale", SCALER_SCALE), |
494 | | {"dscale", OPT_CHOICE_C(scaler[SCALER_DSCALE].kernel.function, cdscale_filters)}, |
495 | | SCALER_OPTS("dscale", SCALER_DSCALE), |
496 | | {"cscale", OPT_CHOICE_C(scaler[SCALER_CSCALE].kernel.function, cdscale_filters)}, |
497 | | SCALER_OPTS("cscale", SCALER_CSCALE), |
498 | | {"tscale", OPT_CHOICE_C(scaler[SCALER_TSCALE].kernel.function, tscale_filters)}, |
499 | | SCALER_OPTS("tscale", SCALER_TSCALE), |
500 | | {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)}, |
501 | | {"correct-downscaling", OPT_BOOL(correct_downscaling)}, |
502 | | {"linear-downscaling", OPT_BOOL(linear_downscaling)}, |
503 | | {"linear-upscaling", OPT_BOOL(linear_upscaling)}, |
504 | | {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)}, |
505 | | {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)}, |
506 | | {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)}, |
507 | | {"fbo-format", OPT_STRING(fbo_format)}, |
508 | | {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}), |
509 | | M_RANGE(-1, 16)}, |
510 | | {"dither", OPT_CHOICE(dither_algo, |
511 | | {"fruit", DITHER_FRUIT}, |
512 | | {"ordered", DITHER_ORDERED}, |
513 | | {"error-diffusion", DITHER_ERROR_DIFFUSION}, |
514 | | {"no", DITHER_NONE})}, |
515 | | {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)}, |
516 | | {"temporal-dither", OPT_BOOL(temporal_dither)}, |
517 | | {"temporal-dither-period", OPT_INT(temporal_dither_period), |
518 | | M_RANGE(1, 128)}, |
519 | | {"error-diffusion", |
520 | | OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)}, |
521 | | {"background", OPT_CHOICE(background, |
522 | | {"none", BACKGROUND_NONE}, |
523 | | {"color", BACKGROUND_COLOR}, |
524 | | {"tiles", BACKGROUND_TILES})}, |
525 | | {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)}, |
526 | | {"background-color", OPT_COLOR(background_color)}, |
527 | | {"interpolation", OPT_BOOL(interpolation)}, |
528 | | {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)}, |
529 | | {"blend-subtitles", OPT_CHOICE(blend_subs, |
530 | | {"no", BLEND_SUBS_NO}, |
531 | | {"yes", BLEND_SUBS_YES}, |
532 | | {"video", BLEND_SUBS_VIDEO})}, |
533 | | {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE}, |
534 | | {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")}, |
535 | | {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)}, |
536 | | {"deband", OPT_BOOL(deband)}, |
537 | | {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)}, |
538 | | {"sharpen", OPT_FLOAT(unsharp)}, |
539 | | {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)}, |
540 | | {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)}, |
541 | | {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)}, |
542 | | {"gpu-shader-cache", OPT_BOOL(shader_cache)}, |
543 | | {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE}, |
544 | | {"gpu-hwdec-interop", |
545 | | OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)}, |
546 | | {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")}, |
547 | | {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")}, |
548 | | {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")}, |
549 | | {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")}, |
550 | | {0} |
551 | | }, |
552 | | .size = sizeof(struct gl_video_opts), |
553 | | .defaults = &gl_video_opts_def, |
554 | | .change_flags = UPDATE_VIDEO, |
555 | | }; |
556 | | |
557 | | static void uninit_rendering(struct gl_video *p); |
558 | | static void uninit_scaler(struct gl_video *p, struct scaler *scaler); |
559 | | static void check_gl_features(struct gl_video *p); |
560 | | static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id); |
561 | | static void reinit_from_options(struct gl_video *p); |
562 | | static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); |
563 | | static void gl_video_setup_hooks(struct gl_video *p); |
564 | | static void gl_video_update_options(struct gl_video *p); |
565 | | |
566 | 0 | #define GLSL(x) gl_sc_add(p->sc, #x "\n"); |
567 | 0 | #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) |
568 | 0 | #define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) |
569 | 0 | #define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__) |
570 | | |
571 | | static struct bstr load_cached_file(struct gl_video *p, const char *path) |
572 | 0 | { |
573 | 0 | if (!path || !path[0]) |
574 | 0 | return (struct bstr){0}; |
575 | 0 | for (int n = 0; n < p->num_files; n++) { |
576 | 0 | if (strcmp(p->files[n].path, path) == 0) |
577 | 0 | return p->files[n].body; |
578 | 0 | } |
579 | | // not found -> load it |
580 | 0 | char *fname = mp_get_user_path(NULL, p->global, path); |
581 | 0 | struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB |
582 | 0 | talloc_free(fname); |
583 | 0 | if (s.len) { |
584 | 0 | struct cached_file new = { |
585 | 0 | .path = talloc_strdup(p, path), |
586 | 0 | .body = s, |
587 | 0 | }; |
588 | 0 | MP_TARRAY_APPEND(p, p->files, p->num_files, new); |
589 | 0 | return new.body; |
590 | 0 | } |
591 | 0 | return (struct bstr){0}; |
592 | 0 | } |
593 | | |
594 | | static void debug_check_gl(struct gl_video *p, const char *msg) |
595 | 0 | { |
596 | 0 | if (p->ra->fns->debug_marker) |
597 | 0 | p->ra->fns->debug_marker(p->ra, msg); |
598 | 0 | } |
599 | | |
600 | | static void gl_video_reset_surfaces(struct gl_video *p) |
601 | 0 | { |
602 | 0 | for (int i = 0; i < SURFACES_MAX; i++) { |
603 | 0 | p->surfaces[i].id = 0; |
604 | 0 | p->surfaces[i].pts = MP_NOPTS_VALUE; |
605 | 0 | } |
606 | 0 | p->surface_idx = 0; |
607 | 0 | p->surface_now = 0; |
608 | 0 | p->frames_drawn = 0; |
609 | 0 | p->output_tex_valid = false; |
610 | 0 | } |
611 | | |
612 | | static void gl_video_reset_hooks(struct gl_video *p) |
613 | 0 | { |
614 | 0 | for (int i = 0; i < p->num_tex_hooks; i++) |
615 | 0 | talloc_free(p->tex_hooks[i].priv); |
616 | |
|
617 | 0 | for (int i = 0; i < p->num_user_textures; i++) |
618 | 0 | ra_tex_free(p->ra, &p->user_textures[i].tex); |
619 | |
|
620 | 0 | p->num_tex_hooks = 0; |
621 | 0 | p->num_user_textures = 0; |
622 | 0 | } |
623 | | |
624 | | static inline int surface_wrap(int id) |
625 | 0 | { |
626 | 0 | id = id % SURFACES_MAX; |
627 | 0 | return id < 0 ? id + SURFACES_MAX : id; |
628 | 0 | } |
629 | | |
630 | | static void reinit_osd(struct gl_video *p) |
631 | 0 | { |
632 | 0 | mpgl_osd_destroy(p->osd); |
633 | 0 | p->osd = NULL; |
634 | 0 | if (p->osd_state) |
635 | 0 | p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); |
636 | 0 | } |
637 | | |
638 | | static void uninit_rendering(struct gl_video *p) |
639 | 0 | { |
640 | 0 | for (int n = 0; n < SCALER_COUNT; n++) |
641 | 0 | uninit_scaler(p, &p->scaler[n]); |
642 | |
|
643 | 0 | ra_tex_free(p->ra, &p->dither_texture); |
644 | |
|
645 | 0 | for (int n = 0; n < 4; n++) { |
646 | 0 | ra_tex_free(p->ra, &p->merge_tex[n]); |
647 | 0 | ra_tex_free(p->ra, &p->scale_tex[n]); |
648 | 0 | ra_tex_free(p->ra, &p->integer_tex[n]); |
649 | 0 | ra_tex_free(p->ra, &p->chroma_tex[n]); |
650 | 0 | } |
651 | |
|
652 | 0 | ra_tex_free(p->ra, &p->indirect_tex); |
653 | 0 | ra_tex_free(p->ra, &p->blend_subs_tex); |
654 | 0 | ra_tex_free(p->ra, &p->screen_tex); |
655 | 0 | ra_tex_free(p->ra, &p->output_tex); |
656 | |
|
657 | 0 | for (int n = 0; n < 2; n++) |
658 | 0 | ra_tex_free(p->ra, &p->error_diffusion_tex[n]); |
659 | |
|
660 | 0 | for (int n = 0; n < SURFACES_MAX; n++) |
661 | 0 | ra_tex_free(p->ra, &p->surfaces[n].tex); |
662 | |
|
663 | 0 | for (int n = 0; n < p->num_hook_textures; n++) |
664 | 0 | ra_tex_free(p->ra, &p->hook_textures[n]); |
665 | |
|
666 | 0 | gl_video_reset_surfaces(p); |
667 | 0 | gl_video_reset_hooks(p); |
668 | |
|
669 | 0 | gl_sc_reset_error(p->sc); |
670 | 0 | } |
671 | | |
672 | | bool gl_video_gamma_auto_enabled(struct gl_video *p) |
673 | 0 | { |
674 | 0 | return p->opts.gamma_auto; |
675 | 0 | } |
676 | | |
677 | | // Warning: profile.start must point to a ta allocation, and the function |
678 | | // takes over ownership. |
679 | | void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) |
680 | 0 | { |
681 | 0 | if (gl_lcms_set_memory_profile(p->cms, icc_data)) |
682 | 0 | reinit_from_options(p); |
683 | 0 | } |
684 | | |
685 | | bool gl_video_icc_auto_enabled(struct gl_video *p) |
686 | 0 | { |
687 | 0 | return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; |
688 | 0 | } |
689 | | |
690 | | static bool gl_video_get_lut3d(struct gl_video *p, enum pl_color_primaries prim, |
691 | | enum pl_color_transfer trc) |
692 | 0 | { |
693 | 0 | if (!p->use_lut_3d) |
694 | 0 | return false; |
695 | | |
696 | 0 | struct AVBufferRef *icc = NULL; |
697 | 0 | if (p->image.mpi) |
698 | 0 | icc = p->image.mpi->icc_profile; |
699 | |
|
700 | 0 | if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc)) |
701 | 0 | return true; |
702 | | |
703 | | // GLES3 doesn't provide filtered 16 bit integer textures |
704 | | // GLES2 doesn't even provide 3D textures |
705 | 0 | const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); |
706 | 0 | if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) { |
707 | 0 | p->use_lut_3d = false; |
708 | 0 | MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n"); |
709 | 0 | return false; |
710 | 0 | } |
711 | | |
712 | 0 | struct lut3d *lut3d = NULL; |
713 | 0 | if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) { |
714 | 0 | p->use_lut_3d = false; |
715 | 0 | return false; |
716 | 0 | } |
717 | | |
718 | 0 | ra_tex_free(p->ra, &p->lut_3d_texture); |
719 | |
|
720 | 0 | struct ra_tex_params params = { |
721 | 0 | .dimensions = 3, |
722 | 0 | .w = lut3d->size[0], |
723 | 0 | .h = lut3d->size[1], |
724 | 0 | .d = lut3d->size[2], |
725 | 0 | .format = fmt, |
726 | 0 | .render_src = true, |
727 | 0 | .src_linear = true, |
728 | 0 | .initial_data = lut3d->data, |
729 | 0 | }; |
730 | 0 | p->lut_3d_texture = ra_tex_create(p->ra, ¶ms); |
731 | |
|
732 | 0 | debug_check_gl(p, "after 3d lut creation"); |
733 | |
|
734 | 0 | for (int i = 0; i < 3; i++) |
735 | 0 | p->lut_3d_size[i] = lut3d->size[i]; |
736 | |
|
737 | 0 | talloc_free(lut3d); |
738 | |
|
739 | 0 | if (!p->lut_3d_texture) { |
740 | 0 | p->use_lut_3d = false; |
741 | 0 | return false; |
742 | 0 | } |
743 | | |
744 | 0 | return true; |
745 | 0 | } |
746 | | |
747 | | // Fill an image struct from a ra_tex + some metadata |
748 | | static struct image image_wrap(struct ra_tex *tex, enum plane_type type, |
749 | | int components) |
750 | 0 | { |
751 | 0 | mp_assert(type != PLANE_NONE); |
752 | 0 | return (struct image){ |
753 | 0 | .type = type, |
754 | 0 | .tex = tex, |
755 | 0 | .multiplier = 1.0, |
756 | 0 | .w = tex ? tex->params.w : 1, |
757 | 0 | .h = tex ? tex->params.h : 1, |
758 | 0 | .transform = identity_trans, |
759 | 0 | .components = components, |
760 | 0 | }; |
761 | 0 | } |
762 | | |
763 | | // Bind an image to a free texture unit and return its ID. |
764 | | static int pass_bind(struct gl_video *p, struct image img) |
765 | 0 | { |
766 | 0 | int idx = p->num_pass_imgs; |
767 | 0 | MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img); |
768 | 0 | return idx; |
769 | 0 | } |
770 | | |
771 | | // Rotation by 90° and flipping. |
772 | | // w/h is used for recentering. |
773 | | static void get_transform(float w, float h, int rotate, bool flip, |
774 | | struct gl_transform *out_tr) |
775 | 0 | { |
776 | 0 | int a = rotate % 90 ? 0 : (rotate / 90) % 4; |
777 | 0 | int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. |
778 | 0 | int cos90[4] = {1, 0, -1, 0}; |
779 | 0 | struct gl_transform tr = {{{ cos90[a], sin90[a]}, |
780 | 0 | {-sin90[a], cos90[a]}}}; |
781 | | |
782 | | // basically, recenter to keep the whole image in view |
783 | 0 | float b[2] = {1, 1}; |
784 | 0 | gl_transform_vec(tr, &b[0], &b[1]); |
785 | 0 | tr.t[0] += b[0] < 0 ? w : 0; |
786 | 0 | tr.t[1] += b[1] < 0 ? h : 0; |
787 | |
|
788 | 0 | if (flip) { |
789 | 0 | struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}}; |
790 | 0 | gl_transform_trans(fliptr, &tr); |
791 | 0 | } |
792 | |
|
793 | 0 | *out_tr = tr; |
794 | 0 | } |
795 | | |
796 | | // Return the chroma plane upscaled to luma size, but with additional padding |
797 | | // for image sizes not aligned to subsampling. |
798 | | static int chroma_upsize(int size, int pixel) |
799 | 0 | { |
800 | 0 | return (size + pixel - 1) / pixel * pixel; |
801 | 0 | } |
802 | | |
803 | | // If a and b are on the same plane, return what plane type should be used. |
804 | | // If a or b are none, the other type always wins. |
805 | | // Usually: LUMA/RGB/XYZ > CHROMA > ALPHA |
806 | | static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) |
807 | 0 | { |
808 | 0 | if (a == PLANE_NONE) |
809 | 0 | return b; |
810 | 0 | if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ) |
811 | 0 | return b; |
812 | 0 | if (b != PLANE_NONE && a == PLANE_ALPHA) |
813 | 0 | return b; |
814 | 0 | return a; |
815 | 0 | } |
816 | | |
817 | | // Places a video_image's image textures + associated metadata into img[]. The |
818 | | // number of textures is equal to p->plane_count. Any necessary plane offsets |
819 | | // are stored in off. (e.g. chroma position) |
820 | | static void pass_get_images(struct gl_video *p, struct video_image *vimg, |
821 | | struct image img[4], struct gl_transform off[4]) |
822 | 0 | { |
823 | 0 | mp_assert(vimg->mpi); |
824 | | |
825 | 0 | int w = p->image_params.w; |
826 | 0 | int h = p->image_params.h; |
827 | | |
828 | | // Determine the chroma offset |
829 | 0 | float ls_w = 1.0 / p->ra_format.chroma_w; |
830 | 0 | float ls_h = 1.0 / p->ra_format.chroma_h; |
831 | |
|
832 | 0 | struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; |
833 | |
|
834 | 0 | if (p->image_params.chroma_location != PL_CHROMA_CENTER) { |
835 | 0 | float cx, cy; |
836 | 0 | pl_chroma_location_offset(p->image_params.chroma_location, &cx, &cy); |
837 | | // By default texture coordinates are such that chroma is centered with |
838 | | // any chroma subsampling. If a specific direction is given, make it |
839 | | // so that the luma and chroma sample line up exactly. |
840 | | // For 4:4:4, setting chroma location should have no effect at all. |
841 | | // luma sample size (in chroma coord. space) |
842 | 0 | chroma.t[0] = ls_w < 1 ? ls_w * -cx : 0; |
843 | 0 | chroma.t[1] = ls_h < 1 ? ls_h * -cy : 0; |
844 | 0 | } |
845 | |
|
846 | 0 | memset(img, 0, 4 * sizeof(img[0])); |
847 | 0 | for (int n = 0; n < p->plane_count; n++) { |
848 | 0 | struct texplane *t = &vimg->planes[n]; |
849 | |
|
850 | 0 | enum plane_type type = PLANE_NONE; |
851 | 0 | int padding = 0; |
852 | 0 | for (int i = 0; i < 4; i++) { |
853 | 0 | int c = p->ra_format.components[n][i]; |
854 | 0 | enum plane_type ctype; |
855 | 0 | if (c == 0) { |
856 | 0 | ctype = PLANE_NONE; |
857 | 0 | } else if (c == 4) { |
858 | 0 | ctype = PLANE_ALPHA; |
859 | 0 | } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_RGB) { |
860 | 0 | ctype = PLANE_RGB; |
861 | 0 | } else if (p->image_params.repr.sys == PL_COLOR_SYSTEM_XYZ) { |
862 | 0 | ctype = PLANE_XYZ; |
863 | 0 | } else { |
864 | 0 | ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; |
865 | 0 | } |
866 | 0 | type = merge_plane_types(type, ctype); |
867 | 0 | if (!c && padding == i) |
868 | 0 | padding = i + 1; |
869 | 0 | } |
870 | |
|
871 | 0 | int msb_valid_bits = |
872 | 0 | p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); |
873 | 0 | int csp = type == PLANE_ALPHA ? PL_COLOR_SYSTEM_RGB : p->image_params.repr.sys; |
874 | 0 | float tex_mul = |
875 | 0 | 1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits); |
876 | 0 | if (p->ra_format.component_type == RA_CTYPE_FLOAT) |
877 | 0 | tex_mul = 1.0; |
878 | |
|
879 | 0 | img[n] = (struct image){ |
880 | 0 | .type = type, |
881 | 0 | .tex = t->tex, |
882 | 0 | .multiplier = tex_mul, |
883 | 0 | .w = t->w, |
884 | 0 | .h = t->h, |
885 | 0 | .padding = padding, |
886 | 0 | }; |
887 | |
|
888 | 0 | for (int i = 0; i < 4; i++) |
889 | 0 | img[n].components += !!p->ra_format.components[n][i]; |
890 | |
|
891 | 0 | get_transform(t->w, t->h, p->image_params.rotate, t->flipped, |
892 | 0 | &img[n].transform); |
893 | 0 | if (p->image_params.rotate % 180 == 90) |
894 | 0 | MPSWAP(int, img[n].w, img[n].h); |
895 | |
|
896 | 0 | off[n] = identity_trans; |
897 | |
|
898 | 0 | if (type == PLANE_CHROMA) { |
899 | 0 | struct gl_transform rot; |
900 | | // Reverse the rotation direction here because the different |
901 | | // coordinate system of chroma offset results in rotation |
902 | | // in the opposite direction. |
903 | 0 | get_transform(0, 0, 360 - p->image_params.rotate, t->flipped, &rot); |
904 | |
|
905 | 0 | struct gl_transform tr = chroma; |
906 | 0 | gl_transform_vec(rot, &tr.t[0], &tr.t[1]); |
907 | |
|
908 | 0 | float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; |
909 | 0 | float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; |
910 | | |
911 | | // Adjust the chroma offset if the real chroma size is fractional |
912 | | // due image sizes not aligned to chroma subsampling. |
913 | 0 | if (rot.m[0][0] < 0) |
914 | 0 | tr.t[0] += dx; |
915 | 0 | if (rot.m[1][0] < 0) |
916 | 0 | tr.t[0] += dy; |
917 | 0 | if (rot.m[0][1] < 0) |
918 | 0 | tr.t[1] += dx; |
919 | 0 | if (rot.m[1][1] < 0) |
920 | 0 | tr.t[1] += dy; |
921 | |
|
922 | 0 | off[n] = tr; |
923 | 0 | } |
924 | 0 | } |
925 | 0 | } |
926 | | |
927 | | // Return the index of the given component (assuming all non-padding components |
928 | | // of all planes are concatenated into a linear list). |
929 | | static int find_comp(struct ra_imgfmt_desc *desc, int component) |
930 | 0 | { |
931 | 0 | int cur = 0; |
932 | 0 | for (int n = 0; n < desc->num_planes; n++) { |
933 | 0 | for (int i = 0; i < 4; i++) { |
934 | 0 | if (desc->components[n][i]) { |
935 | 0 | if (desc->components[n][i] == component) |
936 | 0 | return cur; |
937 | 0 | cur++; |
938 | 0 | } |
939 | 0 | } |
940 | 0 | } |
941 | 0 | return -1; |
942 | 0 | } |
943 | | |
944 | | static void init_video(struct gl_video *p) |
945 | 0 | { |
946 | 0 | p->use_integer_conversion = false; |
947 | |
|
948 | 0 | struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt); |
949 | 0 | if (hwdec) { |
950 | 0 | if (hwdec->driver->overlay_frame) { |
951 | 0 | MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " |
952 | 0 | "on the video!\n"); |
953 | 0 | p->hwdec_overlay = hwdec; |
954 | 0 | } else { |
955 | 0 | p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params); |
956 | 0 | if (!p->hwdec_mapper) |
957 | 0 | MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); |
958 | 0 | } |
959 | 0 | if (p->hwdec_mapper) |
960 | 0 | p->image_params = p->hwdec_mapper->dst_params; |
961 | 0 | const char **exts = hwdec->glsl_extensions; |
962 | 0 | for (int n = 0; exts && exts[n]; n++) |
963 | 0 | gl_sc_enable_extension(p->sc, (char *)exts[n]); |
964 | 0 | p->hwdec_active = true; |
965 | 0 | } |
966 | |
|
967 | 0 | p->ra_format = (struct ra_imgfmt_desc){0}; |
968 | 0 | ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); |
969 | |
|
970 | 0 | p->plane_count = p->ra_format.num_planes; |
971 | |
|
972 | 0 | p->has_alpha = false; |
973 | 0 | p->is_gray = true; |
974 | |
|
975 | 0 | for (int n = 0; n < p->ra_format.num_planes; n++) { |
976 | 0 | for (int i = 0; i < 4; i++) { |
977 | 0 | if (p->ra_format.components[n][i]) { |
978 | 0 | p->has_alpha |= p->ra_format.components[n][i] == 4; |
979 | 0 | p->is_gray &= p->ra_format.components[n][i] == 1 || |
980 | 0 | p->ra_format.components[n][i] == 4; |
981 | 0 | } |
982 | 0 | } |
983 | 0 | } |
984 | |
|
985 | 0 | for (int c = 0; c < 4; c++) { |
986 | 0 | int loc = find_comp(&p->ra_format, c + 1); |
987 | 0 | p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; |
988 | 0 | } |
989 | 0 | p->color_swizzle[4] = '\0'; |
990 | |
|
991 | 0 | mp_image_params_restore_dovi_mapping(&p->image_params); |
992 | 0 | mp_image_params_guess_csp(&p->image_params); |
993 | |
|
994 | 0 | av_lfg_init(&p->lfg, 1); |
995 | |
|
996 | 0 | debug_check_gl(p, "before video texture creation"); |
997 | |
|
998 | 0 | if (!p->hwdec_active) { |
999 | 0 | struct video_image *vimg = &p->image; |
1000 | |
|
1001 | 0 | struct mp_image layout = {0}; |
1002 | 0 | mp_image_set_params(&layout, &p->image_params); |
1003 | |
|
1004 | 0 | for (int n = 0; n < p->plane_count; n++) { |
1005 | 0 | struct texplane *plane = &vimg->planes[n]; |
1006 | 0 | const struct ra_format *format = p->ra_format.planes[n]; |
1007 | |
|
1008 | 0 | plane->w = mp_image_plane_w(&layout, n); |
1009 | 0 | plane->h = mp_image_plane_h(&layout, n); |
1010 | |
|
1011 | 0 | struct ra_tex_params params = { |
1012 | 0 | .dimensions = 2, |
1013 | 0 | .w = plane->w + p->opts.tex_pad_x, |
1014 | 0 | .h = plane->h + p->opts.tex_pad_y, |
1015 | 0 | .d = 1, |
1016 | 0 | .format = format, |
1017 | 0 | .render_src = true, |
1018 | 0 | .src_linear = format->linear_filter, |
1019 | 0 | .non_normalized = p->opts.use_rectangle, |
1020 | 0 | .host_mutable = true, |
1021 | 0 | }; |
1022 | |
|
1023 | 0 | MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, |
1024 | 0 | params.w, params.h); |
1025 | |
|
1026 | 0 | plane->tex = ra_tex_create(p->ra, ¶ms); |
1027 | 0 | p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; |
1028 | 0 | } |
1029 | 0 | } |
1030 | |
|
1031 | 0 | debug_check_gl(p, "after video texture creation"); |
1032 | | |
1033 | | // Format-dependent checks. |
1034 | 0 | check_gl_features(p); |
1035 | |
|
1036 | 0 | gl_video_setup_hooks(p); |
1037 | 0 | } |
1038 | | |
1039 | | static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) |
1040 | 0 | { |
1041 | 0 | for (int i = 0; i < p->num_dr_buffers; i++) { |
1042 | 0 | struct dr_buffer *buffer = &p->dr_buffers[i]; |
1043 | 0 | uint8_t *bufptr = buffer->buf->data; |
1044 | 0 | size_t size = buffer->buf->params.size; |
1045 | 0 | if (ptr >= bufptr && ptr < bufptr + size) |
1046 | 0 | return buffer; |
1047 | 0 | } |
1048 | | |
1049 | 0 | return NULL; |
1050 | 0 | } |
1051 | | |
1052 | | static void gc_pending_dr_fences(struct gl_video *p, bool force) |
1053 | 0 | { |
1054 | 0 | again:; |
1055 | 0 | for (int n = 0; n < p->num_dr_buffers; n++) { |
1056 | 0 | struct dr_buffer *buffer = &p->dr_buffers[n]; |
1057 | 0 | if (!buffer->mpi) |
1058 | 0 | continue; |
1059 | | |
1060 | 0 | bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); |
1061 | 0 | if (res || force) { |
1062 | | // Unreferencing the image could cause gl_video_dr_free_buffer() |
1063 | | // to be called by the talloc destructor (if it was the last |
1064 | | // reference). This will implicitly invalidate the buffer pointer |
1065 | | // and change the p->dr_buffers array. To make it worse, it could |
1066 | | // free multiple dr_buffers due to weird theoretical corner cases. |
1067 | | // This is also why we use the goto to iterate again from the |
1068 | | // start, because everything gets fucked up. Hail satan! |
1069 | 0 | struct mp_image *ref = buffer->mpi; |
1070 | 0 | buffer->mpi = NULL; |
1071 | 0 | talloc_free(ref); |
1072 | 0 | goto again; |
1073 | 0 | } |
1074 | 0 | } |
1075 | 0 | } |
1076 | | |
1077 | | static void unref_current_image(struct gl_video *p) |
1078 | 0 | { |
1079 | 0 | struct video_image *vimg = &p->image; |
1080 | |
|
1081 | 0 | if (vimg->hwdec_mapped) { |
1082 | 0 | mp_assert(p->hwdec_active && p->hwdec_mapper); |
1083 | 0 | ra_hwdec_mapper_unmap(p->hwdec_mapper); |
1084 | 0 | memset(vimg->planes, 0, sizeof(vimg->planes)); |
1085 | 0 | vimg->hwdec_mapped = false; |
1086 | 0 | } |
1087 | | |
1088 | 0 | vimg->id = 0; |
1089 | |
|
1090 | 0 | mp_image_unrefp(&vimg->mpi); |
1091 | | |
1092 | | // While we're at it, also garbage collect pending fences in here to |
1093 | | // get it out of the way. |
1094 | 0 | gc_pending_dr_fences(p, false); |
1095 | 0 | } |
1096 | | |
1097 | | // If overlay mode is used, make sure to remove the overlay. |
1098 | | // Be careful with this. Removing the overlay and adding another one will |
1099 | | // lead to flickering artifacts. |
1100 | | static void unmap_overlay(struct gl_video *p) |
1101 | 0 | { |
1102 | 0 | if (p->hwdec_overlay) |
1103 | 0 | p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true); |
1104 | 0 | } |
1105 | | |
1106 | | static void uninit_video(struct gl_video *p) |
1107 | 0 | { |
1108 | 0 | uninit_rendering(p); |
1109 | |
|
1110 | 0 | struct video_image *vimg = &p->image; |
1111 | |
|
1112 | 0 | unmap_overlay(p); |
1113 | 0 | unref_current_image(p); |
1114 | |
|
1115 | 0 | for (int n = 0; n < p->plane_count; n++) { |
1116 | 0 | struct texplane *plane = &vimg->planes[n]; |
1117 | 0 | ra_tex_free(p->ra, &plane->tex); |
1118 | 0 | } |
1119 | 0 | *vimg = (struct video_image){0}; |
1120 | | |
1121 | | // Invalidate image_params to ensure that gl_video_config() will call |
1122 | | // init_video() on uninitialized gl_video. |
1123 | 0 | p->real_image_params = (struct mp_image_params){0}; |
1124 | 0 | p->image_params = p->real_image_params; |
1125 | 0 | p->hwdec_active = false; |
1126 | 0 | p->hwdec_overlay = NULL; |
1127 | 0 | ra_hwdec_mapper_free(&p->hwdec_mapper); |
1128 | 0 | } |
1129 | | |
1130 | | static void pass_record(struct gl_video *p, const struct mp_pass_perf *perf) |
1131 | 0 | { |
1132 | 0 | if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) |
1133 | 0 | return; |
1134 | | |
1135 | 0 | struct pass_info *pass = &p->pass[p->pass_idx]; |
1136 | 0 | pass->perf = *perf; |
1137 | |
|
1138 | 0 | if (pass->desc.len == 0) |
1139 | 0 | bstr_xappend(p, &pass->desc, bstr0("(unknown)")); |
1140 | |
|
1141 | 0 | p->pass_idx++; |
1142 | 0 | } |
1143 | | |
1144 | | PRINTF_ATTRIBUTE(2, 3) |
1145 | | static void pass_describe(struct gl_video *p, const char *textf, ...) |
1146 | 0 | { |
1147 | 0 | if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) |
1148 | 0 | return; |
1149 | | |
1150 | 0 | struct pass_info *pass = &p->pass[p->pass_idx]; |
1151 | |
|
1152 | 0 | if (pass->desc.len > 0) |
1153 | 0 | bstr_xappend(p, &pass->desc, bstr0(" + ")); |
1154 | |
|
1155 | 0 | va_list ap; |
1156 | 0 | va_start(ap, textf); |
1157 | 0 | bstr_xappend_vasprintf(p, &pass->desc, textf, ap); |
1158 | 0 | va_end(ap); |
1159 | 0 | } |
1160 | | |
1161 | | static void pass_info_reset(struct gl_video *p, bool is_redraw) |
1162 | 0 | { |
1163 | 0 | p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; |
1164 | 0 | p->pass_idx = 0; |
1165 | |
|
1166 | 0 | for (int i = 0; i < VO_PASS_PERF_MAX; i++) { |
1167 | 0 | p->pass[i].desc.len = 0; |
1168 | 0 | } |
1169 | 0 | } |
1170 | | |
1171 | | static void pass_report_performance(struct gl_video *p) |
1172 | 0 | { |
1173 | 0 | if (!p->pass) |
1174 | 0 | return; |
1175 | | |
1176 | 0 | for (int i = 0; i < VO_PASS_PERF_MAX; i++) { |
1177 | 0 | struct pass_info *pass = &p->pass[i]; |
1178 | 0 | if (!pass->desc.len) |
1179 | 0 | break; |
1180 | 0 | MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n", |
1181 | 0 | BSTR_P(pass->desc), |
1182 | 0 | (int)pass->perf.last/1000, |
1183 | 0 | (int)pass->perf.avg/1000, |
1184 | 0 | (int)pass->perf.peak/1000); |
1185 | 0 | } |
1186 | 0 | } |
1187 | | |
1188 | | static void pass_prepare_src_tex(struct gl_video *p) |
1189 | 0 | { |
1190 | 0 | struct gl_shader_cache *sc = p->sc; |
1191 | |
|
1192 | 0 | for (int n = 0; n < p->num_pass_imgs; n++) { |
1193 | 0 | struct image *s = &p->pass_imgs[n]; |
1194 | 0 | if (!s->tex) |
1195 | 0 | continue; |
1196 | | |
1197 | 0 | char *texture_name = mp_tprintf(32, "texture%d", n); |
1198 | 0 | char *texture_size = mp_tprintf(32, "texture_size%d", n); |
1199 | 0 | char *texture_rot = mp_tprintf(32, "texture_rot%d", n); |
1200 | 0 | char *texture_off = mp_tprintf(32, "texture_off%d", n); |
1201 | 0 | char *pixel_size = mp_tprintf(32, "pixel_size%d", n); |
1202 | |
|
1203 | 0 | gl_sc_uniform_texture(sc, texture_name, s->tex); |
1204 | 0 | float f[2] = {1, 1}; |
1205 | 0 | if (!s->tex->params.non_normalized) { |
1206 | 0 | f[0] = s->tex->params.w; |
1207 | 0 | f[1] = s->tex->params.h; |
1208 | 0 | } |
1209 | 0 | gl_sc_uniform_vec2(sc, texture_size, f); |
1210 | 0 | gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); |
1211 | 0 | gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t); |
1212 | 0 | gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0], |
1213 | 0 | 1.0f / f[1]}); |
1214 | 0 | } |
1215 | 0 | } |
1216 | | |
1217 | | static void cleanup_binds(struct gl_video *p) |
1218 | 0 | { |
1219 | 0 | p->num_pass_imgs = 0; |
1220 | 0 | } |
1221 | | |
1222 | | // Sets the appropriate compute shader metadata for an implicit compute pass |
1223 | | // bw/bh: block size |
1224 | | static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible) |
1225 | 0 | { |
1226 | 0 | if (p->pass_compute.active && flexible) { |
1227 | | // Avoid overwriting existing block sizes when using a flexible pass |
1228 | 0 | bw = p->pass_compute.block_w; |
1229 | 0 | bh = p->pass_compute.block_h; |
1230 | 0 | } |
1231 | |
|
1232 | 0 | p->pass_compute = (struct compute_info){ |
1233 | 0 | .active = true, |
1234 | 0 | .block_w = bw, |
1235 | 0 | .block_h = bh, |
1236 | 0 | }; |
1237 | 0 | } |
1238 | | |
1239 | | // w/h: the width/height of the compute shader's operating domain (e.g. the |
1240 | | // target target that needs to be written, or the source texture that needs to |
1241 | | // be reduced) |
1242 | | static void dispatch_compute(struct gl_video *p, int w, int h, |
1243 | | struct compute_info info) |
1244 | 0 | { |
1245 | 0 | PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", |
1246 | 0 | info.threads_w > 0 ? info.threads_w : info.block_w, |
1247 | 0 | info.threads_h > 0 ? info.threads_h : info.block_h); |
1248 | |
|
1249 | 0 | pass_prepare_src_tex(p); |
1250 | | |
1251 | | // Since we don't actually have vertices, we pretend for convenience |
1252 | | // reasons that we do and calculate the right texture coordinates based on |
1253 | | // the output sample ID |
1254 | 0 | gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h }); |
1255 | 0 | PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n"); |
1256 | |
|
1257 | 0 | for (int n = 0; n < p->num_pass_imgs; n++) { |
1258 | 0 | struct image *s = &p->pass_imgs[n]; |
1259 | 0 | if (!s->tex) |
1260 | 0 | continue; |
1261 | | |
1262 | 0 | PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + " |
1263 | 0 | "pixel_size%d * texture_off%d)\n", n, n, n, n); |
1264 | 0 | PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); |
1265 | 0 | } |
1266 | | |
1267 | | // always round up when dividing to make sure we don't leave off a part of |
1268 | | // the image |
1269 | 0 | int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, |
1270 | 0 | num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; |
1271 | |
|
1272 | 0 | if (!(p->ra->caps & RA_CAP_NUM_GROUPS)) |
1273 | 0 | PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y); |
1274 | |
|
1275 | 0 | struct mp_pass_perf perf = gl_sc_dispatch_compute(p->sc, num_x, num_y, 1); |
1276 | 0 | pass_record(p, &perf); |
1277 | 0 | cleanup_binds(p); |
1278 | 0 | } |
1279 | | |
1280 | | static struct mp_pass_perf render_pass_quad(struct gl_video *p, |
1281 | | const struct ra_fbo *fbo, bool discard, |
1282 | | const struct mp_rect *dst) |
1283 | 0 | { |
1284 | | // The first element is reserved for `vec2 position` |
1285 | 0 | int num_vertex_attribs = 1 + p->num_pass_imgs; |
1286 | 0 | size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt); |
1287 | | |
1288 | | // Expand the VAO if necessary |
1289 | 0 | while (p->vao_len < num_vertex_attribs) { |
1290 | 0 | MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { |
1291 | 0 | .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1), |
1292 | 0 | .type = RA_VARTYPE_FLOAT, |
1293 | 0 | .dim_v = 2, |
1294 | 0 | .dim_m = 1, |
1295 | 0 | .offset = p->vao_len * sizeof(struct vertex_pt), |
1296 | 0 | }); |
1297 | 0 | } |
1298 | |
|
1299 | 0 | int num_vertices = 6; // quad as triangle list |
1300 | 0 | int num_attribs_total = num_vertices * num_vertex_attribs; |
1301 | 0 | MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total); |
1302 | |
|
1303 | 0 | struct gl_transform t; |
1304 | 0 | gl_transform_ortho_fbo(&t, fbo); |
1305 | |
|
1306 | 0 | float x[2] = {dst->x0, dst->x1}; |
1307 | 0 | float y[2] = {dst->y0, dst->y1}; |
1308 | 0 | gl_transform_vec(t, &x[0], &y[0]); |
1309 | 0 | gl_transform_vec(t, &x[1], &y[1]); |
1310 | |
|
1311 | 0 | for (int n = 0; n < 4; n++) { |
1312 | 0 | struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n]; |
1313 | | // vec2 position in idx 0 |
1314 | 0 | vs[0].x = x[n / 2]; |
1315 | 0 | vs[0].y = y[n % 2]; |
1316 | 0 | for (int i = 0; i < p->num_pass_imgs; i++) { |
1317 | 0 | struct image *s = &p->pass_imgs[i]; |
1318 | 0 | if (!s->tex) |
1319 | 0 | continue; |
1320 | 0 | struct gl_transform tr = s->transform; |
1321 | 0 | float tx = (n / 2) * s->w; |
1322 | 0 | float ty = (n % 2) * s->h; |
1323 | 0 | gl_transform_vec(tr, &tx, &ty); |
1324 | 0 | bool rect = s->tex->params.non_normalized; |
1325 | | // vec2 texcoordN in idx N+1 |
1326 | 0 | vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w); |
1327 | 0 | vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h); |
1328 | 0 | } |
1329 | 0 | } |
1330 | |
|
1331 | 0 | memmove(&p->tmp_vertex[num_vertex_attribs * 4], |
1332 | 0 | &p->tmp_vertex[num_vertex_attribs * 2], |
1333 | 0 | vertex_stride); |
1334 | |
|
1335 | 0 | memmove(&p->tmp_vertex[num_vertex_attribs * 5], |
1336 | 0 | &p->tmp_vertex[num_vertex_attribs * 1], |
1337 | 0 | vertex_stride); |
1338 | |
|
1339 | 0 | return gl_sc_dispatch_draw(p->sc, fbo->tex, discard, p->vao, num_vertex_attribs, |
1340 | 0 | vertex_stride, p->tmp_vertex, num_vertices); |
1341 | 0 | } |
1342 | | |
1343 | | static void finish_pass_fbo(struct gl_video *p, const struct ra_fbo *fbo, |
1344 | | bool discard, const struct mp_rect *dst) |
1345 | 0 | { |
1346 | 0 | pass_prepare_src_tex(p); |
1347 | 0 | struct mp_pass_perf perf = render_pass_quad(p, fbo, discard, dst); |
1348 | 0 | pass_record(p, &perf); |
1349 | 0 | debug_check_gl(p, "after rendering"); |
1350 | 0 | cleanup_binds(p); |
1351 | 0 | } |
1352 | | |
1353 | | // dst_fbo: this will be used for rendering; possibly reallocating the whole |
1354 | | // FBO, if the required parameters have changed |
1355 | | // w, h: required FBO target dimension, and also defines the target rectangle |
1356 | | // used for rasterization |
1357 | | static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, |
1358 | | int w, int h) |
1359 | 0 | { |
1360 | 0 | if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) { |
1361 | 0 | cleanup_binds(p); |
1362 | 0 | gl_sc_reset(p->sc); |
1363 | 0 | return; |
1364 | 0 | } |
1365 | | |
1366 | | // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders |
1367 | | // over fragment shaders wherever possible. |
1368 | 0 | if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) && |
1369 | 0 | (*dst_tex)->params.storage_dst) |
1370 | 0 | { |
1371 | 0 | pass_is_compute(p, 16, 16, true); |
1372 | 0 | } |
1373 | |
|
1374 | 0 | if (p->pass_compute.active) { |
1375 | 0 | gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); |
1376 | 0 | if (!p->pass_compute.directly_writes) |
1377 | 0 | GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) |
1378 | |
|
1379 | 0 | dispatch_compute(p, w, h, p->pass_compute); |
1380 | 0 | p->pass_compute = (struct compute_info){0}; |
1381 | |
|
1382 | 0 | debug_check_gl(p, "after dispatching compute shader"); |
1383 | 0 | } else { |
1384 | 0 | struct ra_fbo fbo = { .tex = *dst_tex, }; |
1385 | 0 | finish_pass_fbo(p, &fbo, true, &(struct mp_rect){0, 0, w, h}); |
1386 | 0 | } |
1387 | 0 | } |
1388 | | |
1389 | | static const char *get_tex_swizzle(struct image *img) |
1390 | 0 | { |
1391 | 0 | if (!img->tex) |
1392 | 0 | return "rgba"; |
1393 | 0 | if (img->tex->params.format->luminance_alpha) |
1394 | 0 | return "raaa"; |
1395 | 0 | return img->tex->params.format->ordered ? "rgba" : "bgra"; |
1396 | 0 | } |
1397 | | |
1398 | | // Copy a texture to the vec4 color, while increasing offset. Also applies |
1399 | | // the texture multiplier to the sampled color |
1400 | | static void copy_image(struct gl_video *p, unsigned int *offset, struct image img) |
1401 | 0 | { |
1402 | 0 | const unsigned int count = img.components; |
1403 | 0 | char src[5] = {0}; |
1404 | 0 | char dst[5] = {0}; |
1405 | |
|
1406 | 0 | mp_assert(*offset + count < sizeof(dst)); |
1407 | 0 | mp_assert(img.padding + count < sizeof(src)); |
1408 | | |
1409 | 0 | int id = pass_bind(p, img); |
1410 | |
|
1411 | 0 | const char *tex_fmt = get_tex_swizzle(&img); |
1412 | 0 | const char *dst_fmt = "rgba"; |
1413 | 0 | for (unsigned int i = 0; i < count; i++) { |
1414 | 0 | src[i] = tex_fmt[img.padding + i]; |
1415 | 0 | dst[i] = dst_fmt[*offset + i]; |
1416 | 0 | } |
1417 | |
|
1418 | 0 | if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) { |
1419 | 0 | uint64_t tex_max = 1ull << p->ra_format.component_bits; |
1420 | 0 | img.multiplier *= 1.0 / (tex_max - 1); |
1421 | 0 | } |
1422 | |
|
1423 | 0 | GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", |
1424 | 0 | dst, img.multiplier, id, id, src); |
1425 | |
|
1426 | 0 | *offset += count; |
1427 | 0 | } |
1428 | | |
1429 | | static void skip_unused(struct gl_video *p, int num_components) |
1430 | 0 | { |
1431 | 0 | for (int i = num_components; i < 4; i++) |
1432 | 0 | GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0); |
1433 | 0 | } |
1434 | | |
1435 | | static void uninit_scaler(struct gl_video *p, struct scaler *scaler) |
1436 | 0 | { |
1437 | 0 | ra_tex_free(p->ra, &scaler->sep_fbo); |
1438 | 0 | ra_tex_free(p->ra, &scaler->lut); |
1439 | 0 | scaler->kernel = NULL; |
1440 | 0 | scaler->initialized = false; |
1441 | 0 | } |
1442 | | |
1443 | | static void hook_prelude(struct gl_video *p, const char *name, int id, |
1444 | | struct image img) |
1445 | 0 | { |
1446 | 0 | GLSLHF("#define %s_raw texture%d\n", name, id); |
1447 | 0 | GLSLHF("#define %s_pos texcoord%d\n", name, id); |
1448 | 0 | GLSLHF("#define %s_size texture_size%d\n", name, id); |
1449 | 0 | GLSLHF("#define %s_rot texture_rot%d\n", name, id); |
1450 | 0 | GLSLHF("#define %s_off texture_off%d\n", name, id); |
1451 | 0 | GLSLHF("#define %s_pt pixel_size%d\n", name, id); |
1452 | 0 | GLSLHF("#define %s_map texmap%d\n", name, id); |
1453 | 0 | GLSLHF("#define %s_mul %f\n", name, img.multiplier); |
1454 | |
|
1455 | 0 | char crap[5] = ""; |
1456 | 0 | snprintf(crap, sizeof(crap), "%s", get_tex_swizzle(&img)); |
1457 | | |
1458 | | // Remove leading padding by rotating the swizzle mask. |
1459 | 0 | int len = strlen(crap); |
1460 | 0 | for (int n = 0; n < img.padding; n++) { |
1461 | 0 | if (len) { |
1462 | 0 | char f = crap[0]; |
1463 | 0 | memmove(crap, crap + 1, len - 1); |
1464 | 0 | crap[len - 1] = f; |
1465 | 0 | } |
1466 | 0 | } |
1467 | | |
1468 | | // Set up the sampling functions |
1469 | 0 | GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", |
1470 | 0 | name, name, name, crap); |
1471 | |
|
1472 | 0 | if (p->ra->caps & RA_CAP_GATHER) { |
1473 | 0 | GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4(" |
1474 | 0 | "textureGather(%s_raw, pos, c)))\n", name, name, name); |
1475 | 0 | } |
1476 | | |
1477 | | // Since the extra matrix multiplication impacts performance, |
1478 | | // skip it unless the texture was actually rotated |
1479 | 0 | if (gl_transform_eq(img.transform, identity_trans)) { |
1480 | 0 | GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", |
1481 | 0 | name, name, name, name); |
1482 | 0 | } else { |
1483 | 0 | GLSLHF("#define %s_texOff(off) " |
1484 | 0 | "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n", |
1485 | 0 | name, name, name, name, name); |
1486 | 0 | } |
1487 | 0 | } |
1488 | | |
1489 | | static bool saved_img_find(struct gl_video *p, const char *name, |
1490 | | struct image *out) |
1491 | 0 | { |
1492 | 0 | if (!name || !out) |
1493 | 0 | return false; |
1494 | | |
1495 | 0 | for (int i = 0; i < p->num_saved_imgs; i++) { |
1496 | 0 | if (strcmp(p->saved_imgs[i].name, name) == 0) { |
1497 | 0 | *out = p->saved_imgs[i].img; |
1498 | 0 | return true; |
1499 | 0 | } |
1500 | 0 | } |
1501 | | |
1502 | 0 | return false; |
1503 | 0 | } |
1504 | | |
1505 | | static void saved_img_store(struct gl_video *p, const char *name, |
1506 | | struct image img) |
1507 | 0 | { |
1508 | 0 | mp_assert(name); |
1509 | | |
1510 | 0 | for (int i = 0; i < p->num_saved_imgs; i++) { |
1511 | 0 | if (strcmp(p->saved_imgs[i].name, name) == 0) { |
1512 | 0 | p->saved_imgs[i].img = img; |
1513 | 0 | return; |
1514 | 0 | } |
1515 | 0 | } |
1516 | | |
1517 | 0 | MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) { |
1518 | 0 | .name = name, |
1519 | 0 | .img = img |
1520 | 0 | }); |
1521 | 0 | } |
1522 | | |
1523 | | static bool pass_hook_setup_binds(struct gl_video *p, const char *name, |
1524 | | struct image img, struct tex_hook *hook) |
1525 | 0 | { |
1526 | 0 | for (int t = 0; t < SHADER_MAX_BINDS; t++) { |
1527 | 0 | char *bind_name = (char *)hook->bind_tex[t]; |
1528 | |
|
1529 | 0 | if (!bind_name) |
1530 | 0 | continue; |
1531 | | |
1532 | | // This is a special name that means "currently hooked texture" |
1533 | 0 | if (strcmp(bind_name, "HOOKED") == 0) { |
1534 | 0 | int id = pass_bind(p, img); |
1535 | 0 | hook_prelude(p, "HOOKED", id, img); |
1536 | 0 | hook_prelude(p, name, id, img); |
1537 | 0 | continue; |
1538 | 0 | } |
1539 | | |
1540 | | // BIND can also be used to load user-defined textures, in which |
1541 | | // case we will directly load them as a uniform instead of |
1542 | | // generating the hook_prelude boilerplate |
1543 | 0 | for (int u = 0; u < p->num_user_textures; u++) { |
1544 | 0 | struct gl_user_shader_tex *utex = &p->user_textures[u]; |
1545 | 0 | if (bstr_equals0(utex->name, bind_name)) { |
1546 | 0 | gl_sc_uniform_texture(p->sc, bind_name, utex->tex); |
1547 | 0 | goto next_bind; |
1548 | 0 | } |
1549 | 0 | } |
1550 | | |
1551 | 0 | struct image bind_img; |
1552 | 0 | if (!saved_img_find(p, bind_name, &bind_img)) { |
1553 | | // Clean up texture bindings and move on to the next hook |
1554 | 0 | MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n", |
1555 | 0 | name, bind_name); |
1556 | 0 | p->num_pass_imgs -= t; |
1557 | 0 | return false; |
1558 | 0 | } |
1559 | | |
1560 | 0 | hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img); |
1561 | |
|
1562 | 0 | next_bind: ; |
1563 | 0 | } |
1564 | | |
1565 | 0 | return true; |
1566 | 0 | } |
1567 | | |
1568 | | static struct ra_tex **next_hook_tex(struct gl_video *p) |
1569 | 0 | { |
1570 | 0 | if (p->idx_hook_textures == p->num_hook_textures) |
1571 | 0 | MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL); |
1572 | |
|
1573 | 0 | return &p->hook_textures[p->idx_hook_textures++]; |
1574 | 0 | } |
1575 | | |
1576 | | // Process hooks for a plane, saving the result and returning a new image |
1577 | | // If 'trans' is NULL, the shader is forbidden from transforming img |
1578 | | static struct image pass_hook(struct gl_video *p, const char *name, |
1579 | | struct image img, struct gl_transform *trans) |
1580 | 0 | { |
1581 | 0 | if (!name) |
1582 | 0 | return img; |
1583 | | |
1584 | 0 | saved_img_store(p, name, img); |
1585 | |
|
1586 | 0 | MP_TRACE(p, "Running hooks for %s\n", name); |
1587 | 0 | for (int i = 0; i < p->num_tex_hooks; i++) { |
1588 | 0 | struct tex_hook *hook = &p->tex_hooks[i]; |
1589 | | |
1590 | | // Figure out if this pass hooks this texture |
1591 | 0 | for (int h = 0; h < SHADER_MAX_HOOKS; h++) { |
1592 | 0 | if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) |
1593 | 0 | goto found; |
1594 | 0 | } |
1595 | | |
1596 | 0 | continue; |
1597 | | |
1598 | 0 | found: |
1599 | | // Check the hook's condition |
1600 | 0 | if (hook->cond && !hook->cond(p, img, hook->priv)) { |
1601 | 0 | MP_TRACE(p, "Skipping hook on %s due to condition.\n", name); |
1602 | 0 | continue; |
1603 | 0 | } |
1604 | | |
1605 | 0 | const char *store_name = hook->save_tex ? hook->save_tex : name; |
1606 | 0 | bool is_overwrite = strcmp(store_name, name) == 0; |
1607 | | |
1608 | | // If user shader is set to align HOOKED with reference and fix its |
1609 | | // offset, it requires HOOKED to be resizable and overwritten. |
1610 | 0 | if (is_overwrite && hook->align_offset) { |
1611 | 0 | if (!trans) { |
1612 | 0 | MP_ERR(p, "Hook tried to align unresizable texture %s!\n", |
1613 | 0 | name); |
1614 | 0 | return img; |
1615 | 0 | } |
1616 | | |
1617 | 0 | struct gl_transform align_off = identity_trans; |
1618 | 0 | align_off.t[0] = trans->t[0]; |
1619 | 0 | align_off.t[1] = trans->t[1]; |
1620 | |
|
1621 | 0 | gl_transform_trans(align_off, &img.transform); |
1622 | 0 | } |
1623 | | |
1624 | 0 | if (!pass_hook_setup_binds(p, name, img, hook)) |
1625 | 0 | continue; |
1626 | | |
1627 | | // Run the actual hook. This generates a series of GLSL shader |
1628 | | // instructions sufficient for drawing the hook's output |
1629 | 0 | struct gl_transform hook_off = identity_trans; |
1630 | 0 | hook->hook(p, img, &hook_off, hook->priv); |
1631 | |
|
1632 | 0 | int comps = hook->components ? hook->components : img.components; |
1633 | 0 | skip_unused(p, comps); |
1634 | | |
1635 | | // Compute the updated FBO dimensions and store the result |
1636 | 0 | struct mp_rect_f sz = {0, 0, img.w, img.h}; |
1637 | 0 | gl_transform_rect(hook_off, &sz); |
1638 | 0 | int w = lroundf(fabs(sz.x1 - sz.x0)); |
1639 | 0 | int h = lroundf(fabs(sz.y1 - sz.y0)); |
1640 | |
|
1641 | 0 | struct ra_tex **tex = next_hook_tex(p); |
1642 | 0 | finish_pass_tex(p, tex, w, h); |
1643 | 0 | struct image saved_img = image_wrap(*tex, img.type, comps); |
1644 | | |
1645 | | // If the texture we're saving overwrites the "current" texture, also |
1646 | | // update the tex parameter so that the future loop cycles will use the |
1647 | | // updated values, and export the offset |
1648 | 0 | if (is_overwrite) { |
1649 | 0 | if (!trans && !gl_transform_eq(hook_off, identity_trans)) { |
1650 | 0 | MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", |
1651 | 0 | name); |
1652 | 0 | return img; |
1653 | 0 | } |
1654 | | |
1655 | 0 | img = saved_img; |
1656 | 0 | if (trans) { |
1657 | 0 | gl_transform_trans(hook_off, trans); |
1658 | | |
1659 | | // If user shader is set to align HOOKED, the offset it produces |
1660 | | // is dynamic (with static resizing factor though). |
1661 | | // Align it with reference manually to get offset fixed. |
1662 | 0 | if (hook->align_offset) { |
1663 | 0 | trans->t[0] = 0.0; |
1664 | 0 | trans->t[1] = 0.0; |
1665 | 0 | } |
1666 | 0 | } |
1667 | 0 | } |
1668 | | |
1669 | 0 | saved_img_store(p, store_name, saved_img); |
1670 | 0 | } |
1671 | | |
1672 | 0 | return img; |
1673 | 0 | } |
1674 | | |
1675 | | // This can be used at any time in the middle of rendering to specify an |
1676 | | // optional hook point, which if triggered will render out to a new FBO and |
1677 | | // load the result back into vec4 color. Offsets applied by the hooks are |
1678 | | // accumulated in tex_trans, and the FBO is dimensioned according |
1679 | | // to p->texture_w/h |
1680 | | static void pass_opt_hook_point(struct gl_video *p, const char *name, |
1681 | | struct gl_transform *tex_trans) |
1682 | 0 | { |
1683 | 0 | if (!name) |
1684 | 0 | return; |
1685 | | |
1686 | 0 | for (int i = 0; i < p->num_tex_hooks; i++) { |
1687 | 0 | struct tex_hook *hook = &p->tex_hooks[i]; |
1688 | |
|
1689 | 0 | for (int h = 0; h < SHADER_MAX_HOOKS; h++) { |
1690 | 0 | if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) |
1691 | 0 | goto found; |
1692 | 0 | } |
1693 | | |
1694 | 0 | for (int b = 0; b < SHADER_MAX_BINDS; b++) { |
1695 | 0 | if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) |
1696 | 0 | goto found; |
1697 | 0 | } |
1698 | 0 | } |
1699 | | |
1700 | | // Nothing uses this texture, don't bother storing it |
1701 | 0 | return; |
1702 | | |
1703 | 0 | found: ; |
1704 | 0 | struct ra_tex **tex = next_hook_tex(p); |
1705 | 0 | finish_pass_tex(p, tex, p->texture_w, p->texture_h); |
1706 | 0 | struct image img = image_wrap(*tex, PLANE_RGB, p->components); |
1707 | 0 | img = pass_hook(p, name, img, tex_trans); |
1708 | 0 | copy_image(p, &(int){0}, img); |
1709 | 0 | p->texture_w = img.w; |
1710 | 0 | p->texture_h = img.h; |
1711 | 0 | p->components = img.components; |
1712 | 0 | pass_describe(p, "(remainder pass)"); |
1713 | 0 | } |
1714 | | |
1715 | | static void load_shader(struct gl_video *p, struct bstr body) |
1716 | 0 | { |
1717 | 0 | gl_sc_hadd_bstr(p->sc, body); |
1718 | 0 | gl_sc_uniform_dynamic(p->sc); |
1719 | 0 | gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); |
1720 | 0 | gl_sc_uniform_dynamic(p->sc); |
1721 | 0 | gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); |
1722 | 0 | gl_sc_uniform_vec2(p->sc, "input_size", |
1723 | 0 | (float[]){(p->src_rect.x1 - p->src_rect.x0) * |
1724 | 0 | p->texture_offset.m[0][0], |
1725 | 0 | (p->src_rect.y1 - p->src_rect.y0) * |
1726 | 0 | p->texture_offset.m[1][1]}); |
1727 | 0 | gl_sc_uniform_vec2(p->sc, "target_size", |
1728 | 0 | (float[]){p->dst_rect.x1 - p->dst_rect.x0, |
1729 | 0 | p->dst_rect.y1 - p->dst_rect.y0}); |
1730 | 0 | gl_sc_uniform_vec2(p->sc, "tex_offset", |
1731 | 0 | (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] + |
1732 | 0 | p->texture_offset.t[0], |
1733 | 0 | p->src_rect.y0 * p->texture_offset.m[1][1] + |
1734 | 0 | p->texture_offset.t[1]}); |
1735 | 0 | } |
1736 | | |
1737 | | // Semantic equality |
1738 | | static bool double_seq(double a, double b) |
1739 | 0 | { |
1740 | 0 | return (isnan(a) && isnan(b)) || a == b; |
1741 | 0 | } |
1742 | | |
1743 | | static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b) |
1744 | 0 | { |
1745 | 0 | return a.function == b.function && |
1746 | 0 | double_seq(a.params[0], b.params[0]) && |
1747 | 0 | double_seq(a.params[1], b.params[1]) && |
1748 | 0 | a.blur == b.blur && |
1749 | 0 | a.taper == b.taper; |
1750 | 0 | } |
1751 | | |
1752 | | static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b) |
1753 | 0 | { |
1754 | | // Note: antiring isn't compared because it doesn't affect LUT |
1755 | | // generation |
1756 | 0 | return scaler_fun_eq(a.kernel, b.kernel) && |
1757 | 0 | scaler_fun_eq(a.window, b.window) && |
1758 | 0 | a.radius == b.radius && |
1759 | 0 | a.clamp == b.clamp; |
1760 | 0 | } |
1761 | | |
1762 | | static void reinit_scaler(struct gl_video *p, struct scaler *scaler, |
1763 | | const struct scaler_config *conf, |
1764 | | double scale_factor, |
1765 | | int sizes[]) |
1766 | 0 | { |
1767 | 0 | mp_assert(conf); |
1768 | 0 | if (scaler_conf_eq(scaler->conf, *conf) && |
1769 | 0 | scaler->scale_factor == scale_factor && |
1770 | 0 | scaler->initialized) |
1771 | 0 | return; |
1772 | | |
1773 | 0 | uninit_scaler(p, scaler); |
1774 | |
|
1775 | 0 | if (conf->kernel.function == SCALER_INHERIT) |
1776 | 0 | conf = &p->opts.scaler[SCALER_SCALE]; |
1777 | |
|
1778 | 0 | struct filter_kernel bare_window; |
1779 | 0 | const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.function); |
1780 | 0 | const struct filter_window *t_window = mp_find_filter_window(conf->window.function); |
1781 | 0 | if (!t_kernel) { |
1782 | 0 | const struct filter_window *window = mp_find_filter_window(conf->kernel.function); |
1783 | 0 | if (window) { |
1784 | 0 | bare_window = (struct filter_kernel) { .f = *window }; |
1785 | 0 | t_kernel = &bare_window; |
1786 | 0 | } |
1787 | 0 | } |
1788 | |
|
1789 | 0 | scaler->conf = *conf; |
1790 | 0 | scaler->scale_factor = scale_factor; |
1791 | 0 | scaler->insufficient = false; |
1792 | 0 | scaler->initialized = true; |
1793 | 0 | if (!t_kernel) |
1794 | 0 | return; |
1795 | | |
1796 | 0 | scaler->kernel_storage = *t_kernel; |
1797 | 0 | scaler->kernel = &scaler->kernel_storage; |
1798 | |
|
1799 | 0 | if (!t_window) { |
1800 | | // fall back to the scaler's default window if available |
1801 | 0 | t_window = mp_find_filter_window(t_kernel->window); |
1802 | 0 | } |
1803 | 0 | if (t_window) |
1804 | 0 | scaler->kernel->w = *t_window; |
1805 | |
|
1806 | 0 | for (int n = 0; n < 2; n++) { |
1807 | 0 | if (!isnan(conf->kernel.params[n])) |
1808 | 0 | scaler->kernel->f.params[n] = conf->kernel.params[n]; |
1809 | 0 | if (!isnan(conf->window.params[n])) |
1810 | 0 | scaler->kernel->w.params[n] = conf->window.params[n]; |
1811 | 0 | } |
1812 | |
|
1813 | 0 | if (conf->kernel.blur > 0.0) |
1814 | 0 | scaler->kernel->f.blur = conf->kernel.blur; |
1815 | 0 | if (conf->window.blur > 0.0) |
1816 | 0 | scaler->kernel->w.blur = conf->window.blur; |
1817 | |
|
1818 | 0 | if (conf->kernel.taper > 0.0) |
1819 | 0 | scaler->kernel->f.taper = conf->kernel.taper; |
1820 | 0 | if (conf->window.taper > 0.0) |
1821 | 0 | scaler->kernel->w.taper = conf->window.taper; |
1822 | |
|
1823 | 0 | if (scaler->kernel->f.resizable && conf->radius > 0.0) |
1824 | 0 | scaler->kernel->f.radius = conf->radius; |
1825 | |
|
1826 | 0 | scaler->kernel->clamp = conf->clamp; |
1827 | 0 | scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); |
1828 | |
|
1829 | 0 | int size = scaler->kernel->size; |
1830 | 0 | int num_components = size > 2 ? 4 : size; |
1831 | 0 | const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components); |
1832 | 0 | mp_assert(fmt); |
1833 | | |
1834 | 0 | int width = (size + num_components - 1) / num_components; // round up |
1835 | 0 | int stride = width * num_components; |
1836 | 0 | mp_assert(size <= stride); |
1837 | | |
1838 | 0 | static const int lut_size = 256; |
1839 | 0 | float *weights = talloc_array(NULL, float, lut_size * stride); |
1840 | 0 | mp_compute_lut(scaler->kernel, lut_size, stride, weights); |
1841 | |
|
1842 | 0 | bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); |
1843 | |
|
1844 | 0 | struct ra_tex_params lut_params = { |
1845 | 0 | .dimensions = use_1d ? 1 : 2, |
1846 | 0 | .w = use_1d ? lut_size : width, |
1847 | 0 | .h = use_1d ? 1 : lut_size, |
1848 | 0 | .d = 1, |
1849 | 0 | .format = fmt, |
1850 | 0 | .render_src = true, |
1851 | 0 | .src_linear = true, |
1852 | 0 | .initial_data = weights, |
1853 | 0 | }; |
1854 | 0 | scaler->lut = ra_tex_create(p->ra, &lut_params); |
1855 | |
|
1856 | 0 | talloc_free(weights); |
1857 | |
|
1858 | 0 | debug_check_gl(p, "after initializing scaler"); |
1859 | 0 | } |
1860 | | |
1861 | | // Special helper for sampling from two separated stages |
1862 | | static void pass_sample_separated(struct gl_video *p, struct image src, |
1863 | | struct scaler *scaler, int w, int h) |
1864 | 0 | { |
1865 | | // Separate the transformation into x and y components, per pass |
1866 | 0 | struct gl_transform t_x = { |
1867 | 0 | .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}}, |
1868 | 0 | .t = {src.transform.t[0], 0.0}, |
1869 | 0 | }; |
1870 | 0 | struct gl_transform t_y = { |
1871 | 0 | .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}}, |
1872 | 0 | .t = {0.0, src.transform.t[1]}, |
1873 | 0 | }; |
1874 | | |
1875 | | // First pass (scale only in the y dir) |
1876 | 0 | src.transform = t_y; |
1877 | 0 | sampler_prelude(p->sc, pass_bind(p, src)); |
1878 | 0 | GLSLF("// first pass\n"); |
1879 | 0 | pass_sample_separated_gen(p->sc, scaler, 0, 1); |
1880 | 0 | GLSLF("color *= %f;\n", src.multiplier); |
1881 | 0 | finish_pass_tex(p, &scaler->sep_fbo, src.w, h); |
1882 | | |
1883 | | // Second pass (scale only in the x dir) |
1884 | 0 | src = image_wrap(scaler->sep_fbo, src.type, src.components); |
1885 | 0 | src.transform = t_x; |
1886 | 0 | pass_describe(p, "%s second pass", |
1887 | 0 | m_opt_choice_str(scaler->conf.kernel.functions, |
1888 | 0 | scaler->conf.kernel.function)); |
1889 | 0 | sampler_prelude(p->sc, pass_bind(p, src)); |
1890 | 0 | pass_sample_separated_gen(p->sc, scaler, 1, 0); |
1891 | 0 | } |
1892 | | |
1893 | | // Picks either the compute shader version or the regular sampler version |
1894 | | // depending on hardware support |
1895 | | static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler, |
1896 | | struct image img, int w, int h) |
1897 | 0 | { |
1898 | 0 | uint64_t reqs = RA_CAP_COMPUTE; |
1899 | 0 | if ((p->ra->caps & reqs) != reqs) |
1900 | 0 | goto fallback; |
1901 | | |
1902 | 0 | int bound = ceil(scaler->kernel->radius_cutoff); |
1903 | 0 | int offset = bound - 1; // padding top/left |
1904 | 0 | int padding = offset + bound; // total padding |
1905 | |
|
1906 | 0 | float ratiox = (float)w / img.w, |
1907 | 0 | ratioy = (float)h / img.h; |
1908 | | |
1909 | | // For performance we want to load at least as many pixels |
1910 | | // horizontally as there are threads in a warp (32 for nvidia), as |
1911 | | // well as enough to take advantage of shmem parallelism |
1912 | 0 | const int warp_size = 32, threads = 256; |
1913 | 0 | int bw = warp_size; |
1914 | 0 | int bh = threads / bw; |
1915 | | |
1916 | | // We need to sample everything from base_min to base_max, so make sure |
1917 | | // we have enough room in shmem |
1918 | 0 | int iw = (int)ceil(bw / ratiox) + padding + 1, |
1919 | 0 | ih = (int)ceil(bh / ratioy) + padding + 1; |
1920 | |
|
1921 | 0 | int shmem_req = iw * ih * img.components * sizeof(float); |
1922 | 0 | if (shmem_req > p->ra->max_shmem) |
1923 | 0 | goto fallback; |
1924 | | |
1925 | 0 | pass_is_compute(p, bw, bh, false); |
1926 | 0 | pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih); |
1927 | 0 | return; |
1928 | | |
1929 | 0 | fallback: |
1930 | | // Fall back to regular polar shader when compute shaders are unsupported |
1931 | | // or the kernel is too big for shmem |
1932 | 0 | pass_sample_polar(p->sc, scaler, img.components, |
1933 | 0 | p->ra->caps & RA_CAP_GATHER); |
1934 | 0 | } |
1935 | | |
1936 | | // Sample from image, with the src rectangle given by it. |
1937 | | // The dst rectangle is implicit by what the caller will do next, but w and h |
1938 | | // must still be what is going to be used (to dimension FBOs correctly). |
1939 | | // This will write the scaled contents to the vec4 "color". |
1940 | | // The scaler unit is initialized by this function; in order to avoid cache |
1941 | | // thrashing, the scaler unit should usually use the same parameters. |
1942 | | static void pass_sample(struct gl_video *p, struct image img, |
1943 | | struct scaler *scaler, const struct scaler_config *conf, |
1944 | | double scale_factor, int w, int h) |
1945 | 0 | { |
1946 | 0 | reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); |
1947 | | |
1948 | | // Describe scaler |
1949 | 0 | const char *scaler_opt[] = { |
1950 | 0 | [SCALER_SCALE] = "scale", |
1951 | 0 | [SCALER_DSCALE] = "dscale", |
1952 | 0 | [SCALER_CSCALE] = "cscale", |
1953 | 0 | [SCALER_TSCALE] = "tscale", |
1954 | 0 | }; |
1955 | |
|
1956 | 0 | pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], |
1957 | 0 | m_opt_choice_str(scaler->conf.kernel.functions, |
1958 | 0 | scaler->conf.kernel.function), |
1959 | 0 | plane_names[img.type]); |
1960 | |
|
1961 | 0 | bool is_separated = scaler->kernel && !scaler->kernel->polar; |
1962 | | |
1963 | | // Set up the transformation+prelude and bind the texture, for everything |
1964 | | // other than separated scaling (which does this in the subfunction) |
1965 | 0 | if (!is_separated) |
1966 | 0 | sampler_prelude(p->sc, pass_bind(p, img)); |
1967 | | |
1968 | | // Dispatch the scaler. They're all wildly different. |
1969 | 0 | if (scaler->conf.kernel.function == SCALER_BILINEAR) { |
1970 | 0 | GLSL(color = texture(tex, pos);) |
1971 | 0 | } else if (scaler->conf.kernel.function == SCALER_BICUBIC_FAST) { |
1972 | 0 | pass_sample_bicubic_fast(p->sc); |
1973 | 0 | } else if (scaler->conf.kernel.function == SCALER_OVERSAMPLE) { |
1974 | 0 | pass_sample_oversample(p->sc, scaler, w, h); |
1975 | 0 | } else if (scaler->kernel && scaler->kernel->polar) { |
1976 | 0 | pass_dispatch_sample_polar(p, scaler, img, w, h); |
1977 | 0 | } else if (scaler->kernel) { |
1978 | 0 | pass_sample_separated(p, img, scaler, w, h); |
1979 | 0 | } else { |
1980 | 0 | MP_ASSERT_UNREACHABLE(); // should never happen |
1981 | 0 | } |
1982 | | |
1983 | | // Apply any required multipliers. Separated scaling already does this in |
1984 | | // its first stage |
1985 | 0 | if (!is_separated) |
1986 | 0 | GLSLF("color *= %f;\n", img.multiplier); |
1987 | | |
1988 | | // Micro-optimization: Avoid scaling unneeded channels |
1989 | 0 | skip_unused(p, img.components); |
1990 | 0 | } |
1991 | | |
1992 | | // Returns true if two images are semantically equivalent (same metadata) |
1993 | | static bool image_equiv(struct image a, struct image b) |
1994 | 0 | { |
1995 | 0 | return a.type == b.type && |
1996 | 0 | a.components == b.components && |
1997 | 0 | a.multiplier == b.multiplier && |
1998 | 0 | a.tex->params.format == b.tex->params.format && |
1999 | 0 | a.tex->params.w == b.tex->params.w && |
2000 | 0 | a.tex->params.h == b.tex->params.h && |
2001 | 0 | a.w == b.w && |
2002 | 0 | a.h == b.h && |
2003 | 0 | gl_transform_eq(a.transform, b.transform); |
2004 | 0 | } |
2005 | | |
2006 | | static void deband_hook(struct gl_video *p, struct image img, |
2007 | | struct gl_transform *trans, void *priv) |
2008 | 0 | { |
2009 | 0 | pass_describe(p, "debanding (%s)", plane_names[img.type]); |
2010 | 0 | pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, |
2011 | 0 | p->image_params.color.transfer); |
2012 | 0 | } |
2013 | | |
2014 | | static void unsharp_hook(struct gl_video *p, struct image img, |
2015 | | struct gl_transform *trans, void *priv) |
2016 | 0 | { |
2017 | 0 | pass_describe(p, "unsharp masking"); |
2018 | 0 | pass_sample_unsharp(p->sc, p->opts.unsharp); |
2019 | 0 | } |
2020 | | |
2021 | | struct szexp_ctx { |
2022 | | struct gl_video *p; |
2023 | | struct image img; |
2024 | | }; |
2025 | | |
2026 | | static bool szexp_lookup(void *priv, struct bstr var, float size[2]) |
2027 | 0 | { |
2028 | 0 | struct szexp_ctx *ctx = priv; |
2029 | 0 | struct gl_video *p = ctx->p; |
2030 | |
|
2031 | 0 | if (bstr_equals0(var, "NATIVE_CROPPED")) { |
2032 | 0 | size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0]; |
2033 | 0 | size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1]; |
2034 | 0 | return true; |
2035 | 0 | } |
2036 | | |
2037 | | // The size of OUTPUT is determined. It could be useful for certain |
2038 | | // user shaders to skip passes. |
2039 | 0 | if (bstr_equals0(var, "OUTPUT")) { |
2040 | 0 | size[0] = p->dst_rect.x1 - p->dst_rect.x0; |
2041 | 0 | size[1] = p->dst_rect.y1 - p->dst_rect.y0; |
2042 | 0 | return true; |
2043 | 0 | } |
2044 | | |
2045 | | // HOOKED is a special case |
2046 | 0 | if (bstr_equals0(var, "HOOKED")) { |
2047 | 0 | size[0] = ctx->img.w; |
2048 | 0 | size[1] = ctx->img.h; |
2049 | 0 | return true; |
2050 | 0 | } |
2051 | | |
2052 | 0 | for (int o = 0; o < p->num_saved_imgs; o++) { |
2053 | 0 | if (bstr_equals0(var, p->saved_imgs[o].name)) { |
2054 | 0 | size[0] = p->saved_imgs[o].img.w; |
2055 | 0 | size[1] = p->saved_imgs[o].img.h; |
2056 | 0 | return true; |
2057 | 0 | } |
2058 | 0 | } |
2059 | | |
2060 | 0 | return false; |
2061 | 0 | } |
2062 | | |
2063 | | static bool user_hook_cond(struct gl_video *p, struct image img, void *priv) |
2064 | 0 | { |
2065 | 0 | struct gl_user_shader_hook *shader = priv; |
2066 | 0 | mp_assert(shader); |
2067 | | |
2068 | 0 | float res = false; |
2069 | 0 | struct szexp_ctx ctx = {p, img}; |
2070 | 0 | eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res); |
2071 | 0 | return res; |
2072 | 0 | } |
2073 | | |
2074 | | static void user_hook(struct gl_video *p, struct image img, |
2075 | | struct gl_transform *trans, void *priv) |
2076 | 0 | { |
2077 | 0 | struct gl_user_shader_hook *shader = priv; |
2078 | 0 | mp_assert(shader); |
2079 | 0 | load_shader(p, shader->pass_body); |
2080 | |
|
2081 | 0 | pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), |
2082 | 0 | plane_names[img.type]); |
2083 | |
|
2084 | 0 | if (shader->compute.active) { |
2085 | 0 | p->pass_compute = shader->compute; |
2086 | 0 | GLSLF("hook();\n"); |
2087 | 0 | } else { |
2088 | 0 | GLSLF("color = hook();\n"); |
2089 | 0 | } |
2090 | | |
2091 | | // Make sure we at least create a legal FBO on failure, since it's better |
2092 | | // to do this and display an error message than just crash OpenGL |
2093 | 0 | float w = 1.0, h = 1.0; |
2094 | |
|
2095 | 0 | eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w); |
2096 | 0 | eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h); |
2097 | |
|
2098 | 0 | *trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}}; |
2099 | 0 | gl_transform_trans(shader->offset, trans); |
2100 | 0 | } |
2101 | | |
2102 | | static bool add_user_hook(void *priv, const struct gl_user_shader_hook *hook) |
2103 | 0 | { |
2104 | 0 | struct gl_video *p = priv; |
2105 | 0 | struct gl_user_shader_hook *copy = talloc_dup(p, (struct gl_user_shader_hook *)hook); |
2106 | 0 | struct tex_hook texhook = { |
2107 | 0 | .save_tex = bstrdup0(copy, copy->save_tex), |
2108 | 0 | .components = copy->components, |
2109 | 0 | .align_offset = copy->align_offset, |
2110 | 0 | .hook = user_hook, |
2111 | 0 | .cond = user_hook_cond, |
2112 | 0 | .priv = copy, |
2113 | 0 | }; |
2114 | |
|
2115 | 0 | for (int h = 0; h < SHADER_MAX_HOOKS; h++) |
2116 | 0 | texhook.hook_tex[h] = bstrdup0(copy, copy->hook_tex[h]); |
2117 | 0 | for (int h = 0; h < SHADER_MAX_BINDS; h++) |
2118 | 0 | texhook.bind_tex[h] = bstrdup0(copy, copy->bind_tex[h]); |
2119 | |
|
2120 | 0 | MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook); |
2121 | 0 | return true; |
2122 | 0 | } |
2123 | | |
2124 | | static bool add_user_tex(void *priv, struct gl_user_shader_tex tex) |
2125 | 0 | { |
2126 | 0 | struct gl_video *p = priv; |
2127 | |
|
2128 | 0 | tex.tex = ra_tex_create(p->ra, &tex.params); |
2129 | 0 | TA_FREEP(&tex.params.initial_data); |
2130 | |
|
2131 | 0 | if (!tex.tex) |
2132 | 0 | return false; |
2133 | | |
2134 | 0 | MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex); |
2135 | 0 | return true; |
2136 | 0 | } |
2137 | | |
2138 | | static void load_user_shaders(struct gl_video *p, char **shaders) |
2139 | 0 | { |
2140 | 0 | if (!shaders) |
2141 | 0 | return; |
2142 | | |
2143 | 0 | for (int n = 0; shaders[n] != NULL; n++) { |
2144 | 0 | struct bstr file = load_cached_file(p, shaders[n]); |
2145 | 0 | parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex); |
2146 | 0 | } |
2147 | 0 | } |
2148 | | |
2149 | | static void gl_video_setup_hooks(struct gl_video *p) |
2150 | 0 | { |
2151 | 0 | gl_video_reset_hooks(p); |
2152 | |
|
2153 | 0 | if (p->opts.deband) { |
2154 | 0 | MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { |
2155 | 0 | .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"}, |
2156 | 0 | .bind_tex = {"HOOKED"}, |
2157 | 0 | .hook = deband_hook, |
2158 | 0 | }); |
2159 | 0 | } |
2160 | |
|
2161 | 0 | if (p->opts.unsharp != 0.0) { |
2162 | 0 | MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { |
2163 | 0 | .hook_tex = {"MAIN"}, |
2164 | 0 | .bind_tex = {"HOOKED"}, |
2165 | 0 | .hook = unsharp_hook, |
2166 | 0 | }); |
2167 | 0 | } |
2168 | |
|
2169 | 0 | load_user_shaders(p, p->opts.user_shaders); |
2170 | 0 | } |
2171 | | |
2172 | | // sample from video textures, set "color" variable to yuv value |
2173 | | static void pass_read_video(struct gl_video *p) |
2174 | 0 | { |
2175 | 0 | struct image img[4]; |
2176 | 0 | struct gl_transform offsets[4]; |
2177 | 0 | pass_get_images(p, &p->image, img, offsets); |
2178 | | |
2179 | | // To keep the code as simple as possibly, we currently run all shader |
2180 | | // stages even if they would be unnecessary (e.g. no hooks for a texture). |
2181 | | // In the future, deferred image should optimize this away. |
2182 | | |
2183 | | // Merge semantically identical textures. This loop is done from back |
2184 | | // to front so that merged textures end up in the right order while |
2185 | | // simultaneously allowing us to skip unnecessary merges |
2186 | 0 | for (int n = 3; n >= 0; n--) { |
2187 | 0 | if (img[n].type == PLANE_NONE) |
2188 | 0 | continue; |
2189 | | |
2190 | 0 | int first = n; |
2191 | 0 | int num = 0; |
2192 | |
|
2193 | 0 | for (int i = 0; i < n; i++) { |
2194 | 0 | if (image_equiv(img[n], img[i]) && |
2195 | 0 | gl_transform_eq(offsets[n], offsets[i])) |
2196 | 0 | { |
2197 | 0 | GLSLF("// merging plane %d ...\n", i); |
2198 | 0 | copy_image(p, &num, img[i]); |
2199 | 0 | first = MPMIN(first, i); |
2200 | 0 | img[i] = (struct image){0}; |
2201 | 0 | } |
2202 | 0 | } |
2203 | |
|
2204 | 0 | if (num > 0) { |
2205 | 0 | GLSLF("// merging plane %d ... into %d\n", n, first); |
2206 | 0 | copy_image(p, &num, img[n]); |
2207 | 0 | pass_describe(p, "merging planes"); |
2208 | 0 | finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h); |
2209 | 0 | img[first] = image_wrap(p->merge_tex[n], img[n].type, num); |
2210 | 0 | img[n] = (struct image){0}; |
2211 | 0 | } |
2212 | 0 | } |
2213 | | |
2214 | | // If any textures are still in integer format by this point, we need |
2215 | | // to introduce an explicit conversion pass to avoid breaking hooks/scaling |
2216 | 0 | for (int n = 0; n < 4; n++) { |
2217 | 0 | if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) { |
2218 | 0 | GLSLF("// use_integer fix for plane %d\n", n); |
2219 | 0 | copy_image(p, &(int){0}, img[n]); |
2220 | 0 | pass_describe(p, "use_integer fix"); |
2221 | 0 | finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h); |
2222 | 0 | img[n] = image_wrap(p->integer_tex[n], img[n].type, |
2223 | 0 | img[n].components); |
2224 | 0 | } |
2225 | 0 | } |
2226 | | |
2227 | | // The basic idea is we assume the rgb/luma texture is the "reference" and |
2228 | | // scale everything else to match, after all planes are finalized. |
2229 | | // We find the reference texture first, in order to maintain texture offset |
2230 | | // between hooks on different type of planes. |
2231 | 0 | int reference_tex_num = 0; |
2232 | 0 | for (int n = 0; n < 4; n++) { |
2233 | 0 | switch (img[n].type) { |
2234 | 0 | case PLANE_RGB: |
2235 | 0 | case PLANE_XYZ: |
2236 | 0 | case PLANE_LUMA: break; |
2237 | 0 | default: continue; |
2238 | 0 | } |
2239 | | |
2240 | 0 | reference_tex_num = n; |
2241 | 0 | break; |
2242 | 0 | } |
2243 | | |
2244 | | // Dispatch the hooks for all of these textures, saving and perhaps |
2245 | | // modifying them in the process |
2246 | 0 | for (int n = 0; n < 4; n++) { |
2247 | 0 | const char *name; |
2248 | 0 | switch (img[n].type) { |
2249 | 0 | case PLANE_RGB: name = "RGB"; break; |
2250 | 0 | case PLANE_LUMA: name = "LUMA"; break; |
2251 | 0 | case PLANE_CHROMA: name = "CHROMA"; break; |
2252 | 0 | case PLANE_ALPHA: name = "ALPHA"; break; |
2253 | 0 | case PLANE_XYZ: name = "XYZ"; break; |
2254 | 0 | default: continue; |
2255 | 0 | } |
2256 | | |
2257 | 0 | img[n] = pass_hook(p, name, img[n], &offsets[n]); |
2258 | |
|
2259 | 0 | if (reference_tex_num == n) { |
2260 | | // The reference texture is finalized now. |
2261 | 0 | p->texture_w = img[n].w; |
2262 | 0 | p->texture_h = img[n].h; |
2263 | 0 | p->texture_offset = offsets[n]; |
2264 | 0 | } |
2265 | 0 | } |
2266 | | |
2267 | | // If chroma textures are in a subsampled semi-planar format and rotated, |
2268 | | // introduce an explicit conversion pass to avoid breaking chroma scalers. |
2269 | 0 | for (int n = 0; n < 4; n++) { |
2270 | 0 | if (img[n].tex && img[n].type == PLANE_CHROMA && |
2271 | 0 | img[n].tex->params.format->num_components == 2 && |
2272 | 0 | p->image_params.rotate % 180 == 90 && |
2273 | 0 | p->ra_format.chroma_w != 1) |
2274 | 0 | { |
2275 | 0 | GLSLF("// chroma fix for rotated plane %d\n", n); |
2276 | 0 | copy_image(p, &(int){0}, img[n]); |
2277 | 0 | pass_describe(p, "chroma fix for rotated plane"); |
2278 | 0 | finish_pass_tex(p, &p->chroma_tex[n], img[n].w, img[n].h); |
2279 | 0 | img[n] = image_wrap(p->chroma_tex[n], img[n].type, |
2280 | 0 | img[n].components); |
2281 | 0 | } |
2282 | 0 | } |
2283 | | |
2284 | | // At this point all planes are finalized but they may not be at the |
2285 | | // required size yet. Furthermore, they may have texture offsets that |
2286 | | // require realignment. |
2287 | | |
2288 | | // Compute the reference rect |
2289 | 0 | struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; |
2290 | 0 | struct mp_rect_f ref = src; |
2291 | 0 | gl_transform_rect(p->texture_offset, &ref); |
2292 | | |
2293 | | // Explicitly scale all of the textures that don't match |
2294 | 0 | for (int n = 0; n < 4; n++) { |
2295 | 0 | if (img[n].type == PLANE_NONE) |
2296 | 0 | continue; |
2297 | | |
2298 | | // If the planes are aligned identically, we will end up with the |
2299 | | // exact same source rectangle. |
2300 | 0 | struct mp_rect_f rect = src; |
2301 | 0 | gl_transform_rect(offsets[n], &rect); |
2302 | 0 | if (mp_rect_f_seq(ref, rect)) |
2303 | 0 | continue; |
2304 | | |
2305 | | // If the rectangles differ, then our planes have a different |
2306 | | // alignment and/or size. First of all, we have to compute the |
2307 | | // corrections required to meet the target rectangle |
2308 | 0 | struct gl_transform fix = { |
2309 | 0 | .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0}, |
2310 | 0 | {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, |
2311 | 0 | .t = {ref.x0, ref.y0}, |
2312 | 0 | }; |
2313 | | |
2314 | | // Since the scale in texture space is different from the scale in |
2315 | | // absolute terms, we have to scale the coefficients down to be |
2316 | | // relative to the texture's physical dimensions and local offset |
2317 | 0 | struct gl_transform scale = { |
2318 | 0 | .m = {{(float)img[n].w / p->texture_w, 0.0}, |
2319 | 0 | {0.0, (float)img[n].h / p->texture_h}}, |
2320 | 0 | .t = {-rect.x0, -rect.y0}, |
2321 | 0 | }; |
2322 | 0 | if (p->image_params.rotate % 180 == 90) |
2323 | 0 | MPSWAP(double, scale.m[0][0], scale.m[1][1]); |
2324 | |
|
2325 | 0 | gl_transform_trans(scale, &fix); |
2326 | | |
2327 | | // Since the texture transform is a function of the texture coordinates |
2328 | | // to texture space, rather than the other way around, we have to |
2329 | | // actually apply the *inverse* of this. Fortunately, calculating |
2330 | | // the inverse is relatively easy here. |
2331 | 0 | fix.m[0][0] = 1.0 / fix.m[0][0]; |
2332 | 0 | fix.m[1][1] = 1.0 / fix.m[1][1]; |
2333 | 0 | fix.t[0] = fix.m[0][0] * -fix.t[0]; |
2334 | 0 | fix.t[1] = fix.m[1][1] * -fix.t[1]; |
2335 | 0 | gl_transform_trans(fix, &img[n].transform); |
2336 | |
|
2337 | 0 | int scaler_id = -1; |
2338 | 0 | const char *name = NULL; |
2339 | 0 | switch (img[n].type) { |
2340 | 0 | case PLANE_RGB: |
2341 | 0 | case PLANE_LUMA: |
2342 | 0 | case PLANE_XYZ: |
2343 | 0 | scaler_id = SCALER_SCALE; |
2344 | | // these aren't worth hooking, fringe hypothetical cases only |
2345 | 0 | break; |
2346 | 0 | case PLANE_CHROMA: |
2347 | 0 | scaler_id = SCALER_CSCALE; |
2348 | 0 | name = "CHROMA_SCALED"; |
2349 | 0 | break; |
2350 | 0 | case PLANE_ALPHA: |
2351 | | // alpha always uses bilinear |
2352 | 0 | name = "ALPHA_SCALED"; |
2353 | 0 | } |
2354 | | |
2355 | 0 | if (scaler_id < 0) |
2356 | 0 | continue; |
2357 | | |
2358 | 0 | const struct scaler_config *conf = &p->opts.scaler[scaler_id]; |
2359 | |
|
2360 | 0 | if (conf->kernel.function == SCALER_INHERIT) |
2361 | 0 | conf = &p->opts.scaler[SCALER_SCALE]; |
2362 | |
|
2363 | 0 | struct scaler *scaler = &p->scaler[scaler_id]; |
2364 | | |
2365 | | // bilinear scaling is a free no-op thanks to GPU sampling |
2366 | 0 | if (conf->kernel.function != SCALER_BILINEAR) { |
2367 | 0 | GLSLF("// upscaling plane %d\n", n); |
2368 | 0 | pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h); |
2369 | 0 | finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h); |
2370 | 0 | img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components); |
2371 | 0 | } |
2372 | | |
2373 | | // Run any post-scaling hooks |
2374 | 0 | img[n] = pass_hook(p, name, img[n], NULL); |
2375 | 0 | } |
2376 | | |
2377 | | // All planes are of the same size and properly aligned at this point |
2378 | 0 | pass_describe(p, "combining planes"); |
2379 | 0 | int coord = 0; |
2380 | 0 | for (int i = 0; i < 4; i++) { |
2381 | 0 | if (img[i].type != PLANE_NONE) |
2382 | 0 | copy_image(p, &coord, img[i]); |
2383 | 0 | } |
2384 | 0 | p->components = coord; |
2385 | 0 | } |
2386 | | |
2387 | | // Utility function that simply binds a texture and reads from it, without any |
2388 | | // transformations. |
2389 | | static void pass_read_tex(struct gl_video *p, struct ra_tex *tex) |
2390 | 0 | { |
2391 | 0 | struct image img = image_wrap(tex, PLANE_RGB, p->components); |
2392 | 0 | copy_image(p, &(int){0}, img); |
2393 | 0 | } |
2394 | | |
2395 | | // yuv conversion, and any other conversions before main up/down-scaling |
2396 | | static void pass_convert_yuv(struct gl_video *p) |
2397 | 0 | { |
2398 | 0 | struct gl_shader_cache *sc = p->sc; |
2399 | |
|
2400 | 0 | struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; |
2401 | 0 | cparams.gray = p->is_gray; |
2402 | 0 | cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT; |
2403 | 0 | mp_csp_set_image_params(&cparams, &p->image_params); |
2404 | 0 | mp_csp_equalizer_state_get(p->video_eq, &cparams); |
2405 | 0 | p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); |
2406 | |
|
2407 | 0 | pass_describe(p, "color conversion"); |
2408 | |
|
2409 | 0 | if (p->color_swizzle[0]) |
2410 | 0 | GLSLF("color = color.%s;\n", p->color_swizzle); |
2411 | | |
2412 | | // Pre-colormatrix input gamma correction |
2413 | 0 | if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) |
2414 | 0 | pass_linearize(p->sc, p->image_params.color.transfer); |
2415 | | |
2416 | | // We always explicitly normalize the range in pass_read_video |
2417 | 0 | cparams.input_bits = cparams.texture_bits = 0; |
2418 | | |
2419 | | // Conversion to RGB. For RGB itself, this still applies e.g. brightness |
2420 | | // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. |
2421 | 0 | struct pl_transform3x3 m = {0}; |
2422 | 0 | mp_get_csp_matrix(&cparams, &m); |
2423 | 0 | gl_sc_uniform_mat3(sc, "colormatrix", true, &m.mat.m[0][0]); |
2424 | 0 | gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); |
2425 | |
|
2426 | 0 | GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) |
2427 | |
|
2428 | 0 | if (cparams.repr.sys == PL_COLOR_SYSTEM_XYZ) { |
2429 | 0 | pass_delinearize(p->sc, p->image_params.color.transfer); |
2430 | | // mp_get_csp_matrix implicitly converts XYZ to DCI-P3 |
2431 | 0 | p->image_params.repr.sys = PL_COLOR_SYSTEM_RGB; |
2432 | 0 | p->image_params.color.primaries = PL_COLOR_PRIM_DCI_P3; |
2433 | 0 | } |
2434 | |
|
2435 | 0 | if (p->image_params.repr.sys == PL_COLOR_SYSTEM_BT_2020_C) { |
2436 | | // Conversion for C'rcY'cC'bc via the BT.2020 CL system: |
2437 | | // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 |
2438 | | // = (B'-Y'c) / 1.5816 | C'bc > 0 |
2439 | | // |
2440 | | // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 |
2441 | | // = (R'-Y'c) / 0.9936 | C'rc > 0 |
2442 | | // |
2443 | | // as per the BT.2020 specification, table 4. This is a non-linear |
2444 | | // transformation because (constant) luminance receives non-equal |
2445 | | // contributions from the three different channels. |
2446 | 0 | GLSLF("// constant luminance conversion \n" |
2447 | 0 | "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" |
2448 | 0 | " vec2(1.9404, 1.7184), \n" |
2449 | 0 | " %s(lessThanEqual(color.br, vec2(0))))\n" |
2450 | 0 | " + color.gg; \n", |
2451 | 0 | gl_sc_bvec(p->sc, 2)); |
2452 | | // Expand channels to camera-linear light. This shader currently just |
2453 | | // assumes everything uses the BT.2020 12-bit gamma function, since the |
2454 | | // difference between 10 and 12-bit is negligible for anything other |
2455 | | // than 12-bit content. |
2456 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" |
2457 | 0 | " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" |
2458 | 0 | " vec3(1.0/0.45)), \n" |
2459 | 0 | " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n", |
2460 | 0 | gl_sc_bvec(p->sc, 3)); |
2461 | | // Calculate the green channel from the expanded RYcB |
2462 | | // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B |
2463 | 0 | GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) |
2464 | | // Recompress to receive the R'G'B' result, same as other systems |
2465 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" |
2466 | 0 | " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" |
2467 | 0 | " %s(lessThanEqual(vec3(0.0181), color.rgb))); \n", |
2468 | 0 | gl_sc_bvec(p->sc, 3)); |
2469 | 0 | } |
2470 | |
|
2471 | 0 | p->components = 3; |
2472 | 0 | if (!p->has_alpha) { |
2473 | 0 | GLSL(color.a = 1.0;) |
2474 | 0 | } else if (p->image_params.repr.alpha == PL_ALPHA_PREMULTIPLIED) { |
2475 | 0 | p->components = 4; |
2476 | 0 | } else { |
2477 | 0 | p->components = 4; |
2478 | 0 | GLSL(color = vec4(color.rgb * color.a, color.a);) // straight -> premul |
2479 | 0 | } |
2480 | 0 | } |
2481 | | |
2482 | | static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]) |
2483 | 0 | { |
2484 | 0 | double target_w = p->src_rect.x1 - p->src_rect.x0; |
2485 | 0 | double target_h = p->src_rect.y1 - p->src_rect.y0; |
2486 | 0 | if (transpose_rot && p->image_params.rotate % 180 == 90) |
2487 | 0 | MPSWAP(double, target_w, target_h); |
2488 | 0 | xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w; |
2489 | 0 | xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h; |
2490 | 0 | } |
2491 | | |
2492 | | // Cropping. |
2493 | | static void compute_src_transform(struct gl_video *p, struct gl_transform *tr) |
2494 | 0 | { |
2495 | 0 | float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w, |
2496 | 0 | sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h, |
2497 | 0 | ox = p->src_rect.x0, |
2498 | 0 | oy = p->src_rect.y0; |
2499 | 0 | struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}}; |
2500 | |
|
2501 | 0 | gl_transform_trans(p->texture_offset, &transform); |
2502 | |
|
2503 | 0 | *tr = transform; |
2504 | 0 | } |
2505 | | |
2506 | | // Takes care of the main scaling and pre/post-conversions |
2507 | | static void pass_scale_main(struct gl_video *p) |
2508 | 0 | { |
2509 | | // Figure out the main scaler. |
2510 | 0 | double xy[2]; |
2511 | 0 | get_scale_factors(p, true, xy); |
2512 | | |
2513 | | // actual scale factor should be divided by the scale factor of prescaling. |
2514 | 0 | xy[0] /= p->texture_offset.m[0][0]; |
2515 | 0 | xy[1] /= p->texture_offset.m[1][1]; |
2516 | | |
2517 | | // The calculation of scale factor involves 32-bit float(from gl_transform), |
2518 | | // use non-strict equality test to tolerate precision loss. |
2519 | 0 | bool downscaling = xy[0] < 1.0 - FLT_EPSILON || xy[1] < 1.0 - FLT_EPSILON; |
2520 | 0 | bool upscaling = !downscaling && (xy[0] > 1.0 + FLT_EPSILON || |
2521 | 0 | xy[1] > 1.0 + FLT_EPSILON); |
2522 | 0 | double scale_factor = 1.0; |
2523 | |
|
2524 | 0 | struct scaler *scaler = &p->scaler[SCALER_SCALE]; |
2525 | 0 | struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE]; |
2526 | 0 | if (p->opts.scaler_resizes_only && !downscaling && !upscaling) { |
2527 | 0 | scaler_conf.kernel.function = SCALER_BILINEAR; |
2528 | | // For scaler-resizes-only, we round the texture offset to |
2529 | | // the nearest round value in order to prevent ugly blurriness |
2530 | | // (in exchange for slightly shifting the image by up to half a |
2531 | | // subpixel) |
2532 | 0 | p->texture_offset.t[0] = roundf(p->texture_offset.t[0]); |
2533 | 0 | p->texture_offset.t[1] = roundf(p->texture_offset.t[1]); |
2534 | 0 | } |
2535 | 0 | if (downscaling && |
2536 | 0 | p->opts.scaler[SCALER_DSCALE].kernel.function != SCALER_INHERIT) { |
2537 | 0 | scaler_conf = p->opts.scaler[SCALER_DSCALE]; |
2538 | 0 | scaler = &p->scaler[SCALER_DSCALE]; |
2539 | 0 | } |
2540 | | |
2541 | | // When requesting correct-downscaling and the clip is anamorphic, and |
2542 | | // because only a single scale factor is used for both axes, enable it only |
2543 | | // when both axes are downscaled, and use the milder of the factors to not |
2544 | | // end up with too much blur on one axis (even if we end up with sub-optimal |
2545 | | // scale factor on the other axis). This is better than not respecting |
2546 | | // correct scaling at all for anamorphic clips. |
2547 | 0 | double f = MPMAX(xy[0], xy[1]); |
2548 | 0 | if (p->opts.correct_downscaling && f < 1.0) |
2549 | 0 | scale_factor = 1.0 / f; |
2550 | | |
2551 | | // Pre-conversion, like linear light/sigmoidization |
2552 | 0 | GLSLF("// scaler pre-conversion\n"); |
2553 | 0 | bool use_linear = false; |
2554 | 0 | if (downscaling) { |
2555 | 0 | use_linear = p->opts.linear_downscaling; |
2556 | | |
2557 | | // Linear light downscaling results in nasty artifacts for HDR curves |
2558 | | // due to the potentially extreme brightness differences severely |
2559 | | // compounding any ringing. So just scale in gamma light instead. |
2560 | 0 | if (pl_color_space_is_hdr(&p->image_params.color)) |
2561 | 0 | use_linear = false; |
2562 | 0 | } else if (upscaling) { |
2563 | 0 | use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling; |
2564 | 0 | } |
2565 | |
|
2566 | 0 | if (use_linear) { |
2567 | 0 | p->use_linear = true; |
2568 | 0 | pass_linearize(p->sc, p->image_params.color.transfer); |
2569 | 0 | pass_opt_hook_point(p, "LINEAR", NULL); |
2570 | 0 | } |
2571 | |
|
2572 | 0 | bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; |
2573 | 0 | float sig_center, sig_slope, sig_offset, sig_scale; |
2574 | 0 | if (use_sigmoid) { |
2575 | | // Coefficients for the sigmoidal transform are taken from the |
2576 | | // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal |
2577 | 0 | sig_center = p->opts.sigmoid_center; |
2578 | 0 | sig_slope = p->opts.sigmoid_slope; |
2579 | | // This function needs to go through (0,0) and (1,1) so we compute the |
2580 | | // values at 1 and 0, and then scale/shift them, respectively. |
2581 | 0 | sig_offset = 1.0/(1+expf(sig_slope * sig_center)); |
2582 | 0 | sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; |
2583 | 0 | GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) |
2584 | 0 | GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", |
2585 | 0 | sig_center, sig_scale, sig_offset, sig_slope); |
2586 | 0 | pass_opt_hook_point(p, "SIGMOID", NULL); |
2587 | 0 | } |
2588 | |
|
2589 | 0 | pass_opt_hook_point(p, "PREKERNEL", NULL); |
2590 | |
|
2591 | 0 | int vp_w = p->dst_rect.x1 - p->dst_rect.x0; |
2592 | 0 | int vp_h = p->dst_rect.y1 - p->dst_rect.y0; |
2593 | 0 | struct gl_transform transform; |
2594 | 0 | compute_src_transform(p, &transform); |
2595 | |
|
2596 | 0 | GLSLF("// main scaling\n"); |
2597 | 0 | finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h); |
2598 | 0 | struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components); |
2599 | 0 | gl_transform_trans(transform, &src.transform); |
2600 | 0 | pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); |
2601 | | |
2602 | | // Changes the texture size to display size after main scaler. |
2603 | 0 | p->texture_w = vp_w; |
2604 | 0 | p->texture_h = vp_h; |
2605 | |
|
2606 | 0 | pass_opt_hook_point(p, "POSTKERNEL", NULL); |
2607 | |
|
2608 | 0 | GLSLF("// scaler post-conversion\n"); |
2609 | 0 | if (use_sigmoid) { |
2610 | | // Inverse of the transformation above |
2611 | 0 | GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) |
2612 | 0 | GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", |
2613 | 0 | sig_slope, sig_center, sig_offset, sig_scale); |
2614 | 0 | } |
2615 | 0 | } |
2616 | | |
2617 | | // Adapts the colors to the right output color space. (Final pass during |
2618 | | // rendering) |
2619 | | // If OSD is true, ignore any changes that may have been made to the video |
2620 | | // by previous passes (i.e. linear scaling) |
2621 | | static void pass_colormanage(struct gl_video *p, struct pl_color_space src, |
2622 | | enum mp_csp_light src_light, |
2623 | | const struct pl_color_space *fbo_csp, int flags, bool osd) |
2624 | 0 | { |
2625 | 0 | struct ra *ra = p->ra; |
2626 | | |
2627 | | // Configure the destination according to the FBO color space, |
2628 | | // unless specific transfer function, primaries or target peak |
2629 | | // is set. If values are set to _AUTO, the most likely intended |
2630 | | // values are guesstimated later in this function. |
2631 | 0 | struct pl_color_space dst = { |
2632 | 0 | .transfer = p->opts.target_trc == PL_COLOR_TRC_UNKNOWN ? |
2633 | 0 | fbo_csp->transfer : p->opts.target_trc, |
2634 | 0 | .primaries = p->opts.target_prim == PL_COLOR_PRIM_UNKNOWN ? |
2635 | 0 | fbo_csp->primaries : p->opts.target_prim, |
2636 | 0 | .hdr.max_luma = !p->opts.target_peak ? |
2637 | 0 | fbo_csp->hdr.max_luma : p->opts.target_peak, |
2638 | 0 | }; |
2639 | |
|
2640 | 0 | if (!p->colorspace_override_warned && |
2641 | 0 | ((fbo_csp->transfer && dst.transfer != fbo_csp->transfer) || |
2642 | 0 | (fbo_csp->primaries && dst.primaries != fbo_csp->primaries))) |
2643 | 0 | { |
2644 | 0 | MP_WARN(p, "One or more colorspace value is being overridden " |
2645 | 0 | "by user while the FBO provides colorspace information: " |
2646 | 0 | "transfer function: (dst: %s, fbo: %s), " |
2647 | 0 | "primaries: (dst: %s, fbo: %s). " |
2648 | 0 | "Rendering can lead to incorrect results!\n", |
2649 | 0 | m_opt_choice_str(pl_csp_trc_names, dst.transfer), |
2650 | 0 | m_opt_choice_str(pl_csp_trc_names, fbo_csp->transfer), |
2651 | 0 | m_opt_choice_str(pl_csp_prim_names, dst.primaries), |
2652 | 0 | m_opt_choice_str(pl_csp_prim_names, fbo_csp->primaries)); |
2653 | 0 | p->colorspace_override_warned = true; |
2654 | 0 | } |
2655 | |
|
2656 | 0 | enum mp_csp_light dst_light = dst.transfer == PL_COLOR_TRC_HLG ? |
2657 | 0 | MP_CSP_LIGHT_SCENE_HLG : MP_CSP_LIGHT_DISPLAY; |
2658 | |
|
2659 | 0 | if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) { |
2660 | | // The 3DLUT is always generated against the video's original source |
2661 | | // space, *not* the reference space. (To avoid having to regenerate |
2662 | | // the 3DLUT for the OSD on every frame) |
2663 | 0 | enum pl_color_primaries prim_orig = p->image_params.color.primaries; |
2664 | 0 | enum pl_color_transfer trc_orig = p->image_params.color.transfer; |
2665 | | |
2666 | | // One exception: HDR is not implemented by LittleCMS for technical |
2667 | | // limitation reasons, so we use a gamma 2.2 input curve here instead. |
2668 | | // We could pick any value we want here, the difference is just coding |
2669 | | // efficiency. |
2670 | 0 | if (pl_color_space_is_hdr(&p->image_params.color)) |
2671 | 0 | trc_orig = PL_COLOR_TRC_GAMMA22; |
2672 | |
|
2673 | 0 | if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { |
2674 | 0 | dst.primaries = prim_orig; |
2675 | 0 | dst.transfer = trc_orig; |
2676 | 0 | mp_assert(dst.primaries && dst.transfer); |
2677 | 0 | } |
2678 | 0 | } |
2679 | | |
2680 | 0 | if (dst.primaries == PL_COLOR_PRIM_UNKNOWN) { |
2681 | | // The vast majority of people are on sRGB or BT.709 displays, so pick |
2682 | | // this as the default output color space. |
2683 | 0 | dst.primaries = PL_COLOR_PRIM_BT_709; |
2684 | |
|
2685 | 0 | if (src.primaries == PL_COLOR_PRIM_BT_601_525 || |
2686 | 0 | src.primaries == PL_COLOR_PRIM_BT_601_625) |
2687 | 0 | { |
2688 | | // Since we auto-pick BT.601 and BT.709 based on the dimensions, |
2689 | | // combined with the fact that they're very similar to begin with, |
2690 | | // and to avoid confusing the average user, just don't adapt BT.601 |
2691 | | // content automatically at all. |
2692 | 0 | dst.primaries = src.primaries; |
2693 | 0 | } |
2694 | 0 | } |
2695 | |
|
2696 | 0 | if (dst.transfer == PL_COLOR_TRC_UNKNOWN) { |
2697 | | // Most people seem to complain when the image is darker or brighter |
2698 | | // than what they're "used to", so just avoid changing the gamma |
2699 | | // altogether by default. The only exceptions to this rule apply to |
2700 | | // very unusual TRCs, which even hardcode technoluddites would probably |
2701 | | // not enjoy viewing unaltered. |
2702 | 0 | dst.transfer = src.transfer; |
2703 | | |
2704 | | // Avoid outputting linear light or HDR content "by default". For these |
2705 | | // just pick gamma 2.2 as a default, since it's a good estimate for |
2706 | | // the response of typical displays |
2707 | 0 | if (dst.transfer == PL_COLOR_TRC_LINEAR || pl_color_space_is_hdr(&dst)) |
2708 | 0 | dst.transfer = PL_COLOR_TRC_GAMMA22; |
2709 | 0 | } |
2710 | | |
2711 | | // If there's no specific signal peak known for the output display, infer |
2712 | | // it from the chosen transfer function. Also normalize the src peak, in |
2713 | | // case it was unknown |
2714 | 0 | if (!dst.hdr.max_luma) |
2715 | 0 | dst.hdr.max_luma = pl_color_transfer_nominal_peak(dst.transfer) * MP_REF_WHITE; |
2716 | 0 | if (!src.hdr.max_luma) |
2717 | 0 | src.hdr.max_luma = pl_color_transfer_nominal_peak(src.transfer) * MP_REF_WHITE; |
2718 | | |
2719 | | // Whitelist supported modes |
2720 | 0 | switch (p->opts.tone_map.curve) { |
2721 | 0 | case TONE_MAPPING_AUTO: |
2722 | 0 | case TONE_MAPPING_CLIP: |
2723 | 0 | case TONE_MAPPING_MOBIUS: |
2724 | 0 | case TONE_MAPPING_REINHARD: |
2725 | 0 | case TONE_MAPPING_HABLE: |
2726 | 0 | case TONE_MAPPING_GAMMA: |
2727 | 0 | case TONE_MAPPING_LINEAR: |
2728 | 0 | case TONE_MAPPING_BT_2390: |
2729 | 0 | break; |
2730 | 0 | default: |
2731 | 0 | MP_WARN(p, "Tone mapping curve unsupported by vo_gpu, falling back.\n"); |
2732 | 0 | p->opts.tone_map.curve = TONE_MAPPING_AUTO; |
2733 | 0 | break; |
2734 | 0 | } |
2735 | | |
2736 | 0 | switch (p->opts.tone_map.gamut_mode) { |
2737 | 0 | case GAMUT_AUTO: |
2738 | 0 | case GAMUT_WARN: |
2739 | 0 | case GAMUT_CLIP: |
2740 | 0 | case GAMUT_DESATURATE: |
2741 | 0 | break; |
2742 | 0 | default: |
2743 | 0 | MP_WARN(p, "Gamut mapping mode unsupported by vo_gpu, falling back.\n"); |
2744 | 0 | p->opts.tone_map.gamut_mode = GAMUT_AUTO; |
2745 | 0 | break; |
2746 | 0 | } |
2747 | | |
2748 | 0 | struct gl_tone_map_opts tone_map = p->opts.tone_map; |
2749 | 0 | bool detect_peak = tone_map.compute_peak >= 0 && pl_color_space_is_hdr(&src) |
2750 | 0 | && src.hdr.max_luma > dst.hdr.max_luma; |
2751 | |
|
2752 | 0 | if (detect_peak && !p->hdr_peak_ssbo) { |
2753 | 0 | struct { |
2754 | 0 | float average[2]; |
2755 | 0 | int32_t frame_sum; |
2756 | 0 | uint32_t frame_max; |
2757 | 0 | uint32_t counter; |
2758 | 0 | } peak_ssbo = {0}; |
2759 | |
|
2760 | 0 | struct ra_buf_params params = { |
2761 | 0 | .type = RA_BUF_TYPE_SHADER_STORAGE, |
2762 | 0 | .size = sizeof(peak_ssbo), |
2763 | 0 | .initial_data = &peak_ssbo, |
2764 | 0 | }; |
2765 | |
|
2766 | 0 | p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); |
2767 | 0 | if (!p->hdr_peak_ssbo) { |
2768 | 0 | MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); |
2769 | 0 | tone_map.compute_peak = p->opts.tone_map.compute_peak = -1; |
2770 | 0 | detect_peak = false; |
2771 | 0 | } |
2772 | 0 | } |
2773 | |
|
2774 | 0 | if (detect_peak) { |
2775 | 0 | pass_describe(p, "detect HDR peak"); |
2776 | 0 | pass_is_compute(p, 8, 8, true); // 8x8 is good for performance |
2777 | 0 | gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, |
2778 | 0 | "vec2 average;" |
2779 | 0 | "int frame_sum;" |
2780 | 0 | "uint frame_max;" |
2781 | 0 | "uint counter;" |
2782 | 0 | ); |
2783 | 0 | } else { |
2784 | 0 | tone_map.compute_peak = -1; |
2785 | 0 | } |
2786 | | |
2787 | | // Adapt from src to dst as necessary |
2788 | 0 | pass_color_map(p->sc, p->use_linear && !osd, &src, &dst, src_light, dst_light, &tone_map); |
2789 | |
|
2790 | 0 | if (!osd) { |
2791 | 0 | struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; |
2792 | 0 | mp_csp_equalizer_state_get(p->video_eq, &cparams); |
2793 | 0 | if (cparams.levels_out == PL_COLOR_LEVELS_UNKNOWN) |
2794 | 0 | cparams.levels_out = PL_COLOR_LEVELS_FULL; |
2795 | 0 | p->target_params = (struct mp_image_params){ |
2796 | 0 | .imgfmt_name = p->fbo_format ? p->fbo_format->name : "unknown", |
2797 | 0 | .w = mp_rect_w(p->dst_rect), |
2798 | 0 | .h = mp_rect_h(p->dst_rect), |
2799 | 0 | .color = dst, |
2800 | 0 | .repr = {.sys = PL_COLOR_SYSTEM_RGB, .levels = cparams.levels_out}, |
2801 | 0 | .rotate = p->image_params.rotate, |
2802 | 0 | }; |
2803 | 0 | } |
2804 | |
|
2805 | 0 | if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) { |
2806 | 0 | gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture); |
2807 | 0 | GLSL(vec3 cpos;) |
2808 | 0 | for (int i = 0; i < 3; i++) |
2809 | 0 | GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]); |
2810 | 0 | GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;) |
2811 | 0 | } |
2812 | 0 | } |
2813 | | |
2814 | | void gl_video_set_fb_depth(struct gl_video *p, int fb_depth) |
2815 | 0 | { |
2816 | 0 | p->fb_depth = fb_depth; |
2817 | 0 | } |
2818 | | |
2819 | | static void pass_dither(struct gl_video *p, const struct ra_fbo *fbo) |
2820 | 0 | { |
2821 | | // Assume 8 bits per component if unknown. |
2822 | 0 | int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8; |
2823 | 0 | if (p->opts.dither_depth > 0) |
2824 | 0 | dst_depth = p->opts.dither_depth; |
2825 | |
|
2826 | 0 | if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE) |
2827 | 0 | return; |
2828 | | |
2829 | 0 | if (p->opts.dither_algo == DITHER_ERROR_DIFFUSION) { |
2830 | 0 | const struct error_diffusion_kernel *kernel = |
2831 | 0 | mp_find_error_diffusion_kernel(p->opts.error_diffusion); |
2832 | 0 | int o_w = p->dst_rect.x1 - p->dst_rect.x0, |
2833 | 0 | o_h = p->dst_rect.y1 - p->dst_rect.y0; |
2834 | |
|
2835 | 0 | int shmem_req = mp_ef_compute_shared_memory_size(kernel, o_h); |
2836 | 0 | if (shmem_req > p->ra->max_shmem) { |
2837 | 0 | MP_WARN(p, "Fallback to dither=fruit because there is no enough " |
2838 | 0 | "shared memory (%d/%d).\n", |
2839 | 0 | shmem_req, (int)p->ra->max_shmem); |
2840 | 0 | p->opts.dither_algo = DITHER_FRUIT; |
2841 | 0 | } else { |
2842 | 0 | finish_pass_tex(p, &p->error_diffusion_tex[0], o_w, o_h); |
2843 | |
|
2844 | 0 | struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components); |
2845 | | |
2846 | | // Ensure the block size doesn't exceed the maximum of the |
2847 | | // implementation. |
2848 | 0 | int block_size = MPMIN(p->ra->max_compute_group_threads, o_h); |
2849 | |
|
2850 | 0 | pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)", |
2851 | 0 | kernel->name, dst_depth); |
2852 | |
|
2853 | 0 | p->pass_compute = (struct compute_info) { |
2854 | 0 | .active = true, |
2855 | 0 | .threads_w = block_size, |
2856 | 0 | .threads_h = 1, |
2857 | 0 | .directly_writes = true |
2858 | 0 | }; |
2859 | |
|
2860 | 0 | int tex_id = pass_bind(p, img); |
2861 | |
|
2862 | 0 | pass_error_diffusion(p->sc, kernel, tex_id, o_w, o_h, |
2863 | 0 | dst_depth, block_size); |
2864 | |
|
2865 | 0 | finish_pass_tex(p, &p->error_diffusion_tex[1], o_w, o_h); |
2866 | |
|
2867 | 0 | img = image_wrap(p->error_diffusion_tex[1], PLANE_RGB, p->components); |
2868 | 0 | copy_image(p, &(int){0}, img); |
2869 | |
|
2870 | 0 | return; |
2871 | 0 | } |
2872 | 0 | } |
2873 | | |
2874 | 0 | if (!p->dither_texture) { |
2875 | 0 | MP_VERBOSE(p, "Dither to %d.\n", dst_depth); |
2876 | |
|
2877 | 0 | int tex_size = 0; |
2878 | 0 | void *tex_data = NULL; |
2879 | 0 | const struct ra_format *fmt = NULL; |
2880 | 0 | void *temp = NULL; |
2881 | |
|
2882 | 0 | if (p->opts.dither_algo == DITHER_FRUIT) { |
2883 | 0 | int sizeb = p->opts.dither_size; |
2884 | 0 | int size = 1 << sizeb; |
2885 | |
|
2886 | 0 | if (p->last_dither_matrix_size != size) { |
2887 | 0 | p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix, |
2888 | 0 | float, size * size); |
2889 | 0 | mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb); |
2890 | 0 | p->last_dither_matrix_size = size; |
2891 | 0 | } |
2892 | | |
2893 | | // Prefer R16 texture since they provide higher precision. |
2894 | 0 | fmt = ra_find_unorm_format(p->ra, 2, 1); |
2895 | 0 | if (!fmt) |
2896 | 0 | fmt = ra_find_float16_format(p->ra, 1); |
2897 | 0 | if (fmt) { |
2898 | 0 | tex_size = size; |
2899 | 0 | tex_data = p->last_dither_matrix; |
2900 | 0 | if (fmt->ctype == RA_CTYPE_UNORM) { |
2901 | 0 | uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size); |
2902 | 0 | for (int n = 0; n < size * size; n++) |
2903 | 0 | t[n] = p->last_dither_matrix[n] * UINT16_MAX; |
2904 | 0 | tex_data = t; |
2905 | 0 | } |
2906 | 0 | } else { |
2907 | 0 | MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n"); |
2908 | 0 | p->opts.dither_algo = DITHER_ORDERED; |
2909 | 0 | } |
2910 | 0 | } |
2911 | |
|
2912 | 0 | if (p->opts.dither_algo == DITHER_ORDERED) { |
2913 | 0 | temp = talloc_array(NULL, char, 8 * 8); |
2914 | 0 | mp_make_ordered_dither_matrix(temp, 8); |
2915 | |
|
2916 | 0 | fmt = ra_find_unorm_format(p->ra, 1, 1); |
2917 | 0 | tex_size = 8; |
2918 | 0 | tex_data = temp; |
2919 | 0 | } |
2920 | |
|
2921 | 0 | struct ra_tex_params params = { |
2922 | 0 | .dimensions = 2, |
2923 | 0 | .w = tex_size, |
2924 | 0 | .h = tex_size, |
2925 | 0 | .d = 1, |
2926 | 0 | .format = fmt, |
2927 | 0 | .render_src = true, |
2928 | 0 | .src_repeat = true, |
2929 | 0 | .initial_data = tex_data, |
2930 | 0 | }; |
2931 | 0 | p->dither_texture = ra_tex_create(p->ra, ¶ms); |
2932 | |
|
2933 | 0 | debug_check_gl(p, "dither setup"); |
2934 | |
|
2935 | 0 | talloc_free(temp); |
2936 | |
|
2937 | 0 | if (!p->dither_texture) |
2938 | 0 | return; |
2939 | 0 | } |
2940 | | |
2941 | 0 | GLSLF("// dithering\n"); |
2942 | | |
2943 | | // This defines how many bits are considered significant for output on |
2944 | | // screen. The superfluous bits will be used for rounding according to the |
2945 | | // dither matrix. The precision of the source implicitly decides how many |
2946 | | // dither patterns can be visible. |
2947 | 0 | int dither_quantization = (1 << dst_depth) - 1; |
2948 | 0 | int dither_size = p->dither_texture->params.w; |
2949 | |
|
2950 | 0 | gl_sc_uniform_texture(p->sc, "dither", p->dither_texture); |
2951 | |
|
2952 | 0 | GLSLF("vec2 dither_coord = vec2(gl_FragCoord.x, %d.0 + %f * gl_FragCoord.y);", |
2953 | 0 | fbo->flip ? fbo->tex->params.h : 0, fbo->flip ? -1.0 : 1.0); |
2954 | 0 | GLSLF("vec2 dither_pos = dither_coord * 1.0/%d.0;\n", dither_size); |
2955 | |
|
2956 | 0 | if (p->opts.temporal_dither) { |
2957 | 0 | int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u; |
2958 | 0 | float r = phase * (M_PI / 2); // rotate |
2959 | 0 | float m = phase < 4 ? 1 : -1; // mirror |
2960 | |
|
2961 | 0 | float matrix[2][2] = {{cos(r), -sin(r) }, |
2962 | 0 | {sin(r) * m, cos(r) * m}}; |
2963 | 0 | gl_sc_uniform_dynamic(p->sc); |
2964 | 0 | gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); |
2965 | |
|
2966 | 0 | GLSL(dither_pos = dither_trafo * dither_pos;) |
2967 | 0 | } |
2968 | |
|
2969 | 0 | GLSL(float dither_value = texture(dither, dither_pos).r;) |
2970 | 0 | GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n", |
2971 | 0 | dither_quantization, dither_size * dither_size, dither_quantization); |
2972 | 0 | } |
2973 | | |
2974 | | // Draws the OSD, in scene-referred colors.. If cms is true, subtitles are |
2975 | | // instead adapted to the display's gamut. |
2976 | | static void pass_draw_osd(struct gl_video *p, int osd_flags, int frame_flags, |
2977 | | double pts, struct mp_osd_res rect, const struct ra_fbo *fbo, |
2978 | | bool cms) |
2979 | 0 | { |
2980 | 0 | if (frame_flags & RENDER_FRAME_VF_SUBS) |
2981 | 0 | osd_flags |= OSD_DRAW_SUB_FILTER; |
2982 | |
|
2983 | 0 | if ((osd_flags & OSD_DRAW_SUB_ONLY) && (osd_flags & OSD_DRAW_OSD_ONLY)) |
2984 | 0 | return; |
2985 | | |
2986 | 0 | mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo3d, osd_flags); |
2987 | |
|
2988 | 0 | timer_pool_start(p->osd_timer); |
2989 | 0 | for (int n = 0; n < MAX_OSD_PARTS; n++) { |
2990 | | // (This returns false if this part is empty with nothing to draw.) |
2991 | 0 | if (!mpgl_osd_draw_prepare(p->osd, n, p->sc)) |
2992 | 0 | continue; |
2993 | | // When subtitles need to be color managed, assume they're in sRGB |
2994 | | // (for lack of anything saner to do) |
2995 | 0 | if (cms) { |
2996 | 0 | static const struct pl_color_space csp_srgb = { |
2997 | 0 | .primaries = PL_COLOR_PRIM_BT_709, |
2998 | 0 | .transfer = PL_COLOR_TRC_SRGB, |
2999 | 0 | }; |
3000 | |
|
3001 | 0 | pass_colormanage(p, csp_srgb, MP_CSP_LIGHT_DISPLAY, &fbo->color_space, |
3002 | 0 | frame_flags, true); |
3003 | 0 | } |
3004 | 0 | mpgl_osd_draw_finish(p->osd, n, p->sc, fbo); |
3005 | 0 | } |
3006 | |
|
3007 | 0 | timer_pool_stop(p->osd_timer); |
3008 | 0 | pass_describe(p, "drawing osd"); |
3009 | 0 | struct mp_pass_perf perf = timer_pool_measure(p->osd_timer); |
3010 | 0 | pass_record(p, &perf); |
3011 | 0 | } |
3012 | | |
3013 | | static float chroma_realign(int size, int pixel) |
3014 | 0 | { |
3015 | 0 | return size / (float)chroma_upsize(size, pixel); |
3016 | 0 | } |
3017 | | |
3018 | | // Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs. |
3019 | | static void pass_render_frame_dumb(struct gl_video *p) |
3020 | 0 | { |
3021 | 0 | struct image img[4]; |
3022 | 0 | struct gl_transform off[4]; |
3023 | 0 | pass_get_images(p, &p->image, img, off); |
3024 | |
|
3025 | 0 | struct gl_transform transform; |
3026 | 0 | compute_src_transform(p, &transform); |
3027 | |
|
3028 | 0 | int index = 0; |
3029 | 0 | for (int i = 0; i < p->plane_count; i++) { |
3030 | 0 | int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; |
3031 | 0 | int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; |
3032 | 0 | if (p->image_params.rotate % 180 == 90) |
3033 | 0 | MPSWAP(int, cw, ch); |
3034 | |
|
3035 | 0 | struct gl_transform t = transform; |
3036 | 0 | t.m[0][0] *= chroma_realign(p->texture_w, cw); |
3037 | 0 | t.m[1][1] *= chroma_realign(p->texture_h, ch); |
3038 | |
|
3039 | 0 | t.t[0] /= cw; |
3040 | 0 | t.t[1] /= ch; |
3041 | |
|
3042 | 0 | t.t[0] += off[i].t[0]; |
3043 | 0 | t.t[1] += off[i].t[1]; |
3044 | |
|
3045 | 0 | gl_transform_trans(img[i].transform, &t); |
3046 | 0 | img[i].transform = t; |
3047 | |
|
3048 | 0 | copy_image(p, &index, img[i]); |
3049 | 0 | } |
3050 | |
|
3051 | 0 | pass_convert_yuv(p); |
3052 | 0 | } |
3053 | | |
3054 | | // The main rendering function, takes care of everything up to and including |
3055 | | // upscaling. p->image is rendered. |
3056 | | // flags: bit set of RENDER_FRAME_* flags |
3057 | | static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, |
3058 | | uint64_t id, int flags) |
3059 | 0 | { |
3060 | | // initialize the texture parameters and temporary variables |
3061 | 0 | p->texture_w = p->image_params.w; |
3062 | 0 | p->texture_h = p->image_params.h; |
3063 | 0 | p->texture_offset = identity_trans; |
3064 | 0 | p->components = 0; |
3065 | 0 | p->num_saved_imgs = 0; |
3066 | 0 | p->idx_hook_textures = 0; |
3067 | 0 | p->use_linear = false; |
3068 | | |
3069 | | // try uploading the frame |
3070 | 0 | if (!pass_upload_image(p, mpi, id)) |
3071 | 0 | return false; |
3072 | | |
3073 | 0 | if (p->image_params.rotate % 180 == 90) |
3074 | 0 | MPSWAP(int, p->texture_w, p->texture_h); |
3075 | |
|
3076 | 0 | if (p->dumb_mode) |
3077 | 0 | return true; |
3078 | | |
3079 | 0 | pass_read_video(p); |
3080 | 0 | pass_opt_hook_point(p, "NATIVE", &p->texture_offset); |
3081 | 0 | pass_convert_yuv(p); |
3082 | 0 | pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset); |
3083 | | |
3084 | | // For subtitles |
3085 | 0 | double vpts = p->image.mpi->pts; |
3086 | 0 | if (vpts == MP_NOPTS_VALUE) |
3087 | 0 | vpts = p->osd_pts; |
3088 | |
|
3089 | 0 | if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO && |
3090 | 0 | (flags & RENDER_FRAME_SUBS)) |
3091 | 0 | { |
3092 | 0 | double scale[2]; |
3093 | 0 | get_scale_factors(p, false, scale); |
3094 | 0 | struct mp_osd_res rect = { |
3095 | 0 | .w = p->texture_w, .h = p->texture_h, |
3096 | 0 | .display_par = scale[1] / scale[0], // counter compensate scaling |
3097 | 0 | }; |
3098 | 0 | finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h); |
3099 | 0 | struct ra_fbo fbo = { p->blend_subs_tex }; |
3100 | 0 | pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, &fbo, false); |
3101 | 0 | pass_read_tex(p, p->blend_subs_tex); |
3102 | 0 | pass_describe(p, "blend subs video"); |
3103 | 0 | } |
3104 | 0 | pass_opt_hook_point(p, "MAIN", &p->texture_offset); |
3105 | |
|
3106 | 0 | pass_scale_main(p); |
3107 | |
|
3108 | 0 | int vp_w = p->dst_rect.x1 - p->dst_rect.x0, |
3109 | 0 | vp_h = p->dst_rect.y1 - p->dst_rect.y0; |
3110 | 0 | if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES && |
3111 | 0 | (flags & RENDER_FRAME_SUBS)) |
3112 | 0 | { |
3113 | | // Recreate the real video size from the src/dst rects |
3114 | 0 | struct mp_osd_res rect = { |
3115 | 0 | .w = vp_w, .h = vp_h, |
3116 | 0 | .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w, |
3117 | 0 | .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h, |
3118 | 0 | .display_par = 1.0, |
3119 | 0 | }; |
3120 | | // Adjust margins for scale |
3121 | 0 | double scale[2]; |
3122 | 0 | get_scale_factors(p, true, scale); |
3123 | 0 | rect.ml *= scale[0]; rect.mr *= scale[0]; |
3124 | 0 | rect.mt *= scale[1]; rect.mb *= scale[1]; |
3125 | | // We should always blend subtitles in non-linear light |
3126 | 0 | if (p->use_linear) { |
3127 | 0 | pass_delinearize(p->sc, p->image_params.color.transfer); |
3128 | 0 | p->use_linear = false; |
3129 | 0 | } |
3130 | 0 | finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h); |
3131 | 0 | struct ra_fbo fbo = { p->blend_subs_tex }; |
3132 | 0 | pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, &fbo, false); |
3133 | 0 | pass_read_tex(p, p->blend_subs_tex); |
3134 | 0 | pass_describe(p, "blend subs"); |
3135 | 0 | } |
3136 | |
|
3137 | 0 | pass_opt_hook_point(p, "SCALED", NULL); |
3138 | |
|
3139 | 0 | return true; |
3140 | 0 | } |
3141 | | |
3142 | | static void pass_draw_to_screen(struct gl_video *p, const struct ra_fbo *fbo, int flags) |
3143 | 0 | { |
3144 | 0 | if (p->dumb_mode) |
3145 | 0 | pass_render_frame_dumb(p); |
3146 | | |
3147 | | // Adjust the overall gamma before drawing to screen |
3148 | 0 | if (p->user_gamma != 1) { |
3149 | 0 | gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma); |
3150 | 0 | GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) |
3151 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));) |
3152 | 0 | } |
3153 | |
|
3154 | 0 | pass_colormanage(p, p->image_params.color, p->image_params.light, |
3155 | 0 | &fbo->color_space, flags, false); |
3156 | | |
3157 | | // Since finish_pass_fbo doesn't work with compute shaders, and neither |
3158 | | // does the checkerboard/dither code, we may need an indirection via |
3159 | | // p->screen_tex here. |
3160 | 0 | if (p->pass_compute.active) { |
3161 | 0 | int o_w = p->dst_rect.x1 - p->dst_rect.x0, |
3162 | 0 | o_h = p->dst_rect.y1 - p->dst_rect.y0; |
3163 | 0 | finish_pass_tex(p, &p->screen_tex, o_w, o_h); |
3164 | 0 | struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components); |
3165 | 0 | copy_image(p, &(int){0}, tmp); |
3166 | 0 | } |
3167 | |
|
3168 | 0 | if (p->has_alpha) { |
3169 | 0 | if (p->opts.background == BACKGROUND_TILES) { |
3170 | | // Draw checkerboard pattern to indicate transparency |
3171 | 0 | GLSLF("// transparency checkerboard\n"); |
3172 | 0 | GLSLF("vec2 tile_coord = vec2(gl_FragCoord.x, %d.0 + %f * gl_FragCoord.y);", |
3173 | 0 | fbo->flip ? fbo->tex->params.h : 0, fbo->flip ? -1.0 : 1.0); |
3174 | 0 | GLSL(bvec2 tile = lessThan(fract(tile_coord * 1.0 / 32.0), vec2(0.5));) |
3175 | 0 | GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);) |
3176 | 0 | GLSL(color.rgb += background.rgb * (1.0 - color.a);) |
3177 | 0 | GLSL(color.a = 1.0;) |
3178 | 0 | } else if (p->opts.background == BACKGROUND_COLOR) { |
3179 | | // Blend into background color (usually black) |
3180 | 0 | struct m_color c = p->opts.background_color; |
3181 | 0 | GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n", |
3182 | 0 | c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0); |
3183 | 0 | GLSL(color += background * (1.0 - color.a);) |
3184 | 0 | GLSL(color.rgb *= vec3(color.a);); |
3185 | 0 | } |
3186 | 0 | } |
3187 | |
|
3188 | 0 | pass_opt_hook_point(p, "OUTPUT", NULL); |
3189 | |
|
3190 | 0 | if (flags & RENDER_SCREEN_COLOR) |
3191 | 0 | pass_dither(p, fbo); |
3192 | 0 | pass_describe(p, "output to screen"); |
3193 | 0 | finish_pass_fbo(p, fbo, false, &p->dst_rect); |
3194 | 0 | } |
3195 | | |
3196 | | // flags: bit set of RENDER_FRAME_* flags |
3197 | | static bool update_surface(struct gl_video *p, struct mp_image *mpi, |
3198 | | uint64_t id, struct surface *surf, int flags) |
3199 | 0 | { |
3200 | 0 | int vp_w = p->dst_rect.x1 - p->dst_rect.x0, |
3201 | 0 | vp_h = p->dst_rect.y1 - p->dst_rect.y0; |
3202 | |
|
3203 | 0 | pass_info_reset(p, false); |
3204 | 0 | if (!pass_render_frame(p, mpi, id, flags)) |
3205 | 0 | return false; |
3206 | | |
3207 | | // Frame blending should always be done in linear light to preserve the |
3208 | | // overall brightness, otherwise this will result in flashing dark frames |
3209 | | // because mixing in compressed light artificially darkens the results |
3210 | 0 | if (!p->use_linear) { |
3211 | 0 | p->use_linear = true; |
3212 | 0 | pass_linearize(p->sc, p->image_params.color.transfer); |
3213 | 0 | } |
3214 | |
|
3215 | 0 | finish_pass_tex(p, &surf->tex, vp_w, vp_h); |
3216 | 0 | surf->id = id; |
3217 | 0 | surf->pts = mpi->pts; |
3218 | 0 | return true; |
3219 | 0 | } |
3220 | | |
3221 | | // Draws an interpolate frame to fbo, based on the frame timing in t |
3222 | | // flags: bit set of RENDER_FRAME_* flags |
3223 | | static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, |
3224 | | const struct ra_fbo *fbo, int flags) |
3225 | 0 | { |
3226 | 0 | bool is_new = false; |
3227 | | |
3228 | | // Reset the queue completely if this is a still image, to avoid any |
3229 | | // interpolation artifacts from surrounding frames when unpausing or |
3230 | | // framestepping |
3231 | 0 | if (t->still) |
3232 | 0 | gl_video_reset_surfaces(p); |
3233 | | |
3234 | | // First of all, figure out if we have a frame available at all, and draw |
3235 | | // it manually + reset the queue if not |
3236 | 0 | if (p->surfaces[p->surface_now].id == 0) { |
3237 | 0 | struct surface *now = &p->surfaces[p->surface_now]; |
3238 | 0 | if (!update_surface(p, t->current, t->frame_id, now, flags)) |
3239 | 0 | return; |
3240 | 0 | p->surface_idx = p->surface_now; |
3241 | 0 | is_new = true; |
3242 | 0 | } |
3243 | | |
3244 | | // Find the right frame for this instant |
3245 | 0 | if (t->current) { |
3246 | 0 | int next = surface_wrap(p->surface_now + 1); |
3247 | 0 | while (p->surfaces[next].id && |
3248 | 0 | p->surfaces[next].id > p->surfaces[p->surface_now].id && |
3249 | 0 | p->surfaces[p->surface_now].id < t->frame_id) |
3250 | 0 | { |
3251 | 0 | p->surface_now = next; |
3252 | 0 | next = surface_wrap(next + 1); |
3253 | 0 | } |
3254 | 0 | } |
3255 | | |
3256 | | // Figure out the queue size. For illustration, a filter radius of 2 would |
3257 | | // look like this: _ A [B] C D _ |
3258 | | // A is surface_bse, B is surface_now, C is surface_now+1 and D is |
3259 | | // surface_end. |
3260 | 0 | struct scaler *tscale = &p->scaler[SCALER_TSCALE]; |
3261 | 0 | reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes); |
3262 | 0 | bool oversample = tscale->conf.kernel.function == SCALER_OVERSAMPLE; |
3263 | 0 | bool linear = tscale->conf.kernel.function == SCALER_LINEAR; |
3264 | 0 | int size; |
3265 | |
|
3266 | 0 | if (oversample || linear) { |
3267 | 0 | size = 2; |
3268 | 0 | } else { |
3269 | 0 | mp_assert(tscale->kernel && !tscale->kernel->polar); |
3270 | 0 | size = ceil(tscale->kernel->size); |
3271 | 0 | } |
3272 | | |
3273 | 0 | int radius = size/2; |
3274 | 0 | int surface_now = p->surface_now; |
3275 | 0 | int surface_bse = surface_wrap(surface_now - (radius-1)); |
3276 | 0 | int surface_end = surface_wrap(surface_now + radius); |
3277 | 0 | mp_assert(surface_wrap(surface_bse + size-1) == surface_end); |
3278 | | |
3279 | | // Render new frames while there's room in the queue. Note that technically, |
3280 | | // this should be done before the step where we find the right frame, but |
3281 | | // it only barely matters at the very beginning of playback, and this way |
3282 | | // makes the code much more linear. |
3283 | 0 | int surface_dst = surface_wrap(p->surface_idx + 1); |
3284 | 0 | for (int i = 0; i < t->num_frames; i++) { |
3285 | | // Avoid overwriting data we might still need |
3286 | 0 | if (surface_dst == surface_bse - 1) |
3287 | 0 | break; |
3288 | | |
3289 | 0 | struct mp_image *f = t->frames[i]; |
3290 | 0 | uint64_t f_id = t->frame_id + i; |
3291 | 0 | if (!mp_image_params_static_equal(&f->params, &p->real_image_params)) |
3292 | 0 | continue; |
3293 | | |
3294 | 0 | if (f_id > p->surfaces[p->surface_idx].id) { |
3295 | 0 | struct surface *dst = &p->surfaces[surface_dst]; |
3296 | 0 | if (!update_surface(p, f, f_id, dst, flags)) |
3297 | 0 | return; |
3298 | 0 | p->surface_idx = surface_dst; |
3299 | 0 | surface_dst = surface_wrap(surface_dst + 1); |
3300 | 0 | is_new = true; |
3301 | 0 | } |
3302 | 0 | } |
3303 | | |
3304 | | // Figure out whether the queue is "valid". A queue is invalid if the |
3305 | | // frames' PTS is not monotonically increasing. Anything else is invalid, |
3306 | | // so avoid blending incorrect data and just draw the latest frame as-is. |
3307 | | // Possible causes for failure of this condition include seeks, pausing, |
3308 | | // end of playback or start of playback. |
3309 | 0 | bool valid = true; |
3310 | 0 | for (int i = surface_bse, ii; valid && i != surface_end; i = ii) { |
3311 | 0 | ii = surface_wrap(i + 1); |
3312 | 0 | if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) { |
3313 | 0 | valid = false; |
3314 | 0 | } else if (p->surfaces[ii].id < p->surfaces[i].id) { |
3315 | 0 | valid = false; |
3316 | 0 | MP_DBG(p, "interpolation queue underrun\n"); |
3317 | 0 | } |
3318 | 0 | } |
3319 | | |
3320 | | // Update OSD PTS to synchronize subtitles with the displayed frame |
3321 | 0 | p->osd_pts = p->surfaces[surface_now].pts; |
3322 | | |
3323 | | // Finally, draw the right mix of frames to the screen. |
3324 | 0 | if (!is_new) |
3325 | 0 | pass_info_reset(p, true); |
3326 | 0 | pass_describe(p, "interpolation"); |
3327 | 0 | if (!valid || t->still) { |
3328 | | // surface_now is guaranteed to be valid, so we can safely use it. |
3329 | 0 | pass_read_tex(p, p->surfaces[surface_now].tex); |
3330 | 0 | p->is_interpolated = false; |
3331 | 0 | } else { |
3332 | 0 | double mix = t->vsync_offset / t->ideal_frame_duration; |
3333 | | // The scaler code always wants the fcoord to be between 0 and 1, |
3334 | | // so we try to adjust by using the previous set of N frames instead |
3335 | | // (which requires some extra checking to make sure it's valid) |
3336 | 0 | if (mix < 0.0) { |
3337 | 0 | int prev = surface_wrap(surface_bse - 1); |
3338 | 0 | if (p->surfaces[prev].id != 0 && |
3339 | 0 | p->surfaces[prev].id < p->surfaces[surface_bse].id) |
3340 | 0 | { |
3341 | 0 | mix += 1.0; |
3342 | 0 | surface_bse = prev; |
3343 | 0 | } else { |
3344 | 0 | mix = 0.0; // at least don't blow up, this should only |
3345 | | // ever happen at the start of playback |
3346 | 0 | } |
3347 | 0 | } |
3348 | |
|
3349 | 0 | if (oversample) { |
3350 | | // Oversample uses the frame area as mix ratio, not the vsync |
3351 | | // position itself |
3352 | 0 | double vsync_dist = t->vsync_interval / t->ideal_frame_duration, |
3353 | 0 | threshold = tscale->conf.kernel.params[0]; |
3354 | 0 | threshold = isnan(threshold) ? 0.0 : threshold; |
3355 | 0 | mix = (1 - mix) / vsync_dist; |
3356 | 0 | mix = mix <= 0 + threshold ? 0 : mix; |
3357 | 0 | mix = mix >= 1 - threshold ? 1 : mix; |
3358 | 0 | mix = 1 - mix; |
3359 | 0 | } |
3360 | | |
3361 | | // Blend the frames together |
3362 | 0 | if (oversample || linear) { |
3363 | 0 | gl_sc_uniform_dynamic(p->sc); |
3364 | 0 | gl_sc_uniform_f(p->sc, "inter_coeff", mix); |
3365 | 0 | GLSL(color = mix(texture(texture0, texcoord0), |
3366 | 0 | texture(texture1, texcoord1), |
3367 | 0 | inter_coeff);) |
3368 | 0 | } else { |
3369 | 0 | gl_sc_uniform_dynamic(p->sc); |
3370 | 0 | gl_sc_uniform_f(p->sc, "fcoord", mix); |
3371 | 0 | pass_sample_separated_gen(p->sc, tscale, 0, 0); |
3372 | 0 | } |
3373 | | |
3374 | | // Load all the required frames |
3375 | 0 | for (int i = 0; i < size; i++) { |
3376 | 0 | struct image img = |
3377 | 0 | image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex, |
3378 | 0 | PLANE_RGB, p->components); |
3379 | | // Since the code in pass_sample_separated currently assumes |
3380 | | // the textures are bound in-order and starting at 0, we just |
3381 | | // assert to make sure this is the case (which it should always be) |
3382 | 0 | int id = pass_bind(p, img); |
3383 | 0 | mp_assert(id == i); |
3384 | 0 | } |
3385 | | |
3386 | 0 | MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n", |
3387 | 0 | t->ideal_frame_duration, t->vsync_interval, mix); |
3388 | 0 | p->is_interpolated = true; |
3389 | 0 | } |
3390 | 0 | pass_draw_to_screen(p, fbo, flags); |
3391 | |
|
3392 | 0 | p->frames_drawn += 1; |
3393 | 0 | } |
3394 | | |
3395 | | void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, |
3396 | | const struct ra_fbo *fbo, int flags) |
3397 | 0 | { |
3398 | 0 | gl_video_update_options(p); |
3399 | |
|
3400 | 0 | struct mp_rect target_rc = {0, 0, fbo->tex->params.w, fbo->tex->params.h}; |
3401 | |
|
3402 | 0 | p->broken_frame = false; |
3403 | |
|
3404 | 0 | bool has_frame = !!frame->current; |
3405 | |
|
3406 | 0 | struct m_color c = p->clear_color; |
3407 | 0 | float clear_color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0}; |
3408 | 0 | clear_color[0] *= clear_color[3]; |
3409 | 0 | clear_color[1] *= clear_color[3]; |
3410 | 0 | clear_color[2] *= clear_color[3]; |
3411 | 0 | p->ra->fns->clear(p->ra, fbo->tex, clear_color, &target_rc); |
3412 | |
|
3413 | 0 | if (p->hwdec_overlay) { |
3414 | 0 | if (has_frame) { |
3415 | 0 | float *color = p->hwdec_overlay->overlay_colorkey; |
3416 | 0 | p->ra->fns->clear(p->ra, fbo->tex, color, &p->dst_rect); |
3417 | 0 | } |
3418 | |
|
3419 | 0 | p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current, |
3420 | 0 | &p->src_rect, &p->dst_rect, |
3421 | 0 | frame->frame_id != p->image.id); |
3422 | |
|
3423 | 0 | if (frame->current) |
3424 | 0 | p->osd_pts = frame->current->pts; |
3425 | | |
3426 | | // Disable GL rendering |
3427 | 0 | has_frame = false; |
3428 | 0 | } |
3429 | |
|
3430 | 0 | if (has_frame) { |
3431 | 0 | bool interpolate = p->opts.interpolation && frame->display_synced && |
3432 | 0 | (p->frames_drawn || !frame->still); |
3433 | 0 | if (interpolate) { |
3434 | 0 | double ratio = frame->ideal_frame_duration / frame->vsync_interval; |
3435 | 0 | if (fabs(ratio - 1.0) < p->opts.interpolation_threshold) |
3436 | 0 | interpolate = false; |
3437 | 0 | } |
3438 | |
|
3439 | 0 | if (interpolate) { |
3440 | 0 | gl_video_interpolate_frame(p, frame, fbo, flags); |
3441 | 0 | } else { |
3442 | 0 | bool is_new = frame->frame_id != p->image.id; |
3443 | | |
3444 | | // Redrawing a frame might update subtitles. |
3445 | 0 | if (frame->still && p->opts.blend_subs) |
3446 | 0 | is_new = true; |
3447 | |
|
3448 | 0 | if (is_new || !p->output_tex_valid) { |
3449 | 0 | p->output_tex_valid = false; |
3450 | |
|
3451 | 0 | pass_info_reset(p, !is_new); |
3452 | 0 | if (!pass_render_frame(p, frame->current, frame->frame_id, flags)) |
3453 | 0 | goto done; |
3454 | | |
3455 | | // For the non-interpolation case, we draw to a single "cache" |
3456 | | // texture to speed up subsequent re-draws (if any exist) |
3457 | 0 | bool repeats = frame->num_vsyncs > 1 && frame->display_synced; |
3458 | 0 | bool r = false; |
3459 | 0 | if ((repeats || frame->still) && !p->dumb_mode && |
3460 | 0 | (p->ra->caps & RA_CAP_BLIT) && fbo->tex->params.blit_dst) |
3461 | 0 | { |
3462 | | // Attempt to use the same format as the destination FBO |
3463 | | // if possible. Some RAs use a wrapped dummy format here, |
3464 | | // so fall back to the fbo_format in that case. |
3465 | 0 | const struct ra_format *fmt = fbo->tex->params.format; |
3466 | 0 | if (fmt->dummy_format) |
3467 | 0 | fmt = p->fbo_format; |
3468 | 0 | r = ra_tex_resize(p->ra, p->log, &p->output_tex, |
3469 | 0 | fbo->tex->params.w, fbo->tex->params.h, |
3470 | 0 | fmt); |
3471 | 0 | } |
3472 | 0 | const struct ra_fbo *dest_fbo = r ? &(struct ra_fbo) { p->output_tex } : fbo; |
3473 | 0 | p->output_tex_valid = r; |
3474 | 0 | pass_draw_to_screen(p, dest_fbo, flags); |
3475 | 0 | } |
3476 | | |
3477 | | // "output tex valid" and "output tex needed" are equivalent |
3478 | 0 | if (p->output_tex_valid && fbo->tex->params.blit_dst) { |
3479 | 0 | pass_info_reset(p, true); |
3480 | 0 | pass_describe(p, "redraw cached frame"); |
3481 | 0 | struct mp_rect src = p->dst_rect; |
3482 | 0 | struct mp_rect dst = src; |
3483 | 0 | if (fbo->flip) { |
3484 | 0 | dst.y0 = fbo->tex->params.h - src.y0; |
3485 | 0 | dst.y1 = fbo->tex->params.h - src.y1; |
3486 | 0 | } |
3487 | 0 | timer_pool_start(p->blit_timer); |
3488 | 0 | p->ra->fns->blit(p->ra, fbo->tex, p->output_tex, &dst, &src); |
3489 | 0 | timer_pool_stop(p->blit_timer); |
3490 | 0 | struct mp_pass_perf perf = timer_pool_measure(p->blit_timer); |
3491 | 0 | pass_record(p, &perf); |
3492 | 0 | } |
3493 | 0 | } |
3494 | 0 | } |
3495 | | |
3496 | 0 | done: |
3497 | |
|
3498 | 0 | debug_check_gl(p, "after video rendering"); |
3499 | |
|
3500 | 0 | if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) { |
3501 | | // If we haven't actually drawn anything so far, then we technically |
3502 | | // need to consider this the start of a new pass. Let's call it a |
3503 | | // redraw just because, since it's basically a blank frame anyway |
3504 | 0 | if (!has_frame) |
3505 | 0 | pass_info_reset(p, true); |
3506 | |
|
3507 | 0 | int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0; |
3508 | 0 | if (!(flags & RENDER_FRAME_SUBS)) |
3509 | 0 | osd_flags |= OSD_DRAW_OSD_ONLY; |
3510 | 0 | if (!(flags & RENDER_FRAME_OSD)) |
3511 | 0 | osd_flags |= OSD_DRAW_SUB_ONLY; |
3512 | |
|
3513 | 0 | pass_draw_osd(p, osd_flags, flags, p->osd_pts, p->osd_rect, fbo, true); |
3514 | 0 | debug_check_gl(p, "after OSD rendering"); |
3515 | 0 | } |
3516 | |
|
3517 | 0 | p->broken_frame |= gl_sc_error_state(p->sc); |
3518 | 0 | if (p->broken_frame) { |
3519 | | // Make the screen solid blue to make it visually clear that an |
3520 | | // error has occurred |
3521 | 0 | float color[4] = {0.0, 0.05, 0.5, 1.0}; |
3522 | 0 | p->ra->fns->clear(p->ra, fbo->tex, color, &target_rc); |
3523 | 0 | } |
3524 | |
|
3525 | 0 | p->frames_rendered++; |
3526 | 0 | pass_report_performance(p); |
3527 | 0 | } |
3528 | | |
3529 | | void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame, |
3530 | | struct voctrl_screenshot *args) |
3531 | 0 | { |
3532 | 0 | if (!p->ra->fns->tex_download) |
3533 | 0 | return; |
3534 | | |
3535 | 0 | bool ok = false; |
3536 | 0 | struct mp_image *res = NULL; |
3537 | 0 | struct ra_tex *target = NULL; |
3538 | 0 | struct mp_rect old_src = p->src_rect; |
3539 | 0 | struct mp_rect old_dst = p->dst_rect; |
3540 | 0 | struct mp_osd_res old_osd = p->osd_rect; |
3541 | 0 | struct vo_frame *nframe = vo_frame_ref(frame); |
3542 | | |
3543 | | // Disable interpolation and such. |
3544 | 0 | nframe->redraw = true; |
3545 | 0 | nframe->repeat = false; |
3546 | 0 | nframe->still = true; |
3547 | 0 | nframe->pts = 0; |
3548 | 0 | nframe->duration = -1; |
3549 | |
|
3550 | 0 | if (!args->scaled) { |
3551 | 0 | int w, h; |
3552 | 0 | mp_image_params_get_dsize(&p->image_params, &w, &h); |
3553 | 0 | if (w < 1 || h < 1) |
3554 | 0 | return; |
3555 | | |
3556 | 0 | int src_w = p->image_params.w; |
3557 | 0 | int src_h = p->image_params.h; |
3558 | 0 | struct mp_rect src = {0, 0, src_w, src_h}; |
3559 | 0 | struct mp_rect dst = {0, 0, w, h}; |
3560 | |
|
3561 | 0 | if (mp_image_crop_valid(&p->image_params)) |
3562 | 0 | src = p->image_params.crop; |
3563 | |
|
3564 | 0 | if (p->image_params.rotate % 180 == 90) { |
3565 | 0 | MPSWAP(int, w, h); |
3566 | 0 | MPSWAP(int, src_w, src_h); |
3567 | 0 | } |
3568 | 0 | mp_rect_rotate(&src, src_w, src_h, p->image_params.rotate); |
3569 | 0 | mp_rect_rotate(&dst, w, h, p->image_params.rotate); |
3570 | |
|
3571 | 0 | struct mp_osd_res osd = { |
3572 | 0 | .display_par = 1.0, |
3573 | 0 | .w = mp_rect_w(dst), |
3574 | 0 | .h = mp_rect_h(dst), |
3575 | 0 | }; |
3576 | 0 | gl_video_resize(p, &src, &dst, &osd); |
3577 | 0 | } |
3578 | | |
3579 | 0 | gl_video_reset_surfaces(p); |
3580 | |
|
3581 | 0 | struct ra_tex_params params = { |
3582 | 0 | .dimensions = 2, |
3583 | 0 | .downloadable = true, |
3584 | 0 | .w = p->osd_rect.w, |
3585 | 0 | .h = p->osd_rect.h, |
3586 | 0 | .d = 1, |
3587 | 0 | .render_dst = true, |
3588 | 0 | }; |
3589 | |
|
3590 | 0 | params.format = ra_find_unorm_format(p->ra, 1, 4); |
3591 | 0 | int mpfmt = p->has_alpha ? IMGFMT_RGBA : IMGFMT_RGB0; |
3592 | 0 | if (args->high_bit_depth && p->ra_format.component_bits > 8) { |
3593 | 0 | const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); |
3594 | 0 | if (fmt && fmt->renderable) { |
3595 | 0 | params.format = fmt; |
3596 | 0 | mpfmt = IMGFMT_RGBA64; |
3597 | 0 | } |
3598 | 0 | } |
3599 | |
|
3600 | 0 | if (!params.format || !params.format->renderable) |
3601 | 0 | goto done; |
3602 | 0 | target = ra_tex_create(p->ra, ¶ms); |
3603 | 0 | if (!target) |
3604 | 0 | goto done; |
3605 | | |
3606 | 0 | int flags = 0; |
3607 | 0 | if (args->subs) |
3608 | 0 | flags |= RENDER_FRAME_SUBS; |
3609 | 0 | if (args->osd) |
3610 | 0 | flags |= RENDER_FRAME_OSD; |
3611 | 0 | if (args->scaled) |
3612 | 0 | flags |= RENDER_SCREEN_COLOR; |
3613 | 0 | gl_video_render_frame(p, nframe, &(struct ra_fbo){target}, flags); |
3614 | |
|
3615 | 0 | res = mp_image_alloc(mpfmt, params.w, params.h); |
3616 | 0 | if (!res) |
3617 | 0 | goto done; |
3618 | | |
3619 | 0 | struct ra_tex_download_params download_params = { |
3620 | 0 | .tex = target, |
3621 | 0 | .dst = res->planes[0], |
3622 | 0 | .stride = res->stride[0], |
3623 | 0 | }; |
3624 | 0 | if (!p->ra->fns->tex_download(p->ra, &download_params)) |
3625 | 0 | goto done; |
3626 | | |
3627 | 0 | if (p->broken_frame) |
3628 | 0 | goto done; |
3629 | | |
3630 | 0 | ok = true; |
3631 | 0 | done: |
3632 | 0 | talloc_free(nframe); |
3633 | 0 | ra_tex_free(p->ra, &target); |
3634 | 0 | gl_video_resize(p, &old_src, &old_dst, &old_osd); |
3635 | 0 | gl_video_reset_surfaces(p); |
3636 | 0 | if (!ok) |
3637 | 0 | TA_FREEP(&res); |
3638 | 0 | args->res = res; |
3639 | 0 | } |
3640 | | |
3641 | | // Use this color instead of the global option. |
3642 | | void gl_video_set_clear_color(struct gl_video *p, struct m_color c) |
3643 | 0 | { |
3644 | 0 | p->force_clear_color = true; |
3645 | 0 | p->clear_color = c; |
3646 | 0 | } |
3647 | | |
3648 | | void gl_video_set_osd_pts(struct gl_video *p, double pts) |
3649 | 0 | { |
3650 | 0 | p->osd_pts = pts; |
3651 | 0 | } |
3652 | | |
3653 | | bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res, |
3654 | | double pts) |
3655 | 0 | { |
3656 | 0 | return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false; |
3657 | 0 | } |
3658 | | |
3659 | | void gl_video_resize(struct gl_video *p, |
3660 | | struct mp_rect *src, struct mp_rect *dst, |
3661 | | struct mp_osd_res *osd) |
3662 | 0 | { |
3663 | 0 | if (mp_rect_equals(&p->src_rect, src) && |
3664 | 0 | mp_rect_equals(&p->dst_rect, dst) && |
3665 | 0 | osd_res_equals(p->osd_rect, *osd)) |
3666 | 0 | return; |
3667 | | |
3668 | 0 | p->src_rect = *src; |
3669 | 0 | p->dst_rect = *dst; |
3670 | 0 | p->osd_rect = *osd; |
3671 | |
|
3672 | 0 | gl_video_reset_surfaces(p); |
3673 | |
|
3674 | 0 | if (p->osd) |
3675 | 0 | mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo3d); |
3676 | 0 | } |
3677 | | |
3678 | | static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) |
3679 | 0 | { |
3680 | 0 | for (int i = 0; i < VO_PASS_PERF_MAX; i++) { |
3681 | 0 | if (!pass[i].desc.len) |
3682 | 0 | break; |
3683 | 0 | out->perf[out->count] = pass[i].perf; |
3684 | 0 | strncpy(out->desc[out->count], pass[i].desc.start, |
3685 | 0 | sizeof(out->desc[out->count]) - 1); |
3686 | 0 | out->desc[out->count][sizeof(out->desc[out->count]) - 1] = '\0'; |
3687 | 0 | out->count++; |
3688 | 0 | } |
3689 | 0 | } |
3690 | | |
3691 | | void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out) |
3692 | 0 | { |
3693 | 0 | *out = (struct voctrl_performance_data){0}; |
3694 | 0 | frame_perf_data(p->pass_fresh, &out->fresh); |
3695 | 0 | frame_perf_data(p->pass_redraw, &out->redraw); |
3696 | 0 | } |
3697 | | |
3698 | | // Returns false on failure. |
3699 | | static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id) |
3700 | 0 | { |
3701 | 0 | struct video_image *vimg = &p->image; |
3702 | |
|
3703 | 0 | if (vimg->id == id) |
3704 | 0 | return true; |
3705 | | |
3706 | 0 | unref_current_image(p); |
3707 | |
|
3708 | 0 | mpi = mp_image_new_ref(mpi); |
3709 | 0 | if (!mpi) |
3710 | 0 | goto error; |
3711 | | |
3712 | 0 | vimg->mpi = mpi; |
3713 | 0 | vimg->id = id; |
3714 | 0 | p->osd_pts = mpi->pts; |
3715 | 0 | p->frames_uploaded++; |
3716 | |
|
3717 | 0 | if (p->hwdec_active) { |
3718 | | // Hardware decoding |
3719 | |
|
3720 | 0 | if (!p->hwdec_mapper) |
3721 | 0 | goto error; |
3722 | | |
3723 | 0 | pass_describe(p, "map frame (hwdec)"); |
3724 | 0 | timer_pool_start(p->upload_timer); |
3725 | 0 | bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0; |
3726 | 0 | timer_pool_stop(p->upload_timer); |
3727 | 0 | struct mp_pass_perf perf = timer_pool_measure(p->upload_timer); |
3728 | 0 | pass_record(p, &perf); |
3729 | |
|
3730 | 0 | vimg->hwdec_mapped = true; |
3731 | 0 | if (ok) { |
3732 | 0 | struct mp_image layout = {0}; |
3733 | 0 | mp_image_set_params(&layout, &p->image_params); |
3734 | 0 | struct ra_tex **tex = p->hwdec_mapper->tex; |
3735 | 0 | for (int n = 0; n < p->plane_count; n++) { |
3736 | 0 | vimg->planes[n] = (struct texplane){ |
3737 | 0 | .w = mp_image_plane_w(&layout, n), |
3738 | 0 | .h = mp_image_plane_h(&layout, n), |
3739 | 0 | .tex = tex[n], |
3740 | 0 | .flipped = layout.params.vflip, |
3741 | 0 | }; |
3742 | 0 | } |
3743 | 0 | } else { |
3744 | 0 | MP_FATAL(p, "Mapping hardware decoded surface failed.\n"); |
3745 | 0 | goto error; |
3746 | 0 | } |
3747 | 0 | return true; |
3748 | 0 | } |
3749 | | |
3750 | | // Software decoding |
3751 | 0 | mp_assert(mpi->num_planes == p->plane_count); |
3752 | | |
3753 | 0 | timer_pool_start(p->upload_timer); |
3754 | |
|
3755 | 0 | if (mpi->params.vflip) |
3756 | 0 | mp_image_vflip(mpi); |
3757 | |
|
3758 | 0 | for (int n = 0; n < p->plane_count; n++) { |
3759 | 0 | struct texplane *plane = &vimg->planes[n]; |
3760 | 0 | if (!plane->tex) { |
3761 | 0 | timer_pool_stop(p->upload_timer); |
3762 | 0 | goto error; |
3763 | 0 | } |
3764 | | |
3765 | 0 | struct ra_tex_upload_params params = { |
3766 | 0 | .tex = plane->tex, |
3767 | 0 | .src = mpi->planes[n], |
3768 | 0 | .invalidate = true, |
3769 | 0 | .stride = mpi->stride[n], |
3770 | 0 | }; |
3771 | |
|
3772 | 0 | plane->flipped = params.stride < 0; |
3773 | 0 | if (plane->flipped) { |
3774 | 0 | int h = mp_image_plane_h(mpi, n); |
3775 | 0 | params.src = (char *)params.src + (h - 1) * params.stride; |
3776 | 0 | params.stride = -params.stride; |
3777 | 0 | } |
3778 | |
|
3779 | 0 | struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); |
3780 | 0 | if (mapped) { |
3781 | 0 | params.buf = mapped->buf; |
3782 | 0 | params.buf_offset = (uintptr_t)params.src - |
3783 | 0 | (uintptr_t)mapped->buf->data; |
3784 | 0 | params.src = NULL; |
3785 | 0 | } |
3786 | |
|
3787 | 0 | if (p->using_dr_path != !!mapped) { |
3788 | 0 | p->using_dr_path = !!mapped; |
3789 | 0 | MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); |
3790 | 0 | } |
3791 | |
|
3792 | 0 | if (!p->ra->fns->tex_upload(p->ra, ¶ms)) { |
3793 | 0 | timer_pool_stop(p->upload_timer); |
3794 | 0 | goto error; |
3795 | 0 | } |
3796 | | |
3797 | 0 | if (mapped && !mapped->mpi) |
3798 | 0 | mapped->mpi = mp_image_new_ref(mpi); |
3799 | 0 | } |
3800 | 0 | timer_pool_stop(p->upload_timer); |
3801 | |
|
3802 | 0 | bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD); |
3803 | 0 | const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive"; |
3804 | 0 | pass_describe(p, "upload frame (%s)", mode); |
3805 | 0 | struct mp_pass_perf perf = timer_pool_measure(p->upload_timer); |
3806 | 0 | pass_record(p, &perf); |
3807 | |
|
3808 | 0 | return true; |
3809 | | |
3810 | 0 | error: |
3811 | 0 | unref_current_image(p); |
3812 | 0 | p->broken_frame = true; |
3813 | 0 | return false; |
3814 | 0 | } |
3815 | | |
3816 | | static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) |
3817 | 0 | { |
3818 | 0 | MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name); |
3819 | 0 | struct ra_tex *tex = NULL; |
3820 | 0 | bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt); |
3821 | 0 | ra_tex_free(p->ra, &tex); |
3822 | 0 | return success; |
3823 | 0 | } |
3824 | | |
3825 | | // Return whether dumb-mode can be used without disabling any features. |
3826 | | // Essentially, vo_gpu with --profile=fast will return true. |
3827 | | static bool check_dumb_mode(struct gl_video *p) |
3828 | 0 | { |
3829 | 0 | struct gl_video_opts *o = &p->opts; |
3830 | 0 | if (p->use_integer_conversion) |
3831 | 0 | return false; |
3832 | 0 | if (o->dumb_mode > 0) // requested by user |
3833 | 0 | return true; |
3834 | 0 | if (o->dumb_mode < 0) // disabled by user |
3835 | 0 | return false; |
3836 | | |
3837 | | // otherwise, use auto-detection |
3838 | 0 | if (o->correct_downscaling || o->linear_downscaling || |
3839 | 0 | o->linear_upscaling || o->sigmoid_upscaling || o->interpolation || |
3840 | 0 | o->blend_subs || o->deband || o->unsharp) |
3841 | 0 | return false; |
3842 | | // check remaining scalers (tscale is already implicitly excluded above) |
3843 | 0 | for (int i = 0; i < SCALER_COUNT; i++) { |
3844 | 0 | if (i != SCALER_TSCALE) { |
3845 | 0 | if (o->scaler[i].kernel.function != SCALER_BILINEAR && |
3846 | 0 | o->scaler[i].kernel.function != SCALER_INHERIT) |
3847 | 0 | return false; |
3848 | 0 | } |
3849 | 0 | } |
3850 | 0 | if (o->user_shaders && o->user_shaders[0]) |
3851 | 0 | return false; |
3852 | 0 | return true; |
3853 | 0 | } |
3854 | | |
3855 | | // Disable features that are not supported with the current OpenGL version. |
3856 | | static void check_gl_features(struct gl_video *p) |
3857 | 0 | { |
3858 | 0 | struct ra *ra = p->ra; |
3859 | 0 | bool have_float_tex = !!ra_find_float16_format(ra, 1); |
3860 | 0 | bool have_mglsl = ra->glsl_version >= 130; // modern GLSL |
3861 | 0 | const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2); |
3862 | 0 | bool have_texrg = rg_tex && !rg_tex->luminance_alpha; |
3863 | 0 | bool have_compute = ra->caps & RA_CAP_COMPUTE; |
3864 | 0 | bool have_ssbo = ra->caps & RA_CAP_BUF_RW; |
3865 | 0 | bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD; |
3866 | |
|
3867 | 0 | const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16", |
3868 | 0 | "rgb10_a2", "rgba8", 0}; |
3869 | 0 | const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; |
3870 | 0 | const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") |
3871 | 0 | ? user_fbo_fmts : auto_fbo_fmts; |
3872 | 0 | bool user_specified_fbo_fmt = fbo_fmts == user_fbo_fmts; |
3873 | 0 | bool fbo_test_result = false; |
3874 | 0 | bool have_fbo = false; |
3875 | 0 | p->fbo_format = NULL; |
3876 | 0 | for (int n = 0; fbo_fmts[n]; n++) { |
3877 | 0 | const char *fmt = fbo_fmts[n]; |
3878 | 0 | const struct ra_format *f = ra_find_named_format(p->ra, fmt); |
3879 | 0 | if (!f && user_specified_fbo_fmt) |
3880 | 0 | MP_WARN(p, "FBO format '%s' not found!\n", fmt); |
3881 | 0 | if (f && f->renderable && f->linear_filter && |
3882 | 0 | (fbo_test_result = test_fbo(p, f))) { |
3883 | 0 | MP_VERBOSE(p, "Using FBO format %s.\n", f->name); |
3884 | 0 | have_fbo = true; |
3885 | 0 | p->fbo_format = f; |
3886 | 0 | break; |
3887 | 0 | } |
3888 | | |
3889 | 0 | if (user_specified_fbo_fmt) { |
3890 | 0 | MP_WARN(p, "User-specified FBO format '%s' failed to initialize! " |
3891 | 0 | "(exists=%d, renderable=%d, linear_filter=%d, " |
3892 | 0 | "fbo_test_result=%d)\n", |
3893 | 0 | fmt, !!f, f ? f->renderable : 0, f ? f->linear_filter : 0, |
3894 | 0 | fbo_test_result); |
3895 | 0 | } |
3896 | 0 | } |
3897 | |
|
3898 | 0 | if (!have_fragcoord && p->opts.dither_depth >= 0 && |
3899 | 0 | p->opts.dither_algo != DITHER_NONE) |
3900 | 0 | { |
3901 | 0 | p->opts.dither_algo = DITHER_NONE; |
3902 | 0 | MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n"); |
3903 | 0 | } |
3904 | 0 | if (!have_fragcoord && p->opts.background == BACKGROUND_TILES) { |
3905 | 0 | p->opts.background = BACKGROUND_COLOR; |
3906 | 0 | MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n"); |
3907 | 0 | } |
3908 | 0 | if (!have_fbo && have_compute) { |
3909 | 0 | have_compute = false; |
3910 | 0 | MP_WARN(p, "Force-disabling compute shaders as an FBO format was not " |
3911 | 0 | "available! See your FBO format configuration!\n"); |
3912 | 0 | } |
3913 | |
|
3914 | 0 | if (have_compute && have_fbo && !p->fbo_format->storable) { |
3915 | 0 | have_compute = false; |
3916 | 0 | MP_WARN(p, "Force-disabling compute shaders as the chosen FBO format " |
3917 | 0 | "is not storable! See your FBO format configuration!\n"); |
3918 | 0 | } |
3919 | |
|
3920 | 0 | if (!have_compute && p->opts.dither_algo == DITHER_ERROR_DIFFUSION) { |
3921 | 0 | MP_WARN(p, "Disabling error diffusion dithering because compute shader " |
3922 | 0 | "was not supported. Fallback to dither=fruit instead.\n"); |
3923 | 0 | p->opts.dither_algo = DITHER_FRUIT; |
3924 | 0 | } |
3925 | |
|
3926 | 0 | bool have_compute_peak = have_compute && have_ssbo; |
3927 | 0 | if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) { |
3928 | 0 | int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V; |
3929 | 0 | MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the " |
3930 | 0 | "following is not supported: compute shaders=%d, " |
3931 | 0 | "SSBO=%d).\n", have_compute, have_ssbo); |
3932 | 0 | p->opts.tone_map.compute_peak = -1; |
3933 | 0 | } |
3934 | |
|
3935 | 0 | p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; |
3936 | 0 | bool voluntarily_dumb = check_dumb_mode(p); |
3937 | 0 | if (p->forced_dumb_mode || voluntarily_dumb) { |
3938 | 0 | if (voluntarily_dumb) { |
3939 | 0 | MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n"); |
3940 | 0 | } else if (p->opts.dumb_mode <= 0) { |
3941 | 0 | MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n" |
3942 | 0 | "Most extended features will be disabled.\n"); |
3943 | 0 | } |
3944 | 0 | p->dumb_mode = true; |
3945 | | // Most things don't work, so whitelist all options that still work. |
3946 | 0 | p->opts = (struct gl_video_opts){ |
3947 | 0 | .scaler = { |
3948 | 0 | [SCALER_SCALE] = { |
3949 | 0 | {SCALER_BILINEAR, .params = {NAN, NAN}, .functions = scale_filters}, |
3950 | 0 | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
3951 | 0 | }, |
3952 | 0 | [SCALER_DSCALE] = { |
3953 | 0 | {SCALER_BILINEAR, .params = {NAN, NAN}, .functions = cdscale_filters}, |
3954 | 0 | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
3955 | 0 | }, |
3956 | 0 | [SCALER_CSCALE] = { |
3957 | 0 | {SCALER_BILINEAR, .params = {NAN, NAN}, .functions = cdscale_filters}, |
3958 | 0 | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
3959 | 0 | }, |
3960 | 0 | [SCALER_TSCALE] = { |
3961 | 0 | {SCALER_BILINEAR, .params = {NAN, NAN}, .functions = tscale_filters}, |
3962 | 0 | {WINDOW_PREFERRED, .params = {NAN, NAN}, .functions = filter_windows}, |
3963 | 0 | }, |
3964 | 0 | }, |
3965 | 0 | .gamma = p->opts.gamma, |
3966 | 0 | .gamma_auto = p->opts.gamma_auto, |
3967 | 0 | .pbo = p->opts.pbo, |
3968 | 0 | .fbo_format = p->opts.fbo_format, |
3969 | 0 | .background = p->opts.background, |
3970 | 0 | .use_rectangle = p->opts.use_rectangle, |
3971 | 0 | .background_color = p->opts.background_color, |
3972 | 0 | .dither_algo = p->opts.dither_algo, |
3973 | 0 | .dither_depth = p->opts.dither_depth, |
3974 | 0 | .dither_size = p->opts.dither_size, |
3975 | 0 | .error_diffusion = p->opts.error_diffusion, |
3976 | 0 | .temporal_dither = p->opts.temporal_dither, |
3977 | 0 | .temporal_dither_period = p->opts.temporal_dither_period, |
3978 | 0 | .tex_pad_x = p->opts.tex_pad_x, |
3979 | 0 | .tex_pad_y = p->opts.tex_pad_y, |
3980 | 0 | .tone_map = p->opts.tone_map, |
3981 | 0 | .early_flush = p->opts.early_flush, |
3982 | 0 | .icc_opts = p->opts.icc_opts, |
3983 | 0 | .hwdec_interop = p->opts.hwdec_interop, |
3984 | 0 | .target_trc = p->opts.target_trc, |
3985 | 0 | .target_prim = p->opts.target_prim, |
3986 | 0 | .target_peak = p->opts.target_peak, |
3987 | 0 | }; |
3988 | 0 | if (!have_fbo) |
3989 | 0 | p->use_lut_3d = false; |
3990 | 0 | return; |
3991 | 0 | } |
3992 | 0 | p->dumb_mode = false; |
3993 | | |
3994 | | // Normally, we want to disable them by default if FBOs are unavailable, |
3995 | | // because they will be slow (not critically slow, but still slower). |
3996 | | // Without FP textures, we must always disable them. |
3997 | | // I don't know if luminance alpha float textures exist, so disregard them. |
3998 | 0 | for (int n = 0; n < SCALER_COUNT; n++) { |
3999 | 0 | const struct filter_kernel *kernel = |
4000 | 0 | mp_find_filter_kernel(p->opts.scaler[n].kernel.function); |
4001 | 0 | if (kernel) { |
4002 | 0 | char *reason = NULL; |
4003 | 0 | if (!have_float_tex) |
4004 | 0 | reason = "(float tex. missing)"; |
4005 | 0 | if (!have_mglsl) |
4006 | 0 | reason = "(GLSL version too old)"; |
4007 | 0 | if (reason) { |
4008 | 0 | MP_WARN(p, "Disabling scaler #%d %s %s.\n", n, |
4009 | 0 | m_opt_choice_str(p->opts.scaler[n].kernel.functions, |
4010 | 0 | p->opts.scaler[n].kernel.function), |
4011 | 0 | reason); |
4012 | | |
4013 | | // p->opts is a copy => we can just mess with it. |
4014 | 0 | p->opts.scaler[n].kernel.function = SCALER_BILINEAR; |
4015 | 0 | if (n == SCALER_TSCALE) |
4016 | 0 | p->opts.interpolation = false; |
4017 | 0 | } |
4018 | 0 | } |
4019 | 0 | } |
4020 | |
|
4021 | 0 | int use_cms = p->opts.target_prim != PL_COLOR_PRIM_UNKNOWN || |
4022 | 0 | p->opts.target_trc != PL_COLOR_TRC_UNKNOWN || p->use_lut_3d; |
4023 | | |
4024 | | // mix() is needed for some gamma functions |
4025 | 0 | if (!have_mglsl && (p->opts.linear_downscaling || |
4026 | 0 | p->opts.linear_upscaling || p->opts.sigmoid_upscaling)) |
4027 | 0 | { |
4028 | 0 | p->opts.linear_downscaling = false; |
4029 | 0 | p->opts.linear_upscaling = false; |
4030 | 0 | p->opts.sigmoid_upscaling = false; |
4031 | 0 | MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); |
4032 | 0 | } |
4033 | 0 | if (!have_mglsl && use_cms) { |
4034 | 0 | p->opts.target_prim = PL_COLOR_PRIM_UNKNOWN; |
4035 | 0 | p->opts.target_trc = PL_COLOR_TRC_UNKNOWN; |
4036 | 0 | p->use_lut_3d = false; |
4037 | 0 | MP_WARN(p, "Disabling color management (GLSL version too old).\n"); |
4038 | 0 | } |
4039 | 0 | if (!have_mglsl && p->opts.deband) { |
4040 | 0 | p->opts.deband = false; |
4041 | 0 | MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); |
4042 | 0 | } |
4043 | 0 | } |
4044 | | |
4045 | | static void init_gl(struct gl_video *p) |
4046 | 0 | { |
4047 | 0 | debug_check_gl(p, "before init_gl"); |
4048 | |
|
4049 | 0 | p->upload_timer = timer_pool_create(p->ra); |
4050 | 0 | p->blit_timer = timer_pool_create(p->ra); |
4051 | 0 | p->osd_timer = timer_pool_create(p->ra); |
4052 | |
|
4053 | 0 | debug_check_gl(p, "after init_gl"); |
4054 | |
|
4055 | 0 | ra_dump_tex_formats(p->ra, MSGL_DEBUG); |
4056 | 0 | ra_dump_img_formats(p->ra, MSGL_DEBUG); |
4057 | 0 | } |
4058 | | |
4059 | | void gl_video_uninit(struct gl_video *p) |
4060 | 3.57k | { |
4061 | 3.57k | if (!p) |
4062 | 3.57k | return; |
4063 | | |
4064 | 0 | uninit_video(p); |
4065 | 0 | ra_hwdec_ctx_uninit(&p->hwdec_ctx); |
4066 | 0 | gl_sc_destroy(p->sc); |
4067 | |
|
4068 | 0 | ra_tex_free(p->ra, &p->lut_3d_texture); |
4069 | 0 | ra_buf_free(p->ra, &p->hdr_peak_ssbo); |
4070 | |
|
4071 | 0 | timer_pool_destroy(p->upload_timer); |
4072 | 0 | timer_pool_destroy(p->blit_timer); |
4073 | 0 | timer_pool_destroy(p->osd_timer); |
4074 | |
|
4075 | 0 | for (int i = 0; i < VO_PASS_PERF_MAX; i++) { |
4076 | 0 | talloc_free(p->pass_fresh[i].desc.start); |
4077 | 0 | talloc_free(p->pass_redraw[i].desc.start); |
4078 | 0 | } |
4079 | |
|
4080 | 0 | mpgl_osd_destroy(p->osd); |
4081 | | |
4082 | | // Forcibly destroy possibly remaining image references. This should also |
4083 | | // cause gl_video_dr_free_buffer() to be called for the remaining buffers. |
4084 | 0 | gc_pending_dr_fences(p, true); |
4085 | | |
4086 | | // Should all have been unreffed already. |
4087 | 0 | mp_assert(!p->num_dr_buffers); |
4088 | | |
4089 | 0 | talloc_free(p); |
4090 | 0 | } |
4091 | | |
4092 | | void gl_video_reset(struct gl_video *p) |
4093 | 0 | { |
4094 | 0 | gl_video_reset_surfaces(p); |
4095 | 0 | } |
4096 | | |
4097 | | bool gl_video_showing_interpolated_frame(struct gl_video *p) |
4098 | 0 | { |
4099 | 0 | return p->is_interpolated; |
4100 | 0 | } |
4101 | | |
4102 | | static bool is_imgfmt_desc_supported(struct gl_video *p, |
4103 | | const struct ra_imgfmt_desc *desc) |
4104 | 0 | { |
4105 | 0 | if (!desc->num_planes) |
4106 | 0 | return false; |
4107 | | |
4108 | 0 | if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode) |
4109 | 0 | return false; |
4110 | | |
4111 | 0 | return true; |
4112 | 0 | } |
4113 | | |
4114 | | bool gl_video_check_format(struct gl_video *p, int mp_format) |
4115 | 0 | { |
4116 | 0 | struct ra_imgfmt_desc desc; |
4117 | 0 | if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) && |
4118 | 0 | is_imgfmt_desc_supported(p, &desc)) |
4119 | 0 | return true; |
4120 | 0 | if (ra_hwdec_get(&p->hwdec_ctx, mp_format)) |
4121 | 0 | return true; |
4122 | 0 | return false; |
4123 | 0 | } |
4124 | | |
4125 | | void gl_video_config(struct gl_video *p, struct mp_image_params *params) |
4126 | 0 | { |
4127 | 0 | unmap_overlay(p); |
4128 | 0 | unref_current_image(p); |
4129 | |
|
4130 | 0 | if (!mp_image_params_static_equal(&p->real_image_params, params)) { |
4131 | 0 | uninit_video(p); |
4132 | 0 | p->real_image_params = *params; |
4133 | 0 | p->image_params = *params; |
4134 | 0 | if (params->imgfmt) |
4135 | 0 | init_video(p); |
4136 | 0 | } |
4137 | |
|
4138 | 0 | gl_video_reset_surfaces(p); |
4139 | 0 | } |
4140 | | |
4141 | | void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) |
4142 | 0 | { |
4143 | 0 | mpgl_osd_destroy(p->osd); |
4144 | 0 | p->osd = NULL; |
4145 | 0 | p->osd_state = osd; |
4146 | 0 | reinit_osd(p); |
4147 | 0 | } |
4148 | | |
4149 | | struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, |
4150 | | struct mpv_global *g) |
4151 | 0 | { |
4152 | 0 | struct gl_video *p = talloc_ptrtype(NULL, p); |
4153 | 0 | *p = (struct gl_video) { |
4154 | 0 | .ra = ra, |
4155 | 0 | .global = g, |
4156 | 0 | .log = log, |
4157 | 0 | .sc = gl_sc_create(ra, g, log), |
4158 | 0 | .video_eq = mp_csp_equalizer_create(p, g), |
4159 | 0 | .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf), |
4160 | 0 | }; |
4161 | | // make sure this variable is initialized to *something* |
4162 | 0 | p->pass = p->pass_fresh; |
4163 | 0 | struct gl_video_opts *opts = p->opts_cache->opts; |
4164 | 0 | p->cms = gl_lcms_init(p, log, g, opts->icc_opts), |
4165 | 0 | p->opts = *opts; |
4166 | 0 | for (int n = 0; n < SCALER_COUNT; n++) |
4167 | 0 | p->scaler[n] = (struct scaler){.index = n}; |
4168 | | // our VAO always has the vec2 position as the first element |
4169 | 0 | MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { |
4170 | 0 | .name = "position", |
4171 | 0 | .type = RA_VARTYPE_FLOAT, |
4172 | 0 | .dim_v = 2, |
4173 | 0 | .dim_m = 1, |
4174 | 0 | .offset = 0, |
4175 | 0 | }); |
4176 | 0 | init_gl(p); |
4177 | 0 | reinit_from_options(p); |
4178 | 0 | return p; |
4179 | 0 | } |
4180 | | |
4181 | | static void gl_video_update_options(struct gl_video *p) |
4182 | 0 | { |
4183 | 0 | if (m_config_cache_update(p->opts_cache)) { |
4184 | 0 | gl_lcms_update_options(p->cms); |
4185 | 0 | reinit_from_options(p); |
4186 | 0 | } |
4187 | |
|
4188 | 0 | if (mp_csp_equalizer_state_changed(p->video_eq)) |
4189 | 0 | p->output_tex_valid = false; |
4190 | 0 | } |
4191 | | |
4192 | | static void reinit_from_options(struct gl_video *p) |
4193 | 0 | { |
4194 | 0 | p->use_lut_3d = gl_lcms_has_profile(p->cms); |
4195 | | |
4196 | | // Copy the option fields, so that check_gl_features() can mutate them. |
4197 | | // This works only for the fields themselves of course, not for any memory |
4198 | | // referenced by them. |
4199 | 0 | p->opts = *(struct gl_video_opts *)p->opts_cache->opts; |
4200 | |
|
4201 | 0 | if (!p->force_clear_color) |
4202 | 0 | p->clear_color = p->opts.background_color; |
4203 | |
|
4204 | 0 | check_gl_features(p); |
4205 | 0 | uninit_rendering(p); |
4206 | 0 | if (p->opts.shader_cache) |
4207 | 0 | gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir); |
4208 | 0 | p->ra->use_pbo = p->opts.pbo; |
4209 | 0 | gl_video_setup_hooks(p); |
4210 | 0 | reinit_osd(p); |
4211 | |
|
4212 | 0 | struct mp_vo_opts *vo_opts = mp_get_config_group(p, p->global, &vo_sub_opts); |
4213 | 0 | if (p->opts.interpolation && !vo_opts->video_sync && !p->dsi_warned) { |
4214 | 0 | MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n" |
4215 | 0 | "E.g.: --video-sync=display-resample\n"); |
4216 | 0 | p->dsi_warned = true; |
4217 | 0 | } |
4218 | 0 | talloc_free(vo_opts); |
4219 | |
|
4220 | 0 | if (p->opts.correct_downscaling && !p->correct_downscaling_warned) { |
4221 | 0 | if (p->opts.scaler[SCALER_DSCALE].kernel.function == SCALER_BILINEAR || |
4222 | 0 | (p->opts.scaler[SCALER_DSCALE].kernel.function == SCALER_INHERIT && |
4223 | 0 | p->opts.scaler[SCALER_SCALE].kernel.function == SCALER_BILINEAR)) { |
4224 | 0 | MP_WARN(p, "correct-downscaling requires non-bilinear scaler.\n"); |
4225 | 0 | p->correct_downscaling_warned = true; |
4226 | 0 | } |
4227 | 0 | } |
4228 | 0 | } |
4229 | | |
4230 | | void gl_video_configure_queue(struct gl_video *p, struct vo *vo) |
4231 | 0 | { |
4232 | 0 | gl_video_update_options(p); |
4233 | |
|
4234 | 0 | int queue_size = 1; |
4235 | | |
4236 | | // Figure out an adequate size for the interpolation queue. The larger |
4237 | | // the radius, the earlier we need to queue frames. |
4238 | 0 | if (p->opts.interpolation) { |
4239 | 0 | const struct filter_kernel *kernel = |
4240 | 0 | mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.function); |
4241 | 0 | if (kernel) { |
4242 | | // filter_scale wouldn't be correctly initialized were we to use it here. |
4243 | | // This is fine since we're always upsampling, but beware if downsampling |
4244 | | // is added! |
4245 | 0 | double radius = kernel->f.radius; |
4246 | 0 | radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius; |
4247 | 0 | queue_size += 1 + ceil(radius); |
4248 | 0 | } else { |
4249 | | // Oversample/linear case |
4250 | 0 | queue_size += 2; |
4251 | 0 | } |
4252 | 0 | } |
4253 | |
|
4254 | 0 | vo_set_queue_params(vo, 0, queue_size); |
4255 | 0 | } |
4256 | | |
4257 | | static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt, |
4258 | | struct bstr name, const char **value) |
4259 | 2.93k | { |
4260 | 2.93k | struct bstr param = bstr0(*value); |
4261 | 2.93k | char s[32] = {0}; |
4262 | 2.93k | int r = 1; |
4263 | 2.93k | if (bstr_equals0(param, "help")) { |
4264 | 38 | r = M_OPT_EXIT; |
4265 | 2.89k | } else { |
4266 | 2.89k | snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); |
4267 | 2.89k | const struct error_diffusion_kernel *k = mp_find_error_diffusion_kernel(s); |
4268 | 2.89k | if (!k) |
4269 | 579 | r = M_OPT_INVALID; |
4270 | 2.89k | } |
4271 | 2.93k | if (r < 1) { |
4272 | 617 | mp_info(log, "Available error diffusion kernels:\n"); |
4273 | 6.78k | for (int n = 0; mp_error_diffusion_kernels[n].name; n++) |
4274 | 6.17k | mp_info(log, " %s\n", mp_error_diffusion_kernels[n].name); |
4275 | 617 | if (s[0]) |
4276 | 516 | mp_fatal(log, "No error diffusion kernel named '%s' found!\n", s); |
4277 | 617 | } |
4278 | 2.93k | return r; |
4279 | 2.93k | } |
4280 | | |
4281 | | void gl_video_set_ambient_lux(struct gl_video *p, double lux) |
4282 | 0 | { |
4283 | 0 | if (p->opts.gamma_auto) { |
4284 | 0 | p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux); |
4285 | 0 | MP_TRACE(p, "ambient light changed: %f lux (gamma: %f)\n", lux, |
4286 | 0 | p->opts.gamma); |
4287 | 0 | } |
4288 | 0 | } |
4289 | | |
4290 | | static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) |
4291 | 0 | { |
4292 | 0 | struct ra_buf_params params = { |
4293 | 0 | .type = RA_BUF_TYPE_TEX_UPLOAD, |
4294 | 0 | .host_mapped = true, |
4295 | 0 | .size = size, |
4296 | 0 | }; |
4297 | |
|
4298 | 0 | struct ra_buf *buf = ra_buf_create(p->ra, ¶ms); |
4299 | 0 | if (!buf) |
4300 | 0 | return NULL; |
4301 | | |
4302 | 0 | MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); |
4303 | 0 | p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf }; |
4304 | |
|
4305 | 0 | return buf->data; |
4306 | 0 | } |
4307 | | |
4308 | | static void gl_video_dr_free_buffer(void *opaque, uint8_t *data) |
4309 | 0 | { |
4310 | 0 | struct gl_video *p = opaque; |
4311 | |
|
4312 | 0 | for (int n = 0; n < p->num_dr_buffers; n++) { |
4313 | 0 | struct dr_buffer *buffer = &p->dr_buffers[n]; |
4314 | 0 | if (buffer->buf->data == data) { |
4315 | 0 | mp_assert(!buffer->mpi); // can't be freed while it has a ref |
4316 | 0 | ra_buf_free(p->ra, &buffer->buf); |
4317 | 0 | MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); |
4318 | 0 | return; |
4319 | 0 | } |
4320 | 0 | } |
4321 | | // not found - must not happen |
4322 | 0 | MP_ASSERT_UNREACHABLE(); |
4323 | 0 | } |
4324 | | |
4325 | | struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, |
4326 | | int stride_align, int flags) |
4327 | 0 | { |
4328 | 0 | if (flags & VO_DR_FLAG_HOST_CACHED) { |
4329 | 0 | if (p->ra->caps & RA_CAP_SLOW_DR) { |
4330 | 0 | MP_VERBOSE(p, "DR path suspected slow/uncached, disabling.\n"); |
4331 | 0 | return NULL; |
4332 | 0 | } |
4333 | 0 | } |
4334 | | |
4335 | 0 | if (!gl_video_check_format(p, imgfmt)) |
4336 | 0 | return NULL; |
4337 | | |
4338 | 0 | int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); |
4339 | 0 | if (size < 0) |
4340 | 0 | return NULL; |
4341 | | |
4342 | 0 | int alloc_size = size + stride_align; |
4343 | 0 | void *ptr = gl_video_dr_alloc_buffer(p, alloc_size); |
4344 | 0 | if (!ptr) |
4345 | 0 | return NULL; |
4346 | | |
4347 | | // (we expect vo.c to proxy the free callback, so it happens in the same |
4348 | | // thread it was allocated in, removing the need for synchronization) |
4349 | 0 | struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align, |
4350 | 0 | ptr, alloc_size, p, |
4351 | 0 | gl_video_dr_free_buffer); |
4352 | 0 | if (!res) |
4353 | 0 | gl_video_dr_free_buffer(p, ptr); |
4354 | 0 | return res; |
4355 | 0 | } |
4356 | | |
4357 | | void gl_video_init_hwdecs(struct gl_video *p, struct ra_ctx *ra_ctx, |
4358 | | struct mp_hwdec_devices *devs, |
4359 | | bool load_all_by_default) |
4360 | 0 | { |
4361 | 0 | mp_assert(!p->hwdec_ctx.ra_ctx); |
4362 | 0 | p->hwdec_ctx = (struct ra_hwdec_ctx) { |
4363 | 0 | .log = p->log, |
4364 | 0 | .global = p->global, |
4365 | 0 | .ra_ctx = ra_ctx, |
4366 | 0 | }; |
4367 | |
|
4368 | 0 | ra_hwdec_ctx_init(&p->hwdec_ctx, devs, p->opts.hwdec_interop, load_all_by_default); |
4369 | 0 | } |
4370 | | |
4371 | | void gl_video_load_hwdecs_for_img_fmt(struct gl_video *p, struct mp_hwdec_devices *devs, |
4372 | | struct hwdec_imgfmt_request *params) |
4373 | 0 | { |
4374 | 0 | mp_assert(p->hwdec_ctx.ra_ctx); |
4375 | 0 | ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, devs, params); |
4376 | 0 | } |
4377 | | |
4378 | | struct mp_image_params *gl_video_get_target_params_ptr(struct gl_video *p) |
4379 | 0 | { |
4380 | 0 | return &p->target_params; |
4381 | 0 | } |