/src/mpv/video/out/gpu/video_shaders.c
Line | Count | Source |
1 | | /* |
2 | | * This file is part of mpv. |
3 | | * |
4 | | * mpv is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * mpv is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with mpv. If not, see <http://www.gnu.org/licenses/>. |
16 | | */ |
17 | | |
18 | | #include <math.h> |
19 | | |
20 | | #include <libplacebo/colorspace.h> |
21 | | |
22 | | #include "video_shaders.h" |
23 | | #include "video.h" |
24 | | |
25 | | #if PL_API_VER < 362 |
26 | | #define PL_COLOR_TRC_SCRGB PL_COLOR_TRC_LINEAR |
27 | | #endif |
28 | | |
29 | 0 | #define GLSL(x) gl_sc_add(sc, #x "\n"); |
30 | 0 | #define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) |
31 | 0 | #define GLSLH(x) gl_sc_hadd(sc, #x "\n"); |
32 | 0 | #define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) |
33 | | |
34 | | // Set up shared/commonly used variables and macros |
35 | | void sampler_prelude(struct gl_shader_cache *sc, int tex_num) |
36 | 0 | { |
37 | 0 | GLSLF("#undef tex\n"); |
38 | 0 | GLSLF("#undef texmap\n"); |
39 | 0 | GLSLF("#define tex texture%d\n", tex_num); |
40 | 0 | GLSLF("#define texmap texmap%d\n", tex_num); |
41 | 0 | GLSLF("vec2 pos = texcoord%d;\n", tex_num); |
42 | 0 | GLSLF("vec2 size = texture_size%d;\n", tex_num); |
43 | 0 | GLSLF("vec2 pt = pixel_size%d;\n", tex_num); |
44 | 0 | } |
45 | | |
46 | | static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, |
47 | | struct scaler *scaler) |
48 | 0 | { |
49 | 0 | gl_sc_uniform_texture(sc, "lut", scaler->lut); |
50 | 0 | GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut->params.h); |
51 | |
|
52 | 0 | int N = scaler->kernel->size; |
53 | 0 | int width = (N + 3) / 4; // round up |
54 | |
|
55 | 0 | GLSLF("float weights[%d];\n", N); |
56 | 0 | for (int i = 0; i < N; i++) { |
57 | 0 | if (i % 4 == 0) |
58 | 0 | GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width); |
59 | 0 | GLSLF("weights[%d] = c[%d];\n", i, i % 4); |
60 | 0 | } |
61 | 0 | } |
62 | | |
63 | | // Handle a single pass (either vertical or horizontal). The direction is given |
64 | | // by the vector (d_x, d_y). If the vector is 0, then planar interpolation is |
65 | | // used instead (samples from texture0 through textureN) |
66 | | void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, |
67 | | int d_x, int d_y) |
68 | 0 | { |
69 | 0 | int N = scaler->kernel->size; |
70 | 0 | bool use_ar = scaler->conf.antiring > 0; |
71 | 0 | bool planar = d_x == 0 && d_y == 0; |
72 | 0 | GLSL(color = vec4(0.0);) |
73 | 0 | GLSLF("{\n"); |
74 | 0 | if (!planar) { |
75 | 0 | GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y); |
76 | 0 | GLSL(pt *= dir;) |
77 | 0 | GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) |
78 | 0 | GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1); |
79 | 0 | } |
80 | 0 | GLSL(vec4 c;) |
81 | 0 | if (use_ar) { |
82 | 0 | GLSL(vec4 hi = vec4(0.0);) |
83 | 0 | GLSL(vec4 lo = vec4(1.0);) |
84 | 0 | } |
85 | 0 | pass_sample_separated_get_weights(sc, scaler); |
86 | 0 | GLSLF("// scaler samples\n"); |
87 | 0 | for (int n = 0; n < N; n++) { |
88 | 0 | if (planar) { |
89 | 0 | GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); |
90 | 0 | } else { |
91 | 0 | GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n); |
92 | 0 | } |
93 | 0 | GLSLF("color += vec4(weights[%d]) * c;\n", n); |
94 | 0 | if (use_ar && (n == N/2-1 || n == N/2)) { |
95 | 0 | GLSL(lo = min(lo, c);) |
96 | 0 | GLSL(hi = max(hi, c);) |
97 | 0 | } |
98 | 0 | } |
99 | 0 | if (use_ar) |
100 | 0 | GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", |
101 | 0 | scaler->conf.antiring); |
102 | 0 | GLSLF("}\n"); |
103 | 0 | } |
104 | | |
105 | | // Subroutine for computing and adding an individual texel contribution |
106 | | // If planar is false, samples directly |
107 | | // If planar is true, takes the pixel from inX[idx] where X is the component and |
108 | | // `idx` must be defined by the caller |
109 | | static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, |
110 | | int x, int y, int components, bool planar) |
111 | 0 | { |
112 | 0 | double radius = scaler->kernel->radius * scaler->kernel->filter_scale; |
113 | 0 | double radius_cutoff = scaler->kernel->radius_cutoff; |
114 | | |
115 | | // Since we can't know the subpixel position in advance, assume a |
116 | | // worst case scenario |
117 | 0 | int yy = y > 0 ? y-1 : y; |
118 | 0 | int xx = x > 0 ? x-1 : x; |
119 | 0 | double dmax = sqrt(xx*xx + yy*yy); |
120 | | // Skip samples definitely outside the radius |
121 | 0 | if (dmax >= radius_cutoff) |
122 | 0 | return; |
123 | 0 | GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); |
124 | | // Check for samples that might be skippable |
125 | 0 | bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; |
126 | 0 | if (maybe_skippable) |
127 | 0 | GLSLF("if (d < %f) {\n", radius_cutoff); |
128 | | |
129 | | // get the weight for this pixel |
130 | 0 | if (scaler->lut->params.dimensions == 1) { |
131 | 0 | GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", |
132 | 0 | radius, scaler->lut->params.w); |
133 | 0 | } else { |
134 | 0 | GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", |
135 | 0 | radius, scaler->lut->params.h); |
136 | 0 | } |
137 | 0 | GLSL(wsum += w;) |
138 | |
|
139 | 0 | if (planar) { |
140 | 0 | for (int n = 0; n < components; n++) |
141 | 0 | GLSLF("color[%d] += w * in%d[idx];\n", n, n); |
142 | 0 | } else { |
143 | 0 | GLSLF("in0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); |
144 | 0 | GLSL(color += vec4(w) * in0;) |
145 | 0 | } |
146 | |
|
147 | 0 | if (maybe_skippable) |
148 | 0 | GLSLF("}\n"); |
149 | 0 | } |
150 | | |
151 | | void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, |
152 | | int components, bool sup_gather) |
153 | 0 | { |
154 | 0 | GLSL(color = vec4(0.0);) |
155 | 0 | GLSLF("{\n"); |
156 | 0 | GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) |
157 | 0 | GLSL(vec2 base = pos - fcoord * pt;) |
158 | 0 | GLSLF("float w, d, wsum = 0.0;\n"); |
159 | 0 | for (int n = 0; n < components; n++) |
160 | 0 | GLSLF("vec4 in%d;\n", n); |
161 | 0 | GLSL(int idx;) |
162 | |
|
163 | 0 | gl_sc_uniform_texture(sc, "lut", scaler->lut); |
164 | |
|
165 | 0 | GLSLF("// scaler samples\n"); |
166 | 0 | int bound = ceil(scaler->kernel->radius_cutoff); |
167 | 0 | for (int y = 1-bound; y <= bound; y += 2) { |
168 | 0 | for (int x = 1-bound; x <= bound; x += 2) { |
169 | | // First we figure out whether it's more efficient to use direct |
170 | | // sampling or gathering. The problem is that gathering 4 texels |
171 | | // only to discard some of them is very wasteful, so only do it if |
172 | | // we suspect it will be a win rather than a loss. This is the case |
173 | | // exactly when all four texels are within bounds |
174 | 0 | bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; |
175 | |
|
176 | 0 | if (!sup_gather) |
177 | 0 | use_gather = false; |
178 | |
|
179 | 0 | if (use_gather) { |
180 | | // Gather the four surrounding texels simultaneously |
181 | 0 | for (int n = 0; n < components; n++) { |
182 | 0 | GLSLF("in%d = textureGatherOffset(tex, base, " |
183 | 0 | "ivec2(%d, %d), %d);\n", n, x, y, n); |
184 | 0 | } |
185 | | |
186 | | // Mix in all of the points with their weights |
187 | 0 | for (int p = 0; p < 4; p++) { |
188 | | // The four texels are gathered counterclockwise starting |
189 | | // from the bottom left |
190 | 0 | static const int xo[4] = {0, 1, 1, 0}; |
191 | 0 | static const int yo[4] = {1, 1, 0, 0}; |
192 | 0 | if (x+xo[p] > bound || y+yo[p] > bound) |
193 | 0 | continue; |
194 | 0 | GLSLF("idx = %d;\n", p); |
195 | 0 | polar_sample(sc, scaler, x+xo[p], y+yo[p], components, true); |
196 | 0 | } |
197 | 0 | } else { |
198 | | // switch to direct sampling instead, for efficiency/compatibility |
199 | 0 | for (int yy = y; yy <= bound && yy <= y+1; yy++) { |
200 | 0 | for (int xx = x; xx <= bound && xx <= x+1; xx++) |
201 | 0 | polar_sample(sc, scaler, xx, yy, components, false); |
202 | 0 | } |
203 | 0 | } |
204 | 0 | } |
205 | 0 | } |
206 | |
|
207 | 0 | GLSL(color = color / vec4(wsum);) |
208 | 0 | GLSLF("}\n"); |
209 | 0 | } |
210 | | |
211 | | // bw/bh: block size |
212 | | // iw/ih: input size (pre-calculated to fit all required texels) |
213 | | void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, |
214 | | int components, int bw, int bh, int iw, int ih) |
215 | 0 | { |
216 | 0 | int bound = ceil(scaler->kernel->radius_cutoff); |
217 | 0 | int offset = bound - 1; // padding top/left |
218 | |
|
219 | 0 | GLSL(color = vec4(0.0);) |
220 | 0 | GLSLF("{\n"); |
221 | 0 | GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);) |
222 | 0 | GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));) |
223 | 0 | GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) |
224 | 0 | GLSL(vec2 base = pos - pt * fcoord;) |
225 | 0 | GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) |
226 | 0 | GLSL(int idx;) |
227 | 0 | GLSLF("float w, d, wsum = 0.0;\n"); |
228 | 0 | gl_sc_uniform_texture(sc, "lut", scaler->lut); |
229 | | |
230 | | // Load all relevant texels into shmem |
231 | 0 | for (int c = 0; c < components; c++) |
232 | 0 | GLSLHF("shared float in%d[%d];\n", c, ih * iw); |
233 | |
|
234 | 0 | GLSL(vec4 c;) |
235 | 0 | GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); |
236 | 0 | GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); |
237 | 0 | GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); |
238 | 0 | for (int c = 0; c < components; c++) |
239 | 0 | GLSLF("in%d[%d * y + x] = c[%d];\n", c, iw, c); |
240 | 0 | GLSLF("}}\n"); |
241 | 0 | GLSL(groupMemoryBarrier();) |
242 | 0 | GLSL(barrier();) |
243 | | |
244 | | // Dispatch the actual samples |
245 | 0 | GLSLF("// scaler samples\n"); |
246 | 0 | for (int y = 1-bound; y <= bound; y++) { |
247 | 0 | for (int x = 1-bound; x <= bound; x++) { |
248 | 0 | GLSLF("idx = %d * rel.y + rel.x + %d;\n", iw, |
249 | 0 | iw * (y + offset) + x + offset); |
250 | 0 | polar_sample(sc, scaler, x, y, components, true); |
251 | 0 | } |
252 | 0 | } |
253 | |
|
254 | 0 | GLSL(color = color / vec4(wsum);) |
255 | 0 | GLSLF("}\n"); |
256 | 0 | } |
257 | | |
258 | | static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) |
259 | 0 | { |
260 | | // Explanation of how bicubic scaling with only 4 texel fetches is done: |
261 | | // <https://web.archive.org/web/20180720154854/http://www.mate.tue.nl/mate/pdfs/10318.pdf> |
262 | | // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' |
263 | | // Explanation why this algorithm normally always blurs, even with unit |
264 | | // scaling: |
265 | | // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf |
266 | | // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' |
267 | 0 | GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" |
268 | 0 | " + vec4(1, 0, -0.5, 0.5);\n", t, s); |
269 | 0 | GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); |
270 | 0 | GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); |
271 | 0 | GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); |
272 | 0 | GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); |
273 | 0 | } |
274 | | |
275 | | void pass_sample_bicubic_fast(struct gl_shader_cache *sc) |
276 | 0 | { |
277 | 0 | GLSLF("{\n"); |
278 | 0 | GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) |
279 | 0 | bicubic_calcweights(sc, "parmx", "fcoord.x"); |
280 | 0 | bicubic_calcweights(sc, "parmy", "fcoord.y"); |
281 | 0 | GLSL(vec4 cdelta;) |
282 | 0 | GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) |
283 | 0 | GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) |
284 | | // first y-interpolation |
285 | 0 | GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) |
286 | 0 | GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) |
287 | 0 | GLSL(vec4 ab = mix(ag, ar, parmy.b);) |
288 | | // second y-interpolation |
289 | 0 | GLSL(vec4 br = texture(tex, pos + cdelta.zy);) |
290 | 0 | GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) |
291 | 0 | GLSL(vec4 aa = mix(bg, br, parmy.b);) |
292 | | // x-interpolation |
293 | 0 | GLSL(color = mix(aa, ab, parmx.b);) |
294 | 0 | GLSLF("}\n"); |
295 | 0 | } |
296 | | |
297 | | void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, |
298 | | int w, int h) |
299 | 0 | { |
300 | 0 | GLSLF("{\n"); |
301 | 0 | GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest |
302 | 0 | GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) |
303 | | // Determine the mixing coefficient vector |
304 | 0 | gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); |
305 | 0 | GLSL(vec2 coeff = fcoord * output_size/size;) |
306 | 0 | float threshold = scaler->conf.kernel.params[0]; |
307 | 0 | threshold = isnan(threshold) ? 0.0 : threshold; |
308 | 0 | GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); |
309 | 0 | GLSL(coeff = clamp(coeff, 0.0, 1.0);) |
310 | | // Compute the right blend of colors |
311 | 0 | GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) |
312 | 0 | GLSLF("}\n"); |
313 | 0 | } |
314 | | |
315 | | // Common constants for SMPTE ST.2084 (HDR) |
316 | | static const float PQ_M1 = 2610./4096 * 1./4, |
317 | | PQ_M2 = 2523./4096 * 128, |
318 | | PQ_C1 = 3424./4096, |
319 | | PQ_C2 = 2413./4096 * 32, |
320 | | PQ_C3 = 2392./4096 * 32; |
321 | | |
322 | | // Common constants for ARIB STD-B67 (HLG) |
323 | | static const float HLG_A = 0.17883277, |
324 | | HLG_B = 0.28466892, |
325 | | HLG_C = 0.55991073; |
326 | | |
327 | | // Common constants for Panasonic V-Log |
328 | | static const float VLOG_B = 0.00873, |
329 | | VLOG_C = 0.241514, |
330 | | VLOG_D = 0.598206; |
331 | | |
332 | | // Common constants for Sony S-Log |
333 | | static const float SLOG_A = 0.432699, |
334 | | SLOG_B = 0.037584, |
335 | | SLOG_C = 0.616596 + 0.03, |
336 | | SLOG_P = 3.538813, |
337 | | SLOG_Q = 0.030001, |
338 | | SLOG_K2 = 155.0 / 219.0; |
339 | | |
340 | | // Linearize (expand), given a TRC as input. In essence, this is the ITU-R |
341 | | // EOTF, calculated on an idealized (reference) monitor with a white point of |
342 | | // MP_REF_WHITE and infinite contrast. |
343 | | // |
344 | | // These functions always output to a normalized scale of [0,1], for |
345 | | // convenience of the video.c code that calls it. To get the values in an |
346 | | // absolute scale, multiply the result by `pl_color_transfer_nominal_peak(trc)` |
347 | | void pass_linearize(struct gl_shader_cache *sc, enum pl_color_transfer trc) |
348 | 0 | { |
349 | 0 | if (trc == PL_COLOR_TRC_LINEAR || trc == PL_COLOR_TRC_SCRGB) |
350 | 0 | return; |
351 | | |
352 | 0 | GLSLF("// linearize\n"); |
353 | | |
354 | | // Note that this clamp may technically violate the definition of |
355 | | // ITU-R BT.2100, which allows for sub-blacks and super-whites to be |
356 | | // displayed on the display where such would be possible. That said, the |
357 | | // problem is that not all gamma curves are well-defined on the values |
358 | | // outside this range, so we ignore it and just clip anyway for sanity. |
359 | 0 | GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) |
360 | |
|
361 | 0 | switch (trc) { |
362 | 0 | case PL_COLOR_TRC_SRGB: |
363 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" |
364 | 0 | " pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n" |
365 | 0 | " %s(lessThan(vec3(0.04045), color.rgb))); \n", |
366 | 0 | gl_sc_bvec(sc, 3)); |
367 | 0 | break; |
368 | 0 | case PL_COLOR_TRC_BT_1886: |
369 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.4));) |
370 | 0 | break; |
371 | 0 | case PL_COLOR_TRC_GAMMA18: |
372 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.8));) |
373 | 0 | break; |
374 | 0 | case PL_COLOR_TRC_GAMMA20: |
375 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.0));) |
376 | 0 | break; |
377 | 0 | case PL_COLOR_TRC_GAMMA22: |
378 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.2));) |
379 | 0 | break; |
380 | 0 | case PL_COLOR_TRC_GAMMA24: |
381 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.4));) |
382 | 0 | break; |
383 | 0 | case PL_COLOR_TRC_GAMMA26: |
384 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.6));) |
385 | 0 | break; |
386 | 0 | case PL_COLOR_TRC_GAMMA28: |
387 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.8));) |
388 | 0 | break; |
389 | 0 | case PL_COLOR_TRC_PRO_PHOTO: |
390 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" |
391 | 0 | " pow(color.rgb, vec3(1.8)), \n" |
392 | 0 | " %s(lessThan(vec3(0.03125), color.rgb))); \n", |
393 | 0 | gl_sc_bvec(sc, 3)); |
394 | 0 | break; |
395 | 0 | case PL_COLOR_TRC_PQ: |
396 | 0 | GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); |
397 | 0 | GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" |
398 | 0 | " / (vec3(%f) - vec3(%f) * color.rgb);\n", |
399 | 0 | PQ_C1, PQ_C2, PQ_C3); |
400 | 0 | GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1); |
401 | | // PQ's output range is 0-10000, but we need it to be relative to |
402 | | // MP_REF_WHITE instead, so rescale |
403 | 0 | GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); |
404 | 0 | break; |
405 | 0 | case PL_COLOR_TRC_HLG: |
406 | 0 | GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" |
407 | 0 | " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" |
408 | 0 | " %s(lessThan(vec3(0.5), color.rgb)));\n", |
409 | 0 | HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3)); |
410 | 0 | GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG); |
411 | 0 | break; |
412 | 0 | case PL_COLOR_TRC_V_LOG: |
413 | 0 | GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" |
414 | 0 | " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" |
415 | 0 | " - vec3(%f), \n" |
416 | 0 | " %s(lessThanEqual(vec3(0.181), color.rgb))); \n", |
417 | 0 | VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3)); |
418 | 0 | break; |
419 | 0 | case PL_COLOR_TRC_S_LOG1: |
420 | 0 | GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" |
421 | 0 | " - vec3(%f);\n", |
422 | 0 | SLOG_C, SLOG_A, SLOG_B); |
423 | 0 | break; |
424 | 0 | case PL_COLOR_TRC_S_LOG2: |
425 | 0 | GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" |
426 | 0 | " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" |
427 | 0 | " - vec3(%f)) * vec3(1.0/%f), \n" |
428 | 0 | " %s(lessThanEqual(vec3(%f), color.rgb))); \n", |
429 | 0 | SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q); |
430 | 0 | break; |
431 | 0 | case PL_COLOR_TRC_ST428: |
432 | 0 | GLSL(color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6));); |
433 | 0 | break; |
434 | 0 | default: |
435 | 0 | abort(); |
436 | 0 | } |
437 | | |
438 | | // Rescale to prevent clipping on non-float textures |
439 | 0 | GLSLF("color.rgb *= vec3(1.0/%f);\n", pl_color_transfer_nominal_peak(trc)); |
440 | 0 | } |
441 | | |
442 | | // Delinearize (compress), given a TRC as output. This corresponds to the |
443 | | // inverse EOTF (not the OETF) in ITU-R terminology, again assuming a |
444 | | // reference monitor. |
445 | | // |
446 | | // Like pass_linearize, this functions ingests values on an normalized scale |
447 | | void pass_delinearize(struct gl_shader_cache *sc, enum pl_color_transfer trc) |
448 | 0 | { |
449 | 0 | if (trc == PL_COLOR_TRC_LINEAR || trc == PL_COLOR_TRC_SCRGB) |
450 | 0 | return; |
451 | | |
452 | 0 | GLSLF("// delinearize\n"); |
453 | 0 | GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) |
454 | 0 | GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(trc)); |
455 | |
|
456 | 0 | switch (trc) { |
457 | 0 | case PL_COLOR_TRC_SRGB: |
458 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(12.92), \n" |
459 | 0 | " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" |
460 | 0 | " - vec3(0.055), \n" |
461 | 0 | " %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n", |
462 | 0 | gl_sc_bvec(sc, 3)); |
463 | 0 | break; |
464 | 0 | case PL_COLOR_TRC_BT_1886: |
465 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) |
466 | 0 | break; |
467 | 0 | case PL_COLOR_TRC_GAMMA18: |
468 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) |
469 | 0 | break; |
470 | 0 | case PL_COLOR_TRC_GAMMA20: |
471 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.0));) |
472 | 0 | break; |
473 | 0 | case PL_COLOR_TRC_GAMMA22: |
474 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) |
475 | 0 | break; |
476 | 0 | case PL_COLOR_TRC_GAMMA24: |
477 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) |
478 | 0 | break; |
479 | 0 | case PL_COLOR_TRC_GAMMA26: |
480 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.6));) |
481 | 0 | break; |
482 | 0 | case PL_COLOR_TRC_GAMMA28: |
483 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) |
484 | 0 | break; |
485 | 0 | case PL_COLOR_TRC_PRO_PHOTO: |
486 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(16.0), \n" |
487 | 0 | " pow(color.rgb, vec3(1.0/1.8)), \n" |
488 | 0 | " %s(lessThanEqual(vec3(0.001953), color.rgb))); \n", |
489 | 0 | gl_sc_bvec(sc, 3)); |
490 | 0 | break; |
491 | 0 | case PL_COLOR_TRC_PQ: |
492 | 0 | GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); |
493 | 0 | GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); |
494 | 0 | GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" |
495 | 0 | " / (vec3(1.0) + vec3(%f) * color.rgb);\n", |
496 | 0 | PQ_C1, PQ_C2, PQ_C3); |
497 | 0 | GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); |
498 | 0 | break; |
499 | 0 | case PL_COLOR_TRC_HLG: |
500 | 0 | GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG); |
501 | 0 | GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" |
502 | 0 | " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" |
503 | 0 | " %s(lessThan(vec3(1.0), color.rgb)));\n", |
504 | 0 | HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3)); |
505 | 0 | break; |
506 | 0 | case PL_COLOR_TRC_V_LOG: |
507 | 0 | GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" |
508 | 0 | " vec3(%f) * log(color.rgb + vec3(%f)) \n" |
509 | 0 | " + vec3(%f), \n" |
510 | 0 | " %s(lessThanEqual(vec3(0.01), color.rgb))); \n", |
511 | 0 | VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3)); |
512 | 0 | break; |
513 | 0 | case PL_COLOR_TRC_S_LOG1: |
514 | 0 | GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", |
515 | 0 | SLOG_A / M_LN10, SLOG_B, SLOG_C); |
516 | 0 | break; |
517 | 0 | case PL_COLOR_TRC_S_LOG2: |
518 | 0 | GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" |
519 | 0 | " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" |
520 | 0 | " + vec3(%f), \n" |
521 | 0 | " %s(lessThanEqual(vec3(0.0), color.rgb))); \n", |
522 | 0 | SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3)); |
523 | 0 | break; |
524 | 0 | case PL_COLOR_TRC_ST428: |
525 | 0 | GLSL(color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6));); |
526 | 0 | break; |
527 | 0 | default: |
528 | 0 | abort(); |
529 | 0 | } |
530 | 0 | } |
531 | | |
532 | | // Apply the OOTF mapping from a given light type to display-referred light. |
533 | | // Assumes absolute scale values. `peak` is used to tune the OOTF where |
534 | | // applicable (currently only HLG). |
535 | | static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, |
536 | | float peak) |
537 | 0 | { |
538 | 0 | if (light == MP_CSP_LIGHT_DISPLAY) |
539 | 0 | return; |
540 | | |
541 | 0 | GLSLF("// apply ootf\n"); |
542 | |
|
543 | 0 | switch (light) |
544 | 0 | { |
545 | 0 | case MP_CSP_LIGHT_SCENE_HLG: { |
546 | | // HLG OOTF from BT.2100, scaled to the chosen display peak |
547 | 0 | float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); |
548 | 0 | GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n", |
549 | 0 | peak / pow(12.0 / MP_REF_WHITE_HLG, gamma), gamma - 1.0); |
550 | 0 | break; |
551 | 0 | } |
552 | 0 | case MP_CSP_LIGHT_SCENE_709_1886: |
553 | | // This OOTF is defined by encoding the result as 709 and then decoding |
554 | | // it as 1886; although this is called 709_1886 we actually use the |
555 | | // more precise (by one decimal) values from BT.2020 instead |
556 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" |
557 | 0 | " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" |
558 | 0 | " %s(lessThan(vec3(0.0181), color.rgb))); \n", |
559 | 0 | gl_sc_bvec(sc, 3)); |
560 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(2.4));) |
561 | 0 | break; |
562 | 0 | case MP_CSP_LIGHT_SCENE_1_2: |
563 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.2));) |
564 | 0 | break; |
565 | 0 | default: |
566 | 0 | abort(); |
567 | 0 | } |
568 | 0 | } |
569 | | |
570 | | // Inverse of the function pass_ootf, for completeness' sake. |
571 | | static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, |
572 | | float peak) |
573 | 0 | { |
574 | 0 | if (light == MP_CSP_LIGHT_DISPLAY) |
575 | 0 | return; |
576 | | |
577 | 0 | GLSLF("// apply inverse ootf\n"); |
578 | |
|
579 | 0 | switch (light) |
580 | 0 | { |
581 | 0 | case MP_CSP_LIGHT_SCENE_HLG: { |
582 | 0 | float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); |
583 | 0 | GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12.0 / MP_REF_WHITE_HLG, gamma)); |
584 | 0 | GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n", |
585 | 0 | (gamma - 1.0) / gamma); |
586 | 0 | break; |
587 | 0 | } |
588 | 0 | case MP_CSP_LIGHT_SCENE_709_1886: |
589 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) |
590 | 0 | GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" |
591 | 0 | " pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n" |
592 | 0 | " vec3(1/0.45)), \n" |
593 | 0 | " %s(lessThan(vec3(0.08145), color.rgb))); \n", |
594 | 0 | gl_sc_bvec(sc, 3)); |
595 | 0 | break; |
596 | 0 | case MP_CSP_LIGHT_SCENE_1_2: |
597 | 0 | GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) |
598 | 0 | break; |
599 | 0 | default: |
600 | 0 | abort(); |
601 | 0 | } |
602 | 0 | } |
603 | | |
604 | | // Average light level for SDR signals. This is equal to a signal level of 0.5 |
605 | | // under a typical presentation gamma of about 2.0. |
606 | | static const float sdr_avg = 0.25; |
607 | | |
608 | | static void hdr_update_peak(struct gl_shader_cache *sc, |
609 | | const struct gl_tone_map_opts *opts) |
610 | 0 | { |
611 | | // Update the sig_peak/sig_avg from the old SSBO state |
612 | 0 | GLSL(if (average.y > 0.0) {) |
613 | 0 | GLSL( sig_avg = max(1e-3, average.x);) |
614 | 0 | GLSL( sig_peak = max(1.00, average.y);) |
615 | 0 | GLSL(}) |
616 | | |
617 | | // Chosen to avoid overflowing on an 8K buffer |
618 | 0 | const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0; |
619 | | |
620 | | // For performance, and to avoid overflows, we tally up the sub-results per |
621 | | // pixel using shared memory first |
622 | 0 | GLSLH(shared int wg_sum;) |
623 | 0 | GLSLH(shared uint wg_max;) |
624 | 0 | GLSL(wg_sum = 0; wg_max = 0u;) |
625 | 0 | GLSL(barrier();) |
626 | 0 | GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min); |
627 | 0 | GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale); |
628 | 0 | GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale); |
629 | | |
630 | | // Have one thread per work group update the global atomics |
631 | 0 | GLSL(memoryBarrierShared();) |
632 | 0 | GLSL(barrier();) |
633 | 0 | GLSL(if (gl_LocalInvocationIndex == 0u) {) |
634 | 0 | GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);) |
635 | 0 | GLSL( atomicAdd(frame_sum, wg_avg);) |
636 | 0 | GLSL( atomicMax(frame_max, wg_max);) |
637 | 0 | GLSL( memoryBarrierBuffer();) |
638 | 0 | GLSL(}) |
639 | 0 | GLSL(barrier();) |
640 | | |
641 | | // Finally, to update the global state, we increment a counter per dispatch |
642 | 0 | GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) |
643 | 0 | GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {) |
644 | 0 | GLSL( counter = 0u;) |
645 | 0 | GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) |
646 | 0 | GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); |
647 | 0 | GLSL( cur.x = exp(cur.x);) |
648 | 0 | GLSL( if (average.y == 0.0)) |
649 | 0 | GLSL( average = cur;) |
650 | | |
651 | | // Use an IIR low-pass filter to smooth out the detected values, with a |
652 | | // configurable decay rate based on the desired time constant (tau) |
653 | 0 | if (opts->decay_rate) { |
654 | 0 | float decay = 1.0f - expf(-1.0f / opts->decay_rate); |
655 | 0 | GLSLF(" average += %f * (cur - average);\n", decay); |
656 | 0 | } else { |
657 | 0 | GLSLF(" average = cur;\n"); |
658 | 0 | } |
659 | | |
660 | | // Scene change hysteresis |
661 | 0 | float log_db = 10.0 / log(10.0); |
662 | 0 | GLSLF(" float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n", |
663 | 0 | opts->scene_threshold_low / log_db, |
664 | 0 | opts->scene_threshold_high / log_db); |
665 | 0 | GLSL( average = mix(average, cur, weight);) |
666 | | |
667 | | // Reset SSBO state for the next frame |
668 | 0 | GLSL( frame_sum = 0; frame_max = 0u;) |
669 | 0 | GLSL( memoryBarrierBuffer();) |
670 | 0 | GLSL(}) |
671 | 0 | } |
672 | | |
673 | | static inline float pq_delinearize(float x) |
674 | 0 | { |
675 | 0 | x *= MP_REF_WHITE / 10000.0; |
676 | 0 | x = powf(x, PQ_M1); |
677 | 0 | x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x); |
678 | 0 | x = pow(x, PQ_M2); |
679 | 0 | return x; |
680 | 0 | } |
681 | | |
682 | | // Tone map from a known peak brightness to the range [0,1]. If ref_peak |
683 | | // is 0, we will use peak detection instead |
684 | | static void pass_tone_map(struct gl_shader_cache *sc, |
685 | | float src_peak, float dst_peak, |
686 | | const struct gl_tone_map_opts *opts) |
687 | 0 | { |
688 | 0 | GLSLF("// HDR tone mapping\n"); |
689 | | |
690 | | // To prevent discoloration due to out-of-bounds clipping, we need to make |
691 | | // sure to reduce the value range as far as necessary to keep the entire |
692 | | // signal in range, so tone map based on the brightest component. |
693 | 0 | GLSL(int sig_idx = 0;) |
694 | 0 | GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;) |
695 | 0 | GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;) |
696 | 0 | GLSL(float sig_max = color[sig_idx];) |
697 | 0 | GLSLF("float sig_peak = %f;\n", src_peak); |
698 | 0 | GLSLF("float sig_avg = %f;\n", sdr_avg); |
699 | |
|
700 | 0 | if (opts->compute_peak >= 0) |
701 | 0 | hdr_update_peak(sc, opts); |
702 | | |
703 | | // Always hard-clip the upper bound of the signal range to avoid functions |
704 | | // exploding on inputs greater than 1.0 |
705 | 0 | GLSLF("vec3 sig = min(color.rgb, sig_peak);\n"); |
706 | | |
707 | | // This function always operates on an absolute scale, so ignore the |
708 | | // dst_peak normalization for it |
709 | 0 | float dst_scale = dst_peak; |
710 | 0 | enum tone_mapping curve = opts->curve ? opts->curve : TONE_MAPPING_BT_2390; |
711 | 0 | if (curve == TONE_MAPPING_BT_2390) |
712 | 0 | dst_scale = 1.0; |
713 | | |
714 | | // Rescale the variables in order to bring it into a representation where |
715 | | // 1.0 represents the dst_peak. This is because all of the tone mapping |
716 | | // algorithms are defined in such a way that they map to the range [0.0, 1.0]. |
717 | 0 | if (dst_scale > 1.0) { |
718 | 0 | GLSLF("sig *= 1.0/%f;\n", dst_scale); |
719 | 0 | GLSLF("sig_peak *= 1.0/%f;\n", dst_scale); |
720 | 0 | } |
721 | |
|
722 | 0 | GLSL(float sig_orig = sig[sig_idx];) |
723 | 0 | GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg); |
724 | 0 | GLSL(sig *= slope;) |
725 | 0 | GLSL(sig_peak *= slope;) |
726 | |
|
727 | 0 | float param = opts->curve_param; |
728 | 0 | switch (curve) { |
729 | 0 | case TONE_MAPPING_CLIP: |
730 | 0 | GLSLF("sig = min(%f * sig, 1.0);\n", isnan(param) ? 1.0 : param); |
731 | 0 | break; |
732 | | |
733 | 0 | case TONE_MAPPING_MOBIUS: |
734 | 0 | GLSLF("if (sig_peak > (1.0 + 1e-6)) {\n"); |
735 | 0 | GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); |
736 | | // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 |
737 | | // where M(x) = scale * (x+a)/(x+b) |
738 | 0 | GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n"); |
739 | 0 | GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " |
740 | 0 | "max(1e-6, sig_peak - 1.0);\n"); |
741 | 0 | GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); |
742 | 0 | GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b))," |
743 | 0 | " %s(greaterThan(sig, vec3(j))));\n", |
744 | 0 | gl_sc_bvec(sc, 3)); |
745 | 0 | GLSLF("}\n"); |
746 | 0 | break; |
747 | | |
748 | 0 | case TONE_MAPPING_REINHARD: { |
749 | 0 | float contrast = isnan(param) ? 0.5 : param, |
750 | 0 | offset = (1.0 - contrast) / contrast; |
751 | 0 | GLSLF("sig = sig / (sig + vec3(%f));\n", offset); |
752 | 0 | GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); |
753 | 0 | GLSL(sig *= scale;) |
754 | 0 | break; |
755 | 0 | } |
756 | | |
757 | 0 | case TONE_MAPPING_HABLE: { |
758 | 0 | float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; |
759 | 0 | GLSLHF("vec3 hable(vec3 x) {\n"); |
760 | 0 | GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / " |
761 | 0 | " (x * (%f*x + vec3(%f)) + vec3(%f)) " |
762 | 0 | " - vec3(%f);\n", |
763 | 0 | A, C*B, D*E, |
764 | 0 | A, B, D*F, |
765 | 0 | E/F); |
766 | 0 | GLSLHF("}\n"); |
767 | 0 | GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n"); |
768 | 0 | break; |
769 | 0 | } |
770 | | |
771 | 0 | case TONE_MAPPING_GAMMA: { |
772 | 0 | float gamma = isnan(param) ? 1.8 : param; |
773 | 0 | GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma); |
774 | 0 | GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;) |
775 | 0 | GLSLF("sig = mix(scale * sig," |
776 | 0 | " pow(sig / sig_peak, vec3(gamma))," |
777 | 0 | " %s(greaterThan(sig, vec3(cutoff))));\n", |
778 | 0 | gl_sc_bvec(sc, 3)); |
779 | 0 | break; |
780 | 0 | } |
781 | | |
782 | 0 | case TONE_MAPPING_LINEAR: { |
783 | 0 | float coeff = isnan(param) ? 1.0 : param; |
784 | 0 | GLSLF("sig = min(%f / sig_peak, 1.0) * sig;\n", coeff); |
785 | 0 | break; |
786 | 0 | } |
787 | | |
788 | 0 | case TONE_MAPPING_BT_2390: |
789 | | // We first need to encode both sig and sig_peak into PQ space |
790 | 0 | GLSLF("vec4 sig_pq = vec4(sig.rgb, sig_peak); \n" |
791 | 0 | "sig_pq *= vec4(1.0/%f); \n" |
792 | 0 | "sig_pq = pow(sig_pq, vec4(%f)); \n" |
793 | 0 | "sig_pq = (vec4(%f) + vec4(%f) * sig_pq) \n" |
794 | 0 | " / (vec4(1.0) + vec4(%f) * sig_pq); \n" |
795 | 0 | "sig_pq = pow(sig_pq, vec4(%f)); \n", |
796 | 0 | 10000.0 / MP_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); |
797 | | // Encode both the signal and the target brightness to be relative to |
798 | | // the source peak brightness, and figure out the target peak in this space |
799 | 0 | GLSLF("float scale = 1.0 / sig_pq.a; \n" |
800 | 0 | "sig_pq.rgb *= vec3(scale); \n" |
801 | 0 | "float maxLum = %f * scale; \n", |
802 | 0 | pq_delinearize(dst_peak)); |
803 | | // Apply piece-wise hermite spline |
804 | 0 | GLSLF("float ks = 1.5 * maxLum - 0.5; \n" |
805 | 0 | "vec3 tb = (sig_pq.rgb - vec3(ks)) / vec3(1.0 - ks); \n" |
806 | 0 | "vec3 tb2 = tb * tb; \n" |
807 | 0 | "vec3 tb3 = tb2 * tb; \n" |
808 | 0 | "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) + \n" |
809 | 0 | " (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) + \n" |
810 | 0 | " (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum); \n" |
811 | 0 | "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks)))); \n", |
812 | 0 | gl_sc_bvec(sc, 3)); |
813 | | // Convert back from PQ space to linear light |
814 | 0 | GLSLF("sig *= vec3(sig_pq.a); \n" |
815 | 0 | "sig = pow(sig, vec3(1.0/%f)); \n" |
816 | 0 | "sig = max(sig - vec3(%f), 0.0) / \n" |
817 | 0 | " (vec3(%f) - vec3(%f) * sig); \n" |
818 | 0 | "sig = pow(sig, vec3(1.0/%f)); \n" |
819 | 0 | "sig *= vec3(%f); \n", |
820 | 0 | PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / MP_REF_WHITE); |
821 | 0 | break; |
822 | | |
823 | 0 | default: |
824 | 0 | abort(); |
825 | 0 | } |
826 | | |
827 | 0 | GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / \n" |
828 | 0 | " max(sig[sig_idx], 1.0); \n" |
829 | 0 | "coeff = %f * pow(coeff / %f, %f); \n" |
830 | 0 | "color.rgb *= sig[sig_idx] / sig_orig; \n" |
831 | 0 | "color.rgb = mix(color.rgb, %f * sig, coeff); \n", |
832 | 0 | 0.18 / dst_scale, 0.90, dst_scale, 0.20, dst_scale); |
833 | 0 | } |
834 | | |
835 | | // Map colors from one source space to another. These source spaces must be |
836 | | // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any |
837 | | // auto-guessing. If is_linear is true, we assume the input has already been |
838 | | // linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we |
839 | | // will detect the peak instead of relying on metadata. Note that this requires |
840 | | // the caller to have already bound the appropriate SSBO and set up the compute |
841 | | // shader metadata |
842 | | void pass_color_map(struct gl_shader_cache *sc, bool is_linear, |
843 | | const struct pl_color_space *src, const struct pl_color_space *dst, |
844 | | enum mp_csp_light src_light, enum mp_csp_light dst_light, |
845 | | const struct gl_tone_map_opts *opts) |
846 | 0 | { |
847 | 0 | GLSLF("// color mapping\n"); |
848 | | |
849 | | // Some operations need access to the video's luma coefficients, so make |
850 | | // them available |
851 | 0 | pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(src->primaries)); |
852 | 0 | gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz.m[1]); |
853 | 0 | rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(dst->primaries)); |
854 | 0 | gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz.m[1]); |
855 | |
|
856 | 0 | bool need_ootf = src_light != dst_light; |
857 | 0 | if (src_light == MP_CSP_LIGHT_SCENE_HLG && src->hdr.max_luma != dst->hdr.max_luma) |
858 | 0 | need_ootf = true; |
859 | | |
860 | | // All operations from here on require linear light as a starting point, |
861 | | // so we linearize even if src->gamma == dst->transfer when one of the other |
862 | | // operations needs it |
863 | 0 | bool need_linear = src->transfer != dst->transfer || |
864 | 0 | src->primaries != dst->primaries || |
865 | 0 | src->hdr.max_luma != dst->hdr.max_luma || |
866 | 0 | need_ootf; |
867 | |
|
868 | 0 | if (need_linear && !is_linear) { |
869 | | // We also pull it up so that 1.0 is the reference white |
870 | 0 | pass_linearize(sc, src->transfer); |
871 | 0 | is_linear = true; |
872 | 0 | } |
873 | | |
874 | | // Pre-scale the incoming values into an absolute scale |
875 | 0 | GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(src->transfer)); |
876 | |
|
877 | 0 | if (need_ootf) |
878 | 0 | pass_ootf(sc, src_light, src->hdr.max_luma / MP_REF_WHITE); |
879 | | |
880 | | // Tone map to prevent clipping due to excessive brightness |
881 | 0 | if (src->hdr.max_luma > dst->hdr.max_luma) { |
882 | 0 | pass_tone_map(sc, src->hdr.max_luma / MP_REF_WHITE, |
883 | 0 | dst->hdr.max_luma / MP_REF_WHITE, opts); |
884 | 0 | } |
885 | | |
886 | | // Adapt to the right colorspace if necessary |
887 | 0 | if (src->primaries != dst->primaries) { |
888 | 0 | const struct pl_raw_primaries *csp_src = pl_raw_primaries_get(src->primaries), |
889 | 0 | *csp_dst = pl_raw_primaries_get(dst->primaries); |
890 | 0 | pl_matrix3x3 m = pl_get_color_mapping_matrix(csp_src, csp_dst, |
891 | 0 | PL_INTENT_RELATIVE_COLORIMETRIC); |
892 | 0 | gl_sc_uniform_mat3(sc, "cms_matrix", true, &m.m[0][0]); |
893 | 0 | GLSL(color.rgb = cms_matrix * color.rgb;) |
894 | |
|
895 | 0 | if (!opts->gamut_mode || opts->gamut_mode == GAMUT_DESATURATE) { |
896 | 0 | GLSL(float cmin = min(min(color.r, color.g), color.b);) |
897 | 0 | GLSL(if (cmin < 0.0) { |
898 | 0 | float luma = dot(dst_luma, color.rgb); |
899 | 0 | float coeff = cmin / (cmin - luma); |
900 | 0 | color.rgb = mix(color.rgb, vec3(luma), coeff); |
901 | 0 | }) |
902 | 0 | GLSLF("float cmax = 1.0/%f * max(max(color.r, color.g), color.b);\n", |
903 | 0 | dst->hdr.max_luma / MP_REF_WHITE); |
904 | 0 | GLSL(if (cmax > 1.0) color.rgb /= cmax;) |
905 | 0 | } |
906 | 0 | } |
907 | |
|
908 | 0 | if (need_ootf) |
909 | 0 | pass_inverse_ootf(sc, dst_light, dst->hdr.max_luma / MP_REF_WHITE); |
910 | | |
911 | | // Post-scale the outgoing values from absolute scale to normalized. |
912 | | // For SDR, we normalize to the chosen signal peak. For HDR, we normalize |
913 | | // to the encoding range of the transfer function. |
914 | 0 | float dst_range = dst->hdr.max_luma / MP_REF_WHITE; |
915 | 0 | if (pl_color_space_is_hdr(dst)) |
916 | 0 | dst_range = pl_color_transfer_nominal_peak(dst->transfer); |
917 | |
|
918 | 0 | GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); |
919 | | |
920 | | // Warn for remaining out-of-gamut colors if enabled |
921 | 0 | if (opts->gamut_mode == GAMUT_WARN) { |
922 | 0 | GLSL(if (any(greaterThan(color.rgb, vec3(1.005))) || |
923 | 0 | any(lessThan(color.rgb, vec3(-0.005))))) |
924 | 0 | GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert |
925 | 0 | } |
926 | |
|
927 | 0 | if (is_linear) |
928 | 0 | pass_delinearize(sc, dst->transfer); |
929 | 0 | } |
930 | | |
931 | | // Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. |
932 | | // Obtain random numbers by calling rand(h), followed by h = permute(h) to |
933 | | // update the state. Assumes the texture was hooked. |
934 | | // permute() was modified from the original to avoid "large" numbers in |
935 | | // calculations, since low-end mobile GPUs choke on them (overflow). |
936 | | static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) |
937 | 0 | { |
938 | 0 | GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) |
939 | 0 | GLSLHF("float permute(float x) {\n"); |
940 | 0 | GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );) |
941 | 0 | GLSLHF("}\n"); |
942 | 0 | GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) |
943 | | |
944 | | // Initialize the PRNG by hashing the position + a random uniform |
945 | 0 | GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) |
946 | 0 | GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) |
947 | 0 | gl_sc_uniform_dynamic(sc); |
948 | 0 | gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); |
949 | 0 | } |
950 | | |
951 | | const struct deband_opts deband_opts_def = { |
952 | | .iterations = 1, |
953 | | .threshold = 48.0, |
954 | | .range = 16.0, |
955 | | .grain = 32.0, |
956 | | }; |
957 | | |
958 | | #define OPT_BASE_STRUCT struct deband_opts |
959 | | const struct m_sub_options deband_conf = { |
960 | | .opts = (const m_option_t[]) { |
961 | | {"iterations", OPT_INT(iterations), M_RANGE(0, 16)}, |
962 | | {"threshold", OPT_FLOAT(threshold), M_RANGE(0.0, 4096.0)}, |
963 | | {"range", OPT_FLOAT(range), M_RANGE(1.0, 64.0)}, |
964 | | {"grain", OPT_FLOAT(grain), M_RANGE(0.0, 4096.0)}, |
965 | | {0} |
966 | | }, |
967 | | .size = sizeof(struct deband_opts), |
968 | | .defaults = &deband_opts_def, |
969 | | }; |
970 | | |
971 | | // Stochastically sample a debanded result from a hooked texture. |
972 | | void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, |
973 | | AVLFG *lfg, enum pl_color_transfer trc) |
974 | 0 | { |
975 | | // Initialize the PRNG |
976 | 0 | GLSLF("{\n"); |
977 | 0 | prng_init(sc, lfg); |
978 | | |
979 | | // Helper: Compute a stochastic approximation of the avg color around a |
980 | | // pixel |
981 | 0 | GLSLHF("vec4 average(float range, inout float h) {\n"); |
982 | | // Compute a random rangle and distance |
983 | 0 | GLSLH(float dist = rand(h) * range; h = permute(h);) |
984 | 0 | GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) |
985 | 0 | GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) |
986 | | |
987 | | // Sample at quarter-turn intervals around the source pixel |
988 | 0 | GLSLH(vec4 ref[4];) |
989 | 0 | GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) |
990 | 0 | GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) |
991 | 0 | GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) |
992 | 0 | GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) |
993 | | |
994 | | // Return the (normalized) average |
995 | 0 | GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) |
996 | 0 | GLSLHF("}\n"); |
997 | | |
998 | | // Sample the source pixel |
999 | 0 | GLSL(color = HOOKED_tex(HOOKED_pos);) |
1000 | 0 | GLSLF("vec4 avg, diff;\n"); |
1001 | 0 | for (int i = 1; i <= opts->iterations; i++) { |
1002 | | // Sample the average pixel and use it instead of the original if |
1003 | | // the difference is below the given threshold |
1004 | 0 | GLSLF("avg = average(%f, h);\n", i * opts->range); |
1005 | 0 | GLSL(diff = abs(color - avg);) |
1006 | 0 | GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n", |
1007 | 0 | gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0)); |
1008 | 0 | } |
1009 | | |
1010 | | // Add some random noise to smooth out residual differences |
1011 | 0 | GLSL(vec3 noise;) |
1012 | 0 | GLSL(noise.x = rand(h); h = permute(h);) |
1013 | 0 | GLSL(noise.y = rand(h); h = permute(h);) |
1014 | 0 | GLSL(noise.z = rand(h); h = permute(h);) |
1015 | | |
1016 | | // Noise is scaled to the signal level to prevent extreme noise for HDR |
1017 | 0 | float gain = opts->grain/8192.0 / pl_color_transfer_nominal_peak(trc); |
1018 | 0 | GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); |
1019 | 0 | GLSLF("}\n"); |
1020 | 0 | } |
1021 | | |
1022 | | // Assumes the texture was hooked |
1023 | 0 | void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { |
1024 | 0 | GLSLF("{\n"); |
1025 | 0 | GLSL(float st1 = 1.2;) |
1026 | 0 | GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) |
1027 | 0 | GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) |
1028 | 0 | + HOOKED_texOff(st1 * vec2(+1, -1)) |
1029 | 0 | + HOOKED_texOff(st1 * vec2(-1, +1)) |
1030 | 0 | + HOOKED_texOff(st1 * vec2(-1, -1));) |
1031 | 0 | GLSL(float st2 = 1.5;) |
1032 | 0 | GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) |
1033 | 0 | + HOOKED_texOff(st2 * vec2( 0, +1)) |
1034 | 0 | + HOOKED_texOff(st2 * vec2(-1, 0)) |
1035 | 0 | + HOOKED_texOff(st2 * vec2( 0, -1));) |
1036 | 0 | GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) |
1037 | 0 | GLSLF("color = p + t * %f;\n", param); |
1038 | 0 | GLSLF("}\n"); |
1039 | 0 | } |