Coverage Report

Created: 2026-03-12 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mpv/video/out/gpu/video_shaders.c
Line
Count
Source
1
/*
2
 * This file is part of mpv.
3
 *
4
 * mpv is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8
 *
9
 * mpv is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
#include <math.h>
19
20
#include <libplacebo/colorspace.h>
21
22
#include "video_shaders.h"
23
#include "video.h"
24
25
#if PL_API_VER < 362
26
#define PL_COLOR_TRC_SCRGB PL_COLOR_TRC_LINEAR
27
#endif
28
29
0
#define GLSL(x) gl_sc_add(sc, #x "\n");
30
0
#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
31
0
#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
32
0
#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
33
34
// Set up shared/commonly used variables and macros
35
void sampler_prelude(struct gl_shader_cache *sc, int tex_num)
36
0
{
37
0
    GLSLF("#undef tex\n");
38
0
    GLSLF("#undef texmap\n");
39
0
    GLSLF("#define tex texture%d\n", tex_num);
40
0
    GLSLF("#define texmap texmap%d\n", tex_num);
41
0
    GLSLF("vec2 pos = texcoord%d;\n", tex_num);
42
0
    GLSLF("vec2 size = texture_size%d;\n", tex_num);
43
0
    GLSLF("vec2 pt = pixel_size%d;\n", tex_num);
44
0
}
45
46
static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
47
                                              struct scaler *scaler)
48
0
{
49
0
    gl_sc_uniform_texture(sc, "lut", scaler->lut);
50
0
    GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut->params.h);
51
52
0
    int N = scaler->kernel->size;
53
0
    int width = (N + 3) / 4; // round up
54
55
0
    GLSLF("float weights[%d];\n", N);
56
0
    for (int i = 0; i < N; i++) {
57
0
        if (i % 4 == 0)
58
0
            GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
59
0
        GLSLF("weights[%d] = c[%d];\n", i, i % 4);
60
0
    }
61
0
}
62
63
// Handle a single pass (either vertical or horizontal). The direction is given
64
// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is
65
// used instead (samples from texture0 through textureN)
66
void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
67
                               int d_x, int d_y)
68
0
{
69
0
    int N = scaler->kernel->size;
70
0
    bool use_ar = scaler->conf.antiring > 0;
71
0
    bool planar = d_x == 0 && d_y == 0;
72
0
    GLSL(color = vec4(0.0);)
73
0
    GLSLF("{\n");
74
0
    if (!planar) {
75
0
        GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y);
76
0
        GLSL(pt *= dir;)
77
0
        GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);)
78
0
        GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1);
79
0
    }
80
0
    GLSL(vec4 c;)
81
0
    if (use_ar) {
82
0
        GLSL(vec4 hi = vec4(0.0);)
83
0
        GLSL(vec4 lo = vec4(1.0);)
84
0
    }
85
0
    pass_sample_separated_get_weights(sc, scaler);
86
0
    GLSLF("// scaler samples\n");
87
0
    for (int n = 0; n < N; n++) {
88
0
        if (planar) {
89
0
            GLSLF("c = texture(texture%d, texcoord%d);\n", n, n);
90
0
        } else {
91
0
            GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n);
92
0
        }
93
0
        GLSLF("color += vec4(weights[%d]) * c;\n", n);
94
0
        if (use_ar && (n == N/2-1 || n == N/2)) {
95
0
            GLSL(lo = min(lo, c);)
96
0
            GLSL(hi = max(hi, c);)
97
0
        }
98
0
    }
99
0
    if (use_ar)
100
0
        GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
101
0
              scaler->conf.antiring);
102
0
    GLSLF("}\n");
103
0
}
104
105
// Subroutine for computing and adding an individual texel contribution
106
// If planar is false, samples directly
107
// If planar is true, takes the pixel from inX[idx] where X is the component and
108
// `idx` must be defined by the caller
109
static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
110
                         int x, int y, int components, bool planar)
111
0
{
112
0
    double radius = scaler->kernel->radius * scaler->kernel->filter_scale;
113
0
    double radius_cutoff = scaler->kernel->radius_cutoff;
114
115
    // Since we can't know the subpixel position in advance, assume a
116
    // worst case scenario
117
0
    int yy = y > 0 ? y-1 : y;
118
0
    int xx = x > 0 ? x-1 : x;
119
0
    double dmax = sqrt(xx*xx + yy*yy);
120
    // Skip samples definitely outside the radius
121
0
    if (dmax >= radius_cutoff)
122
0
        return;
123
0
    GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y);
124
    // Check for samples that might be skippable
125
0
    bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
126
0
    if (maybe_skippable)
127
0
        GLSLF("if (d < %f) {\n", radius_cutoff);
128
129
    // get the weight for this pixel
130
0
    if (scaler->lut->params.dimensions == 1) {
131
0
        GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n",
132
0
              radius, scaler->lut->params.w);
133
0
    } else {
134
0
        GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n",
135
0
              radius, scaler->lut->params.h);
136
0
    }
137
0
    GLSL(wsum += w;)
138
139
0
    if (planar) {
140
0
        for (int n = 0; n < components; n++)
141
0
            GLSLF("color[%d] += w * in%d[idx];\n", n, n);
142
0
    } else {
143
0
        GLSLF("in0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
144
0
        GLSL(color += vec4(w) * in0;)
145
0
    }
146
147
0
    if (maybe_skippable)
148
0
        GLSLF("}\n");
149
0
}
150
151
void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
152
                       int components, bool sup_gather)
153
0
{
154
0
    GLSL(color = vec4(0.0);)
155
0
    GLSLF("{\n");
156
0
    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
157
0
    GLSL(vec2 base = pos - fcoord * pt;)
158
0
    GLSLF("float w, d, wsum = 0.0;\n");
159
0
    for (int n = 0; n < components; n++)
160
0
        GLSLF("vec4 in%d;\n", n);
161
0
    GLSL(int idx;)
162
163
0
    gl_sc_uniform_texture(sc, "lut", scaler->lut);
164
165
0
    GLSLF("// scaler samples\n");
166
0
    int bound = ceil(scaler->kernel->radius_cutoff);
167
0
    for (int y = 1-bound; y <= bound; y += 2) {
168
0
        for (int x = 1-bound; x <= bound; x += 2) {
169
            // First we figure out whether it's more efficient to use direct
170
            // sampling or gathering. The problem is that gathering 4 texels
171
            // only to discard some of them is very wasteful, so only do it if
172
            // we suspect it will be a win rather than a loss. This is the case
173
            // exactly when all four texels are within bounds
174
0
            bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
175
176
0
            if (!sup_gather)
177
0
                use_gather = false;
178
179
0
            if (use_gather) {
180
                // Gather the four surrounding texels simultaneously
181
0
                for (int n = 0; n < components; n++) {
182
0
                    GLSLF("in%d = textureGatherOffset(tex, base, "
183
0
                          "ivec2(%d, %d), %d);\n", n, x, y, n);
184
0
                }
185
186
                // Mix in all of the points with their weights
187
0
                for (int p = 0; p < 4; p++) {
188
                    // The four texels are gathered counterclockwise starting
189
                    // from the bottom left
190
0
                    static const int xo[4] = {0, 1, 1, 0};
191
0
                    static const int yo[4] = {1, 1, 0, 0};
192
0
                    if (x+xo[p] > bound || y+yo[p] > bound)
193
0
                        continue;
194
0
                    GLSLF("idx = %d;\n", p);
195
0
                    polar_sample(sc, scaler, x+xo[p], y+yo[p], components, true);
196
0
                }
197
0
            } else {
198
                // switch to direct sampling instead, for efficiency/compatibility
199
0
                for (int yy = y; yy <= bound && yy <= y+1; yy++) {
200
0
                    for (int xx = x; xx <= bound && xx <= x+1; xx++)
201
0
                        polar_sample(sc, scaler, xx, yy, components, false);
202
0
                }
203
0
            }
204
0
        }
205
0
    }
206
207
0
    GLSL(color = color / vec4(wsum);)
208
0
    GLSLF("}\n");
209
0
}
210
211
// bw/bh: block size
212
// iw/ih: input size (pre-calculated to fit all required texels)
213
void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
214
                        int components, int bw, int bh, int iw, int ih)
215
0
{
216
0
    int bound = ceil(scaler->kernel->radius_cutoff);
217
0
    int offset = bound - 1; // padding top/left
218
219
0
    GLSL(color = vec4(0.0);)
220
0
    GLSLF("{\n");
221
0
    GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);)
222
0
    GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));)
223
0
    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
224
0
    GLSL(vec2 base = pos - pt * fcoord;)
225
0
    GLSL(ivec2 rel = ivec2(round((base - wbase) * size));)
226
0
    GLSL(int idx;)
227
0
    GLSLF("float w, d, wsum = 0.0;\n");
228
0
    gl_sc_uniform_texture(sc, "lut", scaler->lut);
229
230
    // Load all relevant texels into shmem
231
0
    for (int c = 0; c < components; c++)
232
0
        GLSLHF("shared float in%d[%d];\n", c, ih * iw);
233
234
0
    GLSL(vec4 c;)
235
0
    GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh);
236
0
    GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw);
237
0
    GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset);
238
0
    for (int c = 0; c < components; c++)
239
0
        GLSLF("in%d[%d * y + x] = c[%d];\n", c, iw, c);
240
0
    GLSLF("}}\n");
241
0
    GLSL(groupMemoryBarrier();)
242
0
    GLSL(barrier();)
243
244
    // Dispatch the actual samples
245
0
    GLSLF("// scaler samples\n");
246
0
    for (int y = 1-bound; y <= bound; y++) {
247
0
        for (int x = 1-bound; x <= bound; x++) {
248
0
            GLSLF("idx = %d * rel.y + rel.x + %d;\n", iw,
249
0
                  iw * (y + offset) + x + offset);
250
0
            polar_sample(sc, scaler, x, y, components, true);
251
0
        }
252
0
    }
253
254
0
    GLSL(color = color / vec4(wsum);)
255
0
    GLSLF("}\n");
256
0
}
257
258
static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s)
259
0
{
260
    // Explanation of how bicubic scaling with only 4 texel fetches is done:
261
    //   <https://web.archive.org/web/20180720154854/http://www.mate.tue.nl/mate/pdfs/10318.pdf>
262
    //   'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
263
    // Explanation why this algorithm normally always blurs, even with unit
264
    // scaling:
265
    //   http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
266
    //   'GPU Prefilter for Accurate Cubic B-spline Interpolation'
267
0
    GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
268
0
                " + vec4(1, 0, -0.5, 0.5);\n", t, s);
269
0
    GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
270
0
    GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
271
0
    GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
272
0
    GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s);
273
0
}
274
275
void pass_sample_bicubic_fast(struct gl_shader_cache *sc)
276
0
{
277
0
    GLSLF("{\n");
278
0
    GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));)
279
0
    bicubic_calcweights(sc, "parmx", "fcoord.x");
280
0
    bicubic_calcweights(sc, "parmy", "fcoord.y");
281
0
    GLSL(vec4 cdelta;)
282
0
    GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);)
283
0
    GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);)
284
    // first y-interpolation
285
0
    GLSL(vec4 ar = texture(tex, pos + cdelta.xy);)
286
0
    GLSL(vec4 ag = texture(tex, pos + cdelta.xw);)
287
0
    GLSL(vec4 ab = mix(ag, ar, parmy.b);)
288
    // second y-interpolation
289
0
    GLSL(vec4 br = texture(tex, pos + cdelta.zy);)
290
0
    GLSL(vec4 bg = texture(tex, pos + cdelta.zw);)
291
0
    GLSL(vec4 aa = mix(bg, br, parmy.b);)
292
    // x-interpolation
293
0
    GLSL(color = mix(aa, ab, parmx.b);)
294
0
    GLSLF("}\n");
295
0
}
296
297
void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
298
                                   int w, int h)
299
0
{
300
0
    GLSLF("{\n");
301
0
    GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest
302
0
    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
303
    // Determine the mixing coefficient vector
304
0
    gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h});
305
0
    GLSL(vec2 coeff = fcoord * output_size/size;)
306
0
    float threshold = scaler->conf.kernel.params[0];
307
0
    threshold = isnan(threshold) ? 0.0 : threshold;
308
0
    GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold);
309
0
    GLSL(coeff = clamp(coeff, 0.0, 1.0);)
310
    // Compute the right blend of colors
311
0
    GLSL(color = texture(tex, pos + pt * (coeff - fcoord));)
312
0
    GLSLF("}\n");
313
0
}
314
315
// Common constants for SMPTE ST.2084 (HDR)
316
static const float PQ_M1 = 2610./4096 * 1./4,
317
                   PQ_M2 = 2523./4096 * 128,
318
                   PQ_C1 = 3424./4096,
319
                   PQ_C2 = 2413./4096 * 32,
320
                   PQ_C3 = 2392./4096 * 32;
321
322
// Common constants for ARIB STD-B67 (HLG)
323
static const float HLG_A = 0.17883277,
324
                   HLG_B = 0.28466892,
325
                   HLG_C = 0.55991073;
326
327
// Common constants for Panasonic V-Log
328
static const float VLOG_B = 0.00873,
329
                   VLOG_C = 0.241514,
330
                   VLOG_D = 0.598206;
331
332
// Common constants for Sony S-Log
333
static const float SLOG_A = 0.432699,
334
                   SLOG_B = 0.037584,
335
                   SLOG_C = 0.616596 + 0.03,
336
                   SLOG_P = 3.538813,
337
                   SLOG_Q = 0.030001,
338
                   SLOG_K2 = 155.0 / 219.0;
339
340
// Linearize (expand), given a TRC as input. In essence, this is the ITU-R
341
// EOTF, calculated on an idealized (reference) monitor with a white point of
342
// MP_REF_WHITE and infinite contrast.
343
//
344
// These functions always output to a normalized scale of [0,1], for
345
// convenience of the video.c code that calls it. To get the values in an
346
// absolute scale, multiply the result by `pl_color_transfer_nominal_peak(trc)`
347
void pass_linearize(struct gl_shader_cache *sc, enum pl_color_transfer trc)
348
0
{
349
0
    if (trc == PL_COLOR_TRC_LINEAR || trc == PL_COLOR_TRC_SCRGB)
350
0
        return;
351
352
0
    GLSLF("// linearize\n");
353
354
    // Note that this clamp may technically violate the definition of
355
    // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
356
    // displayed on the display where such would be possible. That said, the
357
    // problem is that not all gamma curves are well-defined on the values
358
    // outside this range, so we ignore it and just clip anyway for sanity.
359
0
    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
360
361
0
    switch (trc) {
362
0
    case PL_COLOR_TRC_SRGB:
363
0
        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92),             \n"
364
0
              "                pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n"
365
0
              "                %s(lessThan(vec3(0.04045), color.rgb))); \n",
366
0
              gl_sc_bvec(sc, 3));
367
0
        break;
368
0
    case PL_COLOR_TRC_BT_1886:
369
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
370
0
        break;
371
0
    case PL_COLOR_TRC_GAMMA18:
372
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.8));)
373
0
        break;
374
0
    case PL_COLOR_TRC_GAMMA20:
375
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.0));)
376
0
        break;
377
0
    case PL_COLOR_TRC_GAMMA22:
378
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.2));)
379
0
        break;
380
0
    case PL_COLOR_TRC_GAMMA24:
381
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
382
0
        break;
383
0
    case PL_COLOR_TRC_GAMMA26:
384
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.6));)
385
0
        break;
386
0
    case PL_COLOR_TRC_GAMMA28:
387
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
388
0
        break;
389
0
    case PL_COLOR_TRC_PRO_PHOTO:
390
0
        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0),              \n"
391
0
              "                pow(color.rgb, vec3(1.8)),               \n"
392
0
              "                %s(lessThan(vec3(0.03125), color.rgb))); \n",
393
0
              gl_sc_bvec(sc, 3));
394
0
        break;
395
0
    case PL_COLOR_TRC_PQ:
396
0
        GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2);
397
0
        GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n"
398
0
              "             / (vec3(%f) - vec3(%f) * color.rgb);\n",
399
0
              PQ_C1, PQ_C2, PQ_C3);
400
0
        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1);
401
        // PQ's output range is 0-10000, but we need it to be relative to
402
        // MP_REF_WHITE instead, so rescale
403
0
        GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE);
404
0
        break;
405
0
    case PL_COLOR_TRC_HLG:
406
0
        GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
407
0
              "                exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n"
408
0
              "                %s(lessThan(vec3(0.5), color.rgb)));\n",
409
0
              HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3));
410
0
        GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG);
411
0
        break;
412
0
    case PL_COLOR_TRC_V_LOG:
413
0
        GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
414
0
              "    pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
415
0
              "              - vec3(%f),                                  \n"
416
0
              "    %s(lessThanEqual(vec3(0.181), color.rgb)));            \n",
417
0
              VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3));
418
0
        break;
419
0
    case PL_COLOR_TRC_S_LOG1:
420
0
        GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n"
421
0
              "            - vec3(%f);\n",
422
0
              SLOG_C, SLOG_A, SLOG_B);
423
0
        break;
424
0
    case PL_COLOR_TRC_S_LOG2:
425
0
        GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f),      \n"
426
0
              "    (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
427
0
              "              - vec3(%f)) * vec3(1.0/%f),                   \n"
428
0
              "    %s(lessThanEqual(vec3(%f), color.rgb)));                \n",
429
0
              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q);
430
0
        break;
431
0
    case PL_COLOR_TRC_ST428:
432
0
        GLSL(color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6)););
433
0
        break;
434
0
    default:
435
0
        abort();
436
0
    }
437
438
    // Rescale to prevent clipping on non-float textures
439
0
    GLSLF("color.rgb *= vec3(1.0/%f);\n", pl_color_transfer_nominal_peak(trc));
440
0
}
441
442
// Delinearize (compress), given a TRC as output. This corresponds to the
443
// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a
444
// reference monitor.
445
//
446
// Like pass_linearize, this functions ingests values on an normalized scale
447
void pass_delinearize(struct gl_shader_cache *sc, enum pl_color_transfer trc)
448
0
{
449
0
    if (trc == PL_COLOR_TRC_LINEAR || trc == PL_COLOR_TRC_SCRGB)
450
0
        return;
451
452
0
    GLSLF("// delinearize\n");
453
0
    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
454
0
    GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(trc));
455
456
0
    switch (trc) {
457
0
    case PL_COLOR_TRC_SRGB:
458
0
        GLSLF("color.rgb = mix(color.rgb * vec3(12.92),                       \n"
459
0
              "               vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))     \n"
460
0
              "                   - vec3(0.055),                              \n"
461
0
              "               %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n",
462
0
              gl_sc_bvec(sc, 3));
463
0
        break;
464
0
    case PL_COLOR_TRC_BT_1886:
465
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
466
0
        break;
467
0
    case PL_COLOR_TRC_GAMMA18:
468
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));)
469
0
        break;
470
0
    case PL_COLOR_TRC_GAMMA20:
471
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.0));)
472
0
        break;
473
0
    case PL_COLOR_TRC_GAMMA22:
474
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));)
475
0
        break;
476
0
    case PL_COLOR_TRC_GAMMA24:
477
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
478
0
        break;
479
0
    case PL_COLOR_TRC_GAMMA26:
480
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.6));)
481
0
        break;
482
0
    case PL_COLOR_TRC_GAMMA28:
483
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
484
0
        break;
485
0
    case PL_COLOR_TRC_PRO_PHOTO:
486
0
        GLSLF("color.rgb = mix(color.rgb * vec3(16.0),                        \n"
487
0
              "                pow(color.rgb, vec3(1.0/1.8)),                 \n"
488
0
              "                %s(lessThanEqual(vec3(0.001953), color.rgb))); \n",
489
0
              gl_sc_bvec(sc, 3));
490
0
        break;
491
0
    case PL_COLOR_TRC_PQ:
492
0
        GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE);
493
0
        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1);
494
0
        GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
495
0
              "             / (vec3(1.0) + vec3(%f) * color.rgb);\n",
496
0
              PQ_C1, PQ_C2, PQ_C3);
497
0
        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2);
498
0
        break;
499
0
    case PL_COLOR_TRC_HLG:
500
0
        GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG);
501
0
        GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n"
502
0
              "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n"
503
0
              "                %s(lessThan(vec3(1.0), color.rgb)));\n",
504
0
              HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3));
505
0
        break;
506
0
    case PL_COLOR_TRC_V_LOG:
507
0
        GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),   \n"
508
0
              "                vec3(%f) * log(color.rgb + vec3(%f))   \n"
509
0
              "                    + vec3(%f),                        \n"
510
0
              "                %s(lessThanEqual(vec3(0.01), color.rgb))); \n",
511
0
              VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3));
512
0
        break;
513
0
    case PL_COLOR_TRC_S_LOG1:
514
0
        GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
515
0
              SLOG_A / M_LN10, SLOG_B, SLOG_C);
516
0
        break;
517
0
    case PL_COLOR_TRC_S_LOG2:
518
0
        GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f),                \n"
519
0
              "                vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
520
0
              "                    + vec3(%f),                                 \n"
521
0
              "                %s(lessThanEqual(vec3(0.0), color.rgb)));       \n",
522
0
              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3));
523
0
        break;
524
0
    case PL_COLOR_TRC_ST428:
525
0
        GLSL(color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6)););
526
0
        break;
527
0
    default:
528
0
        abort();
529
0
    }
530
0
}
531
532
// Apply the OOTF mapping from a given light type to display-referred light.
533
// Assumes absolute scale values. `peak` is used to tune the OOTF where
534
// applicable (currently only HLG).
535
static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light,
536
                      float peak)
537
0
{
538
0
    if (light == MP_CSP_LIGHT_DISPLAY)
539
0
        return;
540
541
0
    GLSLF("// apply ootf\n");
542
543
0
    switch (light)
544
0
    {
545
0
    case MP_CSP_LIGHT_SCENE_HLG: {
546
        // HLG OOTF from BT.2100, scaled to the chosen display peak
547
0
        float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0));
548
0
        GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n",
549
0
              peak / pow(12.0 / MP_REF_WHITE_HLG, gamma), gamma - 1.0);
550
0
        break;
551
0
    }
552
0
    case MP_CSP_LIGHT_SCENE_709_1886:
553
        // This OOTF is defined by encoding the result as 709 and then decoding
554
        // it as 1886; although this is called 709_1886 we actually use the
555
        // more precise (by one decimal) values from BT.2020 instead
556
0
        GLSLF("color.rgb = mix(color.rgb * vec3(4.5),                  \n"
557
0
              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
558
0
              "                %s(lessThan(vec3(0.0181), color.rgb))); \n",
559
0
              gl_sc_bvec(sc, 3));
560
0
        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
561
0
        break;
562
0
    case MP_CSP_LIGHT_SCENE_1_2:
563
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.2));)
564
0
        break;
565
0
    default:
566
0
        abort();
567
0
    }
568
0
}
569
570
// Inverse of the function pass_ootf, for completeness' sake.
571
static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light,
572
                              float peak)
573
0
{
574
0
    if (light == MP_CSP_LIGHT_DISPLAY)
575
0
        return;
576
577
0
    GLSLF("// apply inverse ootf\n");
578
579
0
    switch (light)
580
0
    {
581
0
    case MP_CSP_LIGHT_SCENE_HLG: {
582
0
        float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0));
583
0
        GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12.0 / MP_REF_WHITE_HLG, gamma));
584
0
        GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n",
585
0
              (gamma - 1.0) / gamma);
586
0
        break;
587
0
    }
588
0
    case MP_CSP_LIGHT_SCENE_709_1886:
589
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
590
0
        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5),               \n"
591
0
              "                pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n"
592
0
              "                    vec3(1/0.45)),                       \n"
593
0
              "                %s(lessThan(vec3(0.08145), color.rgb))); \n",
594
0
              gl_sc_bvec(sc, 3));
595
0
        break;
596
0
    case MP_CSP_LIGHT_SCENE_1_2:
597
0
        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));)
598
0
        break;
599
0
    default:
600
0
        abort();
601
0
    }
602
0
}
603
604
// Average light level for SDR signals. This is equal to a signal level of 0.5
605
// under a typical presentation gamma of about 2.0.
606
static const float sdr_avg = 0.25;
607
608
static void hdr_update_peak(struct gl_shader_cache *sc,
609
                            const struct gl_tone_map_opts *opts)
610
0
{
611
    // Update the sig_peak/sig_avg from the old SSBO state
612
0
    GLSL(if (average.y > 0.0) {)
613
0
    GLSL(    sig_avg  = max(1e-3, average.x);)
614
0
    GLSL(    sig_peak = max(1.00, average.y);)
615
0
    GLSL(})
616
617
    // Chosen to avoid overflowing on an 8K buffer
618
0
    const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
619
620
    // For performance, and to avoid overflows, we tally up the sub-results per
621
    // pixel using shared memory first
622
0
    GLSLH(shared int wg_sum;)
623
0
    GLSLH(shared uint wg_max;)
624
0
    GLSL(wg_sum = 0; wg_max = 0u;)
625
0
    GLSL(barrier();)
626
0
    GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
627
0
    GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
628
0
    GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);
629
630
    // Have one thread per work group update the global atomics
631
0
    GLSL(memoryBarrierShared();)
632
0
    GLSL(barrier();)
633
0
    GLSL(if (gl_LocalInvocationIndex == 0u) {)
634
0
    GLSL(    int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
635
0
    GLSL(    atomicAdd(frame_sum, wg_avg);)
636
0
    GLSL(    atomicMax(frame_max, wg_max);)
637
0
    GLSL(    memoryBarrierBuffer();)
638
0
    GLSL(})
639
0
    GLSL(barrier();)
640
641
    // Finally, to update the global state, we increment a counter per dispatch
642
0
    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
643
0
    GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {)
644
0
    GLSL(    counter = 0u;)
645
0
    GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
646
0
    GLSLF("  cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
647
0
    GLSL(    cur.x = exp(cur.x);)
648
0
    GLSL(    if (average.y == 0.0))
649
0
    GLSL(        average = cur;)
650
651
    // Use an IIR low-pass filter to smooth out the detected values, with a
652
    // configurable decay rate based on the desired time constant (tau)
653
0
    if (opts->decay_rate) {
654
0
        float decay = 1.0f - expf(-1.0f / opts->decay_rate);
655
0
        GLSLF("  average += %f * (cur - average);\n", decay);
656
0
    } else {
657
0
        GLSLF("  average = cur;\n");
658
0
    }
659
660
    // Scene change hysteresis
661
0
    float log_db = 10.0 / log(10.0);
662
0
    GLSLF("  float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n",
663
0
          opts->scene_threshold_low / log_db,
664
0
          opts->scene_threshold_high / log_db);
665
0
    GLSL(    average = mix(average, cur, weight);)
666
667
    // Reset SSBO state for the next frame
668
0
    GLSL(    frame_sum = 0; frame_max = 0u;)
669
0
    GLSL(    memoryBarrierBuffer();)
670
0
    GLSL(})
671
0
}
672
673
static inline float pq_delinearize(float x)
674
0
{
675
0
    x *= MP_REF_WHITE / 10000.0;
676
0
    x = powf(x, PQ_M1);
677
0
    x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x);
678
0
    x = pow(x, PQ_M2);
679
0
    return x;
680
0
}
681
682
// Tone map from a known peak brightness to the range [0,1]. If ref_peak
683
// is 0, we will use peak detection instead
684
static void pass_tone_map(struct gl_shader_cache *sc,
685
                          float src_peak, float dst_peak,
686
                          const struct gl_tone_map_opts *opts)
687
0
{
688
0
    GLSLF("// HDR tone mapping\n");
689
690
    // To prevent discoloration due to out-of-bounds clipping, we need to make
691
    // sure to reduce the value range as far as necessary to keep the entire
692
    // signal in range, so tone map based on the brightest component.
693
0
    GLSL(int sig_idx = 0;)
694
0
    GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;)
695
0
    GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;)
696
0
    GLSL(float sig_max = color[sig_idx];)
697
0
    GLSLF("float sig_peak = %f;\n", src_peak);
698
0
    GLSLF("float sig_avg = %f;\n", sdr_avg);
699
700
0
    if (opts->compute_peak >= 0)
701
0
        hdr_update_peak(sc, opts);
702
703
    // Always hard-clip the upper bound of the signal range to avoid functions
704
    // exploding on inputs greater than 1.0
705
0
    GLSLF("vec3 sig = min(color.rgb, sig_peak);\n");
706
707
    // This function always operates on an absolute scale, so ignore the
708
    // dst_peak normalization for it
709
0
    float dst_scale = dst_peak;
710
0
    enum tone_mapping curve = opts->curve ? opts->curve : TONE_MAPPING_BT_2390;
711
0
    if (curve == TONE_MAPPING_BT_2390)
712
0
        dst_scale = 1.0;
713
714
    // Rescale the variables in order to bring it into a representation where
715
    // 1.0 represents the dst_peak. This is because all of the tone mapping
716
    // algorithms are defined in such a way that they map to the range [0.0, 1.0].
717
0
    if (dst_scale > 1.0) {
718
0
        GLSLF("sig *= 1.0/%f;\n", dst_scale);
719
0
        GLSLF("sig_peak *= 1.0/%f;\n", dst_scale);
720
0
    }
721
722
0
    GLSL(float sig_orig = sig[sig_idx];)
723
0
    GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg);
724
0
    GLSL(sig *= slope;)
725
0
    GLSL(sig_peak *= slope;)
726
727
0
    float param = opts->curve_param;
728
0
    switch (curve) {
729
0
    case TONE_MAPPING_CLIP:
730
0
        GLSLF("sig = min(%f * sig, 1.0);\n", isnan(param) ? 1.0 : param);
731
0
        break;
732
733
0
    case TONE_MAPPING_MOBIUS:
734
0
        GLSLF("if (sig_peak > (1.0 + 1e-6)) {\n");
735
0
        GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
736
        // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
737
        // where M(x) = scale * (x+a)/(x+b)
738
0
        GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n");
739
0
        GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / "
740
0
              "max(1e-6, sig_peak - 1.0);\n");
741
0
        GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n");
742
0
        GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)),"
743
0
              "          %s(greaterThan(sig, vec3(j))));\n",
744
0
              gl_sc_bvec(sc, 3));
745
0
        GLSLF("}\n");
746
0
        break;
747
748
0
    case TONE_MAPPING_REINHARD: {
749
0
        float contrast = isnan(param) ? 0.5 : param,
750
0
              offset = (1.0 - contrast) / contrast;
751
0
        GLSLF("sig = sig / (sig + vec3(%f));\n", offset);
752
0
        GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset);
753
0
        GLSL(sig *= scale;)
754
0
        break;
755
0
    }
756
757
0
    case TONE_MAPPING_HABLE: {
758
0
        float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
759
0
        GLSLHF("vec3 hable(vec3 x) {\n");
760
0
        GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / "
761
0
               "       (x * (%f*x + vec3(%f)) + vec3(%f)) "
762
0
               "       - vec3(%f);\n",
763
0
               A, C*B, D*E,
764
0
               A, B, D*F,
765
0
               E/F);
766
0
        GLSLHF("}\n");
767
0
        GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n");
768
0
        break;
769
0
    }
770
771
0
    case TONE_MAPPING_GAMMA: {
772
0
        float gamma = isnan(param) ? 1.8 : param;
773
0
        GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma);
774
0
        GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;)
775
0
        GLSLF("sig = mix(scale * sig,"
776
0
              "          pow(sig / sig_peak, vec3(gamma)),"
777
0
              "          %s(greaterThan(sig, vec3(cutoff))));\n",
778
0
              gl_sc_bvec(sc, 3));
779
0
        break;
780
0
    }
781
782
0
    case TONE_MAPPING_LINEAR: {
783
0
        float coeff = isnan(param) ? 1.0 : param;
784
0
        GLSLF("sig = min(%f / sig_peak, 1.0) * sig;\n", coeff);
785
0
        break;
786
0
    }
787
788
0
    case TONE_MAPPING_BT_2390:
789
        // We first need to encode both sig and sig_peak into PQ space
790
0
        GLSLF("vec4 sig_pq = vec4(sig.rgb, sig_peak);                           \n"
791
0
              "sig_pq *= vec4(1.0/%f);                                          \n"
792
0
              "sig_pq = pow(sig_pq, vec4(%f));                                  \n"
793
0
              "sig_pq = (vec4(%f) + vec4(%f) * sig_pq)                          \n"
794
0
              "          / (vec4(1.0) + vec4(%f) * sig_pq);                     \n"
795
0
              "sig_pq = pow(sig_pq, vec4(%f));                                  \n",
796
0
              10000.0 / MP_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
797
        // Encode both the signal and the target brightness to be relative to
798
        // the source peak brightness, and figure out the target peak in this space
799
0
        GLSLF("float scale = 1.0 / sig_pq.a;                                    \n"
800
0
              "sig_pq.rgb *= vec3(scale);                                       \n"
801
0
              "float maxLum = %f * scale;                                       \n",
802
0
              pq_delinearize(dst_peak));
803
        // Apply piece-wise hermite spline
804
0
        GLSLF("float ks = 1.5 * maxLum - 0.5;                                   \n"
805
0
              "vec3 tb = (sig_pq.rgb - vec3(ks)) / vec3(1.0 - ks);              \n"
806
0
              "vec3 tb2 = tb * tb;                                              \n"
807
0
              "vec3 tb3 = tb2 * tb;                                             \n"
808
0
              "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) +       \n"
809
0
              "          (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) +              \n"
810
0
              "          (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum);               \n"
811
0
              "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks))));   \n",
812
0
              gl_sc_bvec(sc, 3));
813
        // Convert back from PQ space to linear light
814
0
        GLSLF("sig *= vec3(sig_pq.a);                                           \n"
815
0
              "sig = pow(sig, vec3(1.0/%f));                                    \n"
816
0
              "sig = max(sig - vec3(%f), 0.0) /                                 \n"
817
0
              "          (vec3(%f) - vec3(%f) * sig);                           \n"
818
0
              "sig = pow(sig, vec3(1.0/%f));                                    \n"
819
0
              "sig *= vec3(%f);                                                 \n",
820
0
              PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / MP_REF_WHITE);
821
0
        break;
822
823
0
    default:
824
0
        abort();
825
0
    }
826
827
0
    GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / \n"
828
0
          "              max(sig[sig_idx], 1.0);        \n"
829
0
          "coeff = %f * pow(coeff / %f, %f);            \n"
830
0
          "color.rgb *= sig[sig_idx] / sig_orig;        \n"
831
0
          "color.rgb = mix(color.rgb, %f * sig, coeff); \n",
832
0
          0.18 / dst_scale, 0.90, dst_scale, 0.20, dst_scale);
833
0
}
834
835
// Map colors from one source space to another. These source spaces must be
836
// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
837
// auto-guessing. If is_linear is true, we assume the input has already been
838
// linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we
839
// will detect the peak instead of relying on metadata. Note that this requires
840
// the caller to have already bound the appropriate SSBO and set up the compute
841
// shader metadata
842
void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
843
                    const struct pl_color_space *src, const struct pl_color_space *dst,
844
                    enum mp_csp_light src_light, enum mp_csp_light dst_light,
845
                    const struct gl_tone_map_opts *opts)
846
0
{
847
0
    GLSLF("// color mapping\n");
848
849
    // Some operations need access to the video's luma coefficients, so make
850
    // them available
851
0
    pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(src->primaries));
852
0
    gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz.m[1]);
853
0
    rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(dst->primaries));
854
0
    gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz.m[1]);
855
856
0
    bool need_ootf = src_light != dst_light;
857
0
    if (src_light == MP_CSP_LIGHT_SCENE_HLG && src->hdr.max_luma != dst->hdr.max_luma)
858
0
        need_ootf = true;
859
860
    // All operations from here on require linear light as a starting point,
861
    // so we linearize even if src->gamma == dst->transfer when one of the other
862
    // operations needs it
863
0
    bool need_linear = src->transfer != dst->transfer ||
864
0
                       src->primaries != dst->primaries ||
865
0
                       src->hdr.max_luma != dst->hdr.max_luma ||
866
0
                       need_ootf;
867
868
0
    if (need_linear && !is_linear) {
869
        // We also pull it up so that 1.0 is the reference white
870
0
        pass_linearize(sc, src->transfer);
871
0
        is_linear = true;
872
0
    }
873
874
    // Pre-scale the incoming values into an absolute scale
875
0
    GLSLF("color.rgb *= vec3(%f);\n", pl_color_transfer_nominal_peak(src->transfer));
876
877
0
    if (need_ootf)
878
0
        pass_ootf(sc, src_light, src->hdr.max_luma / MP_REF_WHITE);
879
880
    // Tone map to prevent clipping due to excessive brightness
881
0
    if (src->hdr.max_luma > dst->hdr.max_luma) {
882
0
        pass_tone_map(sc, src->hdr.max_luma / MP_REF_WHITE,
883
0
                      dst->hdr.max_luma / MP_REF_WHITE, opts);
884
0
    }
885
886
    // Adapt to the right colorspace if necessary
887
0
    if (src->primaries != dst->primaries) {
888
0
        const struct pl_raw_primaries *csp_src = pl_raw_primaries_get(src->primaries),
889
0
                                      *csp_dst = pl_raw_primaries_get(dst->primaries);
890
0
        pl_matrix3x3 m = pl_get_color_mapping_matrix(csp_src, csp_dst,
891
0
                                                     PL_INTENT_RELATIVE_COLORIMETRIC);
892
0
        gl_sc_uniform_mat3(sc, "cms_matrix", true, &m.m[0][0]);
893
0
        GLSL(color.rgb = cms_matrix * color.rgb;)
894
895
0
        if (!opts->gamut_mode || opts->gamut_mode == GAMUT_DESATURATE) {
896
0
            GLSL(float cmin = min(min(color.r, color.g), color.b);)
897
0
            GLSL(if (cmin < 0.0) {
898
0
                     float luma = dot(dst_luma, color.rgb);
899
0
                     float coeff = cmin / (cmin - luma);
900
0
                     color.rgb = mix(color.rgb, vec3(luma), coeff);
901
0
                 })
902
0
            GLSLF("float cmax = 1.0/%f * max(max(color.r, color.g), color.b);\n",
903
0
                  dst->hdr.max_luma / MP_REF_WHITE);
904
0
            GLSL(if (cmax > 1.0) color.rgb /= cmax;)
905
0
        }
906
0
    }
907
908
0
    if (need_ootf)
909
0
        pass_inverse_ootf(sc, dst_light, dst->hdr.max_luma / MP_REF_WHITE);
910
911
    // Post-scale the outgoing values from absolute scale to normalized.
912
    // For SDR, we normalize to the chosen signal peak. For HDR, we normalize
913
    // to the encoding range of the transfer function.
914
0
    float dst_range = dst->hdr.max_luma / MP_REF_WHITE;
915
0
    if (pl_color_space_is_hdr(dst))
916
0
        dst_range = pl_color_transfer_nominal_peak(dst->transfer);
917
918
0
    GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
919
920
    // Warn for remaining out-of-gamut colors if enabled
921
0
    if (opts->gamut_mode == GAMUT_WARN) {
922
0
        GLSL(if (any(greaterThan(color.rgb, vec3(1.005))) ||
923
0
                 any(lessThan(color.rgb, vec3(-0.005)))))
924
0
            GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert
925
0
    }
926
927
0
    if (is_linear)
928
0
        pass_delinearize(sc, dst->transfer);
929
0
}
930
931
// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post.
932
// Obtain random numbers by calling rand(h), followed by h = permute(h) to
933
// update the state. Assumes the texture was hooked.
934
// permute() was modified from the original to avoid "large" numbers in
935
// calculations, since low-end mobile GPUs choke on them (overflow).
936
static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
937
0
{
938
0
    GLSLH(float mod289(float x)  { return x - floor(x * 1.0/289.0) * 289.0; })
939
0
    GLSLHF("float permute(float x) {\n");
940
0
        GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );)
941
0
    GLSLHF("}\n");
942
0
    GLSLH(float rand(float x)    { return fract(x * 1.0/41.0); })
943
944
    // Initialize the PRNG by hashing the position + a random uniform
945
0
    GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
946
0
    GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
947
0
    gl_sc_uniform_dynamic(sc);
948
0
    gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
949
0
}
950
951
const struct deband_opts deband_opts_def = {
952
    .iterations = 1,
953
    .threshold = 48.0,
954
    .range = 16.0,
955
    .grain = 32.0,
956
};
957
958
#define OPT_BASE_STRUCT struct deband_opts
959
const struct m_sub_options deband_conf = {
960
    .opts = (const m_option_t[]) {
961
        {"iterations", OPT_INT(iterations), M_RANGE(0, 16)},
962
        {"threshold", OPT_FLOAT(threshold), M_RANGE(0.0, 4096.0)},
963
        {"range", OPT_FLOAT(range), M_RANGE(1.0, 64.0)},
964
        {"grain", OPT_FLOAT(grain), M_RANGE(0.0, 4096.0)},
965
        {0}
966
    },
967
    .size = sizeof(struct deband_opts),
968
    .defaults = &deband_opts_def,
969
};
970
971
// Stochastically sample a debanded result from a hooked texture.
972
void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
973
                        AVLFG *lfg, enum pl_color_transfer trc)
974
0
{
975
    // Initialize the PRNG
976
0
    GLSLF("{\n");
977
0
    prng_init(sc, lfg);
978
979
    // Helper: Compute a stochastic approximation of the avg color around a
980
    // pixel
981
0
    GLSLHF("vec4 average(float range, inout float h) {\n");
982
        // Compute a random rangle and distance
983
0
        GLSLH(float dist = rand(h) * range;     h = permute(h);)
984
0
        GLSLH(float dir  = rand(h) * 6.2831853; h = permute(h);)
985
0
        GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));)
986
987
        // Sample at quarter-turn intervals around the source pixel
988
0
        GLSLH(vec4 ref[4];)
989
0
        GLSLH(ref[0] = HOOKED_texOff(vec2( o.x,  o.y));)
990
0
        GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y,  o.x));)
991
0
        GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));)
992
0
        GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));)
993
994
        // Return the (normalized) average
995
0
        GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;)
996
0
    GLSLHF("}\n");
997
998
    // Sample the source pixel
999
0
    GLSL(color = HOOKED_tex(HOOKED_pos);)
1000
0
    GLSLF("vec4 avg, diff;\n");
1001
0
    for (int i = 1; i <= opts->iterations; i++) {
1002
        // Sample the average pixel and use it instead of the original if
1003
        // the difference is below the given threshold
1004
0
        GLSLF("avg = average(%f, h);\n", i * opts->range);
1005
0
        GLSL(diff = abs(color - avg);)
1006
0
        GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n",
1007
0
              gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0));
1008
0
    }
1009
1010
    // Add some random noise to smooth out residual differences
1011
0
    GLSL(vec3 noise;)
1012
0
    GLSL(noise.x = rand(h); h = permute(h);)
1013
0
    GLSL(noise.y = rand(h); h = permute(h);)
1014
0
    GLSL(noise.z = rand(h); h = permute(h);)
1015
1016
    // Noise is scaled to the signal level to prevent extreme noise for HDR
1017
0
    float gain = opts->grain/8192.0 / pl_color_transfer_nominal_peak(trc);
1018
0
    GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain);
1019
0
    GLSLF("}\n");
1020
0
}
1021
1022
// Assumes the texture was hooked
1023
0
void pass_sample_unsharp(struct gl_shader_cache *sc, float param) {
1024
0
    GLSLF("{\n");
1025
0
    GLSL(float st1 = 1.2;)
1026
0
    GLSL(vec4 p = HOOKED_tex(HOOKED_pos);)
1027
0
    GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1))
1028
0
                   + HOOKED_texOff(st1 * vec2(+1, -1))
1029
0
                   + HOOKED_texOff(st1 * vec2(-1, +1))
1030
0
                   + HOOKED_texOff(st1 * vec2(-1, -1));)
1031
0
    GLSL(float st2 = 1.5;)
1032
0
    GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1,  0))
1033
0
                   + HOOKED_texOff(st2 * vec2( 0, +1))
1034
0
                   + HOOKED_texOff(st2 * vec2(-1,  0))
1035
0
                   + HOOKED_texOff(st2 * vec2( 0, -1));)
1036
0
    GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
1037
0
    GLSLF("color = p + t * %f;\n", param);
1038
0
    GLSLF("}\n");
1039
0
}