Coverage Report

Created: 2026-06-25 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mpv/video/out/gpu/shader_cache.c
Line
Count
Source
1
#include <stddef.h>
2
#include <stdint.h>
3
#include <stdlib.h>
4
#include <string.h>
5
#include <stdarg.h>
6
#include <assert.h>
7
8
#include <libavutil/sha.h>
9
#include <libavutil/mem.h>
10
11
#include "osdep/io.h"
12
13
#include "common/common.h"
14
#include "misc/hash.h"
15
#include "misc/io_utils.h"
16
#include "options/path.h"
17
#include "stream/stream.h"
18
#include "shader_cache.h"
19
#include "utils.h"
20
21
// Force cache flush if more than this number of shaders is created.
22
0
#define SC_MAX_ENTRIES 256
23
24
union uniform_val {
25
    float f[9];         // RA_VARTYPE_FLOAT
26
    int i[4];           // RA_VARTYPE_INT
27
    struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_*
28
    struct ra_buf *buf; // RA_VARTYPE_BUF_*
29
};
30
31
enum sc_uniform_type {
32
    SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
33
    SC_UNIFORM_TYPE_UBO = 1,    // uniform buffer (RA_CAP_BUF_RO)
34
    SC_UNIFORM_TYPE_PUSHC = 2,  // push constant (ra.max_pushc_size)
35
};
36
37
struct sc_uniform {
38
    enum sc_uniform_type type;
39
    struct ra_renderpass_input input;
40
    const char *glsl_type;
41
    union uniform_val v;
42
    char *buffer_format;
43
    // for SC_UNIFORM_TYPE_UBO/PUSHC:
44
    struct ra_layout layout;
45
    size_t offset; // byte offset within the buffer
46
};
47
48
struct sc_cached_uniform {
49
    union uniform_val v;
50
    int index; // for ra_renderpass_input_val
51
    bool set; // whether the uniform has ever been set
52
};
53
54
struct sc_entry {
55
    struct ra_renderpass *pass;
56
    struct sc_cached_uniform *cached_uniforms;
57
    int num_cached_uniforms;
58
    bstr total;
59
    struct timer_pool *timer;
60
    struct ra_buf *ubo;
61
    int ubo_index; // for ra_renderpass_input_val.index
62
    void *pushc;
63
};
64
65
struct gl_shader_cache {
66
    struct ra *ra;
67
    struct mp_log *log;
68
69
    // permanent
70
    char **exts;
71
    int num_exts;
72
73
    // this is modified during use (gl_sc_add() etc.) and reset for each shader
74
    bstr prelude_text;
75
    bstr header_text;
76
    bstr text;
77
78
    // Next binding point (texture unit, image unit, buffer binding, etc.)
79
    // In OpenGL these are separate for each input type
80
    int next_binding[RA_VARTYPE_COUNT];
81
    bool next_uniform_dynamic;
82
83
    struct ra_renderpass_params params;
84
85
    struct sc_entry **entries;
86
    int num_entries;
87
88
    struct sc_entry *current_shader; // set by gl_sc_generate()
89
90
    struct sc_uniform *uniforms;
91
    int num_uniforms;
92
93
    int ubo_binding;
94
    size_t ubo_size;
95
    size_t pushc_size;
96
97
    struct ra_renderpass_input_val *values;
98
    int num_values;
99
100
    // For checking that the user is calling gl_sc_reset() properly.
101
    bool needs_reset;
102
103
    bool error_state; // true if an error occurred
104
105
    // temporary buffers (avoids frequent reallocations)
106
    bstr tmp[6];
107
108
    // For the disk-cache.
109
    char *cache_dir;
110
    struct mpv_global *global; // can be NULL
111
};
112
113
struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
114
                                     struct mp_log *log)
115
0
{
116
0
    struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
117
0
    *sc = (struct gl_shader_cache){
118
0
        .ra = ra,
119
0
        .global = global,
120
0
        .log = log,
121
0
    };
122
0
    gl_sc_reset(sc);
123
0
    return sc;
124
0
}
125
126
// Reset the previous pass. This must be called after gl_sc_generate and before
127
// starting a new shader. It may also be called on errors.
128
void gl_sc_reset(struct gl_shader_cache *sc)
129
0
{
130
0
    sc->prelude_text.len = 0;
131
0
    sc->header_text.len = 0;
132
0
    sc->text.len = 0;
133
0
    for (int n = 0; n < sc->num_uniforms; n++)
134
0
        talloc_free((void *)sc->uniforms[n].input.name);
135
0
    sc->num_uniforms = 0;
136
0
    sc->ubo_binding = 0;
137
0
    sc->ubo_size = 0;
138
0
    sc->pushc_size = 0;
139
0
    for (int i = 0; i < RA_VARTYPE_COUNT; i++)
140
0
        sc->next_binding[i] = 0;
141
0
    sc->next_uniform_dynamic = false;
142
0
    sc->current_shader = NULL;
143
0
    sc->params = (struct ra_renderpass_params){0};
144
0
    sc->needs_reset = false;
145
0
}
146
147
static void sc_flush_cache(struct gl_shader_cache *sc)
148
0
{
149
0
    MP_DBG(sc, "flushing shader cache\n");
150
151
0
    for (int n = 0; n < sc->num_entries; n++) {
152
0
        struct sc_entry *e = sc->entries[n];
153
0
        ra_buf_free(sc->ra, &e->ubo);
154
0
        if (e->pass)
155
0
            sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
156
0
        timer_pool_destroy(e->timer);
157
0
        talloc_free(e);
158
0
    }
159
0
    sc->num_entries = 0;
160
0
}
161
162
void gl_sc_destroy(struct gl_shader_cache *sc)
163
0
{
164
0
    if (!sc)
165
0
        return;
166
0
    gl_sc_reset(sc);
167
0
    sc_flush_cache(sc);
168
0
    talloc_free(sc);
169
0
}
170
171
bool gl_sc_error_state(struct gl_shader_cache *sc)
172
0
{
173
0
    return sc->error_state;
174
0
}
175
176
void gl_sc_reset_error(struct gl_shader_cache *sc)
177
0
{
178
0
    sc->error_state = false;
179
0
}
180
181
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name)
182
0
{
183
0
    for (int n = 0; n < sc->num_exts; n++) {
184
0
        if (strcmp(sc->exts[n], name) == 0)
185
0
            return;
186
0
    }
187
0
    MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name));
188
0
}
189
190
void gl_sc_add(struct gl_shader_cache *sc, const char *text)
191
0
{
192
0
    bstr_xappend0(sc, &sc->text, text);
193
0
}
194
195
void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
196
0
{
197
0
    va_list ap;
198
0
    va_start(ap, textf);
199
0
    bstr_xappend_vasprintf(sc, &sc->text, textf, ap);
200
0
    va_end(ap);
201
0
}
202
203
void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
204
0
{
205
0
    bstr_xappend0(sc, &sc->header_text, text);
206
0
}
207
208
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
209
0
{
210
0
    va_list ap;
211
0
    va_start(ap, textf);
212
0
    bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap);
213
0
    va_end(ap);
214
0
}
215
216
void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
217
0
{
218
0
    bstr_xappend(sc, &sc->header_text, text);
219
0
}
220
221
void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
222
0
{
223
0
    va_list ap;
224
0
    va_start(ap, textf);
225
0
    bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
226
0
    va_end(ap);
227
0
}
228
229
static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
230
                                       struct bstr name)
231
0
{
232
0
    struct sc_uniform new = {
233
0
        .input = {
234
0
            .dim_v = 1,
235
0
            .dim_m = 1,
236
0
        },
237
0
    };
238
239
0
    for (int n = 0; n < sc->num_uniforms; n++) {
240
0
        struct sc_uniform *u = &sc->uniforms[n];
241
0
        if (bstrcmp(bstr0(u->input.name), name) == 0) {
242
0
            const char *allocname = u->input.name;
243
0
            *u = new;
244
0
            u->input.name = allocname;
245
0
            return u;
246
0
        }
247
0
    }
248
249
    // not found -> add it
250
0
    new.input.name = bstrdup0(NULL, name);
251
0
    MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
252
0
    return &sc->uniforms[sc->num_uniforms - 1];
253
0
}
254
255
static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
256
0
{
257
0
    return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++;
258
0
}
259
260
void gl_sc_uniform_dynamic(struct gl_shader_cache *sc)
261
0
{
262
0
    sc->next_uniform_dynamic = true;
263
0
}
264
265
// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
266
// and glsl_type/buffer_format are already set.
267
static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
268
0
{
269
0
    bool dynamic = sc->next_uniform_dynamic;
270
0
    sc->next_uniform_dynamic = false;
271
272
    // Try not using push constants for "large" values like matrices, since
273
    // this is likely to both exceed the VGPR budget as well as the pushc size
274
    // budget
275
0
    bool try_pushc = u->input.dim_m == 1 || dynamic;
276
277
    // Attempt using push constants first
278
0
    if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
279
0
        struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
280
0
        size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
281
        // Push constants have limited size, so make sure we don't exceed this
282
0
        size_t new_size = offset + layout.size;
283
0
        if (new_size <= sc->ra->max_pushc_size) {
284
0
            u->type = SC_UNIFORM_TYPE_PUSHC;
285
0
            u->layout = layout;
286
0
            u->offset = offset;
287
0
            sc->pushc_size = new_size;
288
0
            return;
289
0
        }
290
0
    }
291
292
    // Attempt using uniform buffer next. The GLSL version 440 check is due
293
    // to explicit offsets on UBO entries. In theory we could leave away
294
    // the offsets and support UBOs for older GL as well, but this is a nice
295
    // safety net for driver bugs (and also rules out potentially buggy drivers)
296
    // Also avoid UBOs for highly dynamic stuff since that requires synchronizing
297
    // the UBO writes every frame
298
0
    bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic;
299
0
    if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
300
0
        u->type = SC_UNIFORM_TYPE_UBO;
301
0
        u->layout = sc->ra->fns->uniform_layout(&u->input);
302
0
        u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
303
0
        sc->ubo_size = u->offset + u->layout.size;
304
0
        return;
305
0
    }
306
307
    // If all else fails, use global uniforms
308
0
    mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
309
0
    u->type = SC_UNIFORM_TYPE_GLOBAL;
310
0
}
311
312
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
313
                           struct ra_tex *tex)
314
0
{
315
0
    const char *glsl_type = "sampler2D";
316
0
    if (tex->params.dimensions == 1) {
317
0
        glsl_type = "sampler1D";
318
0
    } else if (tex->params.dimensions == 3) {
319
0
        glsl_type = "sampler3D";
320
0
    } else if (tex->params.non_normalized) {
321
0
        glsl_type = "sampler2DRect";
322
0
    } else if (tex->params.external_oes) {
323
0
        glsl_type = "samplerExternalOES";
324
0
    } else if (tex->params.format->ctype == RA_CTYPE_UINT) {
325
0
        glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D";
326
0
    }
327
328
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
329
0
    u->input.type = RA_VARTYPE_TEX;
330
0
    u->glsl_type = glsl_type;
331
0
    u->input.binding = gl_sc_next_binding(sc, u->input.type);
332
0
    u->v.tex = tex;
333
0
}
334
335
void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
336
                              struct ra_tex *tex)
337
0
{
338
0
    gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store");
339
340
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
341
0
    u->input.type = RA_VARTYPE_IMG_W;
342
0
    u->glsl_type = sc->ra->glsl_es ? "writeonly highp image2D" : "writeonly image2D";
343
0
    u->input.binding = gl_sc_next_binding(sc, u->input.type);
344
0
    u->v.tex = tex;
345
0
}
346
347
void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf,
348
                char *format, ...)
349
0
{
350
0
    mp_assert(sc->ra->caps & RA_CAP_BUF_RW);
351
0
    gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
352
353
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
354
0
    u->input.type = RA_VARTYPE_BUF_RW;
355
0
    u->glsl_type = "";
356
0
    u->input.binding = gl_sc_next_binding(sc, u->input.type);
357
0
    u->v.buf = buf;
358
359
0
    va_list ap;
360
0
    va_start(ap, format);
361
0
    u->buffer_format = ta_vasprintf(sc, format, ap);
362
0
    va_end(ap);
363
0
}
364
365
void gl_sc_uniform_f_bstr(struct gl_shader_cache *sc, struct bstr name, float f)
366
0
{
367
0
    struct sc_uniform *u = find_uniform(sc, name);
368
0
    u->input.type = RA_VARTYPE_FLOAT;
369
0
    u->glsl_type = "float";
370
0
    update_uniform_params(sc, u);
371
0
    u->v.f[0] = f;
372
0
}
373
374
void gl_sc_uniform_i_bstr(struct gl_shader_cache *sc, struct bstr name, int i)
375
0
{
376
0
    struct sc_uniform *u = find_uniform(sc, name);
377
0
    u->input.type = RA_VARTYPE_INT;
378
0
    u->glsl_type = "int";
379
0
    update_uniform_params(sc, u);
380
0
    u->v.i[0] = i;
381
0
}
382
383
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
384
0
{
385
0
    gl_sc_uniform_f_bstr(sc, bstr0(name), f);
386
0
}
387
388
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
389
0
{
390
0
    gl_sc_uniform_i_bstr(sc, bstr0(name), i);
391
0
}
392
393
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
394
0
{
395
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
396
0
    u->input.type = RA_VARTYPE_FLOAT;
397
0
    u->input.dim_v = 2;
398
0
    u->glsl_type = "vec2";
399
0
    update_uniform_params(sc, u);
400
0
    u->v.f[0] = f[0];
401
0
    u->v.f[1] = f[1];
402
0
}
403
404
void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
405
0
{
406
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
407
0
    u->input.type = RA_VARTYPE_FLOAT;
408
0
    u->input.dim_v = 3;
409
0
    u->glsl_type = "vec3";
410
0
    update_uniform_params(sc, u);
411
0
    u->v.f[0] = f[0];
412
0
    u->v.f[1] = f[1];
413
0
    u->v.f[2] = f[2];
414
0
}
415
416
static void transpose2x2(float r[2 * 2])
417
0
{
418
0
    MPSWAP(float, r[0+2*1], r[1+2*0]);
419
0
}
420
421
void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
422
                        bool transpose, float *v)
423
0
{
424
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
425
0
    u->input.type = RA_VARTYPE_FLOAT;
426
0
    u->input.dim_v = 2;
427
0
    u->input.dim_m = 2;
428
0
    u->glsl_type = "mat2";
429
0
    update_uniform_params(sc, u);
430
0
    for (int n = 0; n < 4; n++)
431
0
        u->v.f[n] = v[n];
432
0
    if (transpose)
433
0
        transpose2x2(&u->v.f[0]);
434
0
}
435
436
static void transpose3x3(float r[3 * 3])
437
0
{
438
0
    MPSWAP(float, r[0+3*1], r[1+3*0]);
439
0
    MPSWAP(float, r[0+3*2], r[2+3*0]);
440
0
    MPSWAP(float, r[1+3*2], r[2+3*1]);
441
0
}
442
443
void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
444
                        bool transpose, float *v)
445
0
{
446
0
    struct sc_uniform *u = find_uniform(sc, bstr0(name));
447
0
    u->input.type = RA_VARTYPE_FLOAT;
448
0
    u->input.dim_v = 3;
449
0
    u->input.dim_m = 3;
450
0
    u->glsl_type = "mat3";
451
0
    update_uniform_params(sc, u);
452
0
    for (int n = 0; n < 9; n++)
453
0
        u->v.f[n] = v[n];
454
0
    if (transpose)
455
0
        transpose3x3(&u->v.f[0]);
456
0
}
457
458
void gl_sc_blend(struct gl_shader_cache *sc,
459
                 enum ra_blend blend_src_rgb,
460
                 enum ra_blend blend_dst_rgb,
461
                 enum ra_blend blend_src_alpha,
462
                 enum ra_blend blend_dst_alpha)
463
0
{
464
0
    sc->params.enable_blend = true;
465
0
    sc->params.blend_src_rgb = blend_src_rgb;
466
0
    sc->params.blend_dst_rgb = blend_dst_rgb;
467
0
    sc->params.blend_src_alpha = blend_src_alpha;
468
0
    sc->params.blend_dst_alpha = blend_dst_alpha;
469
0
}
470
471
const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims)
472
0
{
473
0
    static const char *bvecs[] = {
474
0
        [1] = "bool",
475
0
        [2] = "bvec2",
476
0
        [3] = "bvec3",
477
0
        [4] = "bvec4",
478
0
    };
479
480
0
    static const char *vecs[] = {
481
0
        [1] = "float",
482
0
        [2] = "vec2",
483
0
        [3] = "vec3",
484
0
        [4] = "vec4",
485
0
    };
486
487
0
    mp_assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs));
488
0
    return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims];
489
0
}
490
491
static const char *vao_glsl_type(const struct ra_renderpass_input *e)
492
0
{
493
    // pretty dumb... too dumb, but works for us
494
0
    switch (e->dim_v) {
495
0
    case 1: return "float";
496
0
    case 2: return "vec2";
497
0
    case 3: return "vec3";
498
0
    case 4: return "vec4";
499
0
    default: MP_ASSERT_UNREACHABLE();
500
0
    }
501
0
}
502
503
static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
504
0
{
505
0
    uintptr_t src = (uintptr_t) &u->v;
506
0
    size_t dst = u->offset;
507
0
    struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
508
0
    struct ra_layout dst_layout = u->layout;
509
510
0
    for (int i = 0; i < u->input.dim_m; i++) {
511
0
        ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride);
512
0
        src += src_layout.stride;
513
0
        dst += dst_layout.stride;
514
0
    }
515
0
}
516
517
static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
518
0
{
519
0
    uintptr_t src = (uintptr_t) &u->v;
520
0
    uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
521
0
    struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
522
0
    struct ra_layout dst_layout = u->layout;
523
524
0
    for (int i = 0; i < u->input.dim_m; i++) {
525
0
        memcpy((void *)dst, (void *)src, src_layout.stride);
526
0
        src += src_layout.stride;
527
0
        dst += dst_layout.stride;
528
0
    }
529
0
}
530
531
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
532
                           struct sc_uniform *u, int n)
533
0
{
534
0
    struct sc_cached_uniform *un = &e->cached_uniforms[n];
535
0
    struct ra_layout layout = ra_renderpass_input_layout(&u->input);
536
0
    if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0)
537
0
        return;
538
539
0
    un->v = u->v;
540
0
    un->set = true;
541
542
0
    static const char *desc[] = {
543
0
        [SC_UNIFORM_TYPE_UBO]    = "UBO",
544
0
        [SC_UNIFORM_TYPE_PUSHC]  = "PC",
545
0
        [SC_UNIFORM_TYPE_GLOBAL] = "global",
546
0
    };
547
0
    MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name);
548
549
0
    switch (u->type) {
550
0
    case SC_UNIFORM_TYPE_GLOBAL: {
551
0
        struct ra_renderpass_input_val value = {
552
0
            .index = un->index,
553
0
            .data = &un->v,
554
0
        };
555
0
        MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
556
0
        break;
557
0
    }
558
0
    case SC_UNIFORM_TYPE_UBO:
559
0
        mp_assert(e->ubo);
560
0
        update_ubo(sc->ra, e->ubo, u);
561
0
        break;
562
0
    case SC_UNIFORM_TYPE_PUSHC:
563
0
        mp_assert(e->pushc);
564
0
        update_pushc(sc->ra, e->pushc, u);
565
0
        break;
566
0
    default: MP_ASSERT_UNREACHABLE();
567
0
    }
568
0
}
569
570
void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir)
571
0
{
572
0
    talloc_free(sc->cache_dir);
573
0
    if (dir && dir[0]) {
574
0
        dir = mp_get_user_path(NULL, sc->global, dir);
575
0
    } else {
576
0
        dir = mp_find_user_file(NULL, sc->global, "cache", "");
577
0
    }
578
0
    sc->cache_dir = talloc_strdup(sc, dir);
579
0
    talloc_free(dir);
580
0
}
581
582
static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
583
0
{
584
0
    bool ret = false;
585
586
0
    void *tmp = talloc_new(NULL);
587
0
    struct ra_renderpass_params params = sc->params;
588
589
0
    const char *cache_header = "mpv shader cache v1\n";
590
0
    char *cache_filename = NULL;
591
0
    char *cache_dir = NULL;
592
593
0
    if (sc->cache_dir && sc->cache_dir[0]) {
594
        // Try to load it from a disk cache.
595
0
        cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir);
596
597
0
        bstr hashstr = mp_hash_to_bstr(tmp, entry->total.start, entry->total.len, "SHA256");
598
0
        cache_filename = mp_path_join_bstr(tmp, bstr0(cache_dir), hashstr);
599
0
        if (stat(cache_filename, &(struct stat){0}) == 0) {
600
0
            MP_DBG(sc, "Trying to load shader from disk...\n");
601
0
            struct bstr cachedata =
602
0
                stream_read_file(cache_filename, tmp, sc->global, 1000000000);
603
0
            if (bstr_eatstart0(&cachedata, cache_header))
604
0
                params.cached_program = cachedata;
605
0
        }
606
0
    }
607
608
    // If using a UBO, also make sure to add it as an input value so the RA
609
    // can see it
610
0
    if (sc->ubo_size) {
611
0
        entry->ubo_index = sc->params.num_inputs;
612
0
        struct ra_renderpass_input ubo_input = {
613
0
            .name = "UBO",
614
0
            .type = RA_VARTYPE_BUF_RO,
615
0
            .dim_v = 1,
616
0
            .dim_m = 1,
617
0
            .binding = sc->ubo_binding,
618
0
        };
619
0
        MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
620
0
    }
621
622
0
    if (sc->pushc_size) {
623
0
        params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
624
0
        entry->pushc = talloc_zero_size(entry, params.push_constants_size);
625
0
    }
626
627
0
    if (sc->ubo_size) {
628
0
        struct ra_buf_params ubo_params = {
629
0
            .type = RA_BUF_TYPE_UNIFORM,
630
0
            .size = sc->ubo_size,
631
0
            .host_mutable = true,
632
0
        };
633
634
0
        entry->ubo = ra_buf_create(sc->ra, &ubo_params);
635
0
        if (!entry->ubo) {
636
0
            MP_ERR(sc, "Failed creating uniform buffer!\n");
637
0
            goto error;
638
0
        }
639
0
    }
640
641
0
    entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
642
0
    if (!entry->pass)
643
0
        goto error;
644
645
0
    if (entry->pass && cache_filename) {
646
0
        bstr nc = entry->pass->params.cached_program;
647
0
        if (nc.len && !bstr_equals(params.cached_program, nc)) {
648
0
            mp_mkdirp(cache_dir);
649
650
0
            MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename);
651
0
            bstr out = {0};
652
0
            bstr_xappend0(tmp, &out, cache_header);
653
0
            bstr_xappend(tmp, &out, nc);
654
0
            mp_save_to_file(cache_filename, out.start, out.len);
655
0
        }
656
0
    }
657
658
0
    ret = true;
659
660
0
error:
661
0
    talloc_free(tmp);
662
0
    return ret;
663
0
}
664
665
0
#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
666
0
#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s))
667
668
static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
669
0
{
670
    // Add all of the UBO entries separately as members of their own buffer
671
0
    if (sc->ubo_size > 0) {
672
0
        ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
673
0
        for (int n = 0; n < sc->num_uniforms; n++) {
674
0
            struct sc_uniform *u = &sc->uniforms[n];
675
0
            if (u->type != SC_UNIFORM_TYPE_UBO)
676
0
                continue;
677
0
            ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
678
0
                u->input.name);
679
0
        }
680
0
        ADD(dst, "};\n");
681
0
    }
682
683
    // Ditto for push constants
684
0
    if (sc->pushc_size > 0) {
685
0
        ADD(dst, "layout(std430, push_constant) uniform PushC {\n");
686
0
        for (int n = 0; n < sc->num_uniforms; n++) {
687
0
            struct sc_uniform *u = &sc->uniforms[n];
688
0
            if (u->type != SC_UNIFORM_TYPE_PUSHC)
689
0
                continue;
690
0
            ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
691
0
                u->input.name);
692
0
        }
693
0
        ADD(dst, "};\n");
694
0
    }
695
696
0
    for (int n = 0; n < sc->num_uniforms; n++) {
697
0
        struct sc_uniform *u = &sc->uniforms[n];
698
0
        if (u->type != SC_UNIFORM_TYPE_GLOBAL)
699
0
            continue;
700
0
        switch (u->input.type) {
701
0
        case RA_VARTYPE_INT:
702
0
        case RA_VARTYPE_FLOAT:
703
0
            mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
704
0
            MP_FALLTHROUGH;
705
0
        case RA_VARTYPE_TEX:
706
            // Vulkan requires explicitly assigning the bindings in the shader
707
            // source. For OpenGL it's optional, but requires higher GL version
708
            // so we don't do it (and instead have ra_gl update the bindings
709
            // after program creation).
710
0
            if (sc->ra->glsl_vulkan)
711
0
                ADD(dst, "layout(binding=%d) ", u->input.binding);
712
0
            ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
713
0
            break;
714
0
        case RA_VARTYPE_BUF_RO:
715
0
            ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n",
716
0
                u->input.binding, u->input.name, u->buffer_format);
717
0
            break;
718
0
        case RA_VARTYPE_BUF_RW:
719
0
            ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n",
720
0
                u->input.binding, u->input.name, u->buffer_format);
721
0
            break;
722
0
        case RA_VARTYPE_IMG_W: {
723
            // For better compatibility, we have to explicitly label the
724
            // type of data we will be reading/writing to this image.
725
0
            const char *fmt = u->v.tex->params.format->glsl_format;
726
727
0
            if (sc->ra->glsl_vulkan) {
728
0
                if (fmt) {
729
0
                    ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt);
730
0
                } else {
731
0
                    ADD(dst, "layout(binding=%d) ", u->input.binding);
732
0
                }
733
0
            } else if (fmt) {
734
0
                ADD(dst, "layout(%s) ", fmt);
735
0
            }
736
0
            ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name);
737
0
        }
738
0
        }
739
0
    }
740
0
}
741
742
// 1. Generate vertex and fragment shaders from the fragment shader text added
743
//    with gl_sc_add(). The generated shader program is cached (based on the
744
//    text), so actual compilation happens only the first time.
745
// 2. Update the uniforms and textures set with gl_sc_uniform_*.
746
// 3. Make the new shader program current (glUseProgram()).
747
// After that, you render, and then you call gc_sc_reset(), which does:
748
// 1. Unbind the program and all textures.
749
// 2. Reset the sc state and prepare for a new shader program. (All uniforms
750
//    and fragment operations needed for the next program have to be re-added.)
751
static void gl_sc_generate(struct gl_shader_cache *sc,
752
                           enum ra_renderpass_type type,
753
                           const struct ra_format *target_format,
754
                           const struct ra_renderpass_input *vao,
755
                           int vao_len, size_t vertex_stride)
756
0
{
757
0
    int glsl_version = sc->ra->glsl_version;
758
0
    int glsl_es = sc->ra->glsl_es ? glsl_version : 0;
759
760
0
    sc->params.type = type;
761
762
    // gl_sc_reset() must be called after ending the previous render process,
763
    // and before starting a new one.
764
0
    mp_assert(!sc->needs_reset);
765
0
    sc->needs_reset = true;
766
767
    // If using a UBO, pick a binding (needed for shader generation)
768
0
    if (sc->ubo_size)
769
0
        sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO);
770
771
0
    for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++)
772
0
        sc->tmp[n].len = 0;
773
774
    // set up shader text (header + uniforms + body)
775
0
    bstr *header = &sc->tmp[0];
776
0
    ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : "");
777
0
    if (type == RA_RENDERPASS_TYPE_COMPUTE) {
778
        // This extension cannot be enabled in fragment shader. Enable it as
779
        // an exception for compute shader.
780
0
        ADD(header, "#extension GL_ARB_compute_shader : enable\n");
781
0
    }
782
0
    for (int n = 0; n < sc->num_exts; n++)
783
0
        ADD(header, "#extension %s : enable\n", sc->exts[n]);
784
0
    if (glsl_es) {
785
0
        ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n");
786
0
        ADD(header, "precision highp float;\n");
787
0
        ADD(header, "#else\n");
788
0
        ADD(header, "precision mediump float;\n");
789
0
        ADD(header, "#endif\n");
790
791
0
        ADD(header, "precision mediump sampler2D;\n");
792
0
        if (sc->ra->caps & RA_CAP_TEX_3D)
793
0
            ADD(header, "precision mediump sampler3D;\n");
794
0
    }
795
796
0
    if (glsl_version >= 130) {
797
0
        ADD(header, "#define tex1D texture\n");
798
0
        ADD(header, "#define tex3D texture\n");
799
0
    } else {
800
0
        ADD(header, "#define tex1D texture1D\n");
801
0
        ADD(header, "#define tex3D texture3D\n");
802
0
        ADD(header, "#define texture texture2D\n");
803
0
    }
804
805
    // Additional helpers.
806
0
    ADD(header, "#define LUT_POS(x, lut_size)"
807
0
                " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
808
809
0
    char *vert_in = glsl_version >= 130 ? "in" : "attribute";
810
0
    char *vert_out = glsl_version >= 130 ? "out" : "varying";
811
0
    char *frag_in = glsl_version >= 130 ? "in" : "varying";
812
813
0
    struct bstr *vert = NULL, *frag = NULL, *comp = NULL;
814
815
0
    if (type == RA_RENDERPASS_TYPE_RASTER) {
816
        // vertex shader: we don't use the vertex shader, so just setup a
817
        // dummy, which passes through the vertex array attributes.
818
0
        bstr *vert_head = &sc->tmp[1];
819
0
        ADD_BSTR(vert_head, *header);
820
0
        bstr *vert_body = &sc->tmp[2];
821
0
        ADD(vert_body, "void main() {\n");
822
0
        bstr *frag_vaos = &sc->tmp[3];
823
0
        for (int n = 0; n < vao_len; n++) {
824
0
            const struct ra_renderpass_input *e = &vao[n];
825
0
            const char *glsl_type = vao_glsl_type(e);
826
0
            char loc[32] = {0};
827
0
            if (sc->ra->glsl_vulkan)
828
0
                snprintf(loc, sizeof(loc), "layout(location=%d) ", n);
829
0
            if (strcmp(e->name, "position") == 0) {
830
                // setting raster pos. requires setting gl_Position magic variable
831
0
                mp_assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT);
832
0
                ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in);
833
0
                ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
834
0
            } else {
835
0
                ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name);
836
0
                ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name);
837
0
                ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
838
0
                ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name);
839
0
            }
840
0
        }
841
0
        ADD(vert_body, "}\n");
842
0
        vert = vert_head;
843
0
        ADD_BSTR(vert, *vert_body);
844
845
        // fragment shader; still requires adding used uniforms and VAO elements
846
0
        frag = &sc->tmp[4];
847
0
        ADD_BSTR(frag, *header);
848
0
        if (glsl_version >= 130) {
849
0
            ADD(frag, "%sout vec4 out_color;\n",
850
0
                sc->ra->glsl_vulkan ? "layout(location=0) " : "");
851
0
        }
852
0
        ADD_BSTR(frag, *frag_vaos);
853
0
        add_uniforms(sc, frag);
854
855
0
        ADD_BSTR(frag, sc->prelude_text);
856
0
        ADD_BSTR(frag, sc->header_text);
857
858
0
        ADD(frag, "void main() {\n");
859
        // we require _all_ frag shaders to write to a "vec4 color"
860
0
        ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
861
0
        ADD_BSTR(frag, sc->text);
862
0
        if (glsl_version >= 130) {
863
0
            ADD(frag, "out_color = color;\n");
864
0
        } else {
865
0
            ADD(frag, "gl_FragColor = color;\n");
866
0
        }
867
0
        ADD(frag, "}\n");
868
869
        // We need to fix the format of the render dst at renderpass creation
870
        // time
871
0
        mp_assert(target_format);
872
0
        sc->params.target_format = target_format;
873
0
    }
874
875
0
    if (type == RA_RENDERPASS_TYPE_COMPUTE) {
876
0
        comp = &sc->tmp[4];
877
0
        ADD_BSTR(comp, *header);
878
879
0
        add_uniforms(sc, comp);
880
881
0
        ADD_BSTR(comp, sc->prelude_text);
882
0
        ADD_BSTR(comp, sc->header_text);
883
884
0
        ADD(comp, "void main() {\n");
885
0
        ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience
886
0
        ADD_BSTR(comp, sc->text);
887
0
        ADD(comp, "}\n");
888
0
    }
889
890
0
    bstr *hash_total = &sc->tmp[5];
891
892
0
    ADD(hash_total, "type %d\n", sc->params.type);
893
894
0
    if (frag) {
895
0
        ADD_BSTR(hash_total, *frag);
896
0
        sc->params.frag_shader = frag->start;
897
0
    }
898
0
    ADD(hash_total, "\n");
899
0
    if (vert) {
900
0
        ADD_BSTR(hash_total, *vert);
901
0
        sc->params.vertex_shader = vert->start;
902
0
    }
903
0
    ADD(hash_total, "\n");
904
0
    if (comp) {
905
0
        ADD_BSTR(hash_total, *comp);
906
0
        sc->params.compute_shader = comp->start;
907
0
    }
908
0
    ADD(hash_total, "\n");
909
910
0
    if (sc->params.enable_blend) {
911
0
        ADD(hash_total, "blend %d %d %d %d\n",
912
0
            sc->params.blend_src_rgb, sc->params.blend_dst_rgb,
913
0
            sc->params.blend_src_alpha, sc->params.blend_dst_alpha);
914
0
    }
915
916
0
    if (sc->params.target_format)
917
0
        ADD(hash_total, "format %s\n", sc->params.target_format->name);
918
919
0
    struct sc_entry *entry = NULL;
920
0
    for (int n = 0; n < sc->num_entries; n++) {
921
0
        struct sc_entry *cur = sc->entries[n];
922
0
        if (bstr_equals(cur->total, *hash_total)) {
923
0
            entry = cur;
924
0
            break;
925
0
        }
926
0
    }
927
0
    if (!entry) {
928
0
        if (sc->num_entries == SC_MAX_ENTRIES)
929
0
            sc_flush_cache(sc);
930
0
        entry = talloc_ptrtype(NULL, entry);
931
0
        *entry = (struct sc_entry){
932
0
            .total = bstrdup(entry, *hash_total),
933
0
            .timer = timer_pool_create(sc->ra),
934
0
        };
935
936
        // The vertex shader uses mangled names for the vertex attributes, so
937
        // that the fragment shader can use the "real" names. But the shader is
938
        // expecting the vertex attribute names (at least with older GLSL
939
        // targets for GL).
940
0
        sc->params.vertex_stride = vertex_stride;
941
0
        for (int n = 0; n < vao_len; n++) {
942
0
            struct ra_renderpass_input attrib = vao[n];
943
0
            attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name);
944
0
            MP_TARRAY_APPEND(sc, sc->params.vertex_attribs,
945
0
                             sc->params.num_vertex_attribs, attrib);
946
0
        }
947
948
0
        for (int n = 0; n < sc->num_uniforms; n++) {
949
0
            struct sc_cached_uniform u = {0};
950
0
            if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) {
951
                // global uniforms need to be made visible to the ra_renderpass
952
0
                u.index = sc->params.num_inputs;
953
0
                MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
954
0
                                 sc->uniforms[n].input);
955
0
            }
956
0
            MP_TARRAY_APPEND(entry, entry->cached_uniforms,
957
0
                             entry->num_cached_uniforms, u);
958
0
        }
959
0
        if (!create_pass(sc, entry))
960
0
            sc->error_state = true;
961
0
        MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
962
0
    }
963
964
0
    if (!entry->pass) {
965
0
        sc->current_shader = NULL;
966
0
        return;
967
0
    }
968
969
0
    mp_assert(sc->num_uniforms == entry->num_cached_uniforms);
970
971
0
    sc->num_values = 0;
972
0
    for (int n = 0; n < sc->num_uniforms; n++)
973
0
        update_uniform(sc, entry, &sc->uniforms[n], n);
974
975
    // If we're using a UBO, make sure to bind it as well
976
0
    if (sc->ubo_size) {
977
0
        struct ra_renderpass_input_val ubo_val = {
978
0
            .index = entry->ubo_index,
979
0
            .data = &entry->ubo,
980
0
        };
981
0
        MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val);
982
0
    }
983
984
0
    sc->current_shader = entry;
985
0
}
986
987
struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
988
                                        struct ra_tex *target, bool discard,
989
                                        const struct ra_renderpass_input *vao,
990
                                        int vao_len, size_t vertex_stride,
991
                                        void *vertices, size_t num_vertices)
992
0
{
993
0
    struct timer_pool *timer = NULL;
994
995
0
    sc->params.invalidate_target = discard;
996
0
    gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format,
997
0
                   vao, vao_len, vertex_stride);
998
0
    if (!sc->current_shader)
999
0
        goto error;
1000
1001
0
    timer = sc->current_shader->timer;
1002
1003
0
    struct mp_rect full_rc = {0, 0, target->params.w, target->params.h};
1004
1005
0
    struct ra_renderpass_run_params run = {
1006
0
        .pass = sc->current_shader->pass,
1007
0
        .values = sc->values,
1008
0
        .num_values = sc->num_values,
1009
0
        .push_constants = sc->current_shader->pushc,
1010
0
        .target = target,
1011
0
        .vertex_data = vertices,
1012
0
        .vertex_count = num_vertices,
1013
0
        .viewport = full_rc,
1014
0
        .scissors = full_rc,
1015
0
    };
1016
1017
0
    timer_pool_start(timer);
1018
0
    sc->ra->fns->renderpass_run(sc->ra, &run);
1019
0
    timer_pool_stop(timer);
1020
1021
0
error:
1022
0
    gl_sc_reset(sc);
1023
0
    return timer_pool_measure(timer);
1024
0
}
1025
1026
struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
1027
                                           int w, int h, int d)
1028
0
{
1029
0
    struct timer_pool *timer = NULL;
1030
1031
0
    gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0);
1032
0
    if (!sc->current_shader)
1033
0
        goto error;
1034
1035
0
    timer = sc->current_shader->timer;
1036
1037
0
    struct ra_renderpass_run_params run = {
1038
0
        .pass = sc->current_shader->pass,
1039
0
        .values = sc->values,
1040
0
        .num_values = sc->num_values,
1041
0
        .push_constants = sc->current_shader->pushc,
1042
0
        .compute_groups = {w, h, d},
1043
0
    };
1044
1045
0
    timer_pool_start(timer);
1046
0
    sc->ra->fns->renderpass_run(sc->ra, &run);
1047
0
    timer_pool_stop(timer);
1048
1049
0
error:
1050
0
    gl_sc_reset(sc);
1051
0
    return timer_pool_measure(timer);
1052
0
}