/src/mpv/video/out/gpu/shader_cache.c
Line | Count | Source |
1 | | #include <stddef.h> |
2 | | #include <stdint.h> |
3 | | #include <stdlib.h> |
4 | | #include <string.h> |
5 | | #include <stdarg.h> |
6 | | #include <assert.h> |
7 | | |
8 | | #include <libavutil/sha.h> |
9 | | #include <libavutil/mem.h> |
10 | | |
11 | | #include "osdep/io.h" |
12 | | |
13 | | #include "common/common.h" |
14 | | #include "options/path.h" |
15 | | #include "stream/stream.h" |
16 | | #include "shader_cache.h" |
17 | | #include "utils.h" |
18 | | |
19 | | // Force cache flush if more than this number of shaders is created. |
20 | 0 | #define SC_MAX_ENTRIES 256 |
21 | | |
22 | | union uniform_val { |
23 | | float f[9]; // RA_VARTYPE_FLOAT |
24 | | int i[4]; // RA_VARTYPE_INT |
25 | | struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* |
26 | | struct ra_buf *buf; // RA_VARTYPE_BUF_* |
27 | | }; |
28 | | |
29 | | enum sc_uniform_type { |
30 | | SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) |
31 | | SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) |
32 | | SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size) |
33 | | }; |
34 | | |
35 | | struct sc_uniform { |
36 | | enum sc_uniform_type type; |
37 | | struct ra_renderpass_input input; |
38 | | const char *glsl_type; |
39 | | union uniform_val v; |
40 | | char *buffer_format; |
41 | | // for SC_UNIFORM_TYPE_UBO/PUSHC: |
42 | | struct ra_layout layout; |
43 | | size_t offset; // byte offset within the buffer |
44 | | }; |
45 | | |
46 | | struct sc_cached_uniform { |
47 | | union uniform_val v; |
48 | | int index; // for ra_renderpass_input_val |
49 | | bool set; // whether the uniform has ever been set |
50 | | }; |
51 | | |
52 | | struct sc_entry { |
53 | | struct ra_renderpass *pass; |
54 | | struct sc_cached_uniform *cached_uniforms; |
55 | | int num_cached_uniforms; |
56 | | bstr total; |
57 | | struct timer_pool *timer; |
58 | | struct ra_buf *ubo; |
59 | | int ubo_index; // for ra_renderpass_input_val.index |
60 | | void *pushc; |
61 | | }; |
62 | | |
63 | | struct gl_shader_cache { |
64 | | struct ra *ra; |
65 | | struct mp_log *log; |
66 | | |
67 | | // permanent |
68 | | char **exts; |
69 | | int num_exts; |
70 | | |
71 | | // this is modified during use (gl_sc_add() etc.) and reset for each shader |
72 | | bstr prelude_text; |
73 | | bstr header_text; |
74 | | bstr text; |
75 | | |
76 | | // Next binding point (texture unit, image unit, buffer binding, etc.) |
77 | | // In OpenGL these are separate for each input type |
78 | | int next_binding[RA_VARTYPE_COUNT]; |
79 | | bool next_uniform_dynamic; |
80 | | |
81 | | struct ra_renderpass_params params; |
82 | | |
83 | | struct sc_entry **entries; |
84 | | int num_entries; |
85 | | |
86 | | struct sc_entry *current_shader; // set by gl_sc_generate() |
87 | | |
88 | | struct sc_uniform *uniforms; |
89 | | int num_uniforms; |
90 | | |
91 | | int ubo_binding; |
92 | | size_t ubo_size; |
93 | | size_t pushc_size; |
94 | | |
95 | | struct ra_renderpass_input_val *values; |
96 | | int num_values; |
97 | | |
98 | | // For checking that the user is calling gl_sc_reset() properly. |
99 | | bool needs_reset; |
100 | | |
101 | | bool error_state; // true if an error occurred |
102 | | |
103 | | // temporary buffers (avoids frequent reallocations) |
104 | | bstr tmp[6]; |
105 | | |
106 | | // For the disk-cache. |
107 | | char *cache_dir; |
108 | | struct mpv_global *global; // can be NULL |
109 | | }; |
110 | | |
111 | | struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, |
112 | | struct mp_log *log) |
113 | 0 | { |
114 | 0 | struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); |
115 | 0 | *sc = (struct gl_shader_cache){ |
116 | 0 | .ra = ra, |
117 | 0 | .global = global, |
118 | 0 | .log = log, |
119 | 0 | }; |
120 | 0 | gl_sc_reset(sc); |
121 | 0 | return sc; |
122 | 0 | } |
123 | | |
124 | | // Reset the previous pass. This must be called after gl_sc_generate and before |
125 | | // starting a new shader. It may also be called on errors. |
126 | | void gl_sc_reset(struct gl_shader_cache *sc) |
127 | 0 | { |
128 | 0 | sc->prelude_text.len = 0; |
129 | 0 | sc->header_text.len = 0; |
130 | 0 | sc->text.len = 0; |
131 | 0 | for (int n = 0; n < sc->num_uniforms; n++) |
132 | 0 | talloc_free((void *)sc->uniforms[n].input.name); |
133 | 0 | sc->num_uniforms = 0; |
134 | 0 | sc->ubo_binding = 0; |
135 | 0 | sc->ubo_size = 0; |
136 | 0 | sc->pushc_size = 0; |
137 | 0 | for (int i = 0; i < RA_VARTYPE_COUNT; i++) |
138 | 0 | sc->next_binding[i] = 0; |
139 | 0 | sc->next_uniform_dynamic = false; |
140 | 0 | sc->current_shader = NULL; |
141 | 0 | sc->params = (struct ra_renderpass_params){0}; |
142 | 0 | sc->needs_reset = false; |
143 | 0 | } |
144 | | |
145 | | static void sc_flush_cache(struct gl_shader_cache *sc) |
146 | 0 | { |
147 | 0 | MP_DBG(sc, "flushing shader cache\n"); |
148 | |
|
149 | 0 | for (int n = 0; n < sc->num_entries; n++) { |
150 | 0 | struct sc_entry *e = sc->entries[n]; |
151 | 0 | ra_buf_free(sc->ra, &e->ubo); |
152 | 0 | if (e->pass) |
153 | 0 | sc->ra->fns->renderpass_destroy(sc->ra, e->pass); |
154 | 0 | timer_pool_destroy(e->timer); |
155 | 0 | talloc_free(e); |
156 | 0 | } |
157 | 0 | sc->num_entries = 0; |
158 | 0 | } |
159 | | |
160 | | void gl_sc_destroy(struct gl_shader_cache *sc) |
161 | 0 | { |
162 | 0 | if (!sc) |
163 | 0 | return; |
164 | 0 | gl_sc_reset(sc); |
165 | 0 | sc_flush_cache(sc); |
166 | 0 | talloc_free(sc); |
167 | 0 | } |
168 | | |
169 | | bool gl_sc_error_state(struct gl_shader_cache *sc) |
170 | 0 | { |
171 | 0 | return sc->error_state; |
172 | 0 | } |
173 | | |
174 | | void gl_sc_reset_error(struct gl_shader_cache *sc) |
175 | 0 | { |
176 | 0 | sc->error_state = false; |
177 | 0 | } |
178 | | |
179 | | void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) |
180 | 0 | { |
181 | 0 | for (int n = 0; n < sc->num_exts; n++) { |
182 | 0 | if (strcmp(sc->exts[n], name) == 0) |
183 | 0 | return; |
184 | 0 | } |
185 | 0 | MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); |
186 | 0 | } |
187 | | |
188 | | void gl_sc_add(struct gl_shader_cache *sc, const char *text) |
189 | 0 | { |
190 | 0 | bstr_xappend0(sc, &sc->text, text); |
191 | 0 | } |
192 | | |
193 | | void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) |
194 | 0 | { |
195 | 0 | va_list ap; |
196 | 0 | va_start(ap, textf); |
197 | 0 | bstr_xappend_vasprintf(sc, &sc->text, textf, ap); |
198 | 0 | va_end(ap); |
199 | 0 | } |
200 | | |
201 | | void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) |
202 | 0 | { |
203 | 0 | bstr_xappend0(sc, &sc->header_text, text); |
204 | 0 | } |
205 | | |
206 | | void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) |
207 | 0 | { |
208 | 0 | va_list ap; |
209 | 0 | va_start(ap, textf); |
210 | 0 | bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); |
211 | 0 | va_end(ap); |
212 | 0 | } |
213 | | |
214 | | void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) |
215 | 0 | { |
216 | 0 | bstr_xappend(sc, &sc->header_text, text); |
217 | 0 | } |
218 | | |
219 | | void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) |
220 | 0 | { |
221 | 0 | va_list ap; |
222 | 0 | va_start(ap, textf); |
223 | 0 | bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); |
224 | 0 | va_end(ap); |
225 | 0 | } |
226 | | |
227 | | static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, |
228 | | const char *name) |
229 | 0 | { |
230 | 0 | struct sc_uniform new = { |
231 | 0 | .input = { |
232 | 0 | .dim_v = 1, |
233 | 0 | .dim_m = 1, |
234 | 0 | }, |
235 | 0 | }; |
236 | |
|
237 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
238 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
239 | 0 | if (strcmp(u->input.name, name) == 0) { |
240 | 0 | const char *allocname = u->input.name; |
241 | 0 | *u = new; |
242 | 0 | u->input.name = allocname; |
243 | 0 | return u; |
244 | 0 | } |
245 | 0 | } |
246 | | |
247 | | // not found -> add it |
248 | 0 | new.input.name = talloc_strdup(NULL, name); |
249 | 0 | MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); |
250 | 0 | return &sc->uniforms[sc->num_uniforms - 1]; |
251 | 0 | } |
252 | | |
253 | | static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) |
254 | 0 | { |
255 | 0 | return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++; |
256 | 0 | } |
257 | | |
258 | | void gl_sc_uniform_dynamic(struct gl_shader_cache *sc) |
259 | 0 | { |
260 | 0 | sc->next_uniform_dynamic = true; |
261 | 0 | } |
262 | | |
263 | | // Updates the metadata for the given sc_uniform. Assumes sc_uniform->input |
264 | | // and glsl_type/buffer_format are already set. |
265 | | static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u) |
266 | 0 | { |
267 | 0 | bool dynamic = sc->next_uniform_dynamic; |
268 | 0 | sc->next_uniform_dynamic = false; |
269 | | |
270 | | // Try not using push constants for "large" values like matrices, since |
271 | | // this is likely to both exceed the VGPR budget as well as the pushc size |
272 | | // budget |
273 | 0 | bool try_pushc = u->input.dim_m == 1 || dynamic; |
274 | | |
275 | | // Attempt using push constants first |
276 | 0 | if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) { |
277 | 0 | struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input); |
278 | 0 | size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align); |
279 | | // Push constants have limited size, so make sure we don't exceed this |
280 | 0 | size_t new_size = offset + layout.size; |
281 | 0 | if (new_size <= sc->ra->max_pushc_size) { |
282 | 0 | u->type = SC_UNIFORM_TYPE_PUSHC; |
283 | 0 | u->layout = layout; |
284 | 0 | u->offset = offset; |
285 | 0 | sc->pushc_size = new_size; |
286 | 0 | return; |
287 | 0 | } |
288 | 0 | } |
289 | | |
290 | | // Attempt using uniform buffer next. The GLSL version 440 check is due |
291 | | // to explicit offsets on UBO entries. In theory we could leave away |
292 | | // the offsets and support UBOs for older GL as well, but this is a nice |
293 | | // safety net for driver bugs (and also rules out potentially buggy drivers) |
294 | | // Also avoid UBOs for highly dynamic stuff since that requires synchronizing |
295 | | // the UBO writes every frame |
296 | 0 | bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic; |
297 | 0 | if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { |
298 | 0 | u->type = SC_UNIFORM_TYPE_UBO; |
299 | 0 | u->layout = sc->ra->fns->uniform_layout(&u->input); |
300 | 0 | u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); |
301 | 0 | sc->ubo_size = u->offset + u->layout.size; |
302 | 0 | return; |
303 | 0 | } |
304 | | |
305 | | // If all else fails, use global uniforms |
306 | 0 | mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); |
307 | 0 | u->type = SC_UNIFORM_TYPE_GLOBAL; |
308 | 0 | } |
309 | | |
310 | | void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, |
311 | | struct ra_tex *tex) |
312 | 0 | { |
313 | 0 | const char *glsl_type = "sampler2D"; |
314 | 0 | if (tex->params.dimensions == 1) { |
315 | 0 | glsl_type = "sampler1D"; |
316 | 0 | } else if (tex->params.dimensions == 3) { |
317 | 0 | glsl_type = "sampler3D"; |
318 | 0 | } else if (tex->params.non_normalized) { |
319 | 0 | glsl_type = "sampler2DRect"; |
320 | 0 | } else if (tex->params.external_oes) { |
321 | 0 | glsl_type = "samplerExternalOES"; |
322 | 0 | } else if (tex->params.format->ctype == RA_CTYPE_UINT) { |
323 | 0 | glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; |
324 | 0 | } |
325 | |
|
326 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
327 | 0 | u->input.type = RA_VARTYPE_TEX; |
328 | 0 | u->glsl_type = glsl_type; |
329 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
330 | 0 | u->v.tex = tex; |
331 | 0 | } |
332 | | |
333 | | void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, |
334 | | struct ra_tex *tex) |
335 | 0 | { |
336 | 0 | gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); |
337 | |
|
338 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
339 | 0 | u->input.type = RA_VARTYPE_IMG_W; |
340 | 0 | u->glsl_type = sc->ra->glsl_es ? "writeonly highp image2D" : "writeonly image2D"; |
341 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
342 | 0 | u->v.tex = tex; |
343 | 0 | } |
344 | | |
345 | | void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, |
346 | | char *format, ...) |
347 | 0 | { |
348 | 0 | mp_assert(sc->ra->caps & RA_CAP_BUF_RW); |
349 | 0 | gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); |
350 | |
|
351 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
352 | 0 | u->input.type = RA_VARTYPE_BUF_RW; |
353 | 0 | u->glsl_type = ""; |
354 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
355 | 0 | u->v.buf = buf; |
356 | |
|
357 | 0 | va_list ap; |
358 | 0 | va_start(ap, format); |
359 | 0 | u->buffer_format = ta_vasprintf(sc, format, ap); |
360 | 0 | va_end(ap); |
361 | 0 | } |
362 | | |
363 | | void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) |
364 | 0 | { |
365 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
366 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
367 | 0 | u->glsl_type = "float"; |
368 | 0 | update_uniform_params(sc, u); |
369 | 0 | u->v.f[0] = f; |
370 | 0 | } |
371 | | |
372 | | void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) |
373 | 0 | { |
374 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
375 | 0 | u->input.type = RA_VARTYPE_INT; |
376 | 0 | u->glsl_type = "int"; |
377 | 0 | update_uniform_params(sc, u); |
378 | 0 | u->v.i[0] = i; |
379 | 0 | } |
380 | | |
381 | | void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) |
382 | 0 | { |
383 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
384 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
385 | 0 | u->input.dim_v = 2; |
386 | 0 | u->glsl_type = "vec2"; |
387 | 0 | update_uniform_params(sc, u); |
388 | 0 | u->v.f[0] = f[0]; |
389 | 0 | u->v.f[1] = f[1]; |
390 | 0 | } |
391 | | |
392 | | void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) |
393 | 0 | { |
394 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
395 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
396 | 0 | u->input.dim_v = 3; |
397 | 0 | u->glsl_type = "vec3"; |
398 | 0 | update_uniform_params(sc, u); |
399 | 0 | u->v.f[0] = f[0]; |
400 | 0 | u->v.f[1] = f[1]; |
401 | 0 | u->v.f[2] = f[2]; |
402 | 0 | } |
403 | | |
404 | | static void transpose2x2(float r[2 * 2]) |
405 | 0 | { |
406 | 0 | MPSWAP(float, r[0+2*1], r[1+2*0]); |
407 | 0 | } |
408 | | |
409 | | void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, |
410 | | bool transpose, float *v) |
411 | 0 | { |
412 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
413 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
414 | 0 | u->input.dim_v = 2; |
415 | 0 | u->input.dim_m = 2; |
416 | 0 | u->glsl_type = "mat2"; |
417 | 0 | update_uniform_params(sc, u); |
418 | 0 | for (int n = 0; n < 4; n++) |
419 | 0 | u->v.f[n] = v[n]; |
420 | 0 | if (transpose) |
421 | 0 | transpose2x2(&u->v.f[0]); |
422 | 0 | } |
423 | | |
424 | | static void transpose3x3(float r[3 * 3]) |
425 | 0 | { |
426 | 0 | MPSWAP(float, r[0+3*1], r[1+3*0]); |
427 | 0 | MPSWAP(float, r[0+3*2], r[2+3*0]); |
428 | 0 | MPSWAP(float, r[1+3*2], r[2+3*1]); |
429 | 0 | } |
430 | | |
431 | | void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, |
432 | | bool transpose, float *v) |
433 | 0 | { |
434 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
435 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
436 | 0 | u->input.dim_v = 3; |
437 | 0 | u->input.dim_m = 3; |
438 | 0 | u->glsl_type = "mat3"; |
439 | 0 | update_uniform_params(sc, u); |
440 | 0 | for (int n = 0; n < 9; n++) |
441 | 0 | u->v.f[n] = v[n]; |
442 | 0 | if (transpose) |
443 | 0 | transpose3x3(&u->v.f[0]); |
444 | 0 | } |
445 | | |
446 | | void gl_sc_blend(struct gl_shader_cache *sc, |
447 | | enum ra_blend blend_src_rgb, |
448 | | enum ra_blend blend_dst_rgb, |
449 | | enum ra_blend blend_src_alpha, |
450 | | enum ra_blend blend_dst_alpha) |
451 | 0 | { |
452 | 0 | sc->params.enable_blend = true; |
453 | 0 | sc->params.blend_src_rgb = blend_src_rgb; |
454 | 0 | sc->params.blend_dst_rgb = blend_dst_rgb; |
455 | 0 | sc->params.blend_src_alpha = blend_src_alpha; |
456 | 0 | sc->params.blend_dst_alpha = blend_dst_alpha; |
457 | 0 | } |
458 | | |
459 | | const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims) |
460 | 0 | { |
461 | 0 | static const char *bvecs[] = { |
462 | 0 | [1] = "bool", |
463 | 0 | [2] = "bvec2", |
464 | 0 | [3] = "bvec3", |
465 | 0 | [4] = "bvec4", |
466 | 0 | }; |
467 | |
|
468 | 0 | static const char *vecs[] = { |
469 | 0 | [1] = "float", |
470 | 0 | [2] = "vec2", |
471 | 0 | [3] = "vec3", |
472 | 0 | [4] = "vec4", |
473 | 0 | }; |
474 | |
|
475 | 0 | mp_assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs)); |
476 | 0 | return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims]; |
477 | 0 | } |
478 | | |
479 | | static const char *vao_glsl_type(const struct ra_renderpass_input *e) |
480 | 0 | { |
481 | | // pretty dumb... too dumb, but works for us |
482 | 0 | switch (e->dim_v) { |
483 | 0 | case 1: return "float"; |
484 | 0 | case 2: return "vec2"; |
485 | 0 | case 3: return "vec3"; |
486 | 0 | case 4: return "vec4"; |
487 | 0 | default: MP_ASSERT_UNREACHABLE(); |
488 | 0 | } |
489 | 0 | } |
490 | | |
491 | | static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) |
492 | 0 | { |
493 | 0 | uintptr_t src = (uintptr_t) &u->v; |
494 | 0 | size_t dst = u->offset; |
495 | 0 | struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); |
496 | 0 | struct ra_layout dst_layout = u->layout; |
497 | |
|
498 | 0 | for (int i = 0; i < u->input.dim_m; i++) { |
499 | 0 | ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); |
500 | 0 | src += src_layout.stride; |
501 | 0 | dst += dst_layout.stride; |
502 | 0 | } |
503 | 0 | } |
504 | | |
505 | | static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u) |
506 | 0 | { |
507 | 0 | uintptr_t src = (uintptr_t) &u->v; |
508 | 0 | uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset; |
509 | 0 | struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); |
510 | 0 | struct ra_layout dst_layout = u->layout; |
511 | |
|
512 | 0 | for (int i = 0; i < u->input.dim_m; i++) { |
513 | 0 | memcpy((void *)dst, (void *)src, src_layout.stride); |
514 | 0 | src += src_layout.stride; |
515 | 0 | dst += dst_layout.stride; |
516 | 0 | } |
517 | 0 | } |
518 | | |
519 | | static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, |
520 | | struct sc_uniform *u, int n) |
521 | 0 | { |
522 | 0 | struct sc_cached_uniform *un = &e->cached_uniforms[n]; |
523 | 0 | struct ra_layout layout = ra_renderpass_input_layout(&u->input); |
524 | 0 | if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) |
525 | 0 | return; |
526 | | |
527 | 0 | un->v = u->v; |
528 | 0 | un->set = true; |
529 | |
|
530 | 0 | static const char *desc[] = { |
531 | 0 | [SC_UNIFORM_TYPE_UBO] = "UBO", |
532 | 0 | [SC_UNIFORM_TYPE_PUSHC] = "PC", |
533 | 0 | [SC_UNIFORM_TYPE_GLOBAL] = "global", |
534 | 0 | }; |
535 | 0 | MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name); |
536 | |
|
537 | 0 | switch (u->type) { |
538 | 0 | case SC_UNIFORM_TYPE_GLOBAL: { |
539 | 0 | struct ra_renderpass_input_val value = { |
540 | 0 | .index = un->index, |
541 | 0 | .data = &un->v, |
542 | 0 | }; |
543 | 0 | MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); |
544 | 0 | break; |
545 | 0 | } |
546 | 0 | case SC_UNIFORM_TYPE_UBO: |
547 | 0 | mp_assert(e->ubo); |
548 | 0 | update_ubo(sc->ra, e->ubo, u); |
549 | 0 | break; |
550 | 0 | case SC_UNIFORM_TYPE_PUSHC: |
551 | 0 | mp_assert(e->pushc); |
552 | 0 | update_pushc(sc->ra, e->pushc, u); |
553 | 0 | break; |
554 | 0 | default: MP_ASSERT_UNREACHABLE(); |
555 | 0 | } |
556 | 0 | } |
557 | | |
558 | | void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir) |
559 | 0 | { |
560 | 0 | talloc_free(sc->cache_dir); |
561 | 0 | if (dir && dir[0]) { |
562 | 0 | dir = mp_get_user_path(NULL, sc->global, dir); |
563 | 0 | } else { |
564 | 0 | dir = mp_find_user_file(NULL, sc->global, "cache", ""); |
565 | 0 | } |
566 | 0 | sc->cache_dir = talloc_strdup(sc, dir); |
567 | 0 | talloc_free(dir); |
568 | 0 | } |
569 | | |
570 | | static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) |
571 | 0 | { |
572 | 0 | bool ret = false; |
573 | |
|
574 | 0 | void *tmp = talloc_new(NULL); |
575 | 0 | struct ra_renderpass_params params = sc->params; |
576 | |
|
577 | 0 | const char *cache_header = "mpv shader cache v1\n"; |
578 | 0 | char *cache_filename = NULL; |
579 | 0 | char *cache_dir = NULL; |
580 | |
|
581 | 0 | if (sc->cache_dir && sc->cache_dir[0]) { |
582 | | // Try to load it from a disk cache. |
583 | 0 | cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); |
584 | |
|
585 | 0 | struct AVSHA *sha = av_sha_alloc(); |
586 | 0 | MP_HANDLE_OOM(sha); |
587 | 0 | av_sha_init(sha, 256); |
588 | 0 | av_sha_update(sha, entry->total.start, entry->total.len); |
589 | |
|
590 | 0 | uint8_t hash[256 / 8]; |
591 | 0 | av_sha_final(sha, hash); |
592 | 0 | av_free(sha); |
593 | |
|
594 | 0 | char hashstr[256 / 8 * 2 + 1]; |
595 | 0 | for (int n = 0; n < 256 / 8; n++) |
596 | 0 | snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); |
597 | |
|
598 | 0 | cache_filename = mp_path_join(tmp, cache_dir, hashstr); |
599 | 0 | if (stat(cache_filename, &(struct stat){0}) == 0) { |
600 | 0 | MP_DBG(sc, "Trying to load shader from disk...\n"); |
601 | 0 | struct bstr cachedata = |
602 | 0 | stream_read_file(cache_filename, tmp, sc->global, 1000000000); |
603 | 0 | if (bstr_eatstart0(&cachedata, cache_header)) |
604 | 0 | params.cached_program = cachedata; |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | // If using a UBO, also make sure to add it as an input value so the RA |
609 | | // can see it |
610 | 0 | if (sc->ubo_size) { |
611 | 0 | entry->ubo_index = sc->params.num_inputs; |
612 | 0 | struct ra_renderpass_input ubo_input = { |
613 | 0 | .name = "UBO", |
614 | 0 | .type = RA_VARTYPE_BUF_RO, |
615 | 0 | .dim_v = 1, |
616 | 0 | .dim_m = 1, |
617 | 0 | .binding = sc->ubo_binding, |
618 | 0 | }; |
619 | 0 | MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); |
620 | 0 | } |
621 | |
|
622 | 0 | if (sc->pushc_size) { |
623 | 0 | params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4); |
624 | 0 | entry->pushc = talloc_zero_size(entry, params.push_constants_size); |
625 | 0 | } |
626 | |
|
627 | 0 | if (sc->ubo_size) { |
628 | 0 | struct ra_buf_params ubo_params = { |
629 | 0 | .type = RA_BUF_TYPE_UNIFORM, |
630 | 0 | .size = sc->ubo_size, |
631 | 0 | .host_mutable = true, |
632 | 0 | }; |
633 | |
|
634 | 0 | entry->ubo = ra_buf_create(sc->ra, &ubo_params); |
635 | 0 | if (!entry->ubo) { |
636 | 0 | MP_ERR(sc, "Failed creating uniform buffer!\n"); |
637 | 0 | goto error; |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | 0 | entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); |
642 | 0 | if (!entry->pass) |
643 | 0 | goto error; |
644 | | |
645 | 0 | if (entry->pass && cache_filename) { |
646 | 0 | bstr nc = entry->pass->params.cached_program; |
647 | 0 | if (nc.len && !bstr_equals(params.cached_program, nc)) { |
648 | 0 | mp_mkdirp(cache_dir); |
649 | |
|
650 | 0 | MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename); |
651 | 0 | FILE *out = fopen(cache_filename, "wb"); |
652 | 0 | if (out) { |
653 | 0 | fwrite(cache_header, strlen(cache_header), 1, out); |
654 | 0 | fwrite(nc.start, nc.len, 1, out); |
655 | 0 | fclose(out); |
656 | 0 | } |
657 | 0 | } |
658 | 0 | } |
659 | |
|
660 | 0 | ret = true; |
661 | |
|
662 | 0 | error: |
663 | 0 | talloc_free(tmp); |
664 | 0 | return ret; |
665 | 0 | } |
666 | | |
667 | 0 | #define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) |
668 | 0 | #define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) |
669 | | |
670 | | static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) |
671 | 0 | { |
672 | | // Add all of the UBO entries separately as members of their own buffer |
673 | 0 | if (sc->ubo_size > 0) { |
674 | 0 | ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); |
675 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
676 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
677 | 0 | if (u->type != SC_UNIFORM_TYPE_UBO) |
678 | 0 | continue; |
679 | 0 | ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, |
680 | 0 | u->input.name); |
681 | 0 | } |
682 | 0 | ADD(dst, "};\n"); |
683 | 0 | } |
684 | | |
685 | | // Ditto for push constants |
686 | 0 | if (sc->pushc_size > 0) { |
687 | 0 | ADD(dst, "layout(std430, push_constant) uniform PushC {\n"); |
688 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
689 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
690 | 0 | if (u->type != SC_UNIFORM_TYPE_PUSHC) |
691 | 0 | continue; |
692 | 0 | ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, |
693 | 0 | u->input.name); |
694 | 0 | } |
695 | 0 | ADD(dst, "};\n"); |
696 | 0 | } |
697 | |
|
698 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
699 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
700 | 0 | if (u->type != SC_UNIFORM_TYPE_GLOBAL) |
701 | 0 | continue; |
702 | 0 | switch (u->input.type) { |
703 | 0 | case RA_VARTYPE_INT: |
704 | 0 | case RA_VARTYPE_FLOAT: |
705 | 0 | mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); |
706 | 0 | MP_FALLTHROUGH; |
707 | 0 | case RA_VARTYPE_TEX: |
708 | | // Vulkan requires explicitly assigning the bindings in the shader |
709 | | // source. For OpenGL it's optional, but requires higher GL version |
710 | | // so we don't do it (and instead have ra_gl update the bindings |
711 | | // after program creation). |
712 | 0 | if (sc->ra->glsl_vulkan) |
713 | 0 | ADD(dst, "layout(binding=%d) ", u->input.binding); |
714 | 0 | ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); |
715 | 0 | break; |
716 | 0 | case RA_VARTYPE_BUF_RO: |
717 | 0 | ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", |
718 | 0 | u->input.binding, u->input.name, u->buffer_format); |
719 | 0 | break; |
720 | 0 | case RA_VARTYPE_BUF_RW: |
721 | 0 | ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n", |
722 | 0 | u->input.binding, u->input.name, u->buffer_format); |
723 | 0 | break; |
724 | 0 | case RA_VARTYPE_IMG_W: { |
725 | | // For better compatibility, we have to explicitly label the |
726 | | // type of data we will be reading/writing to this image. |
727 | 0 | const char *fmt = u->v.tex->params.format->glsl_format; |
728 | |
|
729 | 0 | if (sc->ra->glsl_vulkan) { |
730 | 0 | if (fmt) { |
731 | 0 | ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt); |
732 | 0 | } else { |
733 | 0 | ADD(dst, "layout(binding=%d) ", u->input.binding); |
734 | 0 | } |
735 | 0 | } else if (fmt) { |
736 | 0 | ADD(dst, "layout(%s) ", fmt); |
737 | 0 | } |
738 | 0 | ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name); |
739 | 0 | } |
740 | 0 | } |
741 | 0 | } |
742 | 0 | } |
743 | | |
744 | | // 1. Generate vertex and fragment shaders from the fragment shader text added |
745 | | // with gl_sc_add(). The generated shader program is cached (based on the |
746 | | // text), so actual compilation happens only the first time. |
747 | | // 2. Update the uniforms and textures set with gl_sc_uniform_*. |
748 | | // 3. Make the new shader program current (glUseProgram()). |
749 | | // After that, you render, and then you call gc_sc_reset(), which does: |
750 | | // 1. Unbind the program and all textures. |
751 | | // 2. Reset the sc state and prepare for a new shader program. (All uniforms |
752 | | // and fragment operations needed for the next program have to be re-added.) |
753 | | static void gl_sc_generate(struct gl_shader_cache *sc, |
754 | | enum ra_renderpass_type type, |
755 | | const struct ra_format *target_format, |
756 | | const struct ra_renderpass_input *vao, |
757 | | int vao_len, size_t vertex_stride) |
758 | 0 | { |
759 | 0 | int glsl_version = sc->ra->glsl_version; |
760 | 0 | int glsl_es = sc->ra->glsl_es ? glsl_version : 0; |
761 | |
|
762 | 0 | sc->params.type = type; |
763 | | |
764 | | // gl_sc_reset() must be called after ending the previous render process, |
765 | | // and before starting a new one. |
766 | 0 | mp_assert(!sc->needs_reset); |
767 | 0 | sc->needs_reset = true; |
768 | | |
769 | | // If using a UBO, pick a binding (needed for shader generation) |
770 | 0 | if (sc->ubo_size) |
771 | 0 | sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); |
772 | |
|
773 | 0 | for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) |
774 | 0 | sc->tmp[n].len = 0; |
775 | | |
776 | | // set up shader text (header + uniforms + body) |
777 | 0 | bstr *header = &sc->tmp[0]; |
778 | 0 | ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); |
779 | 0 | if (type == RA_RENDERPASS_TYPE_COMPUTE) { |
780 | | // This extension cannot be enabled in fragment shader. Enable it as |
781 | | // an exception for compute shader. |
782 | 0 | ADD(header, "#extension GL_ARB_compute_shader : enable\n"); |
783 | 0 | } |
784 | 0 | for (int n = 0; n < sc->num_exts; n++) |
785 | 0 | ADD(header, "#extension %s : enable\n", sc->exts[n]); |
786 | 0 | if (glsl_es) { |
787 | 0 | ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n"); |
788 | 0 | ADD(header, "precision highp float;\n"); |
789 | 0 | ADD(header, "#else\n"); |
790 | 0 | ADD(header, "precision mediump float;\n"); |
791 | 0 | ADD(header, "#endif\n"); |
792 | |
|
793 | 0 | ADD(header, "precision mediump sampler2D;\n"); |
794 | 0 | if (sc->ra->caps & RA_CAP_TEX_3D) |
795 | 0 | ADD(header, "precision mediump sampler3D;\n"); |
796 | 0 | } |
797 | |
|
798 | 0 | if (glsl_version >= 130) { |
799 | 0 | ADD(header, "#define tex1D texture\n"); |
800 | 0 | ADD(header, "#define tex3D texture\n"); |
801 | 0 | } else { |
802 | 0 | ADD(header, "#define tex1D texture1D\n"); |
803 | 0 | ADD(header, "#define tex3D texture3D\n"); |
804 | 0 | ADD(header, "#define texture texture2D\n"); |
805 | 0 | } |
806 | | |
807 | | // Additional helpers. |
808 | 0 | ADD(header, "#define LUT_POS(x, lut_size)" |
809 | 0 | " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); |
810 | |
|
811 | 0 | char *vert_in = glsl_version >= 130 ? "in" : "attribute"; |
812 | 0 | char *vert_out = glsl_version >= 130 ? "out" : "varying"; |
813 | 0 | char *frag_in = glsl_version >= 130 ? "in" : "varying"; |
814 | |
|
815 | 0 | struct bstr *vert = NULL, *frag = NULL, *comp = NULL; |
816 | |
|
817 | 0 | if (type == RA_RENDERPASS_TYPE_RASTER) { |
818 | | // vertex shader: we don't use the vertex shader, so just setup a |
819 | | // dummy, which passes through the vertex array attributes. |
820 | 0 | bstr *vert_head = &sc->tmp[1]; |
821 | 0 | ADD_BSTR(vert_head, *header); |
822 | 0 | bstr *vert_body = &sc->tmp[2]; |
823 | 0 | ADD(vert_body, "void main() {\n"); |
824 | 0 | bstr *frag_vaos = &sc->tmp[3]; |
825 | 0 | for (int n = 0; n < vao_len; n++) { |
826 | 0 | const struct ra_renderpass_input *e = &vao[n]; |
827 | 0 | const char *glsl_type = vao_glsl_type(e); |
828 | 0 | char loc[32] = {0}; |
829 | 0 | if (sc->ra->glsl_vulkan) |
830 | 0 | snprintf(loc, sizeof(loc), "layout(location=%d) ", n); |
831 | 0 | if (strcmp(e->name, "position") == 0) { |
832 | | // setting raster pos. requires setting gl_Position magic variable |
833 | 0 | mp_assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); |
834 | 0 | ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); |
835 | 0 | ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); |
836 | 0 | } else { |
837 | 0 | ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); |
838 | 0 | ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); |
839 | 0 | ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); |
840 | 0 | ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); |
841 | 0 | } |
842 | 0 | } |
843 | 0 | ADD(vert_body, "}\n"); |
844 | 0 | vert = vert_head; |
845 | 0 | ADD_BSTR(vert, *vert_body); |
846 | | |
847 | | // fragment shader; still requires adding used uniforms and VAO elements |
848 | 0 | frag = &sc->tmp[4]; |
849 | 0 | ADD_BSTR(frag, *header); |
850 | 0 | if (glsl_version >= 130) { |
851 | 0 | ADD(frag, "%sout vec4 out_color;\n", |
852 | 0 | sc->ra->glsl_vulkan ? "layout(location=0) " : ""); |
853 | 0 | } |
854 | 0 | ADD_BSTR(frag, *frag_vaos); |
855 | 0 | add_uniforms(sc, frag); |
856 | |
|
857 | 0 | ADD_BSTR(frag, sc->prelude_text); |
858 | 0 | ADD_BSTR(frag, sc->header_text); |
859 | |
|
860 | 0 | ADD(frag, "void main() {\n"); |
861 | | // we require _all_ frag shaders to write to a "vec4 color" |
862 | 0 | ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); |
863 | 0 | ADD_BSTR(frag, sc->text); |
864 | 0 | if (glsl_version >= 130) { |
865 | 0 | ADD(frag, "out_color = color;\n"); |
866 | 0 | } else { |
867 | 0 | ADD(frag, "gl_FragColor = color;\n"); |
868 | 0 | } |
869 | 0 | ADD(frag, "}\n"); |
870 | | |
871 | | // We need to fix the format of the render dst at renderpass creation |
872 | | // time |
873 | 0 | mp_assert(target_format); |
874 | 0 | sc->params.target_format = target_format; |
875 | 0 | } |
876 | | |
877 | 0 | if (type == RA_RENDERPASS_TYPE_COMPUTE) { |
878 | 0 | comp = &sc->tmp[4]; |
879 | 0 | ADD_BSTR(comp, *header); |
880 | |
|
881 | 0 | add_uniforms(sc, comp); |
882 | |
|
883 | 0 | ADD_BSTR(comp, sc->prelude_text); |
884 | 0 | ADD_BSTR(comp, sc->header_text); |
885 | |
|
886 | 0 | ADD(comp, "void main() {\n"); |
887 | 0 | ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience |
888 | 0 | ADD_BSTR(comp, sc->text); |
889 | 0 | ADD(comp, "}\n"); |
890 | 0 | } |
891 | |
|
892 | 0 | bstr *hash_total = &sc->tmp[5]; |
893 | |
|
894 | 0 | ADD(hash_total, "type %d\n", sc->params.type); |
895 | |
|
896 | 0 | if (frag) { |
897 | 0 | ADD_BSTR(hash_total, *frag); |
898 | 0 | sc->params.frag_shader = frag->start; |
899 | 0 | } |
900 | 0 | ADD(hash_total, "\n"); |
901 | 0 | if (vert) { |
902 | 0 | ADD_BSTR(hash_total, *vert); |
903 | 0 | sc->params.vertex_shader = vert->start; |
904 | 0 | } |
905 | 0 | ADD(hash_total, "\n"); |
906 | 0 | if (comp) { |
907 | 0 | ADD_BSTR(hash_total, *comp); |
908 | 0 | sc->params.compute_shader = comp->start; |
909 | 0 | } |
910 | 0 | ADD(hash_total, "\n"); |
911 | |
|
912 | 0 | if (sc->params.enable_blend) { |
913 | 0 | ADD(hash_total, "blend %d %d %d %d\n", |
914 | 0 | sc->params.blend_src_rgb, sc->params.blend_dst_rgb, |
915 | 0 | sc->params.blend_src_alpha, sc->params.blend_dst_alpha); |
916 | 0 | } |
917 | |
|
918 | 0 | if (sc->params.target_format) |
919 | 0 | ADD(hash_total, "format %s\n", sc->params.target_format->name); |
920 | |
|
921 | 0 | struct sc_entry *entry = NULL; |
922 | 0 | for (int n = 0; n < sc->num_entries; n++) { |
923 | 0 | struct sc_entry *cur = sc->entries[n]; |
924 | 0 | if (bstr_equals(cur->total, *hash_total)) { |
925 | 0 | entry = cur; |
926 | 0 | break; |
927 | 0 | } |
928 | 0 | } |
929 | 0 | if (!entry) { |
930 | 0 | if (sc->num_entries == SC_MAX_ENTRIES) |
931 | 0 | sc_flush_cache(sc); |
932 | 0 | entry = talloc_ptrtype(NULL, entry); |
933 | 0 | *entry = (struct sc_entry){ |
934 | 0 | .total = bstrdup(entry, *hash_total), |
935 | 0 | .timer = timer_pool_create(sc->ra), |
936 | 0 | }; |
937 | | |
938 | | // The vertex shader uses mangled names for the vertex attributes, so |
939 | | // that the fragment shader can use the "real" names. But the shader is |
940 | | // expecting the vertex attribute names (at least with older GLSL |
941 | | // targets for GL). |
942 | 0 | sc->params.vertex_stride = vertex_stride; |
943 | 0 | for (int n = 0; n < vao_len; n++) { |
944 | 0 | struct ra_renderpass_input attrib = vao[n]; |
945 | 0 | attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name); |
946 | 0 | MP_TARRAY_APPEND(sc, sc->params.vertex_attribs, |
947 | 0 | sc->params.num_vertex_attribs, attrib); |
948 | 0 | } |
949 | |
|
950 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
951 | 0 | struct sc_cached_uniform u = {0}; |
952 | 0 | if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { |
953 | | // global uniforms need to be made visible to the ra_renderpass |
954 | 0 | u.index = sc->params.num_inputs; |
955 | 0 | MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, |
956 | 0 | sc->uniforms[n].input); |
957 | 0 | } |
958 | 0 | MP_TARRAY_APPEND(entry, entry->cached_uniforms, |
959 | 0 | entry->num_cached_uniforms, u); |
960 | 0 | } |
961 | 0 | if (!create_pass(sc, entry)) |
962 | 0 | sc->error_state = true; |
963 | 0 | MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); |
964 | 0 | } |
965 | |
|
966 | 0 | if (!entry->pass) { |
967 | 0 | sc->current_shader = NULL; |
968 | 0 | return; |
969 | 0 | } |
970 | | |
971 | 0 | mp_assert(sc->num_uniforms == entry->num_cached_uniforms); |
972 | | |
973 | 0 | sc->num_values = 0; |
974 | 0 | for (int n = 0; n < sc->num_uniforms; n++) |
975 | 0 | update_uniform(sc, entry, &sc->uniforms[n], n); |
976 | | |
977 | | // If we're using a UBO, make sure to bind it as well |
978 | 0 | if (sc->ubo_size) { |
979 | 0 | struct ra_renderpass_input_val ubo_val = { |
980 | 0 | .index = entry->ubo_index, |
981 | 0 | .data = &entry->ubo, |
982 | 0 | }; |
983 | 0 | MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); |
984 | 0 | } |
985 | |
|
986 | 0 | sc->current_shader = entry; |
987 | 0 | } |
988 | | |
989 | | struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, |
990 | | struct ra_tex *target, bool discard, |
991 | | const struct ra_renderpass_input *vao, |
992 | | int vao_len, size_t vertex_stride, |
993 | | void *vertices, size_t num_vertices) |
994 | 0 | { |
995 | 0 | struct timer_pool *timer = NULL; |
996 | |
|
997 | 0 | sc->params.invalidate_target = discard; |
998 | 0 | gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format, |
999 | 0 | vao, vao_len, vertex_stride); |
1000 | 0 | if (!sc->current_shader) |
1001 | 0 | goto error; |
1002 | | |
1003 | 0 | timer = sc->current_shader->timer; |
1004 | |
|
1005 | 0 | struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; |
1006 | |
|
1007 | 0 | struct ra_renderpass_run_params run = { |
1008 | 0 | .pass = sc->current_shader->pass, |
1009 | 0 | .values = sc->values, |
1010 | 0 | .num_values = sc->num_values, |
1011 | 0 | .push_constants = sc->current_shader->pushc, |
1012 | 0 | .target = target, |
1013 | 0 | .vertex_data = vertices, |
1014 | 0 | .vertex_count = num_vertices, |
1015 | 0 | .viewport = full_rc, |
1016 | 0 | .scissors = full_rc, |
1017 | 0 | }; |
1018 | |
|
1019 | 0 | timer_pool_start(timer); |
1020 | 0 | sc->ra->fns->renderpass_run(sc->ra, &run); |
1021 | 0 | timer_pool_stop(timer); |
1022 | |
|
1023 | 0 | error: |
1024 | 0 | gl_sc_reset(sc); |
1025 | 0 | return timer_pool_measure(timer); |
1026 | 0 | } |
1027 | | |
1028 | | struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, |
1029 | | int w, int h, int d) |
1030 | 0 | { |
1031 | 0 | struct timer_pool *timer = NULL; |
1032 | |
|
1033 | 0 | gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0); |
1034 | 0 | if (!sc->current_shader) |
1035 | 0 | goto error; |
1036 | | |
1037 | 0 | timer = sc->current_shader->timer; |
1038 | |
|
1039 | 0 | struct ra_renderpass_run_params run = { |
1040 | 0 | .pass = sc->current_shader->pass, |
1041 | 0 | .values = sc->values, |
1042 | 0 | .num_values = sc->num_values, |
1043 | 0 | .push_constants = sc->current_shader->pushc, |
1044 | 0 | .compute_groups = {w, h, d}, |
1045 | 0 | }; |
1046 | |
|
1047 | 0 | timer_pool_start(timer); |
1048 | 0 | sc->ra->fns->renderpass_run(sc->ra, &run); |
1049 | 0 | timer_pool_stop(timer); |
1050 | |
|
1051 | 0 | error: |
1052 | 0 | gl_sc_reset(sc); |
1053 | 0 | return timer_pool_measure(timer); |
1054 | 0 | } |