/src/mpv/video/out/gpu/shader_cache.c
Line | Count | Source |
1 | | #include <stddef.h> |
2 | | #include <stdint.h> |
3 | | #include <stdlib.h> |
4 | | #include <string.h> |
5 | | #include <stdarg.h> |
6 | | #include <assert.h> |
7 | | |
8 | | #include <libavutil/sha.h> |
9 | | #include <libavutil/mem.h> |
10 | | |
11 | | #include "osdep/io.h" |
12 | | |
13 | | #include "common/common.h" |
14 | | #include "misc/hash.h" |
15 | | #include "misc/io_utils.h" |
16 | | #include "options/path.h" |
17 | | #include "stream/stream.h" |
18 | | #include "shader_cache.h" |
19 | | #include "utils.h" |
20 | | |
21 | | // Force cache flush if more than this number of shaders is created. |
22 | 0 | #define SC_MAX_ENTRIES 256 |
23 | | |
24 | | union uniform_val { |
25 | | float f[9]; // RA_VARTYPE_FLOAT |
26 | | int i[4]; // RA_VARTYPE_INT |
27 | | struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* |
28 | | struct ra_buf *buf; // RA_VARTYPE_BUF_* |
29 | | }; |
30 | | |
31 | | enum sc_uniform_type { |
32 | | SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) |
33 | | SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) |
34 | | SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size) |
35 | | }; |
36 | | |
37 | | struct sc_uniform { |
38 | | enum sc_uniform_type type; |
39 | | struct ra_renderpass_input input; |
40 | | const char *glsl_type; |
41 | | union uniform_val v; |
42 | | char *buffer_format; |
43 | | // for SC_UNIFORM_TYPE_UBO/PUSHC: |
44 | | struct ra_layout layout; |
45 | | size_t offset; // byte offset within the buffer |
46 | | }; |
47 | | |
48 | | struct sc_cached_uniform { |
49 | | union uniform_val v; |
50 | | int index; // for ra_renderpass_input_val |
51 | | bool set; // whether the uniform has ever been set |
52 | | }; |
53 | | |
54 | | struct sc_entry { |
55 | | struct ra_renderpass *pass; |
56 | | struct sc_cached_uniform *cached_uniforms; |
57 | | int num_cached_uniforms; |
58 | | bstr total; |
59 | | struct timer_pool *timer; |
60 | | struct ra_buf *ubo; |
61 | | int ubo_index; // for ra_renderpass_input_val.index |
62 | | void *pushc; |
63 | | }; |
64 | | |
65 | | struct gl_shader_cache { |
66 | | struct ra *ra; |
67 | | struct mp_log *log; |
68 | | |
69 | | // permanent |
70 | | char **exts; |
71 | | int num_exts; |
72 | | |
73 | | // this is modified during use (gl_sc_add() etc.) and reset for each shader |
74 | | bstr prelude_text; |
75 | | bstr header_text; |
76 | | bstr text; |
77 | | |
78 | | // Next binding point (texture unit, image unit, buffer binding, etc.) |
79 | | // In OpenGL these are separate for each input type |
80 | | int next_binding[RA_VARTYPE_COUNT]; |
81 | | bool next_uniform_dynamic; |
82 | | |
83 | | struct ra_renderpass_params params; |
84 | | |
85 | | struct sc_entry **entries; |
86 | | int num_entries; |
87 | | |
88 | | struct sc_entry *current_shader; // set by gl_sc_generate() |
89 | | |
90 | | struct sc_uniform *uniforms; |
91 | | int num_uniforms; |
92 | | |
93 | | int ubo_binding; |
94 | | size_t ubo_size; |
95 | | size_t pushc_size; |
96 | | |
97 | | struct ra_renderpass_input_val *values; |
98 | | int num_values; |
99 | | |
100 | | // For checking that the user is calling gl_sc_reset() properly. |
101 | | bool needs_reset; |
102 | | |
103 | | bool error_state; // true if an error occurred |
104 | | |
105 | | // temporary buffers (avoids frequent reallocations) |
106 | | bstr tmp[6]; |
107 | | |
108 | | // For the disk-cache. |
109 | | char *cache_dir; |
110 | | struct mpv_global *global; // can be NULL |
111 | | }; |
112 | | |
113 | | struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, |
114 | | struct mp_log *log) |
115 | 0 | { |
116 | 0 | struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); |
117 | 0 | *sc = (struct gl_shader_cache){ |
118 | 0 | .ra = ra, |
119 | 0 | .global = global, |
120 | 0 | .log = log, |
121 | 0 | }; |
122 | 0 | gl_sc_reset(sc); |
123 | 0 | return sc; |
124 | 0 | } |
125 | | |
126 | | // Reset the previous pass. This must be called after gl_sc_generate and before |
127 | | // starting a new shader. It may also be called on errors. |
128 | | void gl_sc_reset(struct gl_shader_cache *sc) |
129 | 0 | { |
130 | 0 | sc->prelude_text.len = 0; |
131 | 0 | sc->header_text.len = 0; |
132 | 0 | sc->text.len = 0; |
133 | 0 | for (int n = 0; n < sc->num_uniforms; n++) |
134 | 0 | talloc_free((void *)sc->uniforms[n].input.name); |
135 | 0 | sc->num_uniforms = 0; |
136 | 0 | sc->ubo_binding = 0; |
137 | 0 | sc->ubo_size = 0; |
138 | 0 | sc->pushc_size = 0; |
139 | 0 | for (int i = 0; i < RA_VARTYPE_COUNT; i++) |
140 | 0 | sc->next_binding[i] = 0; |
141 | 0 | sc->next_uniform_dynamic = false; |
142 | 0 | sc->current_shader = NULL; |
143 | 0 | sc->params = (struct ra_renderpass_params){0}; |
144 | 0 | sc->needs_reset = false; |
145 | 0 | } |
146 | | |
147 | | static void sc_flush_cache(struct gl_shader_cache *sc) |
148 | 0 | { |
149 | 0 | MP_DBG(sc, "flushing shader cache\n"); |
150 | |
|
151 | 0 | for (int n = 0; n < sc->num_entries; n++) { |
152 | 0 | struct sc_entry *e = sc->entries[n]; |
153 | 0 | ra_buf_free(sc->ra, &e->ubo); |
154 | 0 | if (e->pass) |
155 | 0 | sc->ra->fns->renderpass_destroy(sc->ra, e->pass); |
156 | 0 | timer_pool_destroy(e->timer); |
157 | 0 | talloc_free(e); |
158 | 0 | } |
159 | 0 | sc->num_entries = 0; |
160 | 0 | } |
161 | | |
162 | | void gl_sc_destroy(struct gl_shader_cache *sc) |
163 | 0 | { |
164 | 0 | if (!sc) |
165 | 0 | return; |
166 | 0 | gl_sc_reset(sc); |
167 | 0 | sc_flush_cache(sc); |
168 | 0 | talloc_free(sc); |
169 | 0 | } |
170 | | |
171 | | bool gl_sc_error_state(struct gl_shader_cache *sc) |
172 | 0 | { |
173 | 0 | return sc->error_state; |
174 | 0 | } |
175 | | |
176 | | void gl_sc_reset_error(struct gl_shader_cache *sc) |
177 | 0 | { |
178 | 0 | sc->error_state = false; |
179 | 0 | } |
180 | | |
181 | | void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) |
182 | 0 | { |
183 | 0 | for (int n = 0; n < sc->num_exts; n++) { |
184 | 0 | if (strcmp(sc->exts[n], name) == 0) |
185 | 0 | return; |
186 | 0 | } |
187 | 0 | MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); |
188 | 0 | } |
189 | | |
190 | | void gl_sc_add(struct gl_shader_cache *sc, const char *text) |
191 | 0 | { |
192 | 0 | bstr_xappend0(sc, &sc->text, text); |
193 | 0 | } |
194 | | |
195 | | void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) |
196 | 0 | { |
197 | 0 | va_list ap; |
198 | 0 | va_start(ap, textf); |
199 | 0 | bstr_xappend_vasprintf(sc, &sc->text, textf, ap); |
200 | 0 | va_end(ap); |
201 | 0 | } |
202 | | |
203 | | void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) |
204 | 0 | { |
205 | 0 | bstr_xappend0(sc, &sc->header_text, text); |
206 | 0 | } |
207 | | |
208 | | void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) |
209 | 0 | { |
210 | 0 | va_list ap; |
211 | 0 | va_start(ap, textf); |
212 | 0 | bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); |
213 | 0 | va_end(ap); |
214 | 0 | } |
215 | | |
216 | | void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) |
217 | 0 | { |
218 | 0 | bstr_xappend(sc, &sc->header_text, text); |
219 | 0 | } |
220 | | |
221 | | void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) |
222 | 0 | { |
223 | 0 | va_list ap; |
224 | 0 | va_start(ap, textf); |
225 | 0 | bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); |
226 | 0 | va_end(ap); |
227 | 0 | } |
228 | | |
229 | | static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, |
230 | | struct bstr name) |
231 | 0 | { |
232 | 0 | struct sc_uniform new = { |
233 | 0 | .input = { |
234 | 0 | .dim_v = 1, |
235 | 0 | .dim_m = 1, |
236 | 0 | }, |
237 | 0 | }; |
238 | |
|
239 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
240 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
241 | 0 | if (bstrcmp(bstr0(u->input.name), name) == 0) { |
242 | 0 | const char *allocname = u->input.name; |
243 | 0 | *u = new; |
244 | 0 | u->input.name = allocname; |
245 | 0 | return u; |
246 | 0 | } |
247 | 0 | } |
248 | | |
249 | | // not found -> add it |
250 | 0 | new.input.name = bstrdup0(NULL, name); |
251 | 0 | MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); |
252 | 0 | return &sc->uniforms[sc->num_uniforms - 1]; |
253 | 0 | } |
254 | | |
255 | | static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) |
256 | 0 | { |
257 | 0 | return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++; |
258 | 0 | } |
259 | | |
260 | | void gl_sc_uniform_dynamic(struct gl_shader_cache *sc) |
261 | 0 | { |
262 | 0 | sc->next_uniform_dynamic = true; |
263 | 0 | } |
264 | | |
265 | | // Updates the metadata for the given sc_uniform. Assumes sc_uniform->input |
266 | | // and glsl_type/buffer_format are already set. |
267 | | static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u) |
268 | 0 | { |
269 | 0 | bool dynamic = sc->next_uniform_dynamic; |
270 | 0 | sc->next_uniform_dynamic = false; |
271 | | |
272 | | // Try not using push constants for "large" values like matrices, since |
273 | | // this is likely to both exceed the VGPR budget as well as the pushc size |
274 | | // budget |
275 | 0 | bool try_pushc = u->input.dim_m == 1 || dynamic; |
276 | | |
277 | | // Attempt using push constants first |
278 | 0 | if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) { |
279 | 0 | struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input); |
280 | 0 | size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align); |
281 | | // Push constants have limited size, so make sure we don't exceed this |
282 | 0 | size_t new_size = offset + layout.size; |
283 | 0 | if (new_size <= sc->ra->max_pushc_size) { |
284 | 0 | u->type = SC_UNIFORM_TYPE_PUSHC; |
285 | 0 | u->layout = layout; |
286 | 0 | u->offset = offset; |
287 | 0 | sc->pushc_size = new_size; |
288 | 0 | return; |
289 | 0 | } |
290 | 0 | } |
291 | | |
292 | | // Attempt using uniform buffer next. The GLSL version 440 check is due |
293 | | // to explicit offsets on UBO entries. In theory we could leave away |
294 | | // the offsets and support UBOs for older GL as well, but this is a nice |
295 | | // safety net for driver bugs (and also rules out potentially buggy drivers) |
296 | | // Also avoid UBOs for highly dynamic stuff since that requires synchronizing |
297 | | // the UBO writes every frame |
298 | 0 | bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic; |
299 | 0 | if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { |
300 | 0 | u->type = SC_UNIFORM_TYPE_UBO; |
301 | 0 | u->layout = sc->ra->fns->uniform_layout(&u->input); |
302 | 0 | u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); |
303 | 0 | sc->ubo_size = u->offset + u->layout.size; |
304 | 0 | return; |
305 | 0 | } |
306 | | |
307 | | // If all else fails, use global uniforms |
308 | 0 | mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); |
309 | 0 | u->type = SC_UNIFORM_TYPE_GLOBAL; |
310 | 0 | } |
311 | | |
312 | | void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, |
313 | | struct ra_tex *tex) |
314 | 0 | { |
315 | 0 | const char *glsl_type = "sampler2D"; |
316 | 0 | if (tex->params.dimensions == 1) { |
317 | 0 | glsl_type = "sampler1D"; |
318 | 0 | } else if (tex->params.dimensions == 3) { |
319 | 0 | glsl_type = "sampler3D"; |
320 | 0 | } else if (tex->params.non_normalized) { |
321 | 0 | glsl_type = "sampler2DRect"; |
322 | 0 | } else if (tex->params.external_oes) { |
323 | 0 | glsl_type = "samplerExternalOES"; |
324 | 0 | } else if (tex->params.format->ctype == RA_CTYPE_UINT) { |
325 | 0 | glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; |
326 | 0 | } |
327 | |
|
328 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
329 | 0 | u->input.type = RA_VARTYPE_TEX; |
330 | 0 | u->glsl_type = glsl_type; |
331 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
332 | 0 | u->v.tex = tex; |
333 | 0 | } |
334 | | |
335 | | void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, |
336 | | struct ra_tex *tex) |
337 | 0 | { |
338 | 0 | gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); |
339 | |
|
340 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
341 | 0 | u->input.type = RA_VARTYPE_IMG_W; |
342 | 0 | u->glsl_type = sc->ra->glsl_es ? "writeonly highp image2D" : "writeonly image2D"; |
343 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
344 | 0 | u->v.tex = tex; |
345 | 0 | } |
346 | | |
347 | | void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, |
348 | | char *format, ...) |
349 | 0 | { |
350 | 0 | mp_assert(sc->ra->caps & RA_CAP_BUF_RW); |
351 | 0 | gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); |
352 | |
|
353 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
354 | 0 | u->input.type = RA_VARTYPE_BUF_RW; |
355 | 0 | u->glsl_type = ""; |
356 | 0 | u->input.binding = gl_sc_next_binding(sc, u->input.type); |
357 | 0 | u->v.buf = buf; |
358 | |
|
359 | 0 | va_list ap; |
360 | 0 | va_start(ap, format); |
361 | 0 | u->buffer_format = ta_vasprintf(sc, format, ap); |
362 | 0 | va_end(ap); |
363 | 0 | } |
364 | | |
365 | | void gl_sc_uniform_f_bstr(struct gl_shader_cache *sc, struct bstr name, float f) |
366 | 0 | { |
367 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
368 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
369 | 0 | u->glsl_type = "float"; |
370 | 0 | update_uniform_params(sc, u); |
371 | 0 | u->v.f[0] = f; |
372 | 0 | } |
373 | | |
374 | | void gl_sc_uniform_i_bstr(struct gl_shader_cache *sc, struct bstr name, int i) |
375 | 0 | { |
376 | 0 | struct sc_uniform *u = find_uniform(sc, name); |
377 | 0 | u->input.type = RA_VARTYPE_INT; |
378 | 0 | u->glsl_type = "int"; |
379 | 0 | update_uniform_params(sc, u); |
380 | 0 | u->v.i[0] = i; |
381 | 0 | } |
382 | | |
383 | | void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) |
384 | 0 | { |
385 | 0 | gl_sc_uniform_f_bstr(sc, bstr0(name), f); |
386 | 0 | } |
387 | | |
388 | | void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) |
389 | 0 | { |
390 | 0 | gl_sc_uniform_i_bstr(sc, bstr0(name), i); |
391 | 0 | } |
392 | | |
393 | | void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) |
394 | 0 | { |
395 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
396 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
397 | 0 | u->input.dim_v = 2; |
398 | 0 | u->glsl_type = "vec2"; |
399 | 0 | update_uniform_params(sc, u); |
400 | 0 | u->v.f[0] = f[0]; |
401 | 0 | u->v.f[1] = f[1]; |
402 | 0 | } |
403 | | |
404 | | void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) |
405 | 0 | { |
406 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
407 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
408 | 0 | u->input.dim_v = 3; |
409 | 0 | u->glsl_type = "vec3"; |
410 | 0 | update_uniform_params(sc, u); |
411 | 0 | u->v.f[0] = f[0]; |
412 | 0 | u->v.f[1] = f[1]; |
413 | 0 | u->v.f[2] = f[2]; |
414 | 0 | } |
415 | | |
416 | | static void transpose2x2(float r[2 * 2]) |
417 | 0 | { |
418 | 0 | MPSWAP(float, r[0+2*1], r[1+2*0]); |
419 | 0 | } |
420 | | |
421 | | void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, |
422 | | bool transpose, float *v) |
423 | 0 | { |
424 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
425 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
426 | 0 | u->input.dim_v = 2; |
427 | 0 | u->input.dim_m = 2; |
428 | 0 | u->glsl_type = "mat2"; |
429 | 0 | update_uniform_params(sc, u); |
430 | 0 | for (int n = 0; n < 4; n++) |
431 | 0 | u->v.f[n] = v[n]; |
432 | 0 | if (transpose) |
433 | 0 | transpose2x2(&u->v.f[0]); |
434 | 0 | } |
435 | | |
436 | | static void transpose3x3(float r[3 * 3]) |
437 | 0 | { |
438 | 0 | MPSWAP(float, r[0+3*1], r[1+3*0]); |
439 | 0 | MPSWAP(float, r[0+3*2], r[2+3*0]); |
440 | 0 | MPSWAP(float, r[1+3*2], r[2+3*1]); |
441 | 0 | } |
442 | | |
443 | | void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, |
444 | | bool transpose, float *v) |
445 | 0 | { |
446 | 0 | struct sc_uniform *u = find_uniform(sc, bstr0(name)); |
447 | 0 | u->input.type = RA_VARTYPE_FLOAT; |
448 | 0 | u->input.dim_v = 3; |
449 | 0 | u->input.dim_m = 3; |
450 | 0 | u->glsl_type = "mat3"; |
451 | 0 | update_uniform_params(sc, u); |
452 | 0 | for (int n = 0; n < 9; n++) |
453 | 0 | u->v.f[n] = v[n]; |
454 | 0 | if (transpose) |
455 | 0 | transpose3x3(&u->v.f[0]); |
456 | 0 | } |
457 | | |
458 | | void gl_sc_blend(struct gl_shader_cache *sc, |
459 | | enum ra_blend blend_src_rgb, |
460 | | enum ra_blend blend_dst_rgb, |
461 | | enum ra_blend blend_src_alpha, |
462 | | enum ra_blend blend_dst_alpha) |
463 | 0 | { |
464 | 0 | sc->params.enable_blend = true; |
465 | 0 | sc->params.blend_src_rgb = blend_src_rgb; |
466 | 0 | sc->params.blend_dst_rgb = blend_dst_rgb; |
467 | 0 | sc->params.blend_src_alpha = blend_src_alpha; |
468 | 0 | sc->params.blend_dst_alpha = blend_dst_alpha; |
469 | 0 | } |
470 | | |
471 | | const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims) |
472 | 0 | { |
473 | 0 | static const char *bvecs[] = { |
474 | 0 | [1] = "bool", |
475 | 0 | [2] = "bvec2", |
476 | 0 | [3] = "bvec3", |
477 | 0 | [4] = "bvec4", |
478 | 0 | }; |
479 | |
|
480 | 0 | static const char *vecs[] = { |
481 | 0 | [1] = "float", |
482 | 0 | [2] = "vec2", |
483 | 0 | [3] = "vec3", |
484 | 0 | [4] = "vec4", |
485 | 0 | }; |
486 | |
|
487 | 0 | mp_assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs)); |
488 | 0 | return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims]; |
489 | 0 | } |
490 | | |
491 | | static const char *vao_glsl_type(const struct ra_renderpass_input *e) |
492 | 0 | { |
493 | | // pretty dumb... too dumb, but works for us |
494 | 0 | switch (e->dim_v) { |
495 | 0 | case 1: return "float"; |
496 | 0 | case 2: return "vec2"; |
497 | 0 | case 3: return "vec3"; |
498 | 0 | case 4: return "vec4"; |
499 | 0 | default: MP_ASSERT_UNREACHABLE(); |
500 | 0 | } |
501 | 0 | } |
502 | | |
503 | | static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) |
504 | 0 | { |
505 | 0 | uintptr_t src = (uintptr_t) &u->v; |
506 | 0 | size_t dst = u->offset; |
507 | 0 | struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); |
508 | 0 | struct ra_layout dst_layout = u->layout; |
509 | |
|
510 | 0 | for (int i = 0; i < u->input.dim_m; i++) { |
511 | 0 | ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); |
512 | 0 | src += src_layout.stride; |
513 | 0 | dst += dst_layout.stride; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u) |
518 | 0 | { |
519 | 0 | uintptr_t src = (uintptr_t) &u->v; |
520 | 0 | uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset; |
521 | 0 | struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); |
522 | 0 | struct ra_layout dst_layout = u->layout; |
523 | |
|
524 | 0 | for (int i = 0; i < u->input.dim_m; i++) { |
525 | 0 | memcpy((void *)dst, (void *)src, src_layout.stride); |
526 | 0 | src += src_layout.stride; |
527 | 0 | dst += dst_layout.stride; |
528 | 0 | } |
529 | 0 | } |
530 | | |
531 | | static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, |
532 | | struct sc_uniform *u, int n) |
533 | 0 | { |
534 | 0 | struct sc_cached_uniform *un = &e->cached_uniforms[n]; |
535 | 0 | struct ra_layout layout = ra_renderpass_input_layout(&u->input); |
536 | 0 | if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) |
537 | 0 | return; |
538 | | |
539 | 0 | un->v = u->v; |
540 | 0 | un->set = true; |
541 | |
|
542 | 0 | static const char *desc[] = { |
543 | 0 | [SC_UNIFORM_TYPE_UBO] = "UBO", |
544 | 0 | [SC_UNIFORM_TYPE_PUSHC] = "PC", |
545 | 0 | [SC_UNIFORM_TYPE_GLOBAL] = "global", |
546 | 0 | }; |
547 | 0 | MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name); |
548 | |
|
549 | 0 | switch (u->type) { |
550 | 0 | case SC_UNIFORM_TYPE_GLOBAL: { |
551 | 0 | struct ra_renderpass_input_val value = { |
552 | 0 | .index = un->index, |
553 | 0 | .data = &un->v, |
554 | 0 | }; |
555 | 0 | MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); |
556 | 0 | break; |
557 | 0 | } |
558 | 0 | case SC_UNIFORM_TYPE_UBO: |
559 | 0 | mp_assert(e->ubo); |
560 | 0 | update_ubo(sc->ra, e->ubo, u); |
561 | 0 | break; |
562 | 0 | case SC_UNIFORM_TYPE_PUSHC: |
563 | 0 | mp_assert(e->pushc); |
564 | 0 | update_pushc(sc->ra, e->pushc, u); |
565 | 0 | break; |
566 | 0 | default: MP_ASSERT_UNREACHABLE(); |
567 | 0 | } |
568 | 0 | } |
569 | | |
570 | | void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir) |
571 | 0 | { |
572 | 0 | talloc_free(sc->cache_dir); |
573 | 0 | if (dir && dir[0]) { |
574 | 0 | dir = mp_get_user_path(NULL, sc->global, dir); |
575 | 0 | } else { |
576 | 0 | dir = mp_find_user_file(NULL, sc->global, "cache", ""); |
577 | 0 | } |
578 | 0 | sc->cache_dir = talloc_strdup(sc, dir); |
579 | 0 | talloc_free(dir); |
580 | 0 | } |
581 | | |
582 | | static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) |
583 | 0 | { |
584 | 0 | bool ret = false; |
585 | |
|
586 | 0 | void *tmp = talloc_new(NULL); |
587 | 0 | struct ra_renderpass_params params = sc->params; |
588 | |
|
589 | 0 | const char *cache_header = "mpv shader cache v1\n"; |
590 | 0 | char *cache_filename = NULL; |
591 | 0 | char *cache_dir = NULL; |
592 | |
|
593 | 0 | if (sc->cache_dir && sc->cache_dir[0]) { |
594 | | // Try to load it from a disk cache. |
595 | 0 | cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); |
596 | |
|
597 | 0 | bstr hashstr = mp_hash_to_bstr(tmp, entry->total.start, entry->total.len, "SHA256"); |
598 | 0 | cache_filename = mp_path_join_bstr(tmp, bstr0(cache_dir), hashstr); |
599 | 0 | if (stat(cache_filename, &(struct stat){0}) == 0) { |
600 | 0 | MP_DBG(sc, "Trying to load shader from disk...\n"); |
601 | 0 | struct bstr cachedata = |
602 | 0 | stream_read_file(cache_filename, tmp, sc->global, 1000000000); |
603 | 0 | if (bstr_eatstart0(&cachedata, cache_header)) |
604 | 0 | params.cached_program = cachedata; |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | // If using a UBO, also make sure to add it as an input value so the RA |
609 | | // can see it |
610 | 0 | if (sc->ubo_size) { |
611 | 0 | entry->ubo_index = sc->params.num_inputs; |
612 | 0 | struct ra_renderpass_input ubo_input = { |
613 | 0 | .name = "UBO", |
614 | 0 | .type = RA_VARTYPE_BUF_RO, |
615 | 0 | .dim_v = 1, |
616 | 0 | .dim_m = 1, |
617 | 0 | .binding = sc->ubo_binding, |
618 | 0 | }; |
619 | 0 | MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); |
620 | 0 | } |
621 | |
|
622 | 0 | if (sc->pushc_size) { |
623 | 0 | params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4); |
624 | 0 | entry->pushc = talloc_zero_size(entry, params.push_constants_size); |
625 | 0 | } |
626 | |
|
627 | 0 | if (sc->ubo_size) { |
628 | 0 | struct ra_buf_params ubo_params = { |
629 | 0 | .type = RA_BUF_TYPE_UNIFORM, |
630 | 0 | .size = sc->ubo_size, |
631 | 0 | .host_mutable = true, |
632 | 0 | }; |
633 | |
|
634 | 0 | entry->ubo = ra_buf_create(sc->ra, &ubo_params); |
635 | 0 | if (!entry->ubo) { |
636 | 0 | MP_ERR(sc, "Failed creating uniform buffer!\n"); |
637 | 0 | goto error; |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | 0 | entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); |
642 | 0 | if (!entry->pass) |
643 | 0 | goto error; |
644 | | |
645 | 0 | if (entry->pass && cache_filename) { |
646 | 0 | bstr nc = entry->pass->params.cached_program; |
647 | 0 | if (nc.len && !bstr_equals(params.cached_program, nc)) { |
648 | 0 | mp_mkdirp(cache_dir); |
649 | |
|
650 | 0 | MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename); |
651 | 0 | bstr out = {0}; |
652 | 0 | bstr_xappend0(tmp, &out, cache_header); |
653 | 0 | bstr_xappend(tmp, &out, nc); |
654 | 0 | mp_save_to_file(cache_filename, out.start, out.len); |
655 | 0 | } |
656 | 0 | } |
657 | |
|
658 | 0 | ret = true; |
659 | |
|
660 | 0 | error: |
661 | 0 | talloc_free(tmp); |
662 | 0 | return ret; |
663 | 0 | } |
664 | | |
665 | 0 | #define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) |
666 | 0 | #define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) |
667 | | |
668 | | static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) |
669 | 0 | { |
670 | | // Add all of the UBO entries separately as members of their own buffer |
671 | 0 | if (sc->ubo_size > 0) { |
672 | 0 | ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); |
673 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
674 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
675 | 0 | if (u->type != SC_UNIFORM_TYPE_UBO) |
676 | 0 | continue; |
677 | 0 | ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, |
678 | 0 | u->input.name); |
679 | 0 | } |
680 | 0 | ADD(dst, "};\n"); |
681 | 0 | } |
682 | | |
683 | | // Ditto for push constants |
684 | 0 | if (sc->pushc_size > 0) { |
685 | 0 | ADD(dst, "layout(std430, push_constant) uniform PushC {\n"); |
686 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
687 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
688 | 0 | if (u->type != SC_UNIFORM_TYPE_PUSHC) |
689 | 0 | continue; |
690 | 0 | ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, |
691 | 0 | u->input.name); |
692 | 0 | } |
693 | 0 | ADD(dst, "};\n"); |
694 | 0 | } |
695 | |
|
696 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
697 | 0 | struct sc_uniform *u = &sc->uniforms[n]; |
698 | 0 | if (u->type != SC_UNIFORM_TYPE_GLOBAL) |
699 | 0 | continue; |
700 | 0 | switch (u->input.type) { |
701 | 0 | case RA_VARTYPE_INT: |
702 | 0 | case RA_VARTYPE_FLOAT: |
703 | 0 | mp_assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); |
704 | 0 | MP_FALLTHROUGH; |
705 | 0 | case RA_VARTYPE_TEX: |
706 | | // Vulkan requires explicitly assigning the bindings in the shader |
707 | | // source. For OpenGL it's optional, but requires higher GL version |
708 | | // so we don't do it (and instead have ra_gl update the bindings |
709 | | // after program creation). |
710 | 0 | if (sc->ra->glsl_vulkan) |
711 | 0 | ADD(dst, "layout(binding=%d) ", u->input.binding); |
712 | 0 | ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); |
713 | 0 | break; |
714 | 0 | case RA_VARTYPE_BUF_RO: |
715 | 0 | ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", |
716 | 0 | u->input.binding, u->input.name, u->buffer_format); |
717 | 0 | break; |
718 | 0 | case RA_VARTYPE_BUF_RW: |
719 | 0 | ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n", |
720 | 0 | u->input.binding, u->input.name, u->buffer_format); |
721 | 0 | break; |
722 | 0 | case RA_VARTYPE_IMG_W: { |
723 | | // For better compatibility, we have to explicitly label the |
724 | | // type of data we will be reading/writing to this image. |
725 | 0 | const char *fmt = u->v.tex->params.format->glsl_format; |
726 | |
|
727 | 0 | if (sc->ra->glsl_vulkan) { |
728 | 0 | if (fmt) { |
729 | 0 | ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt); |
730 | 0 | } else { |
731 | 0 | ADD(dst, "layout(binding=%d) ", u->input.binding); |
732 | 0 | } |
733 | 0 | } else if (fmt) { |
734 | 0 | ADD(dst, "layout(%s) ", fmt); |
735 | 0 | } |
736 | 0 | ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name); |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | } |
741 | | |
742 | | // 1. Generate vertex and fragment shaders from the fragment shader text added |
743 | | // with gl_sc_add(). The generated shader program is cached (based on the |
744 | | // text), so actual compilation happens only the first time. |
745 | | // 2. Update the uniforms and textures set with gl_sc_uniform_*. |
746 | | // 3. Make the new shader program current (glUseProgram()). |
747 | | // After that, you render, and then you call gc_sc_reset(), which does: |
748 | | // 1. Unbind the program and all textures. |
749 | | // 2. Reset the sc state and prepare for a new shader program. (All uniforms |
750 | | // and fragment operations needed for the next program have to be re-added.) |
751 | | static void gl_sc_generate(struct gl_shader_cache *sc, |
752 | | enum ra_renderpass_type type, |
753 | | const struct ra_format *target_format, |
754 | | const struct ra_renderpass_input *vao, |
755 | | int vao_len, size_t vertex_stride) |
756 | 0 | { |
757 | 0 | int glsl_version = sc->ra->glsl_version; |
758 | 0 | int glsl_es = sc->ra->glsl_es ? glsl_version : 0; |
759 | |
|
760 | 0 | sc->params.type = type; |
761 | | |
762 | | // gl_sc_reset() must be called after ending the previous render process, |
763 | | // and before starting a new one. |
764 | 0 | mp_assert(!sc->needs_reset); |
765 | 0 | sc->needs_reset = true; |
766 | | |
767 | | // If using a UBO, pick a binding (needed for shader generation) |
768 | 0 | if (sc->ubo_size) |
769 | 0 | sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); |
770 | |
|
771 | 0 | for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) |
772 | 0 | sc->tmp[n].len = 0; |
773 | | |
774 | | // set up shader text (header + uniforms + body) |
775 | 0 | bstr *header = &sc->tmp[0]; |
776 | 0 | ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); |
777 | 0 | if (type == RA_RENDERPASS_TYPE_COMPUTE) { |
778 | | // This extension cannot be enabled in fragment shader. Enable it as |
779 | | // an exception for compute shader. |
780 | 0 | ADD(header, "#extension GL_ARB_compute_shader : enable\n"); |
781 | 0 | } |
782 | 0 | for (int n = 0; n < sc->num_exts; n++) |
783 | 0 | ADD(header, "#extension %s : enable\n", sc->exts[n]); |
784 | 0 | if (glsl_es) { |
785 | 0 | ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n"); |
786 | 0 | ADD(header, "precision highp float;\n"); |
787 | 0 | ADD(header, "#else\n"); |
788 | 0 | ADD(header, "precision mediump float;\n"); |
789 | 0 | ADD(header, "#endif\n"); |
790 | |
|
791 | 0 | ADD(header, "precision mediump sampler2D;\n"); |
792 | 0 | if (sc->ra->caps & RA_CAP_TEX_3D) |
793 | 0 | ADD(header, "precision mediump sampler3D;\n"); |
794 | 0 | } |
795 | |
|
796 | 0 | if (glsl_version >= 130) { |
797 | 0 | ADD(header, "#define tex1D texture\n"); |
798 | 0 | ADD(header, "#define tex3D texture\n"); |
799 | 0 | } else { |
800 | 0 | ADD(header, "#define tex1D texture1D\n"); |
801 | 0 | ADD(header, "#define tex3D texture3D\n"); |
802 | 0 | ADD(header, "#define texture texture2D\n"); |
803 | 0 | } |
804 | | |
805 | | // Additional helpers. |
806 | 0 | ADD(header, "#define LUT_POS(x, lut_size)" |
807 | 0 | " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); |
808 | |
|
809 | 0 | char *vert_in = glsl_version >= 130 ? "in" : "attribute"; |
810 | 0 | char *vert_out = glsl_version >= 130 ? "out" : "varying"; |
811 | 0 | char *frag_in = glsl_version >= 130 ? "in" : "varying"; |
812 | |
|
813 | 0 | struct bstr *vert = NULL, *frag = NULL, *comp = NULL; |
814 | |
|
815 | 0 | if (type == RA_RENDERPASS_TYPE_RASTER) { |
816 | | // vertex shader: we don't use the vertex shader, so just setup a |
817 | | // dummy, which passes through the vertex array attributes. |
818 | 0 | bstr *vert_head = &sc->tmp[1]; |
819 | 0 | ADD_BSTR(vert_head, *header); |
820 | 0 | bstr *vert_body = &sc->tmp[2]; |
821 | 0 | ADD(vert_body, "void main() {\n"); |
822 | 0 | bstr *frag_vaos = &sc->tmp[3]; |
823 | 0 | for (int n = 0; n < vao_len; n++) { |
824 | 0 | const struct ra_renderpass_input *e = &vao[n]; |
825 | 0 | const char *glsl_type = vao_glsl_type(e); |
826 | 0 | char loc[32] = {0}; |
827 | 0 | if (sc->ra->glsl_vulkan) |
828 | 0 | snprintf(loc, sizeof(loc), "layout(location=%d) ", n); |
829 | 0 | if (strcmp(e->name, "position") == 0) { |
830 | | // setting raster pos. requires setting gl_Position magic variable |
831 | 0 | mp_assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); |
832 | 0 | ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); |
833 | 0 | ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); |
834 | 0 | } else { |
835 | 0 | ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); |
836 | 0 | ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); |
837 | 0 | ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); |
838 | 0 | ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); |
839 | 0 | } |
840 | 0 | } |
841 | 0 | ADD(vert_body, "}\n"); |
842 | 0 | vert = vert_head; |
843 | 0 | ADD_BSTR(vert, *vert_body); |
844 | | |
845 | | // fragment shader; still requires adding used uniforms and VAO elements |
846 | 0 | frag = &sc->tmp[4]; |
847 | 0 | ADD_BSTR(frag, *header); |
848 | 0 | if (glsl_version >= 130) { |
849 | 0 | ADD(frag, "%sout vec4 out_color;\n", |
850 | 0 | sc->ra->glsl_vulkan ? "layout(location=0) " : ""); |
851 | 0 | } |
852 | 0 | ADD_BSTR(frag, *frag_vaos); |
853 | 0 | add_uniforms(sc, frag); |
854 | |
|
855 | 0 | ADD_BSTR(frag, sc->prelude_text); |
856 | 0 | ADD_BSTR(frag, sc->header_text); |
857 | |
|
858 | 0 | ADD(frag, "void main() {\n"); |
859 | | // we require _all_ frag shaders to write to a "vec4 color" |
860 | 0 | ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); |
861 | 0 | ADD_BSTR(frag, sc->text); |
862 | 0 | if (glsl_version >= 130) { |
863 | 0 | ADD(frag, "out_color = color;\n"); |
864 | 0 | } else { |
865 | 0 | ADD(frag, "gl_FragColor = color;\n"); |
866 | 0 | } |
867 | 0 | ADD(frag, "}\n"); |
868 | | |
869 | | // We need to fix the format of the render dst at renderpass creation |
870 | | // time |
871 | 0 | mp_assert(target_format); |
872 | 0 | sc->params.target_format = target_format; |
873 | 0 | } |
874 | | |
875 | 0 | if (type == RA_RENDERPASS_TYPE_COMPUTE) { |
876 | 0 | comp = &sc->tmp[4]; |
877 | 0 | ADD_BSTR(comp, *header); |
878 | |
|
879 | 0 | add_uniforms(sc, comp); |
880 | |
|
881 | 0 | ADD_BSTR(comp, sc->prelude_text); |
882 | 0 | ADD_BSTR(comp, sc->header_text); |
883 | |
|
884 | 0 | ADD(comp, "void main() {\n"); |
885 | 0 | ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience |
886 | 0 | ADD_BSTR(comp, sc->text); |
887 | 0 | ADD(comp, "}\n"); |
888 | 0 | } |
889 | |
|
890 | 0 | bstr *hash_total = &sc->tmp[5]; |
891 | |
|
892 | 0 | ADD(hash_total, "type %d\n", sc->params.type); |
893 | |
|
894 | 0 | if (frag) { |
895 | 0 | ADD_BSTR(hash_total, *frag); |
896 | 0 | sc->params.frag_shader = frag->start; |
897 | 0 | } |
898 | 0 | ADD(hash_total, "\n"); |
899 | 0 | if (vert) { |
900 | 0 | ADD_BSTR(hash_total, *vert); |
901 | 0 | sc->params.vertex_shader = vert->start; |
902 | 0 | } |
903 | 0 | ADD(hash_total, "\n"); |
904 | 0 | if (comp) { |
905 | 0 | ADD_BSTR(hash_total, *comp); |
906 | 0 | sc->params.compute_shader = comp->start; |
907 | 0 | } |
908 | 0 | ADD(hash_total, "\n"); |
909 | |
|
910 | 0 | if (sc->params.enable_blend) { |
911 | 0 | ADD(hash_total, "blend %d %d %d %d\n", |
912 | 0 | sc->params.blend_src_rgb, sc->params.blend_dst_rgb, |
913 | 0 | sc->params.blend_src_alpha, sc->params.blend_dst_alpha); |
914 | 0 | } |
915 | |
|
916 | 0 | if (sc->params.target_format) |
917 | 0 | ADD(hash_total, "format %s\n", sc->params.target_format->name); |
918 | |
|
919 | 0 | struct sc_entry *entry = NULL; |
920 | 0 | for (int n = 0; n < sc->num_entries; n++) { |
921 | 0 | struct sc_entry *cur = sc->entries[n]; |
922 | 0 | if (bstr_equals(cur->total, *hash_total)) { |
923 | 0 | entry = cur; |
924 | 0 | break; |
925 | 0 | } |
926 | 0 | } |
927 | 0 | if (!entry) { |
928 | 0 | if (sc->num_entries == SC_MAX_ENTRIES) |
929 | 0 | sc_flush_cache(sc); |
930 | 0 | entry = talloc_ptrtype(NULL, entry); |
931 | 0 | *entry = (struct sc_entry){ |
932 | 0 | .total = bstrdup(entry, *hash_total), |
933 | 0 | .timer = timer_pool_create(sc->ra), |
934 | 0 | }; |
935 | | |
936 | | // The vertex shader uses mangled names for the vertex attributes, so |
937 | | // that the fragment shader can use the "real" names. But the shader is |
938 | | // expecting the vertex attribute names (at least with older GLSL |
939 | | // targets for GL). |
940 | 0 | sc->params.vertex_stride = vertex_stride; |
941 | 0 | for (int n = 0; n < vao_len; n++) { |
942 | 0 | struct ra_renderpass_input attrib = vao[n]; |
943 | 0 | attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name); |
944 | 0 | MP_TARRAY_APPEND(sc, sc->params.vertex_attribs, |
945 | 0 | sc->params.num_vertex_attribs, attrib); |
946 | 0 | } |
947 | |
|
948 | 0 | for (int n = 0; n < sc->num_uniforms; n++) { |
949 | 0 | struct sc_cached_uniform u = {0}; |
950 | 0 | if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { |
951 | | // global uniforms need to be made visible to the ra_renderpass |
952 | 0 | u.index = sc->params.num_inputs; |
953 | 0 | MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, |
954 | 0 | sc->uniforms[n].input); |
955 | 0 | } |
956 | 0 | MP_TARRAY_APPEND(entry, entry->cached_uniforms, |
957 | 0 | entry->num_cached_uniforms, u); |
958 | 0 | } |
959 | 0 | if (!create_pass(sc, entry)) |
960 | 0 | sc->error_state = true; |
961 | 0 | MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); |
962 | 0 | } |
963 | |
|
964 | 0 | if (!entry->pass) { |
965 | 0 | sc->current_shader = NULL; |
966 | 0 | return; |
967 | 0 | } |
968 | | |
969 | 0 | mp_assert(sc->num_uniforms == entry->num_cached_uniforms); |
970 | | |
971 | 0 | sc->num_values = 0; |
972 | 0 | for (int n = 0; n < sc->num_uniforms; n++) |
973 | 0 | update_uniform(sc, entry, &sc->uniforms[n], n); |
974 | | |
975 | | // If we're using a UBO, make sure to bind it as well |
976 | 0 | if (sc->ubo_size) { |
977 | 0 | struct ra_renderpass_input_val ubo_val = { |
978 | 0 | .index = entry->ubo_index, |
979 | 0 | .data = &entry->ubo, |
980 | 0 | }; |
981 | 0 | MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); |
982 | 0 | } |
983 | |
|
984 | 0 | sc->current_shader = entry; |
985 | 0 | } |
986 | | |
987 | | struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, |
988 | | struct ra_tex *target, bool discard, |
989 | | const struct ra_renderpass_input *vao, |
990 | | int vao_len, size_t vertex_stride, |
991 | | void *vertices, size_t num_vertices) |
992 | 0 | { |
993 | 0 | struct timer_pool *timer = NULL; |
994 | |
|
995 | 0 | sc->params.invalidate_target = discard; |
996 | 0 | gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format, |
997 | 0 | vao, vao_len, vertex_stride); |
998 | 0 | if (!sc->current_shader) |
999 | 0 | goto error; |
1000 | | |
1001 | 0 | timer = sc->current_shader->timer; |
1002 | |
|
1003 | 0 | struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; |
1004 | |
|
1005 | 0 | struct ra_renderpass_run_params run = { |
1006 | 0 | .pass = sc->current_shader->pass, |
1007 | 0 | .values = sc->values, |
1008 | 0 | .num_values = sc->num_values, |
1009 | 0 | .push_constants = sc->current_shader->pushc, |
1010 | 0 | .target = target, |
1011 | 0 | .vertex_data = vertices, |
1012 | 0 | .vertex_count = num_vertices, |
1013 | 0 | .viewport = full_rc, |
1014 | 0 | .scissors = full_rc, |
1015 | 0 | }; |
1016 | |
|
1017 | 0 | timer_pool_start(timer); |
1018 | 0 | sc->ra->fns->renderpass_run(sc->ra, &run); |
1019 | 0 | timer_pool_stop(timer); |
1020 | |
|
1021 | 0 | error: |
1022 | 0 | gl_sc_reset(sc); |
1023 | 0 | return timer_pool_measure(timer); |
1024 | 0 | } |
1025 | | |
1026 | | struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, |
1027 | | int w, int h, int d) |
1028 | 0 | { |
1029 | 0 | struct timer_pool *timer = NULL; |
1030 | |
|
1031 | 0 | gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0); |
1032 | 0 | if (!sc->current_shader) |
1033 | 0 | goto error; |
1034 | | |
1035 | 0 | timer = sc->current_shader->timer; |
1036 | |
|
1037 | 0 | struct ra_renderpass_run_params run = { |
1038 | 0 | .pass = sc->current_shader->pass, |
1039 | 0 | .values = sc->values, |
1040 | 0 | .num_values = sc->num_values, |
1041 | 0 | .push_constants = sc->current_shader->pushc, |
1042 | 0 | .compute_groups = {w, h, d}, |
1043 | 0 | }; |
1044 | |
|
1045 | 0 | timer_pool_start(timer); |
1046 | 0 | sc->ra->fns->renderpass_run(sc->ra, &run); |
1047 | 0 | timer_pool_stop(timer); |
1048 | |
|
1049 | 0 | error: |
1050 | 0 | gl_sc_reset(sc); |
1051 | 0 | return timer_pool_measure(timer); |
1052 | 0 | } |