/src/mpv/video/out/opengl/ra_gl.c
Line | Count | Source |
1 | | #include <libavutil/intreadwrite.h> |
2 | | |
3 | | #include "formats.h" |
4 | | #include "utils.h" |
5 | | #include "ra_gl.h" |
6 | | |
7 | | static const struct ra_fns ra_fns_gl; |
8 | | |
9 | | // For ra.priv |
10 | | struct ra_gl { |
11 | | GL *gl; |
12 | | bool debug_enable; |
13 | | bool timer_active; // hack for GL_TIME_ELAPSED limitations |
14 | | }; |
15 | | |
16 | | // For ra_tex.priv |
17 | | struct ra_tex_gl { |
18 | | struct ra_buf_pool pbo; // for ra.use_pbo |
19 | | bool own_objects; |
20 | | GLenum target; |
21 | | GLuint texture; // 0 if no texture data associated |
22 | | GLuint fbo; // 0 if no rendering requested, or it default framebuffer |
23 | | // These 3 fields can be 0 if unknown. |
24 | | GLint internal_format; |
25 | | GLenum format; |
26 | | GLenum type; |
27 | | }; |
28 | | |
29 | | // For ra_buf.priv |
30 | | struct ra_buf_gl { |
31 | | GLenum target; |
32 | | GLuint buffer; |
33 | | GLsync fence; |
34 | | }; |
35 | | |
36 | | // For ra_renderpass.priv |
37 | | struct ra_renderpass_gl { |
38 | | GLuint program; |
39 | | // 1 entry for each ra_renderpass_params.inputs[] entry |
40 | | GLint *uniform_loc; |
41 | | int num_uniform_loc; // == ra_renderpass_params.num_inputs |
42 | | struct gl_vao vao; |
43 | | }; |
44 | | |
45 | | // (Init time only.) |
46 | | static void probe_real_size(GL *gl, struct ra_format *fmt) |
47 | 0 | { |
48 | 0 | const struct gl_format *gl_fmt = fmt->priv; |
49 | |
|
50 | 0 | if (!gl->GetTexLevelParameteriv) |
51 | 0 | return; // GLES |
52 | | |
53 | 0 | bool is_la = gl_fmt->format == GL_LUMINANCE || |
54 | 0 | gl_fmt->format == GL_LUMINANCE_ALPHA; |
55 | 0 | if (is_la && gl->es) |
56 | 0 | return; // GLES doesn't provide GL_TEXTURE_LUMINANCE_SIZE. |
57 | | |
58 | 0 | GLuint tex; |
59 | 0 | gl->GenTextures(1, &tex); |
60 | 0 | gl->BindTexture(GL_TEXTURE_2D, tex); |
61 | 0 | gl->TexImage2D(GL_TEXTURE_2D, 0, gl_fmt->internal_format, 64, 64, 0, |
62 | 0 | gl_fmt->format, gl_fmt->type, NULL); |
63 | 0 | for (int i = 0; i < fmt->num_components; i++) { |
64 | 0 | const GLenum pnames[] = { |
65 | 0 | GL_TEXTURE_RED_SIZE, |
66 | 0 | GL_TEXTURE_GREEN_SIZE, |
67 | 0 | GL_TEXTURE_BLUE_SIZE, |
68 | 0 | GL_TEXTURE_ALPHA_SIZE, |
69 | 0 | GL_TEXTURE_LUMINANCE_SIZE, |
70 | 0 | GL_TEXTURE_ALPHA_SIZE, |
71 | 0 | }; |
72 | 0 | int comp = is_la ? i + 4 : i; |
73 | 0 | mp_assert(comp < MP_ARRAY_SIZE(pnames)); |
74 | 0 | GLint param = -1; |
75 | 0 | gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pnames[comp], ¶m); |
76 | 0 | fmt->component_depth[i] = param > 0 ? param : 0; |
77 | 0 | } |
78 | 0 | gl->DeleteTextures(1, &tex); |
79 | 0 | } |
80 | | |
81 | | static int ra_init_gl(struct ra *ra, GL *gl) |
82 | 0 | { |
83 | 0 | if (gl->version < 210 && gl->es < 200) { |
84 | 0 | MP_ERR(ra, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n"); |
85 | 0 | return -1; |
86 | 0 | } |
87 | | |
88 | 0 | struct ra_gl *p = ra->priv = talloc_zero(NULL, struct ra_gl); |
89 | 0 | p->gl = gl; |
90 | |
|
91 | 0 | ra_gl_set_debug(ra, true); |
92 | |
|
93 | 0 | ra->fns = &ra_fns_gl; |
94 | 0 | ra->glsl_version = gl->glsl_version; |
95 | 0 | ra->glsl_es = gl->es > 0; |
96 | |
|
97 | 0 | static const int caps_map[][2] = { |
98 | 0 | {RA_CAP_DIRECT_UPLOAD, 0}, |
99 | 0 | {RA_CAP_GLOBAL_UNIFORM, 0}, |
100 | 0 | {RA_CAP_FRAGCOORD, 0}, |
101 | 0 | {RA_CAP_TEX_1D, MPGL_CAP_1D_TEX}, |
102 | 0 | {RA_CAP_TEX_3D, MPGL_CAP_3D_TEX}, |
103 | 0 | {RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER}, |
104 | 0 | {RA_CAP_NUM_GROUPS, MPGL_CAP_COMPUTE_SHADER}, |
105 | 0 | {RA_CAP_NESTED_ARRAY, MPGL_CAP_NESTED_ARRAY}, |
106 | 0 | {RA_CAP_SLOW_DR, MPGL_CAP_SLOW_DR}, |
107 | 0 | }; |
108 | |
|
109 | 0 | for (int i = 0; i < MP_ARRAY_SIZE(caps_map); i++) { |
110 | 0 | if ((gl->mpgl_caps & caps_map[i][1]) == caps_map[i][1]) |
111 | 0 | ra->caps |= caps_map[i][0]; |
112 | 0 | } |
113 | |
|
114 | 0 | if (gl->BindBufferBase) { |
115 | 0 | if (gl->mpgl_caps & MPGL_CAP_UBO) |
116 | 0 | ra->caps |= RA_CAP_BUF_RO; |
117 | 0 | if (gl->mpgl_caps & MPGL_CAP_SSBO) |
118 | 0 | ra->caps |= RA_CAP_BUF_RW; |
119 | 0 | } |
120 | | |
121 | | // textureGather is only supported in GLSL 400+ / ES 310+ |
122 | 0 | if (ra->glsl_version >= (ra->glsl_es ? 310 : 400)) |
123 | 0 | ra->caps |= RA_CAP_GATHER; |
124 | |
|
125 | 0 | if (gl->BlitFramebuffer) |
126 | 0 | ra->caps |= RA_CAP_BLIT; |
127 | | |
128 | | // Disable compute shaders for GLSL < 420. This work-around is needed since |
129 | | // some buggy OpenGL drivers expose compute shaders for lower GLSL versions, |
130 | | // despite the spec requiring 420+. |
131 | 0 | if (ra->glsl_version < (ra->glsl_es ? 310 : 420)) { |
132 | 0 | ra->caps &= ~RA_CAP_COMPUTE; |
133 | 0 | } |
134 | | |
135 | | // While we can handle compute shaders on GLES the spec (intentionally) |
136 | | // does not support binding textures for writing, which all uses inside mpv |
137 | | // would require. So disable it unconditionally anyway. |
138 | 0 | if (ra->glsl_es) |
139 | 0 | ra->caps &= ~RA_CAP_COMPUTE; |
140 | |
|
141 | 0 | int gl_fmt_features = gl_format_feature_flags(gl); |
142 | |
|
143 | 0 | for (int n = 0; gl_formats[n].internal_format; n++) { |
144 | 0 | const struct gl_format *gl_fmt = &gl_formats[n]; |
145 | |
|
146 | 0 | if (!(gl_fmt->flags & gl_fmt_features)) |
147 | 0 | continue; |
148 | | |
149 | 0 | struct ra_format *fmt = talloc_zero(ra, struct ra_format); |
150 | 0 | *fmt = (struct ra_format){ |
151 | 0 | .name = gl_fmt->name, |
152 | 0 | .priv = (void *)gl_fmt, |
153 | 0 | .ctype = gl_format_type(gl_fmt), |
154 | 0 | .num_components = gl_format_components(gl_fmt->format), |
155 | 0 | .ordered = gl_fmt->format != GL_RGB_422_APPLE, |
156 | 0 | .pixel_size = gl_bytes_per_pixel(gl_fmt->format, gl_fmt->type), |
157 | 0 | .luminance_alpha = gl_fmt->format == GL_LUMINANCE_ALPHA, |
158 | 0 | .linear_filter = gl_fmt->flags & F_TF, |
159 | 0 | .renderable = (gl_fmt->flags & F_CR) && |
160 | 0 | (gl->mpgl_caps & MPGL_CAP_FB), |
161 | | // TODO: Check whether it's a storable format |
162 | | // https://www.khronos.org/opengl/wiki/Image_Load_Store |
163 | 0 | .storable = true, |
164 | 0 | }; |
165 | |
|
166 | 0 | int csize = gl_component_size(gl_fmt->type) * 8; |
167 | 0 | int depth = csize; |
168 | |
|
169 | 0 | if (gl_fmt->flags & F_F16) { |
170 | 0 | depth = 16; |
171 | 0 | csize = 32; // always upload as GL_FLOAT (simpler for us) |
172 | 0 | } |
173 | |
|
174 | 0 | for (int i = 0; i < fmt->num_components; i++) { |
175 | 0 | fmt->component_size[i] = csize; |
176 | 0 | fmt->component_depth[i] = depth; |
177 | 0 | } |
178 | |
|
179 | 0 | if (fmt->ctype == RA_CTYPE_UNORM && depth != 8) |
180 | 0 | probe_real_size(gl, fmt); |
181 | | |
182 | | // Special formats for which OpenGL happens to have direct support. |
183 | 0 | if (strcmp(fmt->name, "rgb565") == 0) { |
184 | 0 | fmt->special_imgfmt = IMGFMT_RGB565; |
185 | 0 | struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); |
186 | 0 | fmt->special_imgfmt_desc = desc; |
187 | 0 | desc->num_planes = 1; |
188 | 0 | desc->planes[0] = fmt; |
189 | 0 | for (int i = 0; i < 3; i++) |
190 | 0 | desc->components[0][i] = i + 1; |
191 | 0 | desc->chroma_w = desc->chroma_h = 1; |
192 | 0 | } |
193 | 0 | if (strcmp(fmt->name, "rgb10_a2") == 0) { |
194 | 0 | fmt->special_imgfmt = IMGFMT_RGB30; |
195 | 0 | struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); |
196 | 0 | fmt->special_imgfmt_desc = desc; |
197 | 0 | desc->component_bits = 10; |
198 | 0 | desc->num_planes = 1; |
199 | 0 | desc->planes[0] = fmt; |
200 | 0 | for (int i = 0; i < 3; i++) |
201 | 0 | desc->components[0][i] = 3 - i; |
202 | 0 | desc->chroma_w = desc->chroma_h = 1; |
203 | 0 | } |
204 | 0 | if (strcmp(fmt->name, "appleyp") == 0) { |
205 | 0 | fmt->special_imgfmt = IMGFMT_UYVY; |
206 | 0 | struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); |
207 | 0 | fmt->special_imgfmt_desc = desc; |
208 | 0 | desc->num_planes = 1; |
209 | 0 | desc->planes[0] = fmt; |
210 | 0 | desc->components[0][0] = 3; |
211 | 0 | desc->components[0][1] = 1; |
212 | 0 | desc->components[0][2] = 2; |
213 | 0 | desc->chroma_w = desc->chroma_h = 1; |
214 | 0 | } |
215 | |
|
216 | 0 | fmt->glsl_format = ra_fmt_glsl_format(fmt); |
217 | |
|
218 | 0 | MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); |
219 | 0 | } |
220 | |
|
221 | 0 | GLint ival; |
222 | 0 | gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ival); |
223 | 0 | ra->max_texture_wh = ival; |
224 | |
|
225 | 0 | if (ra->caps & RA_CAP_COMPUTE) { |
226 | 0 | gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival); |
227 | 0 | ra->max_shmem = ival; |
228 | 0 | gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival); |
229 | 0 | ra->max_compute_group_threads = ival; |
230 | 0 | } |
231 | |
|
232 | 0 | gl->Disable(GL_DITHER); |
233 | |
|
234 | 0 | if (!ra_find_unorm_format(ra, 2, 1)) |
235 | 0 | MP_VERBOSE(ra, "16 bit UNORM textures not available.\n"); |
236 | |
|
237 | 0 | return 0; |
238 | 0 | } |
239 | | |
240 | | struct ra *ra_create_gl(GL *gl, struct mp_log *log) |
241 | 0 | { |
242 | 0 | struct ra *ra = talloc_zero(NULL, struct ra); |
243 | 0 | ra->log = log; |
244 | 0 | if (ra_init_gl(ra, gl) < 0) { |
245 | 0 | talloc_free(ra); |
246 | 0 | return NULL; |
247 | 0 | } |
248 | 0 | return ra; |
249 | 0 | } |
250 | | |
251 | | static void gl_destroy(struct ra *ra) |
252 | 0 | { |
253 | 0 | talloc_free(ra->priv); |
254 | 0 | } |
255 | | |
256 | | void ra_gl_set_debug(struct ra *ra, bool enable) |
257 | 0 | { |
258 | 0 | struct ra_gl *p = ra->priv; |
259 | 0 | GL *gl = ra_gl_get(ra); |
260 | |
|
261 | 0 | p->debug_enable = enable; |
262 | 0 | if (gl->debug_context) |
263 | 0 | gl_set_debug_logger(gl, enable ? ra->log : NULL); |
264 | 0 | } |
265 | | |
266 | | static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex) |
267 | 0 | { |
268 | 0 | GL *gl = ra_gl_get(ra); |
269 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
270 | |
|
271 | 0 | ra_buf_pool_uninit(ra, &tex_gl->pbo); |
272 | |
|
273 | 0 | if (tex_gl->own_objects) { |
274 | 0 | if (tex_gl->fbo) |
275 | 0 | gl->DeleteFramebuffers(1, &tex_gl->fbo); |
276 | |
|
277 | 0 | gl->DeleteTextures(1, &tex_gl->texture); |
278 | 0 | } |
279 | 0 | talloc_free(tex_gl); |
280 | 0 | talloc_free(tex); |
281 | 0 | } |
282 | | |
283 | | static struct ra_tex *gl_tex_create_blank(struct ra *ra, |
284 | | const struct ra_tex_params *params) |
285 | 0 | { |
286 | 0 | struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); |
287 | 0 | tex->params = *params; |
288 | 0 | tex->params.initial_data = NULL; |
289 | 0 | struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl); |
290 | |
|
291 | 0 | const struct gl_format *fmt = params->format->priv; |
292 | 0 | tex_gl->internal_format = fmt->internal_format; |
293 | 0 | tex_gl->format = fmt->format; |
294 | 0 | tex_gl->type = fmt->type; |
295 | 0 | switch (params->dimensions) { |
296 | 0 | case 1: tex_gl->target = GL_TEXTURE_1D; break; |
297 | 0 | case 2: tex_gl->target = GL_TEXTURE_2D; break; |
298 | 0 | case 3: tex_gl->target = GL_TEXTURE_3D; break; |
299 | 0 | default: MP_ASSERT_UNREACHABLE(); |
300 | 0 | } |
301 | 0 | if (params->non_normalized) { |
302 | 0 | mp_assert(params->dimensions == 2); |
303 | 0 | tex_gl->target = GL_TEXTURE_RECTANGLE; |
304 | 0 | } |
305 | 0 | if (params->external_oes) { |
306 | 0 | mp_assert(params->dimensions == 2 && !params->non_normalized); |
307 | 0 | tex_gl->target = GL_TEXTURE_EXTERNAL_OES; |
308 | 0 | } |
309 | | |
310 | 0 | if (params->downloadable && !(params->dimensions == 2 && |
311 | 0 | params->format->renderable)) |
312 | 0 | { |
313 | 0 | gl_tex_destroy(ra, tex); |
314 | 0 | return NULL; |
315 | 0 | } |
316 | | |
317 | 0 | return tex; |
318 | 0 | } |
319 | | |
320 | | static struct ra_tex *gl_tex_create(struct ra *ra, |
321 | | const struct ra_tex_params *params) |
322 | 0 | { |
323 | 0 | GL *gl = ra_gl_get(ra); |
324 | 0 | mp_assert(!params->format->dummy_format); |
325 | | |
326 | 0 | struct ra_tex *tex = gl_tex_create_blank(ra, params); |
327 | 0 | if (!tex) |
328 | 0 | return NULL; |
329 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
330 | |
|
331 | 0 | tex_gl->own_objects = true; |
332 | |
|
333 | 0 | gl->GenTextures(1, &tex_gl->texture); |
334 | 0 | gl->BindTexture(tex_gl->target, tex_gl->texture); |
335 | |
|
336 | 0 | GLint filter = params->src_linear ? GL_LINEAR : GL_NEAREST; |
337 | 0 | GLint wrap = params->src_repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE; |
338 | 0 | gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); |
339 | 0 | gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); |
340 | 0 | gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); |
341 | 0 | if (params->dimensions > 1) |
342 | 0 | gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); |
343 | 0 | if (params->dimensions > 2) |
344 | 0 | gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); |
345 | |
|
346 | 0 | gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); |
347 | 0 | switch (params->dimensions) { |
348 | 0 | case 1: |
349 | 0 | gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, params->w, |
350 | 0 | 0, tex_gl->format, tex_gl->type, params->initial_data); |
351 | 0 | break; |
352 | 0 | case 2: |
353 | 0 | gl->TexImage2D(tex_gl->target, 0, tex_gl->internal_format, params->w, |
354 | 0 | params->h, 0, tex_gl->format, tex_gl->type, |
355 | 0 | params->initial_data); |
356 | 0 | break; |
357 | 0 | case 3: |
358 | 0 | gl->TexImage3D(tex_gl->target, 0, tex_gl->internal_format, params->w, |
359 | 0 | params->h, params->d, 0, tex_gl->format, tex_gl->type, |
360 | 0 | params->initial_data); |
361 | 0 | break; |
362 | 0 | } |
363 | 0 | gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); |
364 | |
|
365 | 0 | gl->BindTexture(tex_gl->target, 0); |
366 | |
|
367 | 0 | gl_check_error(gl, ra->log, "after creating texture"); |
368 | | |
369 | | // Even blitting needs an FBO in OpenGL for strange reasons. |
370 | | // Download is handled by reading from an FBO. |
371 | 0 | if (tex->params.render_dst || tex->params.blit_src || |
372 | 0 | tex->params.blit_dst || tex->params.downloadable) |
373 | 0 | { |
374 | 0 | if (!tex->params.format->renderable) { |
375 | 0 | MP_ERR(ra, "Trying to create renderable texture with unsupported " |
376 | 0 | "format.\n"); |
377 | 0 | ra_tex_free(ra, &tex); |
378 | 0 | return NULL; |
379 | 0 | } |
380 | | |
381 | 0 | mp_assert(gl->mpgl_caps & MPGL_CAP_FB); |
382 | | |
383 | 0 | gl->GenFramebuffers(1, &tex_gl->fbo); |
384 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, tex_gl->fbo); |
385 | 0 | gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, |
386 | 0 | GL_TEXTURE_2D, tex_gl->texture, 0); |
387 | 0 | GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER); |
388 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, 0); |
389 | |
|
390 | 0 | if (err != GL_FRAMEBUFFER_COMPLETE) { |
391 | 0 | MP_ERR(ra, "Error: framebuffer completeness check failed (error=%d).\n", |
392 | 0 | (int)err); |
393 | 0 | ra_tex_free(ra, &tex); |
394 | 0 | return NULL; |
395 | 0 | } |
396 | | |
397 | | |
398 | 0 | gl_check_error(gl, ra->log, "after creating framebuffer"); |
399 | 0 | } |
400 | | |
401 | 0 | return tex; |
402 | 0 | } |
403 | | |
404 | | // Create a ra_tex that merely wraps an existing texture. The returned object |
405 | | // is freed with ra_tex_free(), but this will not delete the texture passed to |
406 | | // this function. |
407 | | // Some features are unsupported, e.g. setting params->initial_data or render_dst. |
408 | | struct ra_tex *ra_create_wrapped_tex(struct ra *ra, |
409 | | const struct ra_tex_params *params, |
410 | | GLuint gl_texture) |
411 | 0 | { |
412 | 0 | struct ra_tex *tex = gl_tex_create_blank(ra, params); |
413 | 0 | if (!tex) |
414 | 0 | return NULL; |
415 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
416 | 0 | tex_gl->texture = gl_texture; |
417 | 0 | return tex; |
418 | 0 | } |
419 | | |
420 | | static const struct ra_format fbo_dummy_format = { |
421 | | .name = "unknown_fbo", |
422 | | .priv = (void *)&(const struct gl_format){ |
423 | | .name = "unknown", |
424 | | .format = GL_RGBA, |
425 | | .flags = F_CR, |
426 | | }, |
427 | | .renderable = true, |
428 | | .dummy_format = true, |
429 | | }; |
430 | | |
431 | | // Create a ra_tex that merely wraps an existing framebuffer. gl_fbo can be 0 |
432 | | // to wrap the default framebuffer. |
433 | | // The returned object is freed with ra_tex_free(), but this will not delete |
434 | | // the framebuffer object passed to this function. |
435 | | struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h) |
436 | 0 | { |
437 | 0 | struct ra_tex *tex = talloc_zero(ra, struct ra_tex); |
438 | 0 | *tex = (struct ra_tex){ |
439 | 0 | .params = { |
440 | 0 | .dimensions = 2, |
441 | 0 | .w = w, .h = h, .d = 1, |
442 | 0 | .format = &fbo_dummy_format, |
443 | 0 | .render_dst = true, |
444 | 0 | .blit_src = true, |
445 | 0 | .blit_dst = true, |
446 | 0 | }, |
447 | 0 | }; |
448 | |
|
449 | 0 | struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl); |
450 | 0 | *tex_gl = (struct ra_tex_gl){ |
451 | 0 | .fbo = gl_fbo, |
452 | 0 | .internal_format = 0, |
453 | 0 | .format = GL_RGBA, |
454 | 0 | .type = 0, |
455 | 0 | }; |
456 | |
|
457 | 0 | return tex; |
458 | 0 | } |
459 | | |
460 | | GL *ra_gl_get(struct ra *ra) |
461 | 0 | { |
462 | 0 | struct ra_gl *p = ra->priv; |
463 | 0 | return p->gl; |
464 | 0 | } |
465 | | |
466 | | // Return the associate glTexImage arguments for the given format. Sets all |
467 | | // fields to 0 on failure. |
468 | | void ra_gl_get_format(const struct ra_format *fmt, GLint *out_internal_format, |
469 | | GLenum *out_format, GLenum *out_type) |
470 | 0 | { |
471 | 0 | const struct gl_format *gl_format = fmt->priv; |
472 | 0 | *out_internal_format = gl_format->internal_format; |
473 | 0 | *out_format = gl_format->format; |
474 | 0 | *out_type = gl_format->type; |
475 | 0 | } |
476 | | |
477 | | void ra_gl_get_raw_tex(struct ra *ra, struct ra_tex *tex, |
478 | | GLuint *out_texture, GLenum *out_target) |
479 | 0 | { |
480 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
481 | 0 | *out_texture = tex_gl->texture; |
482 | 0 | *out_target = tex_gl->target; |
483 | 0 | } |
484 | | |
485 | | // Return whether the ra instance was created with ra_create_gl(). This is the |
486 | | // _only_ function that can be called on a ra instance of any type. |
487 | | bool ra_is_gl(struct ra *ra) |
488 | 0 | { |
489 | 0 | return ra->fns == &ra_fns_gl; |
490 | 0 | } |
491 | | |
492 | | static bool gl_tex_upload(struct ra *ra, |
493 | | const struct ra_tex_upload_params *params) |
494 | 0 | { |
495 | 0 | GL *gl = ra_gl_get(ra); |
496 | 0 | struct ra_tex *tex = params->tex; |
497 | 0 | struct ra_buf *buf = params->buf; |
498 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
499 | 0 | struct ra_buf_gl *buf_gl = buf ? buf->priv : NULL; |
500 | 0 | mp_assert(tex->params.host_mutable); |
501 | 0 | mp_assert(!params->buf || !params->src); |
502 | | |
503 | 0 | if (ra->use_pbo && !params->buf) |
504 | 0 | return ra_tex_upload_pbo(ra, &tex_gl->pbo, params); |
505 | | |
506 | 0 | const void *src = params->src; |
507 | 0 | if (buf) { |
508 | 0 | gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); |
509 | 0 | src = (void *)params->buf_offset; |
510 | 0 | } |
511 | |
|
512 | 0 | gl->BindTexture(tex_gl->target, tex_gl->texture); |
513 | 0 | if (params->invalidate && gl->InvalidateTexImage) |
514 | 0 | gl->InvalidateTexImage(tex_gl->texture, 0); |
515 | |
|
516 | 0 | switch (tex->params.dimensions) { |
517 | 0 | case 1: |
518 | 0 | gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, |
519 | 0 | tex->params.w, 0, tex_gl->format, tex_gl->type, src); |
520 | 0 | break; |
521 | 0 | case 2: { |
522 | 0 | struct mp_rect rc = {0, 0, tex->params.w, tex->params.h}; |
523 | 0 | if (params->rc) |
524 | 0 | rc = *params->rc; |
525 | 0 | gl_upload_tex(gl, tex_gl->target, tex_gl->format, tex_gl->type, |
526 | 0 | src, params->stride, rc.x0, rc.y0, rc.x1 - rc.x0, |
527 | 0 | rc.y1 - rc.y0); |
528 | 0 | break; |
529 | 0 | } |
530 | 0 | case 3: |
531 | 0 | gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); |
532 | 0 | gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w, |
533 | 0 | tex->params.h, tex->params.d, 0, tex_gl->format, |
534 | 0 | tex_gl->type, src); |
535 | 0 | gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); |
536 | 0 | break; |
537 | 0 | } |
538 | | |
539 | 0 | gl->BindTexture(tex_gl->target, 0); |
540 | |
|
541 | 0 | if (buf) { |
542 | 0 | gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); |
543 | 0 | if (buf->params.host_mapped) { |
544 | | // Make sure the PBO is not reused until GL is done with it. If a |
545 | | // previous operation is pending, "update" it by creating a new |
546 | | // fence that will cover the previous operation as well. |
547 | 0 | gl->DeleteSync(buf_gl->fence); |
548 | 0 | buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); |
549 | 0 | } |
550 | 0 | } |
551 | |
|
552 | 0 | return true; |
553 | 0 | } |
554 | | |
555 | | static bool gl_tex_download(struct ra *ra, struct ra_tex_download_params *params) |
556 | 0 | { |
557 | 0 | GL *gl = ra_gl_get(ra); |
558 | 0 | struct ra_tex *tex = params->tex; |
559 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
560 | 0 | if (!tex_gl->fbo) |
561 | 0 | return false; |
562 | 0 | return gl_read_fbo_contents(gl, tex_gl->fbo, 1, tex_gl->format, tex_gl->type, |
563 | 0 | tex->params.w, tex->params.h, params->dst, |
564 | 0 | params->stride); |
565 | 0 | } |
566 | | |
567 | | static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf) |
568 | 0 | { |
569 | 0 | if (!buf) |
570 | 0 | return; |
571 | | |
572 | 0 | GL *gl = ra_gl_get(ra); |
573 | 0 | struct ra_buf_gl *buf_gl = buf->priv; |
574 | |
|
575 | 0 | if (buf_gl->fence) |
576 | 0 | gl->DeleteSync(buf_gl->fence); |
577 | |
|
578 | 0 | if (buf->data) { |
579 | 0 | gl->BindBuffer(buf_gl->target, buf_gl->buffer); |
580 | 0 | gl->UnmapBuffer(buf_gl->target); |
581 | 0 | gl->BindBuffer(buf_gl->target, 0); |
582 | 0 | } |
583 | 0 | gl->DeleteBuffers(1, &buf_gl->buffer); |
584 | |
|
585 | 0 | talloc_free(buf_gl); |
586 | 0 | talloc_free(buf); |
587 | 0 | } |
588 | | |
589 | | static struct ra_buf *gl_buf_create(struct ra *ra, |
590 | | const struct ra_buf_params *params) |
591 | 0 | { |
592 | 0 | GL *gl = ra_gl_get(ra); |
593 | |
|
594 | 0 | if (params->host_mapped && !gl->BufferStorage) |
595 | 0 | return NULL; |
596 | | |
597 | 0 | struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); |
598 | 0 | buf->params = *params; |
599 | 0 | buf->params.initial_data = NULL; |
600 | |
|
601 | 0 | struct ra_buf_gl *buf_gl = buf->priv = talloc_zero(NULL, struct ra_buf_gl); |
602 | 0 | gl->GenBuffers(1, &buf_gl->buffer); |
603 | |
|
604 | 0 | switch (params->type) { |
605 | 0 | case RA_BUF_TYPE_TEX_UPLOAD: buf_gl->target = GL_PIXEL_UNPACK_BUFFER; break; |
606 | 0 | case RA_BUF_TYPE_SHADER_STORAGE: buf_gl->target = GL_SHADER_STORAGE_BUFFER; break; |
607 | 0 | case RA_BUF_TYPE_UNIFORM: buf_gl->target = GL_UNIFORM_BUFFER; break; |
608 | 0 | default: abort(); |
609 | 0 | }; |
610 | |
|
611 | 0 | gl->BindBuffer(buf_gl->target, buf_gl->buffer); |
612 | |
|
613 | 0 | if (params->host_mapped) { |
614 | 0 | unsigned flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | |
615 | 0 | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; |
616 | |
|
617 | 0 | unsigned storflags = flags; |
618 | 0 | if (params->type == RA_BUF_TYPE_TEX_UPLOAD) |
619 | 0 | storflags |= GL_CLIENT_STORAGE_BIT; |
620 | |
|
621 | 0 | gl->BufferStorage(buf_gl->target, params->size, params->initial_data, |
622 | 0 | storflags); |
623 | 0 | buf->data = gl->MapBufferRange(buf_gl->target, 0, params->size, flags); |
624 | 0 | if (!buf->data) { |
625 | 0 | gl_check_error(gl, ra->log, "mapping buffer"); |
626 | 0 | gl_buf_destroy(ra, buf); |
627 | 0 | buf = NULL; |
628 | 0 | } |
629 | 0 | } else { |
630 | 0 | GLenum hint; |
631 | 0 | switch (params->type) { |
632 | 0 | case RA_BUF_TYPE_TEX_UPLOAD: hint = GL_STREAM_DRAW; break; |
633 | 0 | case RA_BUF_TYPE_SHADER_STORAGE: hint = GL_STREAM_COPY; break; |
634 | 0 | case RA_BUF_TYPE_UNIFORM: hint = GL_STATIC_DRAW; break; |
635 | 0 | default: MP_ASSERT_UNREACHABLE(); |
636 | 0 | } |
637 | | |
638 | 0 | gl->BufferData(buf_gl->target, params->size, params->initial_data, hint); |
639 | 0 | } |
640 | | |
641 | 0 | gl->BindBuffer(buf_gl->target, 0); |
642 | 0 | return buf; |
643 | 0 | } |
644 | | |
645 | | static void gl_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, |
646 | | const void *data, size_t size) |
647 | 0 | { |
648 | 0 | GL *gl = ra_gl_get(ra); |
649 | 0 | struct ra_buf_gl *buf_gl = buf->priv; |
650 | 0 | mp_assert(buf->params.host_mutable); |
651 | | |
652 | 0 | gl->BindBuffer(buf_gl->target, buf_gl->buffer); |
653 | 0 | gl->BufferSubData(buf_gl->target, offset, size, data); |
654 | 0 | gl->BindBuffer(buf_gl->target, 0); |
655 | 0 | } |
656 | | |
657 | | static bool gl_buf_poll(struct ra *ra, struct ra_buf *buf) |
658 | 0 | { |
659 | | // Non-persistently mapped buffers are always implicitly reusable in OpenGL, |
660 | | // the implementation will create more buffers under the hood if needed. |
661 | 0 | if (!buf->data) |
662 | 0 | return true; |
663 | | |
664 | 0 | GL *gl = ra_gl_get(ra); |
665 | 0 | struct ra_buf_gl *buf_gl = buf->priv; |
666 | |
|
667 | 0 | if (buf_gl->fence) { |
668 | 0 | GLenum res = gl->ClientWaitSync(buf_gl->fence, 0, 0); // non-blocking |
669 | 0 | if (res == GL_ALREADY_SIGNALED) { |
670 | 0 | gl->DeleteSync(buf_gl->fence); |
671 | 0 | buf_gl->fence = NULL; |
672 | 0 | } |
673 | 0 | } |
674 | |
|
675 | 0 | return !buf_gl->fence; |
676 | 0 | } |
677 | | |
678 | | static void gl_clear(struct ra *ra, struct ra_tex *dst, float color[4], |
679 | | struct mp_rect *scissor) |
680 | 0 | { |
681 | 0 | GL *gl = ra_gl_get(ra); |
682 | |
|
683 | 0 | mp_assert(dst->params.render_dst); |
684 | 0 | struct ra_tex_gl *dst_gl = dst->priv; |
685 | |
|
686 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, dst_gl->fbo); |
687 | |
|
688 | 0 | gl->Scissor(scissor->x0, scissor->y0, |
689 | 0 | scissor->x1 - scissor->x0, |
690 | 0 | scissor->y1 - scissor->y0); |
691 | |
|
692 | 0 | gl->Enable(GL_SCISSOR_TEST); |
693 | 0 | gl->ClearColor(color[0], color[1], color[2], color[3]); |
694 | 0 | gl->Clear(GL_COLOR_BUFFER_BIT); |
695 | 0 | gl->Disable(GL_SCISSOR_TEST); |
696 | |
|
697 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, 0); |
698 | 0 | } |
699 | | |
700 | | static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, |
701 | | struct mp_rect *dst_rc, struct mp_rect *src_rc) |
702 | 0 | { |
703 | 0 | GL *gl = ra_gl_get(ra); |
704 | |
|
705 | 0 | mp_assert(src->params.blit_src); |
706 | 0 | mp_assert(dst->params.blit_dst); |
707 | | |
708 | 0 | struct ra_tex_gl *src_gl = src->priv; |
709 | 0 | struct ra_tex_gl *dst_gl = dst->priv; |
710 | |
|
711 | 0 | gl->BindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo); |
712 | 0 | gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo); |
713 | 0 | gl->BlitFramebuffer(src_rc->x0, src_rc->y0, src_rc->x1, src_rc->y1, |
714 | 0 | dst_rc->x0, dst_rc->y0, dst_rc->x1, dst_rc->y1, |
715 | 0 | GL_COLOR_BUFFER_BIT, GL_NEAREST); |
716 | 0 | gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); |
717 | 0 | gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); |
718 | 0 | } |
719 | | |
720 | | static int gl_desc_namespace(struct ra *ra, enum ra_vartype type) |
721 | 0 | { |
722 | 0 | return type; |
723 | 0 | } |
724 | | |
725 | | static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) |
726 | 0 | { |
727 | 0 | GL *gl = ra_gl_get(ra); |
728 | 0 | struct ra_renderpass_gl *pass_gl = pass->priv; |
729 | 0 | gl->DeleteProgram(pass_gl->program); |
730 | 0 | gl_vao_uninit(&pass_gl->vao); |
731 | |
|
732 | 0 | talloc_free(pass_gl); |
733 | 0 | talloc_free(pass); |
734 | 0 | } |
735 | | |
736 | | static const char *shader_typestr(GLenum type) |
737 | 0 | { |
738 | 0 | switch (type) { |
739 | 0 | case GL_VERTEX_SHADER: return "vertex"; |
740 | 0 | case GL_FRAGMENT_SHADER: return "fragment"; |
741 | 0 | case GL_COMPUTE_SHADER: return "compute"; |
742 | 0 | default: MP_ASSERT_UNREACHABLE(); |
743 | 0 | } |
744 | 0 | } |
745 | | |
746 | | static void compile_attach_shader(struct ra *ra, GLuint program, |
747 | | GLenum type, const char *source, bool *ok) |
748 | 0 | { |
749 | 0 | GL *gl = ra_gl_get(ra); |
750 | |
|
751 | 0 | GLuint shader = gl->CreateShader(type); |
752 | 0 | gl->ShaderSource(shader, 1, &source, NULL); |
753 | 0 | gl->CompileShader(shader); |
754 | 0 | GLint status = 0; |
755 | 0 | gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); |
756 | 0 | GLint log_length = 0; |
757 | 0 | gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); |
758 | |
|
759 | 0 | int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; |
760 | 0 | const char *typestr = shader_typestr(type); |
761 | 0 | if (mp_msg_test(ra->log, pri)) { |
762 | 0 | MP_MSG(ra, pri, "%s shader source:\n", typestr); |
763 | 0 | mp_log_source(ra->log, pri, source); |
764 | 0 | } |
765 | 0 | if (log_length > 1) { |
766 | 0 | GLchar *logstr = talloc_zero_size(NULL, log_length + 1); |
767 | 0 | gl->GetShaderInfoLog(shader, log_length, NULL, logstr); |
768 | 0 | MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n", |
769 | 0 | typestr, status, logstr); |
770 | 0 | talloc_free(logstr); |
771 | 0 | } |
772 | 0 | if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) { |
773 | 0 | GLint len = 0; |
774 | 0 | gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); |
775 | 0 | if (len > 0) { |
776 | 0 | GLchar *sstr = talloc_zero_size(NULL, len + 1); |
777 | 0 | gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); |
778 | 0 | MP_DBG(ra, "Translated shader:\n"); |
779 | 0 | mp_log_source(ra->log, MSGL_DEBUG, sstr); |
780 | 0 | } |
781 | 0 | } |
782 | |
|
783 | 0 | gl->AttachShader(program, shader); |
784 | 0 | gl->DeleteShader(shader); |
785 | |
|
786 | 0 | *ok &= status; |
787 | 0 | } |
788 | | |
789 | | static void link_shader(struct ra *ra, GLuint program, bool *ok) |
790 | 0 | { |
791 | 0 | GL *gl = ra_gl_get(ra); |
792 | |
|
793 | 0 | gl->LinkProgram(program); |
794 | 0 | GLint status = 0; |
795 | 0 | gl->GetProgramiv(program, GL_LINK_STATUS, &status); |
796 | 0 | GLint log_length = 0; |
797 | 0 | gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); |
798 | |
|
799 | 0 | int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; |
800 | 0 | if (mp_msg_test(ra->log, pri)) { |
801 | 0 | GLchar *logstr = talloc_zero_size(NULL, log_length + 1); |
802 | 0 | gl->GetProgramInfoLog(program, log_length, NULL, logstr); |
803 | 0 | MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr); |
804 | 0 | talloc_free(logstr); |
805 | 0 | } |
806 | |
|
807 | 0 | *ok &= status; |
808 | 0 | } |
809 | | |
810 | | // either 'compute' or both 'vertex' and 'frag' are needed |
811 | | static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p) |
812 | 0 | { |
813 | 0 | GL *gl = ra_gl_get(ra); |
814 | |
|
815 | 0 | GLuint prog = gl->CreateProgram(); |
816 | 0 | bool ok = true; |
817 | 0 | if (p->type == RA_RENDERPASS_TYPE_COMPUTE) |
818 | 0 | compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok); |
819 | 0 | if (p->type == RA_RENDERPASS_TYPE_RASTER) { |
820 | 0 | compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok); |
821 | 0 | compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok); |
822 | 0 | for (int n = 0; n < p->num_vertex_attribs; n++) |
823 | 0 | gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name); |
824 | 0 | } |
825 | 0 | link_shader(ra, prog, &ok); |
826 | 0 | if (!ok) { |
827 | 0 | gl->DeleteProgram(prog); |
828 | 0 | prog = 0; |
829 | 0 | } |
830 | 0 | return prog; |
831 | 0 | } |
832 | | |
833 | | static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p, |
834 | | bstr *out_cached_data) |
835 | 0 | { |
836 | 0 | GL *gl = ra_gl_get(ra); |
837 | |
|
838 | 0 | GLuint prog = 0; |
839 | |
|
840 | 0 | if (gl->ProgramBinary && p->cached_program.len > 4) { |
841 | 0 | GLenum format = AV_RL32(p->cached_program.start); |
842 | 0 | prog = gl->CreateProgram(); |
843 | 0 | gl_check_error(gl, ra->log, "before loading program"); |
844 | 0 | gl->ProgramBinary(prog, format, p->cached_program.start + 4, |
845 | 0 | p->cached_program.len - 4); |
846 | 0 | gl->GetError(); // discard potential useless error |
847 | 0 | GLint status = 0; |
848 | 0 | gl->GetProgramiv(prog, GL_LINK_STATUS, &status); |
849 | 0 | if (status) { |
850 | 0 | MP_DBG(ra, "Loading binary program succeeded.\n"); |
851 | 0 | } else { |
852 | 0 | gl->DeleteProgram(prog); |
853 | 0 | prog = 0; |
854 | 0 | } |
855 | 0 | } |
856 | |
|
857 | 0 | if (!prog) { |
858 | 0 | prog = compile_program(ra, p); |
859 | |
|
860 | 0 | if (gl->GetProgramBinary && prog) { |
861 | 0 | GLint size = 0; |
862 | 0 | gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); |
863 | 0 | uint8_t *buffer = talloc_size(NULL, size + 4); |
864 | 0 | GLsizei actual_size = 0; |
865 | 0 | GLenum binary_format = 0; |
866 | 0 | if (size > 0) { |
867 | 0 | gl->GetProgramBinary(prog, size, &actual_size, &binary_format, |
868 | 0 | buffer + 4); |
869 | 0 | } |
870 | 0 | AV_WL32(buffer, binary_format); |
871 | 0 | if (actual_size) { |
872 | 0 | *out_cached_data = (bstr){buffer, actual_size + 4}; |
873 | 0 | } else { |
874 | 0 | talloc_free(buffer); |
875 | 0 | } |
876 | 0 | } |
877 | 0 | } |
878 | |
|
879 | 0 | return prog; |
880 | 0 | } |
881 | | |
882 | | static struct ra_renderpass *gl_renderpass_create(struct ra *ra, |
883 | | const struct ra_renderpass_params *params) |
884 | 0 | { |
885 | 0 | GL *gl = ra_gl_get(ra); |
886 | |
|
887 | 0 | struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); |
888 | 0 | pass->params = *ra_renderpass_params_copy(pass, params); |
889 | 0 | pass->params.cached_program = (bstr){0}; |
890 | 0 | struct ra_renderpass_gl *pass_gl = pass->priv = |
891 | 0 | talloc_zero(NULL, struct ra_renderpass_gl); |
892 | |
|
893 | 0 | bstr cached = {0}; |
894 | 0 | pass_gl->program = load_program(ra, params, &cached); |
895 | 0 | if (!pass_gl->program) { |
896 | 0 | gl_renderpass_destroy(ra, pass); |
897 | 0 | return NULL; |
898 | 0 | } |
899 | | |
900 | 0 | talloc_steal(pass, cached.start); |
901 | 0 | pass->params.cached_program = cached; |
902 | |
|
903 | 0 | gl->UseProgram(pass_gl->program); |
904 | 0 | for (int n = 0; n < params->num_inputs; n++) { |
905 | 0 | GLint loc = |
906 | 0 | gl->GetUniformLocation(pass_gl->program, params->inputs[n].name); |
907 | 0 | MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc, |
908 | 0 | loc); |
909 | | |
910 | | // For compatibility with older OpenGL, we need to explicitly update |
911 | | // the texture/image unit bindings after creating the shader program, |
912 | | // since specifying it directly requires GLSL 4.20+ |
913 | 0 | switch (params->inputs[n].type) { |
914 | 0 | case RA_VARTYPE_TEX: |
915 | 0 | case RA_VARTYPE_IMG_W: |
916 | 0 | gl->Uniform1i(loc, params->inputs[n].binding); |
917 | 0 | break; |
918 | 0 | } |
919 | 0 | } |
920 | 0 | gl->UseProgram(0); |
921 | |
|
922 | 0 | gl_vao_init(&pass_gl->vao, gl, pass->params.vertex_stride, |
923 | 0 | pass->params.vertex_attribs, pass->params.num_vertex_attribs); |
924 | |
|
925 | 0 | return pass; |
926 | 0 | } |
927 | | |
928 | | static GLenum map_blend(enum ra_blend blend) |
929 | 0 | { |
930 | 0 | switch (blend) { |
931 | 0 | case RA_BLEND_ZERO: return GL_ZERO; |
932 | 0 | case RA_BLEND_ONE: return GL_ONE; |
933 | 0 | case RA_BLEND_SRC_ALPHA: return GL_SRC_ALPHA; |
934 | 0 | case RA_BLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA; |
935 | 0 | default: return 0; |
936 | 0 | } |
937 | 0 | } |
938 | | |
939 | | // Assumes program is current (gl->UseProgram(program)). |
940 | | static void update_uniform(struct ra *ra, struct ra_renderpass *pass, |
941 | | struct ra_renderpass_input_val *val) |
942 | 0 | { |
943 | 0 | GL *gl = ra_gl_get(ra); |
944 | 0 | struct ra_renderpass_gl *pass_gl = pass->priv; |
945 | |
|
946 | 0 | struct ra_renderpass_input *input = &pass->params.inputs[val->index]; |
947 | 0 | mp_assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc); |
948 | 0 | GLint loc = pass_gl->uniform_loc[val->index]; |
949 | |
|
950 | 0 | switch (input->type) { |
951 | 0 | case RA_VARTYPE_INT: { |
952 | 0 | mp_assert(input->dim_v * input->dim_m == 1); |
953 | 0 | if (loc < 0) |
954 | 0 | break; |
955 | 0 | gl->Uniform1i(loc, *(int *)val->data); |
956 | 0 | break; |
957 | 0 | } |
958 | 0 | case RA_VARTYPE_FLOAT: { |
959 | 0 | float *f = val->data; |
960 | 0 | if (loc < 0) |
961 | 0 | break; |
962 | 0 | if (input->dim_m == 1) { |
963 | 0 | switch (input->dim_v) { |
964 | 0 | case 1: gl->Uniform1f(loc, f[0]); break; |
965 | 0 | case 2: gl->Uniform2f(loc, f[0], f[1]); break; |
966 | 0 | case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break; |
967 | 0 | case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break; |
968 | 0 | default: MP_ASSERT_UNREACHABLE(); |
969 | 0 | } |
970 | 0 | } else if (input->dim_v == 2 && input->dim_m == 2) { |
971 | 0 | gl->UniformMatrix2fv(loc, 1, GL_FALSE, f); |
972 | 0 | } else if (input->dim_v == 3 && input->dim_m == 3) { |
973 | 0 | gl->UniformMatrix3fv(loc, 1, GL_FALSE, f); |
974 | 0 | } else { |
975 | 0 | MP_ASSERT_UNREACHABLE(); |
976 | 0 | } |
977 | 0 | break; |
978 | 0 | } |
979 | 0 | case RA_VARTYPE_IMG_W: { |
980 | 0 | struct ra_tex *tex = *(struct ra_tex **)val->data; |
981 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
982 | 0 | mp_assert(tex->params.storage_dst); |
983 | 0 | gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0, |
984 | 0 | GL_WRITE_ONLY, tex_gl->internal_format); |
985 | 0 | break; |
986 | 0 | } |
987 | 0 | case RA_VARTYPE_TEX: { |
988 | 0 | struct ra_tex *tex = *(struct ra_tex **)val->data; |
989 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
990 | 0 | mp_assert(tex->params.render_src); |
991 | 0 | gl->ActiveTexture(GL_TEXTURE0 + input->binding); |
992 | 0 | gl->BindTexture(tex_gl->target, tex_gl->texture); |
993 | 0 | break; |
994 | 0 | } |
995 | 0 | case RA_VARTYPE_BUF_RO: // fall through |
996 | 0 | case RA_VARTYPE_BUF_RW: { |
997 | 0 | struct ra_buf *buf = *(struct ra_buf **)val->data; |
998 | 0 | struct ra_buf_gl *buf_gl = buf->priv; |
999 | 0 | gl->BindBufferBase(buf_gl->target, input->binding, buf_gl->buffer); |
1000 | | // SSBOs are not implicitly coherent in OpengL |
1001 | 0 | if (input->type == RA_VARTYPE_BUF_RW) |
1002 | 0 | gl->MemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); |
1003 | 0 | break; |
1004 | 0 | } |
1005 | 0 | default: |
1006 | 0 | MP_ASSERT_UNREACHABLE(); |
1007 | 0 | } |
1008 | 0 | } |
1009 | | |
1010 | | static void disable_binding(struct ra *ra, struct ra_renderpass *pass, |
1011 | | struct ra_renderpass_input_val *val) |
1012 | 0 | { |
1013 | 0 | GL *gl = ra_gl_get(ra); |
1014 | |
|
1015 | 0 | struct ra_renderpass_input *input = &pass->params.inputs[val->index]; |
1016 | |
|
1017 | 0 | switch (input->type) { |
1018 | 0 | case RA_VARTYPE_IMG_W: /* fall through */ |
1019 | 0 | case RA_VARTYPE_TEX: { |
1020 | 0 | struct ra_tex *tex = *(struct ra_tex **)val->data; |
1021 | 0 | struct ra_tex_gl *tex_gl = tex->priv; |
1022 | 0 | mp_assert(tex->params.render_src); |
1023 | 0 | if (input->type == RA_VARTYPE_TEX) { |
1024 | 0 | gl->ActiveTexture(GL_TEXTURE0 + input->binding); |
1025 | 0 | gl->BindTexture(tex_gl->target, 0); |
1026 | 0 | } else { |
1027 | 0 | gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0, |
1028 | 0 | GL_WRITE_ONLY, tex_gl->internal_format); |
1029 | 0 | } |
1030 | 0 | break; |
1031 | 0 | } |
1032 | 0 | case RA_VARTYPE_BUF_RW: |
1033 | 0 | gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0); |
1034 | 0 | break; |
1035 | 0 | } |
1036 | 0 | } |
1037 | | |
1038 | | static void gl_renderpass_run(struct ra *ra, |
1039 | | const struct ra_renderpass_run_params *params) |
1040 | 0 | { |
1041 | 0 | GL *gl = ra_gl_get(ra); |
1042 | 0 | struct ra_renderpass *pass = params->pass; |
1043 | 0 | struct ra_renderpass_gl *pass_gl = pass->priv; |
1044 | |
|
1045 | 0 | gl->UseProgram(pass_gl->program); |
1046 | |
|
1047 | 0 | for (int n = 0; n < params->num_values; n++) |
1048 | 0 | update_uniform(ra, pass, ¶ms->values[n]); |
1049 | 0 | gl->ActiveTexture(GL_TEXTURE0); |
1050 | |
|
1051 | 0 | switch (pass->params.type) { |
1052 | 0 | case RA_RENDERPASS_TYPE_RASTER: { |
1053 | 0 | struct ra_tex_gl *target_gl = params->target->priv; |
1054 | 0 | mp_assert(params->target->params.render_dst); |
1055 | 0 | mp_assert(params->target->params.format == pass->params.target_format); |
1056 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo); |
1057 | 0 | if (pass->params.invalidate_target && gl->InvalidateFramebuffer) { |
1058 | 0 | GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; |
1059 | 0 | gl->InvalidateFramebuffer(GL_FRAMEBUFFER, 1, &fb); |
1060 | 0 | } |
1061 | 0 | gl->Viewport(params->viewport.x0, params->viewport.y0, |
1062 | 0 | mp_rect_w(params->viewport), |
1063 | 0 | mp_rect_h(params->viewport)); |
1064 | 0 | gl->Scissor(params->scissors.x0, params->scissors.y0, |
1065 | 0 | mp_rect_w(params->scissors), |
1066 | 0 | mp_rect_h(params->scissors)); |
1067 | 0 | gl->Enable(GL_SCISSOR_TEST); |
1068 | 0 | if (pass->params.enable_blend) { |
1069 | 0 | gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb), |
1070 | 0 | map_blend(pass->params.blend_dst_rgb), |
1071 | 0 | map_blend(pass->params.blend_src_alpha), |
1072 | 0 | map_blend(pass->params.blend_dst_alpha)); |
1073 | 0 | gl->Enable(GL_BLEND); |
1074 | 0 | } |
1075 | 0 | gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data, |
1076 | 0 | params->vertex_count); |
1077 | 0 | gl->Disable(GL_SCISSOR_TEST); |
1078 | 0 | gl->Disable(GL_BLEND); |
1079 | 0 | gl->BindFramebuffer(GL_FRAMEBUFFER, 0); |
1080 | 0 | break; |
1081 | 0 | } |
1082 | 0 | case RA_RENDERPASS_TYPE_COMPUTE: { |
1083 | 0 | gl->DispatchCompute(params->compute_groups[0], |
1084 | 0 | params->compute_groups[1], |
1085 | 0 | params->compute_groups[2]); |
1086 | |
|
1087 | 0 | gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); |
1088 | 0 | break; |
1089 | 0 | } |
1090 | 0 | default: MP_ASSERT_UNREACHABLE(); |
1091 | 0 | } |
1092 | | |
1093 | 0 | for (int n = 0; n < params->num_values; n++) |
1094 | 0 | disable_binding(ra, pass, ¶ms->values[n]); |
1095 | 0 | gl->ActiveTexture(GL_TEXTURE0); |
1096 | |
|
1097 | 0 | gl->UseProgram(0); |
1098 | 0 | } |
1099 | | |
1100 | | // Timers in GL use query objects, and are asynchronous. So pool a few of |
1101 | | // these together. GL_QUERY_OBJECT_NUM should be large enough to avoid this |
1102 | | // ever blocking. We can afford to throw query objects around, there's no |
1103 | | // practical limit on them and their overhead is small. |
1104 | | |
1105 | 0 | #define GL_QUERY_OBJECT_NUM 8 |
1106 | | |
1107 | | struct gl_timer { |
1108 | | GLuint query[GL_QUERY_OBJECT_NUM]; |
1109 | | int idx; |
1110 | | uint64_t result; |
1111 | | bool active; |
1112 | | }; |
1113 | | |
1114 | | static ra_timer *gl_timer_create(struct ra *ra) |
1115 | 0 | { |
1116 | 0 | GL *gl = ra_gl_get(ra); |
1117 | |
|
1118 | 0 | if (!gl->GenQueries) |
1119 | 0 | return NULL; |
1120 | | |
1121 | 0 | struct gl_timer *timer = talloc_zero(NULL, struct gl_timer); |
1122 | 0 | gl->GenQueries(GL_QUERY_OBJECT_NUM, timer->query); |
1123 | |
|
1124 | 0 | return (ra_timer *)timer; |
1125 | 0 | } |
1126 | | |
1127 | | static void gl_timer_destroy(struct ra *ra, ra_timer *ratimer) |
1128 | 0 | { |
1129 | 0 | if (!ratimer) |
1130 | 0 | return; |
1131 | | |
1132 | 0 | GL *gl = ra_gl_get(ra); |
1133 | 0 | struct gl_timer *timer = ratimer; |
1134 | |
|
1135 | 0 | gl->DeleteQueries(GL_QUERY_OBJECT_NUM, timer->query); |
1136 | 0 | talloc_free(timer); |
1137 | 0 | } |
1138 | | |
1139 | | static void gl_timer_start(struct ra *ra, ra_timer *ratimer) |
1140 | 0 | { |
1141 | 0 | struct ra_gl *p = ra->priv; |
1142 | 0 | GL *gl = p->gl; |
1143 | 0 | struct gl_timer *timer = ratimer; |
1144 | | |
1145 | | // GL_TIME_ELAPSED queries are not re-entrant, so just do nothing instead |
1146 | | // of crashing. Work-around for shitty GL limitations |
1147 | 0 | if (p->timer_active) |
1148 | 0 | return; |
1149 | | |
1150 | | // If this query object already contains a result, we need to retrieve it |
1151 | 0 | timer->result = 0; |
1152 | 0 | if (gl->IsQuery(timer->query[timer->idx])) { |
1153 | 0 | gl->GetQueryObjectui64v(timer->query[timer->idx], GL_QUERY_RESULT, |
1154 | 0 | &timer->result); |
1155 | 0 | } |
1156 | |
|
1157 | 0 | gl->BeginQuery(GL_TIME_ELAPSED, timer->query[timer->idx++]); |
1158 | 0 | timer->idx %= GL_QUERY_OBJECT_NUM; |
1159 | |
|
1160 | 0 | p->timer_active = timer->active = true; |
1161 | 0 | } |
1162 | | |
1163 | | static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer) |
1164 | 0 | { |
1165 | 0 | struct ra_gl *p = ra->priv; |
1166 | 0 | GL *gl = p->gl; |
1167 | 0 | struct gl_timer *timer = ratimer; |
1168 | |
|
1169 | 0 | if (!timer->active) |
1170 | 0 | return 0; |
1171 | | |
1172 | 0 | gl->EndQuery(GL_TIME_ELAPSED); |
1173 | 0 | p->timer_active = timer->active = false; |
1174 | |
|
1175 | 0 | return timer->result; |
1176 | 0 | } |
1177 | | |
1178 | | static void gl_debug_marker(struct ra *ra, const char *msg) |
1179 | 0 | { |
1180 | 0 | struct ra_gl *p = ra->priv; |
1181 | |
|
1182 | 0 | if (p->debug_enable) |
1183 | 0 | gl_check_error(p->gl, ra->log, msg); |
1184 | 0 | } |
1185 | | |
1186 | | static const struct ra_fns ra_fns_gl = { |
1187 | | .destroy = gl_destroy, |
1188 | | .tex_create = gl_tex_create, |
1189 | | .tex_destroy = gl_tex_destroy, |
1190 | | .tex_upload = gl_tex_upload, |
1191 | | .tex_download = gl_tex_download, |
1192 | | .buf_create = gl_buf_create, |
1193 | | .buf_destroy = gl_buf_destroy, |
1194 | | .buf_update = gl_buf_update, |
1195 | | .buf_poll = gl_buf_poll, |
1196 | | .clear = gl_clear, |
1197 | | .blit = gl_blit, |
1198 | | .uniform_layout = std140_layout, |
1199 | | .desc_namespace = gl_desc_namespace, |
1200 | | .renderpass_create = gl_renderpass_create, |
1201 | | .renderpass_destroy = gl_renderpass_destroy, |
1202 | | .renderpass_run = gl_renderpass_run, |
1203 | | .timer_create = gl_timer_create, |
1204 | | .timer_destroy = gl_timer_destroy, |
1205 | | .timer_start = gl_timer_start, |
1206 | | .timer_stop = gl_timer_stop, |
1207 | | .debug_marker = gl_debug_marker, |
1208 | | }; |