/src/mpv/video/out/gpu/ra.c
Line | Count | Source |
1 | | #include "common/common.h" |
2 | | #include "common/msg.h" |
3 | | #include "video/img_format.h" |
4 | | |
5 | | #include "ra.h" |
6 | | |
7 | | void ra_add_native_resource(struct ra *ra, const char *name, void *data) |
8 | 0 | { |
9 | 0 | struct ra_native_resource r = { |
10 | 0 | .name = name, |
11 | 0 | .data = data, |
12 | 0 | }; |
13 | 0 | MP_TARRAY_APPEND(ra, ra->native_resources, ra->num_native_resources, r); |
14 | 0 | } |
15 | | |
16 | | void *ra_get_native_resource(struct ra *ra, const char *name) |
17 | 0 | { |
18 | 0 | for (int n = 0; n < ra->num_native_resources; n++) { |
19 | 0 | struct ra_native_resource *r = &ra->native_resources[n]; |
20 | 0 | if (strcmp(r->name, name) == 0) |
21 | 0 | return r->data; |
22 | 0 | } |
23 | | |
24 | 0 | return NULL; |
25 | 0 | } |
26 | | |
27 | | struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) |
28 | 0 | { |
29 | 0 | switch (params->dimensions) { |
30 | 0 | case 1: |
31 | 0 | mp_assert(params->h == 1 && params->d == 1); |
32 | 0 | break; |
33 | 0 | case 2: |
34 | 0 | mp_assert(params->d == 1); |
35 | 0 | break; |
36 | 0 | default: |
37 | 0 | mp_assert(params->dimensions >= 1 && params->dimensions <= 3); |
38 | 0 | } |
39 | 0 | return ra->fns->tex_create(ra, params); |
40 | 0 | } |
41 | | |
42 | | void ra_tex_free(struct ra *ra, struct ra_tex **tex) |
43 | 0 | { |
44 | 0 | if (*tex) |
45 | 0 | ra->fns->tex_destroy(ra, *tex); |
46 | 0 | *tex = NULL; |
47 | 0 | } |
48 | | |
49 | | struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params) |
50 | 0 | { |
51 | 0 | return ra->fns->buf_create(ra, params); |
52 | 0 | } |
53 | | |
54 | | void ra_buf_free(struct ra *ra, struct ra_buf **buf) |
55 | 0 | { |
56 | 0 | if (*buf) |
57 | 0 | ra->fns->buf_destroy(ra, *buf); |
58 | 0 | *buf = NULL; |
59 | 0 | } |
60 | | |
61 | | void ra_free(struct ra **ra) |
62 | 0 | { |
63 | 0 | if (*ra) |
64 | 0 | (*ra)->fns->destroy(*ra); |
65 | 0 | talloc_free(*ra); |
66 | 0 | *ra = NULL; |
67 | 0 | } |
68 | | |
69 | | size_t ra_vartype_size(enum ra_vartype type) |
70 | 0 | { |
71 | 0 | switch (type) { |
72 | 0 | case RA_VARTYPE_INT: return sizeof(int); |
73 | 0 | case RA_VARTYPE_FLOAT: return sizeof(float); |
74 | 0 | case RA_VARTYPE_BYTE_UNORM: return 1; |
75 | 0 | default: return 0; |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) |
80 | 0 | { |
81 | 0 | size_t el_size = ra_vartype_size(input->type); |
82 | 0 | if (!el_size) |
83 | 0 | return (struct ra_layout){0}; |
84 | | |
85 | | // host data is always tightly packed |
86 | 0 | return (struct ra_layout) { |
87 | 0 | .align = 1, |
88 | 0 | .stride = el_size * input->dim_v, |
89 | 0 | .size = el_size * input->dim_v * input->dim_m, |
90 | 0 | }; |
91 | 0 | } |
92 | | |
93 | | static struct ra_renderpass_input *dup_inputs(void *ta_parent, |
94 | | const struct ra_renderpass_input *inputs, int num_inputs) |
95 | 0 | { |
96 | 0 | struct ra_renderpass_input *res = |
97 | 0 | talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); |
98 | 0 | for (int n = 0; n < num_inputs; n++) |
99 | 0 | res[n].name = talloc_strdup(res, res[n].name); |
100 | 0 | return res; |
101 | 0 | } |
102 | | |
103 | | // Return a newly allocated deep-copy of params. |
104 | | struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, |
105 | | const struct ra_renderpass_params *params) |
106 | 0 | { |
107 | 0 | struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); |
108 | 0 | *res = *params; |
109 | 0 | res->inputs = dup_inputs(res, res->inputs, res->num_inputs); |
110 | 0 | res->vertex_attribs = |
111 | 0 | dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); |
112 | 0 | res->cached_program = bstrdup(res, res->cached_program); |
113 | 0 | res->vertex_shader = talloc_strdup(res, res->vertex_shader); |
114 | 0 | res->frag_shader = talloc_strdup(res, res->frag_shader); |
115 | 0 | res->compute_shader = talloc_strdup(res, res->compute_shader); |
116 | 0 | return res; |
117 | 0 | } |
118 | | |
119 | | struct glsl_fmt { |
120 | | enum ra_ctype ctype; |
121 | | int num_components; |
122 | | int component_depth[4]; |
123 | | const char *glsl_format; |
124 | | }; |
125 | | |
126 | | // List taken from the GLSL specification, sans snorm and sint formats |
127 | | static const struct glsl_fmt ra_glsl_fmts[] = { |
128 | | {RA_CTYPE_FLOAT, 1, {16}, "r16f"}, |
129 | | {RA_CTYPE_FLOAT, 1, {32}, "r32f"}, |
130 | | {RA_CTYPE_FLOAT, 2, {16, 16}, "rg16f"}, |
131 | | {RA_CTYPE_FLOAT, 2, {32, 32}, "rg32f"}, |
132 | | {RA_CTYPE_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, |
133 | | {RA_CTYPE_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, |
134 | | {RA_CTYPE_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, |
135 | | |
136 | | {RA_CTYPE_UNORM, 1, {8}, "r8"}, |
137 | | {RA_CTYPE_UNORM, 1, {16}, "r16"}, |
138 | | {RA_CTYPE_UNORM, 2, {8, 8}, "rg8"}, |
139 | | {RA_CTYPE_UNORM, 2, {16, 16}, "rg16"}, |
140 | | {RA_CTYPE_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, |
141 | | {RA_CTYPE_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, |
142 | | {RA_CTYPE_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, |
143 | | |
144 | | {RA_CTYPE_UINT, 1, {8}, "r8ui"}, |
145 | | {RA_CTYPE_UINT, 1, {16}, "r16ui"}, |
146 | | {RA_CTYPE_UINT, 1, {32}, "r32ui"}, |
147 | | {RA_CTYPE_UINT, 2, {8, 8}, "rg8ui"}, |
148 | | {RA_CTYPE_UINT, 2, {16, 16}, "rg16ui"}, |
149 | | {RA_CTYPE_UINT, 2, {32, 32}, "rg32ui"}, |
150 | | {RA_CTYPE_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, |
151 | | {RA_CTYPE_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, |
152 | | {RA_CTYPE_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, |
153 | | {RA_CTYPE_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, |
154 | | }; |
155 | | |
156 | | const char *ra_fmt_glsl_format(const struct ra_format *fmt) |
157 | 0 | { |
158 | 0 | for (int n = 0; n < MP_ARRAY_SIZE(ra_glsl_fmts); n++) { |
159 | 0 | const struct glsl_fmt *gfmt = &ra_glsl_fmts[n]; |
160 | |
|
161 | 0 | if (fmt->ctype != gfmt->ctype) |
162 | 0 | continue; |
163 | 0 | if (fmt->num_components != gfmt->num_components) |
164 | 0 | continue; |
165 | | |
166 | 0 | for (int i = 0; i < fmt->num_components; i++) { |
167 | 0 | if (fmt->component_depth[i] != gfmt->component_depth[i]) |
168 | 0 | goto next_fmt; |
169 | 0 | } |
170 | | |
171 | 0 | return gfmt->glsl_format; |
172 | | |
173 | 0 | next_fmt: ; // equivalent to `continue` |
174 | 0 | } |
175 | | |
176 | 0 | return NULL; |
177 | 0 | } |
178 | | |
179 | | // Return whether this is a tightly packed format with no external padding and |
180 | | // with the same bit size/depth in all components, and the shader returns |
181 | | // components in the same order as in memory. |
182 | | static bool ra_format_is_regular(const struct ra_format *fmt) |
183 | 0 | { |
184 | 0 | if (!fmt->pixel_size || !fmt->num_components || !fmt->ordered) |
185 | 0 | return false; |
186 | 0 | for (int n = 1; n < fmt->num_components; n++) { |
187 | 0 | if (fmt->component_size[n] != fmt->component_size[0] || |
188 | 0 | fmt->component_depth[n] != fmt->component_depth[0]) |
189 | 0 | return false; |
190 | 0 | } |
191 | 0 | if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8) |
192 | 0 | return false; |
193 | 0 | return true; |
194 | 0 | } |
195 | | |
196 | | // Return a regular filterable format using RA_CTYPE_UNORM. |
197 | | const struct ra_format *ra_find_unorm_format(struct ra *ra, |
198 | | int bytes_per_component, |
199 | | int n_components) |
200 | 0 | { |
201 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
202 | 0 | const struct ra_format *fmt = ra->formats[n]; |
203 | 0 | if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components && |
204 | 0 | fmt->pixel_size == bytes_per_component * n_components && |
205 | 0 | fmt->component_depth[0] == bytes_per_component * 8 && |
206 | 0 | fmt->linear_filter && ra_format_is_regular(fmt)) |
207 | 0 | return fmt; |
208 | 0 | } |
209 | 0 | return NULL; |
210 | 0 | } |
211 | | |
212 | | // Return a regular format using RA_CTYPE_UINT. |
213 | | const struct ra_format *ra_find_uint_format(struct ra *ra, |
214 | | int bytes_per_component, |
215 | | int n_components) |
216 | 0 | { |
217 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
218 | 0 | const struct ra_format *fmt = ra->formats[n]; |
219 | 0 | if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components && |
220 | 0 | fmt->pixel_size == bytes_per_component * n_components && |
221 | 0 | fmt->component_depth[0] == bytes_per_component * 8 && |
222 | 0 | ra_format_is_regular(fmt)) |
223 | 0 | return fmt; |
224 | 0 | } |
225 | 0 | return NULL; |
226 | 0 | } |
227 | | |
228 | | // Find a float format of any precision that matches the C type of the same |
229 | | // size for upload. |
230 | | // May drop bits from the mantissa (such as selecting float16 even if |
231 | | // bytes_per_component == 32); prefers possibly faster formats first. |
232 | | static const struct ra_format *ra_find_float_format(struct ra *ra, |
233 | | int bytes_per_component, |
234 | | int n_components) |
235 | 0 | { |
236 | | // Assumes ra_format are ordered by performance. |
237 | | // The >=16 check is to avoid catching fringe formats. |
238 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
239 | 0 | const struct ra_format *fmt = ra->formats[n]; |
240 | 0 | if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components && |
241 | 0 | fmt->pixel_size == bytes_per_component * n_components && |
242 | 0 | fmt->component_depth[0] >= 16 && |
243 | 0 | fmt->linear_filter && ra_format_is_regular(fmt)) |
244 | 0 | return fmt; |
245 | 0 | } |
246 | 0 | return NULL; |
247 | 0 | } |
248 | | |
249 | | // Return a filterable regular format that uses at least float16 internally, and |
250 | | // uses a normal C float for transfer on the CPU side. (This is just so we don't |
251 | | // need 32->16 bit conversion on CPU, which would be messy.) |
252 | | const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components) |
253 | 0 | { |
254 | 0 | return ra_find_float_format(ra, sizeof(float), n_components); |
255 | 0 | } |
256 | | |
257 | | const struct ra_format *ra_find_named_format(struct ra *ra, const char *name) |
258 | 0 | { |
259 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
260 | 0 | const struct ra_format *fmt = ra->formats[n]; |
261 | 0 | if (strcmp(fmt->name, name) == 0) |
262 | 0 | return fmt; |
263 | 0 | } |
264 | 0 | return NULL; |
265 | 0 | } |
266 | | |
267 | | // Like ra_find_unorm_format(), but if no fixed point format is available, |
268 | | // return an unsigned integer format. |
269 | | static const struct ra_format *find_plane_format(struct ra *ra, int bytes, |
270 | | int n_channels, |
271 | | enum mp_component_type ctype) |
272 | 0 | { |
273 | 0 | switch (ctype) { |
274 | 0 | case MP_COMPONENT_TYPE_UINT: { |
275 | 0 | const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels); |
276 | 0 | if (f) |
277 | 0 | return f; |
278 | 0 | return ra_find_uint_format(ra, bytes, n_channels); |
279 | 0 | } |
280 | 0 | case MP_COMPONENT_TYPE_FLOAT: |
281 | 0 | return ra_find_float_format(ra, bytes, n_channels); |
282 | 0 | default: return NULL; |
283 | 0 | } |
284 | 0 | } |
285 | | |
286 | | // Put a mapping of imgfmt to texture formats into *out. Basically it selects |
287 | | // the correct texture formats needed to represent an imgfmt in a shader, with |
288 | | // textures using the same memory organization as on the CPU. |
289 | | // Each plane is represented by a texture, and each texture has a RGBA |
290 | | // component order. out->components describes the meaning of them. |
291 | | // May return integer formats for >8 bit formats, if the driver has no |
292 | | // normalized 16 bit formats. |
293 | | // Returns false (and *out is not touched) if no format found. |
294 | | bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out) |
295 | 0 | { |
296 | 0 | struct ra_imgfmt_desc res = {.component_type = RA_CTYPE_UNKNOWN}; |
297 | |
|
298 | 0 | struct mp_regular_imgfmt regfmt; |
299 | 0 | if (mp_get_regular_imgfmt(®fmt, imgfmt)) { |
300 | 0 | res.num_planes = regfmt.num_planes; |
301 | 0 | res.component_bits = regfmt.component_size * 8; |
302 | 0 | res.component_pad = regfmt.component_pad; |
303 | 0 | for (int n = 0; n < regfmt.num_planes; n++) { |
304 | 0 | struct mp_regular_imgfmt_plane *plane = ®fmt.planes[n]; |
305 | 0 | res.planes[n] = find_plane_format(ra, regfmt.component_size, |
306 | 0 | plane->num_components, |
307 | 0 | regfmt.component_type); |
308 | 0 | if (!res.planes[n]) |
309 | 0 | return false; |
310 | 0 | for (int i = 0; i < plane->num_components; i++) |
311 | 0 | res.components[n][i] = plane->components[i]; |
312 | | // Dropping LSBs when shifting will lead to dropped MSBs. |
313 | 0 | if (res.component_bits > res.planes[n]->component_depth[0] && |
314 | 0 | res.component_pad < 0) |
315 | 0 | return false; |
316 | | // Renderer restriction, but actually an unwanted corner case. |
317 | 0 | if (res.component_type != RA_CTYPE_UNKNOWN && |
318 | 0 | res.component_type != res.planes[n]->ctype) |
319 | 0 | return false; |
320 | 0 | res.component_type = res.planes[n]->ctype; |
321 | 0 | } |
322 | 0 | res.chroma_w = 1 << regfmt.chroma_xs; |
323 | 0 | res.chroma_h = 1 << regfmt.chroma_ys; |
324 | 0 | goto supported; |
325 | 0 | } |
326 | | |
327 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
328 | 0 | if (imgfmt && ra->formats[n]->special_imgfmt == imgfmt) { |
329 | 0 | res = *ra->formats[n]->special_imgfmt_desc; |
330 | 0 | goto supported; |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | | // Unsupported format |
335 | 0 | return false; |
336 | | |
337 | 0 | supported: |
338 | |
|
339 | 0 | *out = res; |
340 | 0 | return true; |
341 | 0 | } |
342 | | |
343 | | static const char *ctype_to_str(enum ra_ctype ctype) |
344 | 0 | { |
345 | 0 | switch (ctype) { |
346 | 0 | case RA_CTYPE_UNORM: return "unorm"; |
347 | 0 | case RA_CTYPE_UINT: return "uint "; |
348 | 0 | case RA_CTYPE_FLOAT: return "float"; |
349 | 0 | default: return "unknown"; |
350 | 0 | } |
351 | 0 | } |
352 | | |
353 | | void ra_dump_tex_formats(struct ra *ra, int msgl) |
354 | 0 | { |
355 | 0 | if (!mp_msg_test(ra->log, msgl)) |
356 | 0 | return; |
357 | 0 | MP_MSG(ra, msgl, "Texture formats:\n"); |
358 | 0 | MP_MSG(ra, msgl, " NAME COMP*TYPE SIZE DEPTH PER COMP.\n"); |
359 | 0 | for (int n = 0; n < ra->num_formats; n++) { |
360 | 0 | const struct ra_format *fmt = ra->formats[n]; |
361 | 0 | const char *ctype = ctype_to_str(fmt->ctype); |
362 | 0 | char cl[40] = ""; |
363 | 0 | for (int i = 0; i < fmt->num_components; i++) { |
364 | 0 | mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "", |
365 | 0 | fmt->component_size[i]); |
366 | 0 | if (fmt->component_size[i] != fmt->component_depth[i]) |
367 | 0 | mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]); |
368 | 0 | } |
369 | 0 | MP_MSG(ra, msgl, " %-10s %d*%s %3dB %s %s %s %s {%s}\n", fmt->name, |
370 | 0 | fmt->num_components, ctype, fmt->pixel_size, |
371 | 0 | fmt->luminance_alpha ? "LA" : " ", |
372 | 0 | fmt->linear_filter ? "LF" : " ", |
373 | 0 | fmt->renderable ? "CR" : " ", |
374 | 0 | fmt->storable ? "ST" : " ", cl); |
375 | 0 | } |
376 | 0 | MP_MSG(ra, msgl, " LA = LUMINANCE_ALPHA hack format\n"); |
377 | 0 | MP_MSG(ra, msgl, " LF = linear filterable\n"); |
378 | 0 | MP_MSG(ra, msgl, " CR = can be used for render targets\n"); |
379 | 0 | MP_MSG(ra, msgl, " ST = can be used for storable images\n"); |
380 | 0 | } |
381 | | |
382 | | void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, |
383 | | int msgl) |
384 | 0 | { |
385 | 0 | char pl[80] = ""; |
386 | 0 | char pf[80] = ""; |
387 | 0 | for (int n = 0; n < desc->num_planes; n++) { |
388 | 0 | if (n > 0) { |
389 | 0 | mp_snprintf_cat(pl, sizeof(pl), "/"); |
390 | 0 | mp_snprintf_cat(pf, sizeof(pf), "/"); |
391 | 0 | } |
392 | 0 | char t[5] = {0}; |
393 | 0 | for (int i = 0; i < 4; i++) |
394 | 0 | t[i] = "_rgba"[desc->components[n][i]]; |
395 | 0 | for (int i = 3; i > 0 && t[i] == '_'; i--) |
396 | 0 | t[i] = '\0'; |
397 | 0 | mp_snprintf_cat(pl, sizeof(pl), "%s", t); |
398 | 0 | mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name); |
399 | 0 | } |
400 | 0 | MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s) [%s]\n", |
401 | 0 | desc->num_planes, desc->chroma_w, desc->chroma_h, |
402 | 0 | desc->component_bits, desc->component_pad, pf, pl, |
403 | 0 | ctype_to_str(desc->component_type)); |
404 | 0 | } |
405 | | |
406 | | void ra_dump_img_formats(struct ra *ra, int msgl) |
407 | 0 | { |
408 | 0 | if (!mp_msg_test(ra->log, msgl)) |
409 | 0 | return; |
410 | 0 | MP_MSG(ra, msgl, "Image formats:\n"); |
411 | 0 | for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { |
412 | 0 | const char *name = mp_imgfmt_to_name(imgfmt); |
413 | 0 | if (strcmp(name, "unknown") == 0) |
414 | 0 | continue; |
415 | 0 | MP_MSG(ra, msgl, " %s", name); |
416 | 0 | struct ra_imgfmt_desc desc; |
417 | 0 | if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) { |
418 | 0 | MP_MSG(ra, msgl, " => "); |
419 | 0 | ra_dump_imgfmt_desc(ra, &desc, msgl); |
420 | 0 | } else { |
421 | 0 | MP_MSG(ra, msgl, "\n"); |
422 | 0 | } |
423 | 0 | } |
424 | 0 | } |