Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | #include "vcs_version.h" |
30 | | |
31 | | #include <errno.h> |
32 | | #include <string.h> |
33 | | |
34 | | #if defined(__linux__) && HAVE_DLSYM |
35 | | #include <dlfcn.h> |
36 | | #endif |
37 | | |
38 | | #include "dav1d/dav1d.h" |
39 | | #include "dav1d/data.h" |
40 | | |
41 | | #include "common/validate.h" |
42 | | |
43 | | #include "src/cpu.h" |
44 | | #include "src/fg_apply.h" |
45 | | #include "src/internal.h" |
46 | | #include "src/log.h" |
47 | | #include "src/obu.h" |
48 | | #include "src/qm.h" |
49 | | #include "src/ref.h" |
50 | | #include "src/thread_task.h" |
51 | | #include "src/wedge.h" |
52 | | |
53 | 2 | static COLD void init_internal(void) { |
54 | 2 | dav1d_init_cpu(); |
55 | 2 | dav1d_init_ii_wedge_masks(); |
56 | 2 | dav1d_init_intra_edge_tree(); |
57 | 2 | dav1d_init_qm_tables(); |
58 | 2 | dav1d_init_thread(); |
59 | 2 | } |
60 | | |
61 | 23.7k | COLD const char *dav1d_version(void) { |
62 | 23.7k | return DAV1D_VERSION; |
63 | 23.7k | } |
64 | | |
65 | 0 | COLD unsigned dav1d_version_api(void) { |
66 | 0 | return (DAV1D_API_VERSION_MAJOR << 16) | |
67 | 0 | (DAV1D_API_VERSION_MINOR << 8) | |
68 | 0 | (DAV1D_API_VERSION_PATCH << 0); |
69 | 0 | } |
70 | | |
71 | 23.7k | COLD void dav1d_default_settings(Dav1dSettings *const s) { |
72 | 23.7k | s->n_threads = 0; |
73 | 23.7k | s->max_frame_delay = 0; |
74 | 23.7k | s->apply_grain = 1; |
75 | 23.7k | s->allocator.cookie = NULL; |
76 | 23.7k | s->allocator.alloc_picture_callback = dav1d_default_picture_alloc; |
77 | 23.7k | s->allocator.release_picture_callback = dav1d_default_picture_release; |
78 | 23.7k | s->logger.cookie = NULL; |
79 | 23.7k | s->logger.callback = dav1d_log_default_callback; |
80 | 23.7k | s->operating_point = 0; |
81 | 23.7k | s->all_layers = 1; // just until the tests are adjusted |
82 | 23.7k | s->frame_size_limit = 0; |
83 | 23.7k | s->strict_std_compliance = 0; |
84 | 23.7k | s->output_invisible_frames = 0; |
85 | 23.7k | s->inloop_filters = DAV1D_INLOOPFILTER_ALL; |
86 | 23.7k | s->decode_frame_type = DAV1D_DECODEFRAMETYPE_ALL; |
87 | 23.7k | } |
88 | | |
89 | | static void close_internal(Dav1dContext **const c_out, int flush); |
90 | | |
91 | | NO_SANITIZE("cfi-icall") // CFI is broken with dlsym() |
92 | 23.7k | static COLD size_t get_stack_size_internal(const pthread_attr_t *const thread_attr) { |
93 | 23.7k | #if defined(__linux__) && HAVE_DLSYM && defined(__GLIBC__) |
94 | | /* glibc has an issue where the size of the TLS is subtracted from the stack |
95 | | * size instead of allocated separately. As a result the specified stack |
96 | | * size may be insufficient when used in an application with large amounts |
97 | | * of TLS data. The following is a workaround to compensate for that. |
98 | | * See https://sourceware.org/bugzilla/show_bug.cgi?id=11787 */ |
99 | 23.7k | size_t (*const get_minstack)(const pthread_attr_t*) = |
100 | 23.7k | dlsym(RTLD_DEFAULT, "__pthread_get_minstack"); |
101 | 23.7k | if (get_minstack) |
102 | 23.7k | return get_minstack(thread_attr) - PTHREAD_STACK_MIN; |
103 | 0 | #endif |
104 | 0 | return 0; |
105 | 23.7k | } |
106 | | |
107 | | static COLD void get_num_threads(Dav1dContext *const c, const Dav1dSettings *const s, |
108 | | unsigned *n_tc, unsigned *n_fc) |
109 | 23.7k | { |
110 | | /* ceil(sqrt(n)) */ |
111 | 23.7k | static const uint8_t fc_lut[49] = { |
112 | 23.7k | 1, /* 1 */ |
113 | 23.7k | 2, 2, 2, /* 2- 4 */ |
114 | 23.7k | 3, 3, 3, 3, 3, /* 5- 9 */ |
115 | 23.7k | 4, 4, 4, 4, 4, 4, 4, /* 10-16 */ |
116 | 23.7k | 5, 5, 5, 5, 5, 5, 5, 5, 5, /* 17-25 */ |
117 | 23.7k | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, /* 26-36 */ |
118 | 23.7k | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* 37-49 */ |
119 | 23.7k | }; |
120 | 23.7k | *n_tc = s->n_threads ? s->n_threads : |
121 | 23.7k | iclip(dav1d_num_logical_processors(c), 1, DAV1D_MAX_THREADS); |
122 | 23.7k | *n_fc = s->max_frame_delay ? umin(s->max_frame_delay, *n_tc) : |
123 | 23.7k | *n_tc < 50 ? fc_lut[*n_tc - 1] : 8; // min(8, ceil(sqrt(n))) |
124 | 23.7k | } |
125 | | |
126 | 0 | COLD int dav1d_get_frame_delay(const Dav1dSettings *const s) { |
127 | 0 | unsigned n_tc, n_fc; |
128 | 0 | validate_input_or_ret(s != NULL, DAV1D_ERR(EINVAL)); |
129 | 0 | validate_input_or_ret(s->n_threads >= 0 && |
130 | 0 | s->n_threads <= DAV1D_MAX_THREADS, DAV1D_ERR(EINVAL)); |
131 | 0 | validate_input_or_ret(s->max_frame_delay >= 0 && |
132 | 0 | s->max_frame_delay <= DAV1D_MAX_FRAME_DELAY, DAV1D_ERR(EINVAL)); |
133 | |
|
134 | 0 | get_num_threads(NULL, s, &n_tc, &n_fc); |
135 | 0 | return n_fc; |
136 | 0 | } |
137 | | |
138 | 23.7k | COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) { |
139 | 23.7k | static pthread_once_t initted = PTHREAD_ONCE_INIT; |
140 | 23.7k | pthread_once(&initted, init_internal); |
141 | | |
142 | 23.7k | validate_input_or_ret(c_out != NULL, DAV1D_ERR(EINVAL)); |
143 | 23.7k | validate_input_or_ret(s != NULL, DAV1D_ERR(EINVAL)); |
144 | 23.7k | validate_input_or_ret(s->n_threads >= 0 && |
145 | 23.7k | s->n_threads <= DAV1D_MAX_THREADS, DAV1D_ERR(EINVAL)); |
146 | 23.7k | validate_input_or_ret(s->max_frame_delay >= 0 && |
147 | 23.7k | s->max_frame_delay <= DAV1D_MAX_FRAME_DELAY, DAV1D_ERR(EINVAL)); |
148 | 23.7k | validate_input_or_ret(s->allocator.alloc_picture_callback != NULL, |
149 | 23.7k | DAV1D_ERR(EINVAL)); |
150 | 23.7k | validate_input_or_ret(s->allocator.release_picture_callback != NULL, |
151 | 23.7k | DAV1D_ERR(EINVAL)); |
152 | 23.7k | validate_input_or_ret(s->operating_point >= 0 && |
153 | 23.7k | s->operating_point <= 31, DAV1D_ERR(EINVAL)); |
154 | 23.7k | validate_input_or_ret(s->decode_frame_type >= DAV1D_DECODEFRAMETYPE_ALL && |
155 | 23.7k | s->decode_frame_type <= DAV1D_DECODEFRAMETYPE_KEY, DAV1D_ERR(EINVAL)); |
156 | | |
157 | 23.7k | pthread_attr_t thread_attr; |
158 | 23.7k | if (pthread_attr_init(&thread_attr)) return DAV1D_ERR(ENOMEM); |
159 | 23.7k | size_t stack_size = 1024 * 1024 + get_stack_size_internal(&thread_attr); |
160 | | |
161 | 23.7k | pthread_attr_setstacksize(&thread_attr, stack_size); |
162 | | |
163 | 23.7k | Dav1dContext *const c = *c_out = dav1d_alloc_aligned(ALLOC_COMMON_CTX, sizeof(*c), 64); |
164 | 23.7k | if (!c) goto error; |
165 | 23.7k | memset(c, 0, sizeof(*c)); |
166 | | |
167 | 23.7k | c->allocator = s->allocator; |
168 | 23.7k | c->logger = s->logger; |
169 | 23.7k | c->apply_grain = s->apply_grain; |
170 | 23.7k | c->operating_point = s->operating_point; |
171 | 23.7k | c->all_layers = s->all_layers; |
172 | 23.7k | c->frame_size_limit = s->frame_size_limit; |
173 | 23.7k | c->strict_std_compliance = s->strict_std_compliance; |
174 | 23.7k | c->output_invisible_frames = s->output_invisible_frames; |
175 | 23.7k | c->inloop_filters = s->inloop_filters; |
176 | 23.7k | c->decode_frame_type = s->decode_frame_type; |
177 | | |
178 | 23.7k | dav1d_data_props_set_defaults(&c->cached_error_props); |
179 | | |
180 | 23.7k | if (dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->seq_hdr_pool) || |
181 | 23.7k | dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->frame_hdr_pool) || |
182 | 23.7k | dav1d_mem_pool_init(ALLOC_SEGMAP, &c->segmap_pool) || |
183 | 23.7k | dav1d_mem_pool_init(ALLOC_REFMVS, &c->refmvs_pool) || |
184 | 23.7k | dav1d_mem_pool_init(ALLOC_PIC_CTX, &c->pic_ctx_pool) || |
185 | 23.7k | dav1d_mem_pool_init(ALLOC_CDF, &c->cdf_pool)) |
186 | 0 | { |
187 | 0 | goto error; |
188 | 0 | } |
189 | | |
190 | 23.7k | if (c->allocator.alloc_picture_callback == dav1d_default_picture_alloc && |
191 | 23.7k | c->allocator.release_picture_callback == dav1d_default_picture_release) |
192 | 23.7k | { |
193 | 23.7k | if (c->allocator.cookie) goto error; |
194 | 23.7k | if (dav1d_mem_pool_init(ALLOC_PIC, &c->picture_pool)) goto error; |
195 | 23.7k | c->allocator.cookie = c->picture_pool; |
196 | 23.7k | } else if (c->allocator.alloc_picture_callback == dav1d_default_picture_alloc || |
197 | 0 | c->allocator.release_picture_callback == dav1d_default_picture_release) |
198 | 0 | { |
199 | 0 | goto error; |
200 | 0 | } |
201 | | |
202 | | /* On 32-bit systems extremely large frame sizes can cause overflows in |
203 | | * dav1d_decode_frame() malloc size calculations. Prevent that from occuring |
204 | | * by enforcing a maximum frame size limit, chosen to roughly correspond to |
205 | | * the largest size possible to decode without exhausting virtual memory. */ |
206 | 23.7k | if (sizeof(size_t) < 8 && s->frame_size_limit - 1 >= 8192 * 8192) { |
207 | 0 | c->frame_size_limit = 8192 * 8192; |
208 | 0 | if (s->frame_size_limit) |
209 | 0 | dav1d_log(c, "Frame size limit reduced from %u to %u.\n", |
210 | 0 | s->frame_size_limit, c->frame_size_limit); |
211 | 0 | } |
212 | | |
213 | 23.7k | c->flush = &c->flush_mem; |
214 | 23.7k | atomic_init(c->flush, 0); |
215 | | |
216 | 23.7k | get_num_threads(c, s, &c->n_tc, &c->n_fc); |
217 | | |
218 | 23.7k | c->fc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->fc) * c->n_fc, 32); |
219 | 23.7k | if (!c->fc) goto error; |
220 | 23.7k | memset(c->fc, 0, sizeof(*c->fc) * c->n_fc); |
221 | | |
222 | 23.7k | c->tc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->tc) * c->n_tc, 64); |
223 | 23.7k | if (!c->tc) goto error; |
224 | 23.7k | memset(c->tc, 0, sizeof(*c->tc) * c->n_tc); |
225 | 23.7k | if (c->n_tc > 1) { |
226 | 13.5k | if (pthread_mutex_init(&c->task_thread.lock, NULL)) goto error; |
227 | 13.5k | if (pthread_cond_init(&c->task_thread.cond, NULL)) { |
228 | 0 | pthread_mutex_destroy(&c->task_thread.lock); |
229 | 0 | goto error; |
230 | 0 | } |
231 | 13.5k | if (pthread_cond_init(&c->task_thread.delayed_fg.cond, NULL)) { |
232 | 0 | pthread_cond_destroy(&c->task_thread.cond); |
233 | 0 | pthread_mutex_destroy(&c->task_thread.lock); |
234 | 0 | goto error; |
235 | 0 | } |
236 | 13.5k | c->task_thread.cur = c->n_fc; |
237 | 13.5k | atomic_init(&c->task_thread.reset_task_cur, UINT_MAX); |
238 | 13.5k | atomic_init(&c->task_thread.cond_signaled, 0); |
239 | 13.5k | c->task_thread.inited = 1; |
240 | 13.5k | } |
241 | | |
242 | 23.7k | if (c->n_fc > 1) { |
243 | 13.5k | const size_t out_delayed_sz = sizeof(*c->frame_thread.out_delayed) * c->n_fc; |
244 | 13.5k | c->frame_thread.out_delayed = |
245 | 13.5k | dav1d_malloc(ALLOC_THREAD_CTX, out_delayed_sz); |
246 | 13.5k | if (!c->frame_thread.out_delayed) goto error; |
247 | 13.5k | memset(c->frame_thread.out_delayed, 0, out_delayed_sz); |
248 | 13.5k | } |
249 | 88.0k | for (unsigned n = 0; n < c->n_fc; n++) { |
250 | 64.3k | Dav1dFrameContext *const f = &c->fc[n]; |
251 | 64.3k | if (c->n_tc > 1) { |
252 | 54.1k | if (pthread_mutex_init(&f->task_thread.lock, NULL)) goto error; |
253 | 54.1k | if (pthread_cond_init(&f->task_thread.cond, NULL)) { |
254 | 0 | pthread_mutex_destroy(&f->task_thread.lock); |
255 | 0 | goto error; |
256 | 0 | } |
257 | 54.1k | if (pthread_mutex_init(&f->task_thread.pending_tasks.lock, NULL)) { |
258 | 0 | pthread_cond_destroy(&f->task_thread.cond); |
259 | 0 | pthread_mutex_destroy(&f->task_thread.lock); |
260 | 0 | goto error; |
261 | 0 | } |
262 | 54.1k | } |
263 | 64.3k | f->c = c; |
264 | 64.3k | f->task_thread.ttd = &c->task_thread; |
265 | 64.3k | f->lf.last_sharpness = -1; |
266 | 64.3k | } |
267 | | |
268 | 88.0k | for (unsigned m = 0; m < c->n_tc; m++) { |
269 | 64.3k | Dav1dTaskContext *const t = &c->tc[m]; |
270 | 64.3k | t->f = &c->fc[0]; |
271 | 64.3k | t->task_thread.ttd = &c->task_thread; |
272 | 64.3k | t->c = c; |
273 | 64.3k | memset(t->cf_16bpc, 0, sizeof(t->cf_16bpc)); |
274 | 64.3k | if (c->n_tc > 1) { |
275 | 54.1k | if (pthread_mutex_init(&t->task_thread.td.lock, NULL)) goto error; |
276 | 54.1k | if (pthread_cond_init(&t->task_thread.td.cond, NULL)) { |
277 | 0 | pthread_mutex_destroy(&t->task_thread.td.lock); |
278 | 0 | goto error; |
279 | 0 | } |
280 | 54.1k | if (pthread_create(&t->task_thread.td.thread, &thread_attr, dav1d_worker_task, t)) { |
281 | 0 | pthread_cond_destroy(&t->task_thread.td.cond); |
282 | 0 | pthread_mutex_destroy(&t->task_thread.td.lock); |
283 | 0 | goto error; |
284 | 0 | } |
285 | 54.1k | t->task_thread.td.inited = 1; |
286 | 54.1k | } |
287 | 64.3k | } |
288 | 23.7k | dav1d_pal_dsp_init(&c->pal_dsp); |
289 | 23.7k | dav1d_refmvs_dsp_init(&c->refmvs_dsp); |
290 | | |
291 | 23.7k | pthread_attr_destroy(&thread_attr); |
292 | | |
293 | 23.7k | return 0; |
294 | | |
295 | 0 | error: |
296 | 0 | if (c) close_internal(c_out, 0); |
297 | 0 | pthread_attr_destroy(&thread_attr); |
298 | 0 | return DAV1D_ERR(ENOMEM); |
299 | 23.7k | } |
300 | | |
301 | | static int has_grain(const Dav1dPicture *const pic) |
302 | 194k | { |
303 | 194k | const Dav1dFilmGrainData *fgdata = &pic->frame_hdr->film_grain.data; |
304 | 194k | return fgdata->num_y_points || fgdata->num_uv_points[0] || |
305 | 179k | fgdata->num_uv_points[1] || (fgdata->clip_to_restricted_range && |
306 | 4.33k | fgdata->chroma_scaling_from_luma); |
307 | 194k | } |
308 | | |
309 | | static int output_image(Dav1dContext *const c, Dav1dPicture *const out) |
310 | 181k | { |
311 | 181k | int res = 0; |
312 | | |
313 | 181k | Dav1dThreadPicture *const in = (c->all_layers || !c->max_spatial_id) |
314 | 181k | ? &c->out : &c->cache; |
315 | 181k | if (!c->apply_grain || !has_grain(&in->p)) { |
316 | 168k | dav1d_picture_move_ref(out, &in->p); |
317 | 168k | dav1d_thread_picture_unref(in); |
318 | 168k | goto end; |
319 | 168k | } |
320 | | |
321 | 12.6k | res = dav1d_apply_grain(c, out, &in->p); |
322 | 12.6k | dav1d_thread_picture_unref(in); |
323 | 181k | end: |
324 | 181k | if (!c->all_layers && c->max_spatial_id && c->out.p.data[0]) { |
325 | 0 | dav1d_thread_picture_move_ref(in, &c->out); |
326 | 0 | } |
327 | 181k | return res; |
328 | 12.6k | } |
329 | | |
330 | 1.75M | static int output_picture_ready(Dav1dContext *const c, const int drain) { |
331 | 1.75M | if (c->cached_error) return 1; |
332 | 1.44M | if (!c->all_layers && c->max_spatial_id) { |
333 | 0 | if (c->out.p.data[0] && c->cache.p.data[0]) { |
334 | 0 | if (c->max_spatial_id == c->cache.p.frame_hdr->spatial_id || |
335 | 0 | c->out.flags & PICTURE_FLAG_NEW_TEMPORAL_UNIT) |
336 | 0 | return 1; |
337 | 0 | dav1d_thread_picture_unref(&c->cache); |
338 | 0 | dav1d_thread_picture_move_ref(&c->cache, &c->out); |
339 | 0 | return 0; |
340 | 0 | } else if (c->cache.p.data[0] && drain) { |
341 | 0 | return 1; |
342 | 0 | } else if (c->out.p.data[0]) { |
343 | 0 | dav1d_thread_picture_move_ref(&c->cache, &c->out); |
344 | 0 | return 0; |
345 | 0 | } |
346 | 0 | } |
347 | | |
348 | 1.44M | return !!c->out.p.data[0]; |
349 | 1.44M | } |
350 | | |
351 | 24.6k | static int drain_picture(Dav1dContext *const c, Dav1dPicture *const out) { |
352 | 24.6k | unsigned drain_count = 0; |
353 | 24.6k | int drained = 0; |
354 | 74.2k | do { |
355 | 74.2k | const unsigned next = c->frame_thread.next; |
356 | 74.2k | Dav1dFrameContext *const f = &c->fc[next]; |
357 | 74.2k | pthread_mutex_lock(&c->task_thread.lock); |
358 | 89.0k | while (f->n_tile_data > 0) |
359 | 14.7k | pthread_cond_wait(&f->task_thread.cond, |
360 | 14.7k | &f->task_thread.ttd->lock); |
361 | 74.2k | Dav1dThreadPicture *const out_delayed = |
362 | 74.2k | &c->frame_thread.out_delayed[next]; |
363 | 74.2k | if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) { |
364 | 17.7k | unsigned first = atomic_load(&c->task_thread.first); |
365 | 17.7k | if (first + 1U < c->n_fc) |
366 | 17.7k | atomic_fetch_add(&c->task_thread.first, 1U); |
367 | 1.08k | else |
368 | 17.7k | atomic_store(&c->task_thread.first, 0); |
369 | 17.7k | atomic_compare_exchange_strong(&c->task_thread.reset_task_cur, |
370 | 17.7k | &first, UINT_MAX); |
371 | 17.7k | if (c->task_thread.cur && c->task_thread.cur < c->n_fc) |
372 | 1.92k | c->task_thread.cur--; |
373 | 17.7k | drained = 1; |
374 | 56.5k | } else if (drained) { |
375 | 523 | pthread_mutex_unlock(&c->task_thread.lock); |
376 | 523 | break; |
377 | 523 | } |
378 | 73.7k | if (++c->frame_thread.next == c->n_fc) |
379 | 18.4k | c->frame_thread.next = 0; |
380 | 73.7k | pthread_mutex_unlock(&c->task_thread.lock); |
381 | 73.7k | const int error = f->task_thread.retval; |
382 | 73.7k | if (error) { |
383 | 5.69k | f->task_thread.retval = 0; |
384 | 5.69k | dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m); |
385 | 5.69k | dav1d_thread_picture_unref(out_delayed); |
386 | 5.69k | return error; |
387 | 5.69k | } |
388 | 68.0k | if (out_delayed->p.data[0]) { |
389 | 12.0k | const unsigned progress = |
390 | 12.0k | atomic_load_explicit(&out_delayed->progress[1], |
391 | 12.0k | memory_order_relaxed); |
392 | 12.0k | if ((out_delayed->visible || c->output_invisible_frames) && |
393 | 10.7k | progress != FRAME_ERROR) |
394 | 10.6k | { |
395 | 10.6k | dav1d_thread_picture_ref(&c->out, out_delayed); |
396 | 10.6k | c->event_flags |= dav1d_picture_get_event_flags(out_delayed); |
397 | 10.6k | } |
398 | 12.0k | dav1d_thread_picture_unref(out_delayed); |
399 | 12.0k | if (output_picture_ready(c, 0)) |
400 | 10.6k | return output_image(c, out); |
401 | 12.0k | } |
402 | 68.0k | } while (++drain_count < c->n_fc); |
403 | | |
404 | 8.32k | if (output_picture_ready(c, 1)) |
405 | 0 | return output_image(c, out); |
406 | | |
407 | 8.32k | return DAV1D_ERR(EAGAIN); |
408 | 8.32k | } |
409 | | |
410 | | static int gen_picture(Dav1dContext *const c) |
411 | 864k | { |
412 | 864k | Dav1dData *const in = &c->in; |
413 | | |
414 | 864k | if (output_picture_ready(c, 0)) |
415 | 308k | return 0; |
416 | | |
417 | 738k | while (in->sz > 0) { |
418 | 603k | const ptrdiff_t res = dav1d_parse_obus(c, in); |
419 | 603k | if (res < 0) { |
420 | 98.1k | dav1d_data_unref_internal(in); |
421 | 505k | } else { |
422 | 505k | assert((size_t)res <= in->sz); |
423 | 505k | in->sz -= res; |
424 | 505k | in->data += res; |
425 | 505k | if (!in->sz) dav1d_data_unref_internal(in); |
426 | 505k | } |
427 | 603k | if (output_picture_ready(c, 0)) |
428 | 323k | break; |
429 | 280k | if (res < 0) |
430 | 97.7k | return (int)res; |
431 | 280k | } |
432 | | |
433 | 457k | return 0; |
434 | 555k | } |
435 | | |
436 | | int dav1d_send_data(Dav1dContext *const c, Dav1dData *const in) |
437 | 471k | { |
438 | 471k | validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL)); |
439 | 471k | validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL)); |
440 | | |
441 | 471k | if (in->data) { |
442 | 471k | validate_input_or_ret(in->sz > 0 && in->sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL)); |
443 | 471k | c->drain = 0; |
444 | 471k | } |
445 | 471k | if (c->in.data) |
446 | 24.8k | return DAV1D_ERR(EAGAIN); |
447 | 446k | dav1d_data_ref(&c->in, in); |
448 | | |
449 | 446k | int res = gen_picture(c); |
450 | 446k | if (!res) |
451 | 350k | dav1d_data_unref_internal(in); |
452 | | |
453 | 446k | return res; |
454 | 471k | } |
455 | | |
456 | | int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out) |
457 | 417k | { |
458 | 417k | validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL)); |
459 | 417k | validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL)); |
460 | | |
461 | 417k | const int drain = c->drain; |
462 | 417k | c->drain = 1; |
463 | | |
464 | 417k | int res = gen_picture(c); |
465 | 417k | if (res < 0) |
466 | 1.78k | return res; |
467 | | |
468 | 415k | if (c->cached_error) { |
469 | 152k | const int res = c->cached_error; |
470 | 152k | c->cached_error = 0; |
471 | 152k | return res; |
472 | 152k | } |
473 | | |
474 | 263k | if (output_picture_ready(c, c->n_fc == 1)) |
475 | 170k | return output_image(c, out); |
476 | | |
477 | 92.3k | if (c->n_fc > 1 && drain) |
478 | 24.6k | return drain_picture(c, out); |
479 | | |
480 | 67.7k | return DAV1D_ERR(EAGAIN); |
481 | 92.3k | } |
482 | | |
483 | | int dav1d_apply_grain(Dav1dContext *const c, Dav1dPicture *const out, |
484 | | const Dav1dPicture *const in) |
485 | 12.6k | { |
486 | 12.6k | validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL)); |
487 | 12.6k | validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL)); |
488 | 12.6k | validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL)); |
489 | | |
490 | 12.6k | if (!has_grain(in)) { |
491 | 0 | dav1d_picture_ref(out, in); |
492 | 0 | return 0; |
493 | 0 | } |
494 | | |
495 | 12.6k | int res = dav1d_picture_alloc_copy(c, out, in->p.w, in); |
496 | 12.6k | if (res < 0) goto error; |
497 | | |
498 | 12.6k | if (c->n_tc > 1) { |
499 | 7.68k | dav1d_task_delayed_fg(c, out, in); |
500 | 7.68k | } else { |
501 | 5.01k | switch (out->p.bpc) { |
502 | 0 | #if CONFIG_8BPC |
503 | 1.60k | case 8: |
504 | 1.60k | dav1d_apply_grain_8bpc(&c->dsp[0].fg, out, in); |
505 | 1.60k | break; |
506 | 0 | #endif |
507 | 0 | #if CONFIG_16BPC |
508 | 3.11k | case 10: |
509 | 3.41k | case 12: |
510 | 3.41k | dav1d_apply_grain_16bpc(&c->dsp[(out->p.bpc >> 1) - 4].fg, out, in); |
511 | 3.41k | break; |
512 | 0 | #endif |
513 | 0 | default: abort(); |
514 | 5.01k | } |
515 | 5.01k | } |
516 | | |
517 | 12.6k | return 0; |
518 | | |
519 | 0 | error: |
520 | 0 | dav1d_picture_unref_internal(out); |
521 | 0 | return res; |
522 | 12.6k | } |
523 | | |
524 | 23.7k | void dav1d_flush(Dav1dContext *const c) { |
525 | 23.7k | dav1d_data_unref_internal(&c->in); |
526 | 23.7k | if (c->out.p.frame_hdr) |
527 | 0 | dav1d_thread_picture_unref(&c->out); |
528 | 23.7k | if (c->cache.p.frame_hdr) |
529 | 0 | dav1d_thread_picture_unref(&c->cache); |
530 | | |
531 | 23.7k | c->drain = 0; |
532 | 23.7k | c->cached_error = 0; |
533 | | |
534 | 213k | for (int i = 0; i < 8; i++) { |
535 | 189k | if (c->refs[i].p.p.frame_hdr) |
536 | 117k | dav1d_thread_picture_unref(&c->refs[i].p); |
537 | 189k | dav1d_ref_dec(&c->refs[i].segmap); |
538 | 189k | dav1d_ref_dec(&c->refs[i].refmvs); |
539 | 189k | dav1d_cdf_thread_unref(&c->cdf[i]); |
540 | 189k | } |
541 | 23.7k | c->frame_hdr = NULL; |
542 | 23.7k | c->seq_hdr = NULL; |
543 | 23.7k | dav1d_ref_dec(&c->seq_hdr_ref); |
544 | | |
545 | 23.7k | c->mastering_display = NULL; |
546 | 23.7k | c->content_light = NULL; |
547 | 23.7k | c->itut_t35 = NULL; |
548 | 23.7k | c->n_itut_t35 = 0; |
549 | 23.7k | dav1d_ref_dec(&c->mastering_display_ref); |
550 | 23.7k | dav1d_ref_dec(&c->content_light_ref); |
551 | 23.7k | dav1d_ref_dec(&c->itut_t35_ref); |
552 | | |
553 | 23.7k | dav1d_data_props_unref_internal(&c->cached_error_props); |
554 | | |
555 | 23.7k | if (c->n_fc == 1 && c->n_tc == 1) return; |
556 | 23.7k | atomic_store(c->flush, 1); |
557 | | |
558 | 13.5k | if (c->n_tc > 1) { |
559 | 13.5k | pthread_mutex_lock(&c->task_thread.lock); |
560 | | // stop running tasks in worker threads |
561 | 67.7k | for (unsigned i = 0; i < c->n_tc; i++) { |
562 | 54.1k | Dav1dTaskContext *const tc = &c->tc[i]; |
563 | 57.2k | while (!tc->task_thread.flushed) { |
564 | 3.09k | pthread_cond_wait(&tc->task_thread.td.cond, &c->task_thread.lock); |
565 | 3.09k | } |
566 | 54.1k | } |
567 | 67.7k | for (unsigned i = 0; i < c->n_fc; i++) { |
568 | 54.1k | c->fc[i].task_thread.task_head = NULL; |
569 | 54.1k | c->fc[i].task_thread.task_tail = NULL; |
570 | 54.1k | c->fc[i].task_thread.task_cur_prev = NULL; |
571 | 54.1k | c->fc[i].task_thread.pending_tasks.head = NULL; |
572 | 54.1k | c->fc[i].task_thread.pending_tasks.tail = NULL; |
573 | 54.1k | atomic_init(&c->fc[i].task_thread.pending_tasks.merge, 0); |
574 | 54.1k | } |
575 | 13.5k | atomic_init(&c->task_thread.first, 0); |
576 | 13.5k | c->task_thread.cur = c->n_fc; |
577 | 13.5k | atomic_store(&c->task_thread.reset_task_cur, UINT_MAX); |
578 | 13.5k | atomic_store(&c->task_thread.cond_signaled, 0); |
579 | 13.5k | pthread_mutex_unlock(&c->task_thread.lock); |
580 | 13.5k | } |
581 | | |
582 | 13.5k | if (c->n_fc > 1) { |
583 | 67.7k | for (unsigned n = 0, next = c->frame_thread.next; n < c->n_fc; n++, next++) { |
584 | 54.1k | if (next == c->n_fc) next = 0; |
585 | 54.1k | Dav1dFrameContext *const f = &c->fc[next]; |
586 | 54.1k | dav1d_decode_frame_exit(f, -1); |
587 | 54.1k | f->n_tile_data = 0; |
588 | 54.1k | f->task_thread.retval = 0; |
589 | 54.1k | f->task_thread.error = 0; |
590 | 54.1k | Dav1dThreadPicture *out_delayed = &c->frame_thread.out_delayed[next]; |
591 | 54.1k | if (out_delayed->p.frame_hdr) { |
592 | 4.21k | dav1d_thread_picture_unref(out_delayed); |
593 | 4.21k | } |
594 | 54.1k | } |
595 | 13.5k | c->frame_thread.next = 0; |
596 | 13.5k | } |
597 | 13.5k | atomic_store(c->flush, 0); |
598 | 13.5k | } |
599 | | |
600 | 23.7k | COLD void dav1d_close(Dav1dContext **const c_out) { |
601 | 23.7k | validate_input(c_out != NULL); |
602 | | #if TRACK_HEAP_ALLOCATIONS |
603 | | dav1d_log_alloc_stats(*c_out); |
604 | | #endif |
605 | 23.7k | close_internal(c_out, 1); |
606 | 23.7k | } |
607 | | |
608 | 23.7k | static COLD void close_internal(Dav1dContext **const c_out, int flush) { |
609 | 23.7k | Dav1dContext *const c = *c_out; |
610 | 23.7k | if (!c) return; |
611 | | |
612 | 23.7k | if (flush) dav1d_flush(c); |
613 | | |
614 | 23.7k | if (c->tc) { |
615 | 23.7k | struct TaskThreadData *ttd = &c->task_thread; |
616 | 23.7k | if (ttd->inited) { |
617 | 13.5k | pthread_mutex_lock(&ttd->lock); |
618 | 67.7k | for (unsigned n = 0; n < c->n_tc && c->tc[n].task_thread.td.inited; n++) |
619 | 54.1k | c->tc[n].task_thread.die = 1; |
620 | 13.5k | pthread_cond_broadcast(&ttd->cond); |
621 | 13.5k | pthread_mutex_unlock(&ttd->lock); |
622 | 67.7k | for (unsigned n = 0; n < c->n_tc; n++) { |
623 | 54.1k | Dav1dTaskContext *const pf = &c->tc[n]; |
624 | 54.1k | if (!pf->task_thread.td.inited) break; |
625 | 54.1k | pthread_join(pf->task_thread.td.thread, NULL); |
626 | 54.1k | pthread_cond_destroy(&pf->task_thread.td.cond); |
627 | 54.1k | pthread_mutex_destroy(&pf->task_thread.td.lock); |
628 | 54.1k | } |
629 | 13.5k | pthread_cond_destroy(&ttd->delayed_fg.cond); |
630 | 13.5k | pthread_cond_destroy(&ttd->cond); |
631 | 13.5k | pthread_mutex_destroy(&ttd->lock); |
632 | 13.5k | } |
633 | 23.7k | dav1d_free_aligned(c->tc); |
634 | 23.7k | } |
635 | | |
636 | 88.0k | for (unsigned n = 0; c->fc && n < c->n_fc; n++) { |
637 | 64.3k | Dav1dFrameContext *const f = &c->fc[n]; |
638 | | |
639 | | // clean-up threading stuff |
640 | 64.3k | if (c->n_fc > 1) { |
641 | 54.1k | dav1d_free(f->tile_thread.lowest_pixel_mem); |
642 | 54.1k | dav1d_free(f->frame_thread.b); |
643 | 54.1k | dav1d_free_aligned(f->frame_thread.cbi); |
644 | 54.1k | dav1d_free_aligned(f->frame_thread.pal_idx); |
645 | 54.1k | dav1d_free_aligned(f->frame_thread.cf); |
646 | 54.1k | dav1d_free(f->frame_thread.tile_start_off); |
647 | 54.1k | dav1d_free_aligned(f->frame_thread.pal); |
648 | 54.1k | } |
649 | 64.3k | if (c->n_tc > 1) { |
650 | 54.1k | pthread_mutex_destroy(&f->task_thread.pending_tasks.lock); |
651 | 54.1k | pthread_cond_destroy(&f->task_thread.cond); |
652 | 54.1k | pthread_mutex_destroy(&f->task_thread.lock); |
653 | 54.1k | } |
654 | 64.3k | dav1d_free(f->frame_thread.frame_progress); |
655 | 64.3k | dav1d_free(f->task_thread.tasks); |
656 | 64.3k | dav1d_free(f->task_thread.tile_tasks[0]); |
657 | 64.3k | dav1d_free_aligned(f->ts); |
658 | 64.3k | dav1d_free_aligned(f->ipred_edge[0]); |
659 | 64.3k | dav1d_free(f->a); |
660 | 64.3k | dav1d_free(f->tile); |
661 | 64.3k | dav1d_free(f->lf.mask); |
662 | 64.3k | dav1d_free(f->lf.level); |
663 | 64.3k | dav1d_free(f->lf.lr_mask); |
664 | 64.3k | dav1d_free(f->lf.tx_lpf_right_edge[0]); |
665 | 64.3k | dav1d_free(f->lf.start_of_tile_row); |
666 | 64.3k | dav1d_free_aligned(f->rf.r); |
667 | 64.3k | dav1d_free_aligned(f->lf.cdef_line_buf); |
668 | 64.3k | dav1d_free_aligned(f->lf.lr_line_buf); |
669 | 64.3k | } |
670 | 23.7k | dav1d_free_aligned(c->fc); |
671 | 23.7k | if (c->n_fc > 1 && c->frame_thread.out_delayed) { |
672 | 67.7k | for (unsigned n = 0; n < c->n_fc; n++) |
673 | 54.1k | if (c->frame_thread.out_delayed[n].p.frame_hdr) |
674 | 0 | dav1d_thread_picture_unref(&c->frame_thread.out_delayed[n]); |
675 | 13.5k | dav1d_free(c->frame_thread.out_delayed); |
676 | 13.5k | } |
677 | 23.9k | for (int n = 0; n < c->n_tile_data; n++) |
678 | 239 | dav1d_data_unref_internal(&c->tile[n].data); |
679 | 23.7k | dav1d_free(c->tile); |
680 | 213k | for (int n = 0; n < 8; n++) { |
681 | 189k | dav1d_cdf_thread_unref(&c->cdf[n]); |
682 | 189k | if (c->refs[n].p.p.frame_hdr) |
683 | 0 | dav1d_thread_picture_unref(&c->refs[n].p); |
684 | 189k | dav1d_ref_dec(&c->refs[n].refmvs); |
685 | 189k | dav1d_ref_dec(&c->refs[n].segmap); |
686 | 189k | } |
687 | 23.7k | dav1d_ref_dec(&c->seq_hdr_ref); |
688 | 23.7k | dav1d_ref_dec(&c->frame_hdr_ref); |
689 | | |
690 | 23.7k | dav1d_ref_dec(&c->mastering_display_ref); |
691 | 23.7k | dav1d_ref_dec(&c->content_light_ref); |
692 | 23.7k | dav1d_ref_dec(&c->itut_t35_ref); |
693 | | |
694 | 23.7k | dav1d_mem_pool_end(c->seq_hdr_pool); |
695 | 23.7k | dav1d_mem_pool_end(c->frame_hdr_pool); |
696 | 23.7k | dav1d_mem_pool_end(c->segmap_pool); |
697 | 23.7k | dav1d_mem_pool_end(c->refmvs_pool); |
698 | 23.7k | dav1d_mem_pool_end(c->cdf_pool); |
699 | 23.7k | dav1d_mem_pool_end(c->picture_pool); |
700 | 23.7k | dav1d_mem_pool_end(c->pic_ctx_pool); |
701 | | |
702 | 23.7k | dav1d_freep_aligned(c_out); |
703 | 23.7k | } |
704 | | |
705 | 0 | int dav1d_get_event_flags(Dav1dContext *const c, enum Dav1dEventFlags *const flags) { |
706 | 0 | validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL)); |
707 | 0 | validate_input_or_ret(flags != NULL, DAV1D_ERR(EINVAL)); |
708 | |
|
709 | 0 | *flags = c->event_flags; |
710 | 0 | c->event_flags = 0; |
711 | 0 | return 0; |
712 | 0 | } |
713 | | |
714 | 0 | int dav1d_get_decode_error_data_props(Dav1dContext *const c, Dav1dDataProps *const out) { |
715 | 0 | validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL)); |
716 | 0 | validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL)); |
717 | |
|
718 | 0 | dav1d_data_props_unref_internal(out); |
719 | 0 | *out = c->cached_error_props; |
720 | 0 | dav1d_data_props_set_defaults(&c->cached_error_props); |
721 | |
|
722 | 0 | return 0; |
723 | 0 | } |
724 | | |
725 | 181k | void dav1d_picture_unref(Dav1dPicture *const p) { |
726 | 181k | dav1d_picture_unref_internal(p); |
727 | 181k | } |
728 | | |
729 | 447k | uint8_t *dav1d_data_create(Dav1dData *const buf, const size_t sz) { |
730 | 447k | return dav1d_data_create_internal(buf, sz); |
731 | 447k | } |
732 | | |
733 | | int dav1d_data_wrap(Dav1dData *const buf, const uint8_t *const ptr, |
734 | | const size_t sz, |
735 | | void (*const free_callback)(const uint8_t *data, |
736 | | void *user_data), |
737 | | void *const user_data) |
738 | 0 | { |
739 | 0 | return dav1d_data_wrap_internal(buf, ptr, sz, free_callback, user_data); |
740 | 0 | } |
741 | | |
742 | | int dav1d_data_wrap_user_data(Dav1dData *const buf, |
743 | | const uint8_t *const user_data, |
744 | | void (*const free_callback)(const uint8_t *user_data, |
745 | | void *cookie), |
746 | | void *const cookie) |
747 | 0 | { |
748 | 0 | return dav1d_data_wrap_user_data_internal(buf, |
749 | 0 | user_data, |
750 | 0 | free_callback, |
751 | 0 | cookie); |
752 | 0 | } |
753 | | |
754 | 96.8k | void dav1d_data_unref(Dav1dData *const buf) { |
755 | 96.8k | dav1d_data_unref_internal(buf); |
756 | 96.8k | } |
757 | | |
758 | 0 | void dav1d_data_props_unref(Dav1dDataProps *const props) { |
759 | 0 | dav1d_data_props_unref_internal(props); |
760 | 0 | } |