/work/dav1d/src/thread_task.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include "common/frame.h" |
31 | | |
32 | | #include "src/thread_task.h" |
33 | | #include "src/fg_apply.h" |
34 | | |
35 | | // This function resets the cur pointer to the first frame theoretically |
36 | | // executable after a task completed (ie. each time we update some progress or |
37 | | // insert some tasks in the queue). |
38 | | // When frame_idx is set, it can be either from a completed task, or from tasks |
39 | | // inserted in the queue, in which case we have to make sure the cur pointer |
40 | | // isn't past this insert. |
41 | | // The special case where frame_idx is UINT_MAX is to handle the reset after |
42 | | // completing a task and locklessly signaling progress. In this case we don't |
43 | | // enter a critical section, which is needed for this function, so we set an |
44 | | // atomic for a delayed handling, happening here. Meaning we can call this |
45 | | // function without any actual update other than what's in the atomic, hence |
46 | | // this special case. |
47 | | static inline int reset_task_cur(const Dav1dContext *const c, |
48 | | struct TaskThreadData *const ttd, |
49 | | unsigned frame_idx) |
50 | 5.21M | { |
51 | 5.21M | const unsigned first = atomic_load(&ttd->first); |
52 | 5.21M | unsigned reset_frame_idx = atomic_exchange(&ttd->reset_task_cur, UINT_MAX); |
53 | 5.21M | if (reset_frame_idx < first) { |
54 | 0 | if (frame_idx == UINT_MAX) return 0; |
55 | 0 | reset_frame_idx = UINT_MAX; |
56 | 0 | } |
57 | 5.21M | if (!ttd->cur && c->fc[first].task_thread.task_cur_prev == NULL) |
58 | 1.04M | return 0; |
59 | 4.16M | if (reset_frame_idx != UINT_MAX) { |
60 | 98.7k | if (frame_idx == UINT_MAX) { |
61 | 44.7k | if (reset_frame_idx > first + ttd->cur) |
62 | 48 | return 0; |
63 | 44.7k | ttd->cur = reset_frame_idx - first; |
64 | 44.7k | goto cur_found; |
65 | 44.7k | } |
66 | 4.07M | } else if (frame_idx == UINT_MAX) |
67 | 3.55M | return 0; |
68 | 570k | if (frame_idx < first) frame_idx += c->n_fc; |
69 | 570k | const unsigned min_frame_idx = umin(reset_frame_idx, frame_idx); |
70 | 570k | const unsigned cur_frame_idx = first + ttd->cur; |
71 | 570k | if (ttd->cur < c->n_fc && cur_frame_idx < min_frame_idx) |
72 | 3.64k | return 0; |
73 | 1.07M | for (ttd->cur = min_frame_idx - first; ttd->cur < c->n_fc; ttd->cur++) |
74 | 994k | if (c->fc[(first + ttd->cur) % c->n_fc].task_thread.task_head) |
75 | 482k | break; |
76 | 611k | cur_found: |
77 | 3.57M | for (unsigned i = ttd->cur; i < c->n_fc; i++) |
78 | 2.95M | c->fc[(first + i) % c->n_fc].task_thread.task_cur_prev = NULL; |
79 | 611k | return 1; |
80 | 566k | } |
81 | | |
82 | | static inline void reset_task_cur_async(struct TaskThreadData *const ttd, |
83 | | unsigned frame_idx, unsigned n_frames) |
84 | 446k | { |
85 | 446k | const unsigned first = atomic_load(&ttd->first); |
86 | 446k | if (frame_idx < first) frame_idx += n_frames; |
87 | 446k | unsigned last_idx = frame_idx; |
88 | 448k | do { |
89 | 448k | frame_idx = last_idx; |
90 | 448k | last_idx = atomic_exchange(&ttd->reset_task_cur, frame_idx); |
91 | 448k | } while (last_idx < frame_idx); |
92 | 446k | if (frame_idx == first && atomic_load(&ttd->first) != first) { |
93 | 0 | unsigned expected = frame_idx; |
94 | 0 | atomic_compare_exchange_strong(&ttd->reset_task_cur, &expected, UINT_MAX); |
95 | 0 | } |
96 | 446k | } |
97 | | |
98 | | static void insert_tasks_between(Dav1dFrameContext *const f, |
99 | | Dav1dTask *const first, Dav1dTask *const last, |
100 | | Dav1dTask *const a, Dav1dTask *const b, |
101 | | const int cond_signal) |
102 | 793k | { |
103 | 793k | struct TaskThreadData *const ttd = f->task_thread.ttd; |
104 | 793k | if (atomic_load(f->c->flush)) return; |
105 | 793k | assert(!a || a->next == b); |
106 | 793k | if (!a) f->task_thread.task_head = first; |
107 | 596k | else a->next = first; |
108 | 793k | if (!b) f->task_thread.task_tail = last; |
109 | 793k | last->next = b; |
110 | 793k | reset_task_cur(f->c, ttd, first->frame_idx); |
111 | 793k | if (cond_signal && !atomic_fetch_or(&ttd->cond_signaled, 1)) |
112 | 38.8k | pthread_cond_signal(&ttd->cond); |
113 | 793k | } |
114 | | |
115 | | static void insert_tasks(Dav1dFrameContext *const f, |
116 | | Dav1dTask *const first, Dav1dTask *const last, |
117 | | const int cond_signal) |
118 | 793k | { |
119 | | // insert task back into task queue |
120 | 793k | Dav1dTask *t_ptr, *prev_t = NULL; |
121 | 793k | for (t_ptr = f->task_thread.task_head; |
122 | 4.63M | t_ptr; prev_t = t_ptr, t_ptr = t_ptr->next) |
123 | 4.00M | { |
124 | | // entropy coding precedes other steps |
125 | 4.00M | if (t_ptr->type == DAV1D_TASK_TYPE_TILE_ENTROPY) { |
126 | 313k | if (first->type > DAV1D_TASK_TYPE_TILE_ENTROPY) continue; |
127 | | // both are entropy |
128 | 46.8k | if (first->sby > t_ptr->sby) continue; |
129 | 14.5k | if (first->sby < t_ptr->sby) { |
130 | 583 | insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal); |
131 | 583 | return; |
132 | 583 | } |
133 | | // same sby |
134 | 3.69M | } else { |
135 | 3.69M | if (first->type == DAV1D_TASK_TYPE_TILE_ENTROPY) { |
136 | 85.7k | insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal); |
137 | 85.7k | return; |
138 | 85.7k | } |
139 | 3.60M | if (first->sby > t_ptr->sby) continue; |
140 | 431k | if (first->sby < t_ptr->sby) { |
141 | 74.1k | insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal); |
142 | 74.1k | return; |
143 | 74.1k | } |
144 | | // same sby |
145 | 357k | if (first->type > t_ptr->type) continue; |
146 | 17.6k | if (first->type < t_ptr->type) { |
147 | 3.04k | insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal); |
148 | 3.04k | return; |
149 | 3.04k | } |
150 | | // same task type |
151 | 17.6k | } |
152 | | |
153 | | // sort by tile-id |
154 | 28.5k | assert(first->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION || |
155 | 28.5k | first->type == DAV1D_TASK_TYPE_TILE_ENTROPY); |
156 | 28.5k | assert(first->type == t_ptr->type); |
157 | 28.5k | assert(t_ptr->sby == first->sby); |
158 | 28.5k | const int p = first->type == DAV1D_TASK_TYPE_TILE_ENTROPY; |
159 | 28.5k | const int t_tile_idx = (int) (first - f->task_thread.tile_tasks[p]); |
160 | 28.5k | const int p_tile_idx = (int) (t_ptr - f->task_thread.tile_tasks[p]); |
161 | 28.5k | assert(t_tile_idx != p_tile_idx); |
162 | 28.5k | if (t_tile_idx > p_tile_idx) continue; |
163 | 15 | insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal); |
164 | 15 | return; |
165 | 28.5k | } |
166 | | // append at the end |
167 | 630k | insert_tasks_between(f, first, last, prev_t, NULL, cond_signal); |
168 | 630k | } |
169 | | |
170 | | static inline void insert_task(Dav1dFrameContext *const f, |
171 | | Dav1dTask *const t, const int cond_signal) |
172 | 793k | { |
173 | 793k | insert_tasks(f, t, t, cond_signal); |
174 | 793k | } |
175 | | |
176 | 166k | static inline void add_pending(Dav1dFrameContext *const f, Dav1dTask *const t) { |
177 | 166k | pthread_mutex_lock(&f->task_thread.pending_tasks.lock); |
178 | 166k | t->next = NULL; |
179 | 166k | if (!f->task_thread.pending_tasks.head) |
180 | 162k | f->task_thread.pending_tasks.head = t; |
181 | 4.21k | else |
182 | 4.21k | f->task_thread.pending_tasks.tail->next = t; |
183 | 166k | f->task_thread.pending_tasks.tail = t; |
184 | 166k | atomic_store(&f->task_thread.pending_tasks.merge, 1); |
185 | 166k | pthread_mutex_unlock(&f->task_thread.pending_tasks.lock); |
186 | 166k | } |
187 | | |
188 | 29.8M | static inline int merge_pending_frame(Dav1dFrameContext *const f) { |
189 | 29.8M | int const merge = atomic_load(&f->task_thread.pending_tasks.merge); |
190 | 29.8M | if (merge) { |
191 | 213k | pthread_mutex_lock(&f->task_thread.pending_tasks.lock); |
192 | 213k | Dav1dTask *t = f->task_thread.pending_tasks.head; |
193 | 213k | f->task_thread.pending_tasks.head = NULL; |
194 | 213k | f->task_thread.pending_tasks.tail = NULL; |
195 | 213k | atomic_store(&f->task_thread.pending_tasks.merge, 0); |
196 | 213k | pthread_mutex_unlock(&f->task_thread.pending_tasks.lock); |
197 | 599k | while (t) { |
198 | 386k | Dav1dTask *const tmp = t->next; |
199 | 386k | insert_task(f, t, 0); |
200 | 386k | t = tmp; |
201 | 386k | } |
202 | 213k | } |
203 | 29.8M | return merge; |
204 | 29.8M | } |
205 | | |
206 | 4.60M | static inline int merge_pending(const Dav1dContext *const c) { |
207 | 4.60M | int res = 0; |
208 | 32.2M | for (unsigned i = 0; i < c->n_fc; i++) |
209 | 27.6M | res |= merge_pending_frame(&c->fc[i]); |
210 | 4.60M | return res; |
211 | 4.60M | } |
212 | | |
213 | | static int create_filter_sbrow(Dav1dFrameContext *const f, |
214 | | const int pass, Dav1dTask **res_t) |
215 | 103k | { |
216 | 103k | const int has_deblock = f->frame_hdr->loopfilter.level_y[0] || |
217 | 46.5k | f->frame_hdr->loopfilter.level_y[1]; |
218 | 103k | const int has_cdef = f->seq_hdr->cdef; |
219 | 103k | const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; |
220 | 103k | const int has_lr = f->lf.restore_planes; |
221 | | |
222 | 103k | Dav1dTask *tasks = f->task_thread.tasks; |
223 | 103k | const int uses_2pass = f->c->n_fc > 1; |
224 | 103k | int num_tasks = f->sbh * (1 + uses_2pass); |
225 | 103k | if (num_tasks > f->task_thread.num_tasks) { |
226 | 51.6k | const size_t size = sizeof(Dav1dTask) * num_tasks; |
227 | 51.6k | tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tasks, size); |
228 | 51.6k | if (!tasks) return -1; |
229 | 51.6k | memset(tasks, 0, size); |
230 | 51.6k | f->task_thread.tasks = tasks; |
231 | 51.6k | f->task_thread.num_tasks = num_tasks; |
232 | 51.6k | } |
233 | 103k | tasks += f->sbh * (pass & 1); |
234 | | |
235 | 103k | if (pass & 1) { |
236 | 51.6k | f->frame_thread.entropy_progress = 0; |
237 | 51.6k | } else { |
238 | 51.6k | const int prog_sz = ((f->sbh + 31) & ~31) >> 5; |
239 | 51.6k | if (prog_sz > f->frame_thread.prog_sz) { |
240 | 51.6k | atomic_uint *const prog = dav1d_realloc(ALLOC_COMMON_CTX, f->frame_thread.frame_progress, |
241 | 51.6k | 2 * prog_sz * sizeof(*prog)); |
242 | 51.6k | if (!prog) return -1; |
243 | 51.6k | f->frame_thread.frame_progress = prog; |
244 | 51.6k | f->frame_thread.copy_lpf_progress = prog + prog_sz; |
245 | 51.6k | } |
246 | 51.6k | f->frame_thread.prog_sz = prog_sz; |
247 | 51.6k | memset(f->frame_thread.frame_progress, 0, prog_sz * sizeof(atomic_uint)); |
248 | 51.6k | memset(f->frame_thread.copy_lpf_progress, 0, prog_sz * sizeof(atomic_uint)); |
249 | 51.6k | atomic_store(&f->frame_thread.deblock_progress, 0); |
250 | 51.6k | } |
251 | 103k | f->frame_thread.next_tile_row[pass & 1] = 0; |
252 | | |
253 | 103k | Dav1dTask *t = &tasks[0]; |
254 | 103k | t->sby = 0; |
255 | 103k | t->recon_progress = 1; |
256 | 103k | t->deblock_progress = 0; |
257 | 103k | t->type = pass == 1 ? DAV1D_TASK_TYPE_ENTROPY_PROGRESS : |
258 | 103k | has_deblock ? DAV1D_TASK_TYPE_DEBLOCK_COLS : |
259 | 51.5k | has_cdef || has_lr /* i.e. LR backup */ ? DAV1D_TASK_TYPE_DEBLOCK_ROWS : |
260 | 19.0k | has_resize ? DAV1D_TASK_TYPE_SUPER_RESOLUTION : |
261 | 8.78k | DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS; |
262 | 103k | t->frame_idx = (int)(f - f->c->fc); |
263 | | |
264 | 103k | *res_t = t; |
265 | 103k | return 0; |
266 | 103k | } |
267 | | |
268 | | int dav1d_task_create_tile_sbrow(Dav1dFrameContext *const f, const int pass, |
269 | | const int cond_signal) |
270 | 103k | { |
271 | 103k | Dav1dTask *tasks = f->task_thread.tile_tasks[0]; |
272 | 103k | const int uses_2pass = f->c->n_fc > 1; |
273 | 103k | const int n_tasks_per_pass = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows; |
274 | 103k | const int n_tasks = n_tasks_per_pass * (1 + uses_2pass); |
275 | 103k | if (pass < 2) { |
276 | 51.5k | if (n_tasks > f->task_thread.num_tile_tasks) { |
277 | 51.5k | const size_t size = sizeof(Dav1dTask) * n_tasks; |
278 | 51.5k | tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tile_tasks[0], size); |
279 | 51.5k | if (!tasks) return -1; |
280 | 51.5k | memset(tasks, 0, size); |
281 | 51.5k | f->task_thread.tile_tasks[0] = tasks; |
282 | 51.5k | f->task_thread.num_tile_tasks = n_tasks; |
283 | 51.5k | } |
284 | 51.5k | f->task_thread.tile_tasks[1] = tasks + n_tasks_per_pass; |
285 | 51.5k | } |
286 | 103k | assert(n_tasks <= f->task_thread.num_tile_tasks); |
287 | | |
288 | 103k | Dav1dTask *pf_t; |
289 | 103k | if (create_filter_sbrow(f, pass, &pf_t)) |
290 | 0 | return -1; |
291 | | |
292 | 103k | Dav1dTask *const p1_tasks = f->task_thread.tile_tasks[1]; |
293 | 103k | Dav1dTask *prev_t = NULL; |
294 | 103k | if (pass == 2) { |
295 | 51.6k | prev_t = &p1_tasks[n_tasks_per_pass - 1]; |
296 | | // PF task is scheduled after the last sby=0 TILE task |
297 | 51.6k | if (f->frame_hdr->tiling.rows == 1) |
298 | 50.0k | prev_t = prev_t->next; |
299 | 51.6k | } |
300 | 103k | tasks += (pass & 1) * n_tasks_per_pass; |
301 | 220k | for (int tile_idx = 0; tile_idx < n_tasks_per_pass; tile_idx++) { |
302 | 117k | Dav1dTileState *const ts = &f->ts[tile_idx]; |
303 | 117k | Dav1dTask *t = &tasks[tile_idx]; |
304 | 117k | t->sby = ts->tiling.row_start >> f->sb_shift; |
305 | 117k | if (pf_t && t->sby) { |
306 | 3.05k | prev_t->next = pf_t; |
307 | 3.05k | prev_t = pf_t; |
308 | 3.05k | pf_t = NULL; |
309 | 3.05k | } |
310 | 117k | t->recon_progress = 0; |
311 | 117k | t->deblock_progress = 0; |
312 | 117k | t->deps_skip = 0; |
313 | 117k | t->type = pass != 1 ? DAV1D_TASK_TYPE_TILE_RECONSTRUCTION : |
314 | 117k | DAV1D_TASK_TYPE_TILE_ENTROPY; |
315 | 117k | t->frame_idx = (int)(f - f->c->fc); |
316 | 117k | if (prev_t) prev_t->next = t; |
317 | 117k | prev_t = t; |
318 | 117k | } |
319 | 103k | if (pf_t) { |
320 | 100k | prev_t->next = pf_t; |
321 | 100k | prev_t = pf_t; |
322 | 100k | } |
323 | 103k | prev_t->next = NULL; |
324 | | |
325 | 103k | atomic_store(&f->task_thread.done[pass & 1], 0); |
326 | | |
327 | | // XXX in theory this could be done locklessly, at this point they are no |
328 | | // tasks in the frameQ, so no other runner should be using this lock, but |
329 | | // we must add both passes at once |
330 | 103k | if (!(pass & 1)) { |
331 | 51.6k | pthread_mutex_lock(&f->task_thread.pending_tasks.lock); |
332 | 51.6k | assert(f->task_thread.pending_tasks.head == NULL); |
333 | 51.6k | f->task_thread.pending_tasks.head = f->task_thread.tile_tasks[pass == 2]; |
334 | 51.6k | f->task_thread.pending_tasks.tail = prev_t; |
335 | 51.6k | atomic_store(&f->task_thread.pending_tasks.merge, 1); |
336 | 51.6k | atomic_store(&f->task_thread.init_done, 1); |
337 | 51.6k | pthread_mutex_unlock(&f->task_thread.pending_tasks.lock); |
338 | 51.6k | } |
339 | 103k | return 0; |
340 | 103k | } |
341 | | |
342 | 56.2k | void dav1d_task_frame_init(Dav1dFrameContext *const f) { |
343 | 56.2k | const Dav1dContext *const c = f->c; |
344 | | |
345 | 56.2k | atomic_store(&f->task_thread.init_done, 0); |
346 | | // schedule init task, which will schedule the remaining tasks |
347 | 56.2k | Dav1dTask *const t = &f->task_thread.init_task; |
348 | 56.2k | t->type = DAV1D_TASK_TYPE_INIT; |
349 | 56.2k | t->frame_idx = (int)(f - c->fc); |
350 | 56.2k | t->sby = 0; |
351 | 56.2k | t->recon_progress = t->deblock_progress = 0; |
352 | 56.2k | insert_task(f, t, 1); |
353 | 56.2k | } |
354 | | |
355 | | void dav1d_task_delayed_fg(Dav1dContext *const c, Dav1dPicture *const out, |
356 | | const Dav1dPicture *const in) |
357 | 2.73k | { |
358 | 2.73k | struct TaskThreadData *const ttd = &c->task_thread; |
359 | 2.73k | ttd->delayed_fg.in = in; |
360 | 2.73k | ttd->delayed_fg.out = out; |
361 | 2.73k | ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_PREP; |
362 | 2.73k | atomic_init(&ttd->delayed_fg.progress[0], 0); |
363 | 2.73k | atomic_init(&ttd->delayed_fg.progress[1], 0); |
364 | 2.73k | pthread_mutex_lock(&ttd->lock); |
365 | 2.73k | ttd->delayed_fg.exec = 1; |
366 | 2.73k | ttd->delayed_fg.finished = 0; |
367 | 2.73k | pthread_cond_signal(&ttd->cond); |
368 | 2.73k | do { |
369 | 2.73k | pthread_cond_wait(&ttd->delayed_fg.cond, &ttd->lock); |
370 | 2.73k | } while (!ttd->delayed_fg.finished); |
371 | 2.73k | pthread_mutex_unlock(&ttd->lock); |
372 | 2.73k | } |
373 | | |
374 | | static inline int ensure_progress(struct TaskThreadData *const ttd, |
375 | | Dav1dFrameContext *const f, |
376 | | Dav1dTask *const t, const enum TaskType type, |
377 | | atomic_int *const state, int *const target) |
378 | 102k | { |
379 | | // deblock_rows (non-LR portion) depends on deblock of previous sbrow, |
380 | | // so ensure that completed. if not, re-add to task-queue; else, fall-through |
381 | 102k | int p1 = atomic_load(state); |
382 | 102k | if (p1 < t->sby) { |
383 | 7.25k | t->type = type; |
384 | 7.25k | t->recon_progress = t->deblock_progress = 0; |
385 | 7.25k | *target = t->sby; |
386 | 7.25k | add_pending(f, t); |
387 | 7.25k | pthread_mutex_lock(&ttd->lock); |
388 | 7.25k | return 1; |
389 | 7.25k | } |
390 | 95.2k | return 0; |
391 | 102k | } |
392 | | |
393 | | static inline int check_tile(Dav1dTask *const t, Dav1dFrameContext *const f, |
394 | | const int frame_mt) |
395 | 718k | { |
396 | 718k | const int tp = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY; |
397 | 718k | const int tile_idx = (int)(t - f->task_thread.tile_tasks[tp]); |
398 | 718k | Dav1dTileState *const ts = &f->ts[tile_idx]; |
399 | 718k | const int p1 = atomic_load(&ts->progress[tp]); |
400 | 718k | if (p1 < t->sby) return 1; |
401 | 566k | int error = p1 == TILE_ERROR; |
402 | 566k | error |= atomic_fetch_or(&f->task_thread.error, error); |
403 | 566k | if (!error && frame_mt && !tp) { |
404 | 181k | const int p2 = atomic_load(&ts->progress[1]); |
405 | 181k | if (p2 <= t->sby) return 1; |
406 | 110k | error = p2 == TILE_ERROR; |
407 | 110k | error |= atomic_fetch_or(&f->task_thread.error, error); |
408 | 110k | } |
409 | 496k | if (!error && frame_mt && !IS_KEY_OR_INTRA(f->frame_hdr)) { |
410 | | // check reference state |
411 | 50.9k | const Dav1dThreadPicture *p = &f->sr_cur; |
412 | 50.9k | const int ss_ver = p->p.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
413 | 50.9k | const unsigned p_b = (t->sby + 1) << (f->sb_shift + 2); |
414 | 50.9k | const int tile_sby = t->sby - (ts->tiling.row_start >> f->sb_shift); |
415 | 50.9k | const int (*const lowest_px)[2] = ts->lowest_pixel[tile_sby]; |
416 | 262k | for (int n = t->deps_skip; n < 7; n++, t->deps_skip++) { |
417 | 232k | unsigned lowest; |
418 | 232k | if (tp) { |
419 | | // if temporal mv refs are disabled, we only need this |
420 | | // for the primary ref; if segmentation is disabled, we |
421 | | // don't even need that |
422 | 135k | lowest = p_b; |
423 | 135k | } else { |
424 | | // +8 is postfilter-induced delay |
425 | 96.2k | const int y = lowest_px[n][0] == INT_MIN ? INT_MIN : |
426 | 96.2k | lowest_px[n][0] + 8; |
427 | 96.2k | const int uv = lowest_px[n][1] == INT_MIN ? INT_MIN : |
428 | 96.2k | lowest_px[n][1] * (1 << ss_ver) + 8; |
429 | 96.2k | const int max = imax(y, uv); |
430 | 96.2k | if (max == INT_MIN) continue; |
431 | 31.9k | lowest = iclip(max, 1, f->refp[n].p.p.h); |
432 | 31.9k | } |
433 | 167k | const unsigned p3 = atomic_load(&f->refp[n].progress[!tp]); |
434 | 167k | if (p3 < lowest) return 1; |
435 | 167k | atomic_fetch_or(&f->task_thread.error, p3 == FRAME_ERROR); |
436 | 146k | } |
437 | 50.9k | } |
438 | 475k | return 0; |
439 | 496k | } |
440 | | |
441 | | static inline int get_frame_progress(const Dav1dContext *const c, |
442 | | const Dav1dFrameContext *const f) |
443 | 222k | { |
444 | 222k | unsigned frame_prog = c->n_fc > 1 ? atomic_load(&f->sr_cur.progress[1]) : 0; |
445 | 222k | if (frame_prog >= FRAME_ERROR) |
446 | 116k | return f->sbh - 1; |
447 | 105k | int idx = frame_prog >> (f->sb_shift + 7); |
448 | 105k | int prog; |
449 | 106k | do { |
450 | 106k | atomic_uint *state = &f->frame_thread.frame_progress[idx]; |
451 | 106k | const unsigned val = ~atomic_load(state); |
452 | 106k | prog = val ? ctz(val) : 32; |
453 | 106k | if (prog != 32) break; |
454 | 892 | prog = 0; |
455 | 892 | } while (++idx < f->frame_thread.prog_sz); |
456 | 105k | return ((idx << 5) | prog) - 1; |
457 | 222k | } |
458 | | |
459 | 2.25k | static inline void abort_frame(Dav1dFrameContext *const f, const int error) { |
460 | 2.25k | atomic_store(&f->task_thread.error, error == DAV1D_ERR(EINVAL) ? 1 : -1); |
461 | 2.25k | atomic_store(&f->task_thread.task_counter, 0); |
462 | 2.25k | atomic_store(&f->task_thread.done[0], 1); |
463 | 2.25k | atomic_store(&f->task_thread.done[1], 1); |
464 | 2.25k | atomic_store(&f->sr_cur.progress[0], FRAME_ERROR); |
465 | 2.25k | atomic_store(&f->sr_cur.progress[1], FRAME_ERROR); |
466 | 2.25k | dav1d_decode_frame_exit(f, error); |
467 | 2.25k | f->n_tile_data = 0; |
468 | 2.25k | pthread_cond_signal(&f->task_thread.cond); |
469 | 2.25k | } |
470 | | |
471 | | static inline void delayed_fg_task(const Dav1dContext *const c, |
472 | | struct TaskThreadData *const ttd) |
473 | 11.5k | { |
474 | 11.5k | const Dav1dPicture *const in = ttd->delayed_fg.in; |
475 | 11.5k | Dav1dPicture *const out = ttd->delayed_fg.out; |
476 | 11.5k | #if CONFIG_16BPC |
477 | 11.5k | int off; |
478 | 11.5k | if (out->p.bpc != 8) |
479 | 6.43k | off = (out->p.bpc >> 1) - 4; |
480 | 11.5k | #endif |
481 | 11.5k | switch (ttd->delayed_fg.type) { |
482 | 2.73k | case DAV1D_TASK_TYPE_FG_PREP: |
483 | 2.73k | ttd->delayed_fg.exec = 0; |
484 | 2.73k | if (atomic_load(&ttd->cond_signaled)) |
485 | 3 | pthread_cond_signal(&ttd->cond); |
486 | 2.73k | pthread_mutex_unlock(&ttd->lock); |
487 | 2.73k | switch (out->p.bpc) { |
488 | 0 | #if CONFIG_8BPC |
489 | 1.27k | case 8: |
490 | 1.27k | dav1d_prep_grain_8bpc(&c->dsp[0].fg, out, in, |
491 | 1.27k | ttd->delayed_fg.scaling_8bpc, |
492 | 1.27k | ttd->delayed_fg.grain_lut_8bpc); |
493 | 1.27k | break; |
494 | 0 | #endif |
495 | 0 | #if CONFIG_16BPC |
496 | 1.33k | case 10: |
497 | 1.46k | case 12: |
498 | 1.46k | dav1d_prep_grain_16bpc(&c->dsp[off].fg, out, in, |
499 | 1.46k | ttd->delayed_fg.scaling_16bpc, |
500 | 1.46k | ttd->delayed_fg.grain_lut_16bpc); |
501 | 1.46k | break; |
502 | 0 | #endif |
503 | 0 | default: abort(); |
504 | 2.73k | } |
505 | 2.73k | ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_APPLY; |
506 | 2.73k | pthread_mutex_lock(&ttd->lock); |
507 | 2.73k | ttd->delayed_fg.exec = 1; |
508 | | // fall-through |
509 | 11.5k | case DAV1D_TASK_TYPE_FG_APPLY:; |
510 | 11.5k | int row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1); |
511 | 11.5k | pthread_mutex_unlock(&ttd->lock); |
512 | 11.5k | int progmax = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE; |
513 | 34.1k | while (row < progmax) { |
514 | 22.8k | if (row + 1 < progmax) |
515 | 20.1k | pthread_cond_signal(&ttd->cond); |
516 | 2.73k | else { |
517 | 2.73k | pthread_mutex_lock(&ttd->lock); |
518 | 2.73k | ttd->delayed_fg.exec = 0; |
519 | 2.73k | pthread_mutex_unlock(&ttd->lock); |
520 | 2.73k | } |
521 | 22.8k | switch (out->p.bpc) { |
522 | 0 | #if CONFIG_8BPC |
523 | 9.94k | case 8: |
524 | 9.94k | dav1d_apply_grain_row_8bpc(&c->dsp[0].fg, out, in, |
525 | 9.94k | ttd->delayed_fg.scaling_8bpc, |
526 | 9.94k | ttd->delayed_fg.grain_lut_8bpc, row); |
527 | 9.94k | break; |
528 | 0 | #endif |
529 | 0 | #if CONFIG_16BPC |
530 | 12.2k | case 10: |
531 | 12.9k | case 12: |
532 | 12.9k | dav1d_apply_grain_row_16bpc(&c->dsp[off].fg, out, in, |
533 | 12.9k | ttd->delayed_fg.scaling_16bpc, |
534 | 12.9k | ttd->delayed_fg.grain_lut_16bpc, row); |
535 | 12.9k | break; |
536 | 0 | #endif |
537 | 0 | default: abort(); |
538 | 22.8k | } |
539 | 22.6k | row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1); |
540 | 22.6k | atomic_fetch_add(&ttd->delayed_fg.progress[1], 1); |
541 | 22.6k | } |
542 | 11.2k | pthread_mutex_lock(&ttd->lock); |
543 | 11.2k | ttd->delayed_fg.exec = 0; |
544 | 11.2k | int done = atomic_fetch_add(&ttd->delayed_fg.progress[1], 1) + 1; |
545 | 11.2k | progmax = atomic_load(&ttd->delayed_fg.progress[0]); |
546 | | // signal for completion only once the last runner reaches this |
547 | 11.2k | if (done >= progmax) { |
548 | 2.73k | ttd->delayed_fg.finished = 1; |
549 | 2.73k | pthread_cond_signal(&ttd->delayed_fg.cond); |
550 | 2.73k | } |
551 | 11.2k | break; |
552 | 0 | default: abort(); |
553 | 11.5k | } |
554 | 11.5k | } |
555 | | |
556 | 1.26M | void *dav1d_worker_task(void *data) { |
557 | 1.26M | Dav1dTaskContext *const tc = data; |
558 | 1.26M | const Dav1dContext *const c = tc->c; |
559 | 1.26M | struct TaskThreadData *const ttd = tc->task_thread.ttd; |
560 | | |
561 | 1.26M | dav1d_set_thread_name("dav1d-worker"); |
562 | | |
563 | 1.26M | pthread_mutex_lock(&ttd->lock); |
564 | 4.00M | for (;;) { |
565 | 4.00M | if (tc->task_thread.die) break; |
566 | 2.74M | if (atomic_load(c->flush)) goto park; |
567 | | |
568 | 2.70M | merge_pending(c); |
569 | 2.70M | if (ttd->delayed_fg.exec) { // run delayed film grain first |
570 | 11.5k | delayed_fg_task(c, ttd); |
571 | 11.5k | continue; |
572 | 11.5k | } |
573 | 2.69M | Dav1dFrameContext *f; |
574 | 2.69M | Dav1dTask *t, *prev_t = NULL; |
575 | 2.69M | if (c->n_fc > 1) { // run init tasks second |
576 | 18.5M | for (unsigned i = 0; i < c->n_fc; i++) { |
577 | 15.8M | const unsigned first = atomic_load(&ttd->first); |
578 | 15.8M | f = &c->fc[(first + i) % c->n_fc]; |
579 | 15.8M | if (atomic_load(&f->task_thread.init_done)) continue; |
580 | 13.9M | t = f->task_thread.task_head; |
581 | 13.9M | if (!t) continue; |
582 | 67.4k | if (t->type == DAV1D_TASK_TYPE_INIT) goto found; |
583 | 13.4k | if (t->type == DAV1D_TASK_TYPE_INIT_CDF) { |
584 | | // XXX This can be a simple else, if adding tasks of both |
585 | | // passes at once (in dav1d_task_create_tile_sbrow). |
586 | | // Adding the tasks to the pending Q can result in a |
587 | | // thread merging them before setting init_done. |
588 | | // We will need to set init_done before adding to the |
589 | | // pending Q, so maybe return the tasks, set init_done, |
590 | | // and add to pending Q only then. |
591 | 13.4k | const int p1 = f->in_cdf.progress ? |
592 | 13.4k | atomic_load(f->in_cdf.progress) : 1; |
593 | 13.4k | if (p1) { |
594 | 2.09k | atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR); |
595 | 2.09k | goto found; |
596 | 2.09k | } |
597 | 13.4k | } |
598 | 13.4k | } |
599 | 2.69M | } |
600 | 4.09M | while (ttd->cur < c->n_fc) { // run decoding tasks last |
601 | 2.18M | const unsigned first = atomic_load(&ttd->first); |
602 | 2.18M | f = &c->fc[(first + ttd->cur) % c->n_fc]; |
603 | 2.18M | merge_pending_frame(f); |
604 | 2.18M | prev_t = f->task_thread.task_cur_prev; |
605 | 2.18M | t = prev_t ? prev_t->next : f->task_thread.task_head; |
606 | 6.09M | while (t) { |
607 | 4.63M | if (t->type == DAV1D_TASK_TYPE_INIT_CDF) goto next; |
608 | 4.63M | else if (t->type == DAV1D_TASK_TYPE_TILE_ENTROPY || |
609 | 4.47M | t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION) |
610 | 357k | { |
611 | | // if not bottom sbrow of tile, this task will be re-added |
612 | | // after it's finished |
613 | 357k | if (!check_tile(t, f, c->n_fc > 1)) |
614 | 265k | goto found; |
615 | 4.27M | } else if (t->recon_progress) { |
616 | 838k | const int p = t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS; |
617 | 838k | int error = atomic_load(&f->task_thread.error); |
618 | 838k | assert(!atomic_load(&f->task_thread.done[p]) || error); |
619 | 838k | const int tile_row_base = f->frame_hdr->tiling.cols * |
620 | 838k | f->frame_thread.next_tile_row[p]; |
621 | 838k | if (p) { |
622 | 343k | atomic_int *const prog = &f->frame_thread.entropy_progress; |
623 | 343k | const int p1 = atomic_load(prog); |
624 | 343k | if (p1 < t->sby) goto next; |
625 | 343k | atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR); |
626 | 325k | } |
627 | 1.29M | for (int tc = 0; tc < f->frame_hdr->tiling.cols; tc++) { |
628 | 849k | Dav1dTileState *const ts = &f->ts[tile_row_base + tc]; |
629 | 849k | const int p2 = atomic_load(&ts->progress[p]); |
630 | 849k | if (p2 < t->recon_progress) goto next; |
631 | 849k | atomic_fetch_or(&f->task_thread.error, p2 == TILE_ERROR); |
632 | 476k | } |
633 | 447k | if (t->sby + 1 < f->sbh) { |
634 | | // add sby+1 to list to replace this one |
635 | 351k | Dav1dTask *next_t = &t[1]; |
636 | 351k | *next_t = *t; |
637 | 351k | next_t->sby++; |
638 | 351k | const int ntr = f->frame_thread.next_tile_row[p] + 1; |
639 | 351k | const int start = f->frame_hdr->tiling.row_start_sb[ntr]; |
640 | 351k | if (next_t->sby == start) |
641 | 6.23k | f->frame_thread.next_tile_row[p] = ntr; |
642 | 351k | next_t->recon_progress = next_t->sby + 1; |
643 | 351k | insert_task(f, next_t, 0); |
644 | 351k | } |
645 | 447k | goto found; |
646 | 3.43M | } else if (t->type == DAV1D_TASK_TYPE_CDEF) { |
647 | 38.4k | atomic_uint *prog = f->frame_thread.copy_lpf_progress; |
648 | 38.4k | const int p1 = atomic_load(&prog[(t->sby - 1) >> 5]); |
649 | 38.4k | if (p1 & (1U << ((t->sby - 1) & 31))) |
650 | 5.31k | goto found; |
651 | 3.39M | } else { |
652 | 3.39M | assert(t->deblock_progress); |
653 | 3.39M | const int p1 = atomic_load(&f->frame_thread.deblock_progress); |
654 | 3.39M | if (p1 >= t->deblock_progress) { |
655 | 7.26k | atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR); |
656 | 7.26k | goto found; |
657 | 7.26k | } |
658 | 3.39M | } |
659 | 3.90M | next: |
660 | 3.90M | prev_t = t; |
661 | 3.90M | t = t->next; |
662 | 3.90M | f->task_thread.task_cur_prev = prev_t; |
663 | 3.90M | } |
664 | 1.46M | ttd->cur++; |
665 | 1.46M | } |
666 | 1.90M | if (reset_task_cur(c, ttd, UINT_MAX)) continue; |
667 | 1.90M | if (merge_pending(c)) continue; |
668 | 1.94M | park: |
669 | 1.94M | tc->task_thread.flushed = 1; |
670 | 1.94M | pthread_cond_signal(&tc->task_thread.td.cond); |
671 | | // we want to be woken up next time progress is signaled |
672 | 1.94M | atomic_store(&ttd->cond_signaled, 0); |
673 | 1.94M | pthread_cond_wait(&ttd->cond, &ttd->lock); |
674 | 1.94M | tc->task_thread.flushed = 0; |
675 | 1.94M | reset_task_cur(c, ttd, UINT_MAX); |
676 | 1.94M | continue; |
677 | | |
678 | 781k | found: |
679 | | // remove t from list |
680 | 781k | if (prev_t) prev_t->next = t->next; |
681 | 657k | else f->task_thread.task_head = t->next; |
682 | 781k | if (!t->next) f->task_thread.task_tail = prev_t; |
683 | 781k | if (t->type > DAV1D_TASK_TYPE_INIT_CDF && !f->task_thread.task_head) |
684 | 46.8k | ttd->cur++; |
685 | 781k | t->next = NULL; |
686 | | // we don't need to check cond_signaled here, since we found a task |
687 | | // after the last signal so we want to re-signal the next waiting thread |
688 | | // and again won't need to signal after that |
689 | 781k | atomic_store(&ttd->cond_signaled, 1); |
690 | 781k | pthread_cond_signal(&ttd->cond); |
691 | 781k | pthread_mutex_unlock(&ttd->lock); |
692 | 1.04M | found_unlocked:; |
693 | 1.04M | const int flush = atomic_load(c->flush); |
694 | 1.04M | int error = atomic_fetch_or(&f->task_thread.error, flush) | flush; |
695 | | |
696 | | // run it |
697 | 1.04M | tc->f = f; |
698 | 1.04M | int sby = t->sby; |
699 | 1.04M | switch (t->type) { |
700 | 53.9k | case DAV1D_TASK_TYPE_INIT: { |
701 | 53.9k | assert(c->n_fc > 1); |
702 | 53.9k | int res = dav1d_decode_frame_init(f); |
703 | 53.9k | int p1 = f->in_cdf.progress ? atomic_load(f->in_cdf.progress) : 1; |
704 | 53.9k | if (res || p1 == TILE_ERROR) { |
705 | 45 | pthread_mutex_lock(&ttd->lock); |
706 | 45 | abort_frame(f, res ? res : DAV1D_ERR(EINVAL)); |
707 | 45 | reset_task_cur(c, ttd, t->frame_idx); |
708 | 53.8k | } else { |
709 | 53.8k | t->type = DAV1D_TASK_TYPE_INIT_CDF; |
710 | 53.8k | if (p1) goto found_unlocked; |
711 | 2.15k | add_pending(f, t); |
712 | 2.15k | pthread_mutex_lock(&ttd->lock); |
713 | 2.15k | } |
714 | 2.19k | continue; |
715 | 53.9k | } |
716 | 53.8k | case DAV1D_TASK_TYPE_INIT_CDF: { |
717 | 53.8k | assert(c->n_fc > 1); |
718 | 53.8k | int res = DAV1D_ERR(EINVAL); |
719 | 53.8k | if (!atomic_load(&f->task_thread.error)) |
720 | 52.7k | res = dav1d_decode_frame_init_cdf(f); |
721 | 53.8k | if (f->frame_hdr->refresh_context && !f->task_thread.update_set) |
722 | 53.8k | atomic_store(f->out_cdf.progress, res < 0 ? TILE_ERROR : 1); |
723 | 157k | for (int p = 1; p <= 2 && !res; p++) |
724 | 103k | res = dav1d_task_create_tile_sbrow(f, p, 0); |
725 | 53.8k | pthread_mutex_lock(&ttd->lock); |
726 | 53.8k | if (res) { |
727 | 2.21k | abort_frame(f, DAV1D_ERR(ENOMEM)); |
728 | 2.21k | reset_task_cur(c, ttd, t->frame_idx); |
729 | 2.21k | atomic_store(&f->task_thread.init_done, 1); |
730 | 2.21k | } |
731 | 53.8k | continue; |
732 | 53.8k | } |
733 | 239k | case DAV1D_TASK_TYPE_TILE_ENTROPY: |
734 | 476k | case DAV1D_TASK_TYPE_TILE_RECONSTRUCTION: { |
735 | 476k | const int p = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY; |
736 | 476k | const int tile_idx = (int)(t - f->task_thread.tile_tasks[p]); |
737 | 476k | Dav1dTileState *const ts = &f->ts[tile_idx]; |
738 | | |
739 | 476k | tc->ts = ts; |
740 | 476k | tc->by = sby << f->sb_shift; |
741 | 476k | const int uses_2pass = c->n_fc > 1; |
742 | 476k | tc->frame_thread.pass = !uses_2pass ? 0 : |
743 | 476k | 1 + (t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION); |
744 | 476k | if (!error) error = dav1d_decode_tile_sbrow(tc); |
745 | 476k | const int progress = error ? TILE_ERROR : 1 + sby; |
746 | | |
747 | | // signal progress |
748 | 476k | atomic_fetch_or(&f->task_thread.error, error); |
749 | 476k | if (((sby + 1) << f->sb_shift) < ts->tiling.row_end) { |
750 | 362k | t->sby++; |
751 | 362k | t->deps_skip = 0; |
752 | 362k | if (!check_tile(t, f, uses_2pass)) { |
753 | 210k | atomic_store(&ts->progress[p], progress); |
754 | 210k | reset_task_cur_async(ttd, t->frame_idx, c->n_fc); |
755 | 210k | if (!atomic_fetch_or(&ttd->cond_signaled, 1)) |
756 | 60 | pthread_cond_signal(&ttd->cond); |
757 | 210k | goto found_unlocked; |
758 | 210k | } |
759 | 362k | atomic_store(&ts->progress[p], progress); |
760 | 152k | add_pending(f, t); |
761 | 152k | pthread_mutex_lock(&ttd->lock); |
762 | 152k | } else { |
763 | 113k | pthread_mutex_lock(&ttd->lock); |
764 | 113k | atomic_store(&ts->progress[p], progress); |
765 | 113k | reset_task_cur(c, ttd, t->frame_idx); |
766 | 113k | error = atomic_load(&f->task_thread.error); |
767 | 113k | if (f->frame_hdr->refresh_context && |
768 | 14.8k | tc->frame_thread.pass <= 1 && f->task_thread.update_set && |
769 | 7.68k | f->frame_hdr->tiling.update == tile_idx) |
770 | 7.36k | { |
771 | 7.36k | if (!error) |
772 | 5.97k | dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf, |
773 | 5.97k | &f->ts[f->frame_hdr->tiling.update].cdf); |
774 | 7.36k | if (c->n_fc > 1) |
775 | 7.36k | atomic_store(f->out_cdf.progress, error ? TILE_ERROR : 1); |
776 | 7.36k | } |
777 | 113k | if (atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1 == 0 && |
778 | 113k | atomic_load(&f->task_thread.done[0]) && |
779 | 913 | (!uses_2pass || atomic_load(&f->task_thread.done[1]))) |
780 | 913 | { |
781 | 913 | error = atomic_load(&f->task_thread.error); |
782 | 913 | dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) : |
783 | 913 | error ? DAV1D_ERR(ENOMEM) : 0); |
784 | 913 | f->n_tile_data = 0; |
785 | 913 | pthread_cond_signal(&f->task_thread.cond); |
786 | 913 | } |
787 | 113k | assert(atomic_load(&f->task_thread.task_counter) >= 0); |
788 | 113k | if (!atomic_fetch_or(&ttd->cond_signaled, 1)) |
789 | 71.3k | pthread_cond_signal(&ttd->cond); |
790 | 113k | } |
791 | 265k | continue; |
792 | 476k | } |
793 | 265k | case DAV1D_TASK_TYPE_DEBLOCK_COLS: |
794 | 102k | if (!atomic_load(&f->task_thread.error)) |
795 | 61.7k | f->bd_fn.filter_sbrow_deblock_cols(f, sby); |
796 | 102k | if (ensure_progress(ttd, f, t, DAV1D_TASK_TYPE_DEBLOCK_ROWS, |
797 | 102k | &f->frame_thread.deblock_progress, |
798 | 102k | &t->deblock_progress)) continue; |
799 | | // fall-through |
800 | 177k | case DAV1D_TASK_TYPE_DEBLOCK_ROWS: |
801 | 177k | if (!atomic_load(&f->task_thread.error)) |
802 | 83.7k | f->bd_fn.filter_sbrow_deblock_rows(f, sby); |
803 | | // signal deblock progress |
804 | 177k | if (f->frame_hdr->loopfilter.level_y[0] || |
805 | 93.1k | f->frame_hdr->loopfilter.level_y[1]) |
806 | 102k | { |
807 | 102k | error = atomic_load(&f->task_thread.error); |
808 | 102k | atomic_store(&f->frame_thread.deblock_progress, |
809 | 102k | error ? TILE_ERROR : sby + 1); |
810 | 102k | reset_task_cur_async(ttd, t->frame_idx, c->n_fc); |
811 | 102k | if (!atomic_fetch_or(&ttd->cond_signaled, 1)) |
812 | 23.5k | pthread_cond_signal(&ttd->cond); |
813 | 102k | } else if (f->seq_hdr->cdef || f->lf.restore_planes) { |
814 | 74.7k | atomic_fetch_or(&f->frame_thread.copy_lpf_progress[sby >> 5], |
815 | 74.7k | 1U << (sby & 31)); |
816 | | // CDEF needs the top buffer to be saved by lr_copy_lpf of the |
817 | | // previous sbrow |
818 | 74.7k | if (sby) { |
819 | 64.9k | int prog = atomic_load(&f->frame_thread.copy_lpf_progress[(sby - 1) >> 5]); |
820 | 64.9k | if (~prog & (1U << ((sby - 1) & 31))) { |
821 | 5.31k | t->type = DAV1D_TASK_TYPE_CDEF; |
822 | 5.31k | t->recon_progress = t->deblock_progress = 0; |
823 | 5.31k | add_pending(f, t); |
824 | 5.31k | pthread_mutex_lock(&ttd->lock); |
825 | 5.31k | continue; |
826 | 5.31k | } |
827 | 64.9k | } |
828 | 74.7k | } |
829 | | // fall-through |
830 | 177k | case DAV1D_TASK_TYPE_CDEF: |
831 | 177k | if (f->seq_hdr->cdef) { |
832 | 133k | if (!atomic_load(&f->task_thread.error)) |
833 | 59.3k | f->bd_fn.filter_sbrow_cdef(tc, sby); |
834 | 133k | reset_task_cur_async(ttd, t->frame_idx, c->n_fc); |
835 | 133k | if (!atomic_fetch_or(&ttd->cond_signaled, 1)) |
836 | 23.4k | pthread_cond_signal(&ttd->cond); |
837 | 133k | } |
838 | | // fall-through |
839 | 183k | case DAV1D_TASK_TYPE_SUPER_RESOLUTION: |
840 | 183k | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) |
841 | 64.1k | if (!atomic_load(&f->task_thread.error)) |
842 | 20.9k | f->bd_fn.filter_sbrow_resize(f, sby); |
843 | | // fall-through |
844 | 183k | case DAV1D_TASK_TYPE_LOOP_RESTORATION: |
845 | 183k | if (!atomic_load(&f->task_thread.error) && f->lf.restore_planes) |
846 | 26.5k | f->bd_fn.filter_sbrow_lr(f, sby); |
847 | | // fall-through |
848 | 222k | case DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS: |
849 | | // dummy to cover for no post-filters |
850 | 447k | case DAV1D_TASK_TYPE_ENTROPY_PROGRESS: |
851 | | // dummy to convert tile progress to frame |
852 | 447k | break; |
853 | 0 | default: abort(); |
854 | 1.04M | } |
855 | | // if task completed [typically LR], signal picture progress as per below |
856 | 447k | const int uses_2pass = c->n_fc > 1; |
857 | 447k | const int sbh = f->sbh; |
858 | 447k | const int sbsz = f->sb_step * 4; |
859 | 447k | if (t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS) { |
860 | 225k | error = atomic_load(&f->task_thread.error); |
861 | 225k | const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz; |
862 | 225k | assert(c->n_fc > 1); |
863 | 225k | if (f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */) |
864 | 225k | atomic_store(&f->sr_cur.progress[0], error ? FRAME_ERROR : y); |
865 | 225k | atomic_store(&f->frame_thread.entropy_progress, |
866 | 225k | error ? TILE_ERROR : sby + 1); |
867 | 225k | if (sby + 1 == sbh) |
868 | 225k | atomic_store(&f->task_thread.done[1], 1); |
869 | 225k | pthread_mutex_lock(&ttd->lock); |
870 | 225k | const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1; |
871 | 225k | if (sby + 1 < sbh && num_tasks) { |
872 | 174k | reset_task_cur(c, ttd, t->frame_idx); |
873 | 174k | continue; |
874 | 174k | } |
875 | 51.0k | if (!num_tasks && atomic_load(&f->task_thread.done[0]) && |
876 | 51.0k | atomic_load(&f->task_thread.done[1])) |
877 | 6.57k | { |
878 | 6.57k | error = atomic_load(&f->task_thread.error); |
879 | 6.57k | dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) : |
880 | 6.57k | error ? DAV1D_ERR(ENOMEM) : 0); |
881 | 6.57k | f->n_tile_data = 0; |
882 | 6.57k | pthread_cond_signal(&f->task_thread.cond); |
883 | 6.57k | } |
884 | 51.0k | reset_task_cur(c, ttd, t->frame_idx); |
885 | 51.0k | continue; |
886 | 225k | } |
887 | | // t->type != DAV1D_TASK_TYPE_ENTROPY_PROGRESS |
888 | 447k | atomic_fetch_or(&f->frame_thread.frame_progress[sby >> 5], |
889 | 222k | 1U << (sby & 31)); |
890 | 222k | pthread_mutex_lock(&f->task_thread.lock); |
891 | 222k | sby = get_frame_progress(c, f); |
892 | 222k | error = atomic_load(&f->task_thread.error); |
893 | 222k | const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz; |
894 | 222k | if (c->n_fc > 1 && f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */) |
895 | 222k | atomic_store(&f->sr_cur.progress[1], error ? FRAME_ERROR : y); |
896 | 222k | pthread_mutex_unlock(&f->task_thread.lock); |
897 | 222k | if (sby + 1 == sbh) |
898 | 222k | atomic_store(&f->task_thread.done[0], 1); |
899 | 222k | pthread_mutex_lock(&ttd->lock); |
900 | 222k | const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1; |
901 | 222k | if (sby + 1 < sbh && num_tasks) { |
902 | 65.7k | reset_task_cur(c, ttd, t->frame_idx); |
903 | 65.7k | continue; |
904 | 65.7k | } |
905 | 156k | if (!num_tasks && atomic_load(&f->task_thread.done[0]) && |
906 | 39.0k | (!uses_2pass || atomic_load(&f->task_thread.done[1]))) |
907 | 39.0k | { |
908 | 39.0k | error = atomic_load(&f->task_thread.error); |
909 | 39.0k | dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) : |
910 | 39.0k | error ? DAV1D_ERR(ENOMEM) : 0); |
911 | 39.0k | f->n_tile_data = 0; |
912 | 39.0k | pthread_cond_signal(&f->task_thread.cond); |
913 | 39.0k | } |
914 | 156k | reset_task_cur(c, ttd, t->frame_idx); |
915 | 156k | } |
916 | 1.26M | pthread_mutex_unlock(&ttd->lock); |
917 | | |
918 | | return NULL; |
919 | 1.26M | } |