Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/thread_task.c
Line
Count
Source
1
/*
2
 * Copyright © 2018, VideoLAN and dav1d authors
3
 * Copyright © 2018, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include "common/frame.h"
31
32
#include "src/thread_task.h"
33
#include "src/fg_apply.h"
34
35
// This function resets the cur pointer to the first frame theoretically
36
// executable after a task completed (ie. each time we update some progress or
37
// insert some tasks in the queue).
38
// When frame_idx is set, it can be either from a completed task, or from tasks
39
// inserted in the queue, in which case we have to make sure the cur pointer
40
// isn't past this insert.
41
// The special case where frame_idx is UINT_MAX is to handle the reset after
42
// completing a task and locklessly signaling progress. In this case we don't
43
// enter a critical section, which is needed for this function, so we set an
44
// atomic for a delayed handling, happening here. Meaning we can call this
45
// function without any actual update other than what's in the atomic, hence
46
// this special case.
47
static inline int reset_task_cur(const Dav1dContext *const c,
48
                                 struct TaskThreadData *const ttd,
49
                                 unsigned frame_idx)
50
12.3M
{
51
12.3M
    const unsigned first = atomic_load(&ttd->first);
52
12.3M
    unsigned reset_frame_idx = atomic_exchange(&ttd->reset_task_cur, UINT_MAX);
53
12.3M
    if (reset_frame_idx < first) {
54
0
        if (frame_idx == UINT_MAX) return 0;
55
0
        reset_frame_idx = UINT_MAX;
56
0
    }
57
12.3M
    if (!ttd->cur && c->fc[first].task_thread.task_cur_prev == NULL)
58
2.78M
        return 0;
59
9.59M
    if (reset_frame_idx != UINT_MAX) {
60
274k
        if (frame_idx == UINT_MAX) {
61
133k
            if (reset_frame_idx > first + ttd->cur)
62
609
                return 0;
63
132k
            ttd->cur = reset_frame_idx - first;
64
132k
            goto cur_found;
65
133k
        }
66
9.31M
    } else if (frame_idx == UINT_MAX)
67
7.72M
        return 0;
68
1.72M
    if (frame_idx < first) frame_idx += c->n_fc;
69
1.72M
    const unsigned min_frame_idx = umin(reset_frame_idx, frame_idx);
70
1.72M
    const unsigned cur_frame_idx = first + ttd->cur;
71
1.72M
    if (ttd->cur < c->n_fc && cur_frame_idx < min_frame_idx)
72
127k
        return 0;
73
2.77M
    for (ttd->cur = min_frame_idx - first; ttd->cur < c->n_fc; ttd->cur++)
74
2.58M
        if (c->fc[(first + ttd->cur) % c->n_fc].task_thread.task_head)
75
1.41M
            break;
76
1.73M
cur_found:
77
9.50M
    for (unsigned i = ttd->cur; i < c->n_fc; i++)
78
7.76M
        c->fc[(first + i) % c->n_fc].task_thread.task_cur_prev = NULL;
79
1.73M
    return 1;
80
1.60M
}
81
82
static inline void reset_task_cur_async(struct TaskThreadData *const ttd,
83
                                        unsigned frame_idx, unsigned n_frames)
84
1.33M
{
85
1.33M
    const unsigned first = atomic_load(&ttd->first);
86
1.33M
    if (frame_idx < first) frame_idx += n_frames;
87
1.33M
    unsigned last_idx = frame_idx;
88
1.33M
    do {
89
1.33M
        frame_idx = last_idx;
90
1.33M
        last_idx = atomic_exchange(&ttd->reset_task_cur, frame_idx);
91
1.33M
    } while (last_idx < frame_idx);
92
1.33M
    if (frame_idx == first && atomic_load(&ttd->first) != first) {
93
0
        unsigned expected = frame_idx;
94
0
        atomic_compare_exchange_strong(&ttd->reset_task_cur, &expected, UINT_MAX);
95
0
    }
96
1.33M
}
97
98
static void insert_tasks_between(Dav1dFrameContext *const f,
99
                                 Dav1dTask *const first, Dav1dTask *const last,
100
                                 Dav1dTask *const a, Dav1dTask *const b,
101
                                 const int cond_signal)
102
2.26M
{
103
2.26M
    struct TaskThreadData *const ttd = f->task_thread.ttd;
104
2.26M
    if (atomic_load(f->c->flush)) return;
105
2.26M
    assert(!a || a->next == b);
106
2.26M
    if (!a) f->task_thread.task_head = first;
107
1.71M
    else a->next = first;
108
2.26M
    if (!b) f->task_thread.task_tail = last;
109
2.26M
    last->next = b;
110
2.26M
    reset_task_cur(f->c, ttd, first->frame_idx);
111
2.26M
    if (cond_signal && !atomic_fetch_or(&ttd->cond_signaled, 1))
112
90.9k
        pthread_cond_signal(&ttd->cond);
113
2.26M
}
114
115
static void insert_tasks(Dav1dFrameContext *const f,
116
                         Dav1dTask *const first, Dav1dTask *const last,
117
                         const int cond_signal)
118
2.26M
{
119
    // insert task back into task queue
120
2.26M
    Dav1dTask *t_ptr, *prev_t = NULL;
121
2.26M
    for (t_ptr = f->task_thread.task_head;
122
8.25M
         t_ptr; prev_t = t_ptr, t_ptr = t_ptr->next)
123
6.43M
    {
124
        // entropy coding precedes other steps
125
6.43M
        if (t_ptr->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
126
892k
            if (first->type > DAV1D_TASK_TYPE_TILE_ENTROPY) continue;
127
            // both are entropy
128
147k
            if (first->sby > t_ptr->sby) continue;
129
48.2k
            if (first->sby < t_ptr->sby) {
130
1.62k
                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
131
1.62k
                return;
132
1.62k
            }
133
            // same sby
134
5.54M
        } else {
135
5.54M
            if (first->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
136
226k
                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
137
226k
                return;
138
226k
            }
139
5.31M
            if (first->sby > t_ptr->sby) continue;
140
1.22M
            if (first->sby < t_ptr->sby) {
141
216k
                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
142
216k
                return;
143
216k
            }
144
            // same sby
145
1.00M
            if (first->type > t_ptr->type) continue;
146
53.0k
            if (first->type < t_ptr->type) {
147
6.96k
                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
148
6.96k
                return;
149
6.96k
            }
150
            // same task type
151
53.0k
        }
152
153
        // sort by tile-id
154
92.6k
        assert(first->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION ||
155
92.6k
               first->type == DAV1D_TASK_TYPE_TILE_ENTROPY);
156
92.6k
        assert(first->type == t_ptr->type);
157
92.6k
        assert(t_ptr->sby == first->sby);
158
92.6k
        const int p = first->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
159
92.6k
        const int t_tile_idx = (int) (first - f->task_thread.tile_tasks[p]);
160
92.6k
        const int p_tile_idx = (int) (t_ptr - f->task_thread.tile_tasks[p]);
161
92.6k
        assert(t_tile_idx != p_tile_idx);
162
92.6k
        if (t_tile_idx > p_tile_idx) continue;
163
177
        insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
164
177
        return;
165
92.6k
    }
166
    // append at the end
167
1.81M
    insert_tasks_between(f, first, last, prev_t, NULL, cond_signal);
168
1.81M
}
169
170
static inline void insert_task(Dav1dFrameContext *const f,
171
                               Dav1dTask *const t, const int cond_signal)
172
2.26M
{
173
2.26M
    insert_tasks(f, t, t, cond_signal);
174
2.26M
}
175
176
467k
static inline void add_pending(Dav1dFrameContext *const f, Dav1dTask *const t) {
177
467k
    pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
178
467k
    t->next = NULL;
179
467k
    if (!f->task_thread.pending_tasks.head)
180
453k
        f->task_thread.pending_tasks.head = t;
181
13.6k
    else
182
13.6k
        f->task_thread.pending_tasks.tail->next = t;
183
467k
    f->task_thread.pending_tasks.tail = t;
184
467k
    atomic_store(&f->task_thread.pending_tasks.merge, 1);
185
467k
    pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
186
467k
}
187
188
69.9M
static inline int merge_pending_frame(Dav1dFrameContext *const f) {
189
69.9M
    int const merge = atomic_load(&f->task_thread.pending_tasks.merge);
190
69.9M
    if (merge) {
191
592k
        pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
192
592k
        Dav1dTask *t = f->task_thread.pending_tasks.head;
193
592k
        f->task_thread.pending_tasks.head = NULL;
194
592k
        f->task_thread.pending_tasks.tail = NULL;
195
592k
        atomic_store(&f->task_thread.pending_tasks.merge, 0);
196
592k
        pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
197
1.64M
        while (t) {
198
1.05M
            Dav1dTask *const tmp = t->next;
199
1.05M
            insert_task(f, t, 0);
200
1.05M
            t = tmp;
201
1.05M
        }
202
592k
    }
203
69.9M
    return merge;
204
69.9M
}
205
206
10.6M
static inline int merge_pending(const Dav1dContext *const c) {
207
10.6M
    int res = 0;
208
74.7M
    for (unsigned i = 0; i < c->n_fc; i++)
209
64.0M
        res |= merge_pending_frame(&c->fc[i]);
210
10.6M
    return res;
211
10.6M
}
212
213
static int create_filter_sbrow(Dav1dFrameContext *const f,
214
                               const int pass, Dav1dTask **res_t)
215
280k
{
216
280k
    const int has_deblock = f->frame_hdr->loopfilter.level_y[0] ||
217
79.4k
                            f->frame_hdr->loopfilter.level_y[1];
218
280k
    const int has_cdef = f->seq_hdr->cdef;
219
280k
    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
220
280k
    const int has_lr = f->lf.restore_planes;
221
222
280k
    Dav1dTask *tasks = f->task_thread.tasks;
223
280k
    const int uses_2pass = f->c->n_fc > 1;
224
280k
    int num_tasks = f->sbh * (1 + uses_2pass);
225
280k
    if (num_tasks > f->task_thread.num_tasks) {
226
99.5k
        const size_t size = sizeof(Dav1dTask) * num_tasks;
227
99.5k
        tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tasks, size);
228
99.5k
        if (!tasks) return -1;
229
99.5k
        memset(tasks, 0, size);
230
99.5k
        f->task_thread.tasks = tasks;
231
99.5k
        f->task_thread.num_tasks = num_tasks;
232
99.5k
    }
233
280k
    tasks += f->sbh * (pass & 1);
234
235
280k
    if (pass & 1) {
236
140k
        f->frame_thread.entropy_progress = 0;
237
140k
    } else {
238
140k
        const int prog_sz = ((f->sbh + 31) & ~31) >> 5;
239
140k
        if (prog_sz > f->frame_thread.prog_sz) {
240
99.4k
            atomic_uint *const prog = dav1d_realloc(ALLOC_COMMON_CTX, f->frame_thread.frame_progress,
241
99.4k
                                                    2 * prog_sz * sizeof(*prog));
242
99.4k
            if (!prog) return -1;
243
99.4k
            f->frame_thread.frame_progress = prog;
244
99.4k
            f->frame_thread.copy_lpf_progress = prog + prog_sz;
245
99.4k
        }
246
140k
        f->frame_thread.prog_sz = prog_sz;
247
140k
        memset(f->frame_thread.frame_progress, 0, prog_sz * sizeof(atomic_uint));
248
140k
        memset(f->frame_thread.copy_lpf_progress, 0, prog_sz * sizeof(atomic_uint));
249
140k
        atomic_store(&f->frame_thread.deblock_progress, 0);
250
140k
    }
251
280k
    f->frame_thread.next_tile_row[pass & 1] = 0;
252
253
280k
    Dav1dTask *t = &tasks[0];
254
280k
    t->sby = 0;
255
280k
    t->recon_progress = 1;
256
280k
    t->deblock_progress = 0;
257
280k
    t->type = pass == 1 ? DAV1D_TASK_TYPE_ENTROPY_PROGRESS :
258
280k
              has_deblock ? DAV1D_TASK_TYPE_DEBLOCK_COLS :
259
140k
              has_cdef || has_lr /* i.e. LR backup */ ? DAV1D_TASK_TYPE_DEBLOCK_ROWS :
260
32.5k
              has_resize ? DAV1D_TASK_TYPE_SUPER_RESOLUTION :
261
14.6k
              DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS;
262
280k
    t->frame_idx = (int)(f - f->c->fc);
263
264
280k
    *res_t = t;
265
280k
    return 0;
266
280k
}
267
268
int dav1d_task_create_tile_sbrow(Dav1dFrameContext *const f, const int pass,
269
                                 const int cond_signal)
270
280k
{
271
280k
    Dav1dTask *tasks = f->task_thread.tile_tasks[0];
272
280k
    const int uses_2pass = f->c->n_fc > 1;
273
280k
    const int n_tasks_per_pass = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
274
280k
    const int n_tasks = n_tasks_per_pass * (1 + uses_2pass);
275
280k
    if (pass < 2) {
276
140k
        if (n_tasks > f->task_thread.num_tile_tasks) {
277
99.4k
            const size_t size = sizeof(Dav1dTask) * n_tasks;
278
99.4k
            tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tile_tasks[0], size);
279
99.4k
            if (!tasks) return -1;
280
99.4k
            memset(tasks, 0, size);
281
99.4k
            f->task_thread.tile_tasks[0] = tasks;
282
99.4k
            f->task_thread.num_tile_tasks = n_tasks;
283
99.4k
        }
284
140k
        f->task_thread.tile_tasks[1] = tasks + n_tasks_per_pass;
285
140k
    }
286
280k
    assert(n_tasks <= f->task_thread.num_tile_tasks);
287
288
280k
    Dav1dTask *pf_t;
289
280k
    if (create_filter_sbrow(f, pass, &pf_t))
290
0
        return -1;
291
292
280k
    Dav1dTask *const p1_tasks = f->task_thread.tile_tasks[1];
293
280k
    Dav1dTask *prev_t = NULL;
294
280k
    if (pass == 2) {
295
140k
        prev_t = &p1_tasks[n_tasks_per_pass - 1];
296
        // PF task is scheduled after the last sby=0 TILE task
297
140k
        if (f->frame_hdr->tiling.rows == 1)
298
137k
            prev_t = prev_t->next;
299
140k
    }
300
280k
    tasks += (pass & 1) * n_tasks_per_pass;
301
590k
    for (int tile_idx = 0; tile_idx < n_tasks_per_pass; tile_idx++) {
302
310k
        Dav1dTileState *const ts = &f->ts[tile_idx];
303
310k
        Dav1dTask *t = &tasks[tile_idx];
304
310k
        t->sby = ts->tiling.row_start >> f->sb_shift;
305
310k
        if (pf_t && t->sby) {
306
4.78k
            prev_t->next = pf_t;
307
4.78k
            prev_t = pf_t;
308
4.78k
            pf_t = NULL;
309
4.78k
        }
310
310k
        t->recon_progress = 0;
311
310k
        t->deblock_progress = 0;
312
310k
        t->deps_skip = 0;
313
310k
        t->type = pass != 1 ? DAV1D_TASK_TYPE_TILE_RECONSTRUCTION :
314
310k
                              DAV1D_TASK_TYPE_TILE_ENTROPY;
315
310k
        t->frame_idx = (int)(f - f->c->fc);
316
310k
        if (prev_t) prev_t->next = t;
317
310k
        prev_t = t;
318
310k
    }
319
280k
    if (pf_t) {
320
275k
        prev_t->next = pf_t;
321
275k
        prev_t = pf_t;
322
275k
    }
323
280k
    prev_t->next = NULL;
324
325
280k
    atomic_store(&f->task_thread.done[pass & 1], 0);
326
327
    // XXX in theory this could be done locklessly, at this point they are no
328
    // tasks in the frameQ, so no other runner should be using this lock, but
329
    // we must add both passes at once
330
280k
    if (!(pass & 1)) {
331
140k
        pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
332
140k
        assert(f->task_thread.pending_tasks.head == NULL);
333
140k
        f->task_thread.pending_tasks.head = f->task_thread.tile_tasks[pass == 2];
334
140k
        f->task_thread.pending_tasks.tail = prev_t;
335
140k
        atomic_store(&f->task_thread.pending_tasks.merge, 1);
336
140k
        atomic_store(&f->task_thread.init_done, 1);
337
140k
        pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
338
140k
    }
339
280k
    return 0;
340
280k
}
341
342
148k
void dav1d_task_frame_init(Dav1dFrameContext *const f) {
343
148k
    const Dav1dContext *const c = f->c;
344
345
148k
    atomic_store(&f->task_thread.init_done, 0);
346
    // schedule init task, which will schedule the remaining tasks
347
148k
    Dav1dTask *const t = &f->task_thread.init_task;
348
148k
    t->type = DAV1D_TASK_TYPE_INIT;
349
148k
    t->frame_idx = (int)(f - c->fc);
350
148k
    t->sby = 0;
351
148k
    t->recon_progress = t->deblock_progress = 0;
352
148k
    insert_task(f, t, 1);
353
148k
}
354
355
void dav1d_task_delayed_fg(Dav1dContext *const c, Dav1dPicture *const out,
356
                           const Dav1dPicture *const in)
357
4.57k
{
358
4.57k
    struct TaskThreadData *const ttd = &c->task_thread;
359
4.57k
    ttd->delayed_fg.in = in;
360
4.57k
    ttd->delayed_fg.out = out;
361
4.57k
    ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_PREP;
362
4.57k
    atomic_init(&ttd->delayed_fg.progress[0], 0);
363
4.57k
    atomic_init(&ttd->delayed_fg.progress[1], 0);
364
4.57k
    pthread_mutex_lock(&ttd->lock);
365
4.57k
    ttd->delayed_fg.exec = 1;
366
4.57k
    ttd->delayed_fg.finished = 0;
367
4.57k
    pthread_cond_signal(&ttd->cond);
368
4.57k
    do {
369
4.57k
        pthread_cond_wait(&ttd->delayed_fg.cond, &ttd->lock);
370
4.57k
    } while (!ttd->delayed_fg.finished);
371
4.57k
    pthread_mutex_unlock(&ttd->lock);
372
4.57k
}
373
374
static inline int ensure_progress(struct TaskThreadData *const ttd,
375
                                  Dav1dFrameContext *const f,
376
                                  Dav1dTask *const t, const enum TaskType type,
377
                                  atomic_int *const state, int *const target)
378
332k
{
379
    // deblock_rows (non-LR portion) depends on deblock of previous sbrow,
380
    // so ensure that completed. if not, re-add to task-queue; else, fall-through
381
332k
    int p1 = atomic_load(state);
382
332k
    if (p1 < t->sby) {
383
14.9k
        t->type = type;
384
14.9k
        t->recon_progress = t->deblock_progress = 0;
385
14.9k
        *target = t->sby;
386
14.9k
        add_pending(f, t);
387
14.9k
        pthread_mutex_lock(&ttd->lock);
388
14.9k
        return 1;
389
14.9k
    }
390
317k
    return 0;
391
332k
}
392
393
static inline int check_tile(Dav1dTask *const t, Dav1dFrameContext *const f,
394
                             const int frame_mt)
395
2.42M
{
396
2.42M
    const int tp = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
397
2.42M
    const int tile_idx = (int)(t - f->task_thread.tile_tasks[tp]);
398
2.42M
    Dav1dTileState *const ts = &f->ts[tile_idx];
399
2.42M
    const int p1 = atomic_load(&ts->progress[tp]);
400
2.42M
    if (p1 < t->sby) return 1;
401
2.01M
    int error = p1 == TILE_ERROR;
402
2.01M
    error |= atomic_fetch_or(&f->task_thread.error, error);
403
2.01M
    if (!error && frame_mt && !tp) {
404
790k
        const int p2 = atomic_load(&ts->progress[1]);
405
790k
        if (p2 <= t->sby) return 1;
406
458k
        error = p2 == TILE_ERROR;
407
458k
        error |= atomic_fetch_or(&f->task_thread.error, error);
408
458k
    }
409
1.67M
    if (!error && frame_mt && !IS_KEY_OR_INTRA(f->frame_hdr)) {
410
        // check reference state
411
419k
        const Dav1dThreadPicture *p = &f->sr_cur;
412
419k
        const int ss_ver = p->p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
413
419k
        const unsigned p_b = (t->sby + 1) << (f->sb_shift + 2);
414
419k
        const int tile_sby = t->sby - (ts->tiling.row_start >> f->sb_shift);
415
419k
        const int (*const lowest_px)[2] = ts->lowest_pixel[tile_sby];
416
1.44M
        for (int n = t->deps_skip; n < 7; n++, t->deps_skip++) {
417
1.30M
            unsigned lowest;
418
1.30M
            if (tp) {
419
                // if temporal mv refs are disabled, we only need this
420
                // for the primary ref; if segmentation is disabled, we
421
                // don't even need that
422
659k
                lowest = p_b;
423
659k
            } else {
424
                // +8 is postfilter-induced delay
425
642k
                const int y = lowest_px[n][0] == INT_MIN ? INT_MIN :
426
642k
                              lowest_px[n][0] + 8;
427
642k
                const int uv = lowest_px[n][1] == INT_MIN ? INT_MIN :
428
642k
                               lowest_px[n][1] * (1 << ss_ver) + 8;
429
642k
                const int max = imax(y, uv);
430
642k
                if (max == INT_MIN) continue;
431
257k
                lowest = iclip(max, 1, f->refp[n].p.p.h);
432
257k
            }
433
916k
            const unsigned p3 = atomic_load(&f->refp[n].progress[!tp]);
434
916k
            if (p3 < lowest) return 1;
435
916k
            atomic_fetch_or(&f->task_thread.error, p3 == FRAME_ERROR);
436
643k
        }
437
419k
    }
438
1.40M
    return 0;
439
1.67M
}
440
441
static inline int get_frame_progress(const Dav1dContext *const c,
442
                                     const Dav1dFrameContext *const f)
443
663k
{
444
663k
    unsigned frame_prog = c->n_fc > 1 ? atomic_load(&f->sr_cur.progress[1]) : 0;
445
663k
    if (frame_prog >= FRAME_ERROR)
446
358k
        return f->sbh - 1;
447
304k
    int idx = frame_prog >> (f->sb_shift + 7);
448
304k
    int prog;
449
307k
    do {
450
307k
        atomic_uint *state = &f->frame_thread.frame_progress[idx];
451
307k
        const unsigned val = ~atomic_load(state);
452
307k
        prog = val ? ctz(val) : 32;
453
307k
        if (prog != 32) break;
454
3.28k
        prog = 0;
455
3.28k
    } while (++idx < f->frame_thread.prog_sz);
456
304k
    return ((idx << 5) | prog) - 1;
457
663k
}
458
459
4.73k
static inline void abort_frame(Dav1dFrameContext *const f, const int error) {
460
4.73k
    atomic_store(&f->task_thread.error, error == DAV1D_ERR(EINVAL) ? 1 : -1);
461
4.73k
    atomic_store(&f->task_thread.task_counter, 0);
462
4.73k
    atomic_store(&f->task_thread.done[0], 1);
463
4.73k
    atomic_store(&f->task_thread.done[1], 1);
464
4.73k
    atomic_store(&f->sr_cur.progress[0], FRAME_ERROR);
465
4.73k
    atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
466
4.73k
    dav1d_decode_frame_exit(f, error);
467
4.73k
    f->n_tile_data = 0;
468
4.73k
    pthread_cond_signal(&f->task_thread.cond);
469
4.73k
}
470
471
static inline void delayed_fg_task(const Dav1dContext *const c,
472
                                   struct TaskThreadData *const ttd)
473
21.8k
{
474
21.8k
    const Dav1dPicture *const in = ttd->delayed_fg.in;
475
21.8k
    Dav1dPicture *const out = ttd->delayed_fg.out;
476
21.8k
#if CONFIG_16BPC
477
21.8k
    int off;
478
21.8k
    if (out->p.bpc != 8)
479
10.2k
        off = (out->p.bpc >> 1) - 4;
480
21.8k
#endif
481
21.8k
    switch (ttd->delayed_fg.type) {
482
4.57k
    case DAV1D_TASK_TYPE_FG_PREP:
483
4.57k
        ttd->delayed_fg.exec = 0;
484
4.57k
        if (atomic_load(&ttd->cond_signaled))
485
15
            pthread_cond_signal(&ttd->cond);
486
4.57k
        pthread_mutex_unlock(&ttd->lock);
487
4.57k
        switch (out->p.bpc) {
488
0
#if CONFIG_8BPC
489
2.19k
        case 8:
490
2.19k
            dav1d_prep_grain_8bpc(&c->dsp[0].fg, out, in,
491
2.19k
                                  ttd->delayed_fg.scaling_8bpc,
492
2.19k
                                  ttd->delayed_fg.grain_lut_8bpc);
493
2.19k
            break;
494
0
#endif
495
0
#if CONFIG_16BPC
496
2.11k
        case 10:
497
2.37k
        case 12:
498
2.37k
            dav1d_prep_grain_16bpc(&c->dsp[off].fg, out, in,
499
2.37k
                                   ttd->delayed_fg.scaling_16bpc,
500
2.37k
                                   ttd->delayed_fg.grain_lut_16bpc);
501
2.37k
            break;
502
0
#endif
503
0
        default: abort();
504
4.57k
        }
505
4.57k
        ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_APPLY;
506
4.57k
        pthread_mutex_lock(&ttd->lock);
507
4.57k
        ttd->delayed_fg.exec = 1;
508
        // fall-through
509
21.8k
    case DAV1D_TASK_TYPE_FG_APPLY:;
510
21.8k
        int row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
511
21.8k
        pthread_mutex_unlock(&ttd->lock);
512
21.8k
        int progmax = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
513
66.1k
        while (row < progmax) {
514
45.1k
            if (row + 1 < progmax)
515
40.5k
                pthread_cond_signal(&ttd->cond);
516
4.57k
            else {
517
4.57k
                pthread_mutex_lock(&ttd->lock);
518
4.57k
                ttd->delayed_fg.exec = 0;
519
4.57k
                pthread_mutex_unlock(&ttd->lock);
520
4.57k
            }
521
45.1k
            switch (out->p.bpc) {
522
0
#if CONFIG_8BPC
523
24.7k
            case 8:
524
24.7k
                dav1d_apply_grain_row_8bpc(&c->dsp[0].fg, out, in,
525
24.7k
                                           ttd->delayed_fg.scaling_8bpc,
526
24.7k
                                           ttd->delayed_fg.grain_lut_8bpc, row);
527
24.7k
                break;
528
0
#endif
529
0
#if CONFIG_16BPC
530
18.2k
            case 10:
531
20.3k
            case 12:
532
20.3k
                dav1d_apply_grain_row_16bpc(&c->dsp[off].fg, out, in,
533
20.3k
                                            ttd->delayed_fg.scaling_16bpc,
534
20.3k
                                            ttd->delayed_fg.grain_lut_16bpc, row);
535
20.3k
                break;
536
0
#endif
537
0
            default: abort();
538
45.1k
            }
539
44.3k
            row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
540
44.3k
            atomic_fetch_add(&ttd->delayed_fg.progress[1], 1);
541
44.3k
        }
542
21.0k
        pthread_mutex_lock(&ttd->lock);
543
21.0k
        ttd->delayed_fg.exec = 0;
544
21.0k
        int done = atomic_fetch_add(&ttd->delayed_fg.progress[1], 1) + 1;
545
21.0k
        progmax = atomic_load(&ttd->delayed_fg.progress[0]);
546
        // signal for completion only once the last runner reaches this
547
21.0k
        if (done >= progmax) {
548
4.57k
            ttd->delayed_fg.finished = 1;
549
4.57k
            pthread_cond_signal(&ttd->delayed_fg.cond);
550
4.57k
        }
551
21.0k
        break;
552
0
    default: abort();
553
21.8k
    }
554
21.8k
}
555
556
2.46M
void *dav1d_worker_task(void *data) {
557
2.46M
    Dav1dTaskContext *const tc = data;
558
2.46M
    const Dav1dContext *const c = tc->c;
559
2.46M
    struct TaskThreadData *const ttd = tc->task_thread.ttd;
560
561
2.46M
    dav1d_set_thread_name("dav1d-worker");
562
563
2.46M
    pthread_mutex_lock(&ttd->lock);
564
9.01M
    for (;;) {
565
9.01M
        if (tc->task_thread.die) break;
566
6.55M
        if (atomic_load(c->flush)) goto park;
567
568
6.47M
        merge_pending(c);
569
6.47M
        if (ttd->delayed_fg.exec) { // run delayed film grain first
570
21.8k
            delayed_fg_task(c, ttd);
571
21.8k
            continue;
572
21.8k
        }
573
6.45M
        Dav1dFrameContext *f;
574
6.45M
        Dav1dTask *t, *prev_t = NULL;
575
6.45M
        if (c->n_fc > 1) { // run init tasks second
576
44.5M
            for (unsigned i = 0; i < c->n_fc; i++) {
577
38.2M
                const unsigned first = atomic_load(&ttd->first);
578
38.2M
                f = &c->fc[(first + i) % c->n_fc];
579
38.2M
                if (atomic_load(&f->task_thread.init_done)) continue;
580
30.1M
                t = f->task_thread.task_head;
581
30.1M
                if (!t) continue;
582
529k
                if (t->type == DAV1D_TASK_TYPE_INIT) goto found;
583
384k
                if (t->type == DAV1D_TASK_TYPE_INIT_CDF) {
584
                    // XXX This can be a simple else, if adding tasks of both
585
                    // passes at once (in dav1d_task_create_tile_sbrow).
586
                    // Adding the tasks to the pending Q can result in a
587
                    // thread merging them before setting init_done.
588
                    // We will need to set init_done before adding to the
589
                    // pending Q, so maybe return the tasks, set init_done,
590
                    // and add to pending Q only then.
591
384k
                    const int p1 = f->in_cdf.progress ?
592
384k
                        atomic_load(f->in_cdf.progress) : 1;
593
384k
                    if (p1) {
594
23.0k
                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
595
23.0k
                        goto found;
596
23.0k
                    }
597
384k
                }
598
384k
            }
599
6.45M
        }
600
10.1M
        while (ttd->cur < c->n_fc) { // run decoding tasks last
601
5.94M
            const unsigned first = atomic_load(&ttd->first);
602
5.94M
            f = &c->fc[(first + ttd->cur) % c->n_fc];
603
5.94M
            merge_pending_frame(f);
604
5.94M
            prev_t = f->task_thread.task_cur_prev;
605
5.94M
            t = prev_t ? prev_t->next : f->task_thread.task_head;
606
11.0M
            while (t) {
607
7.21M
                if (t->type == DAV1D_TASK_TYPE_INIT_CDF) goto next;
608
7.12M
                else if (t->type == DAV1D_TASK_TYPE_TILE_ENTROPY ||
609
6.63M
                         t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION)
610
1.31M
                {
611
                    // if not bottom sbrow of tile, this task will be re-added
612
                    // after it's finished
613
1.31M
                    if (!check_tile(t, f, c->n_fc > 1))
614
714k
                        goto found;
615
5.80M
                } else if (t->recon_progress) {
616
2.92M
                    const int p = t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS;
617
2.92M
                    int error = atomic_load(&f->task_thread.error);
618
2.92M
                    assert(!atomic_load(&f->task_thread.done[p]) || error);
619
2.92M
                    const int tile_row_base = f->frame_hdr->tiling.cols *
620
2.92M
                                              f->frame_thread.next_tile_row[p];
621
2.92M
                    if (p) {
622
1.14M
                        atomic_int *const prog = &f->frame_thread.entropy_progress;
623
1.14M
                        const int p1 = atomic_load(prog);
624
1.14M
                        if (p1 < t->sby) goto next;
625
1.14M
                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
626
1.08M
                    }
627
4.27M
                    for (int tc = 0; tc < f->frame_hdr->tiling.cols; tc++) {
628
2.94M
                        Dav1dTileState *const ts = &f->ts[tile_row_base + tc];
629
2.94M
                        const int p2 = atomic_load(&ts->progress[p]);
630
2.94M
                        if (p2 < t->recon_progress) goto next;
631
2.94M
                        atomic_fetch_or(&f->task_thread.error, p2 == TILE_ERROR);
632
1.40M
                    }
633
1.33M
                    if (t->sby + 1 < f->sbh) {
634
                        // add sby+1 to list to replace this one
635
1.06M
                        Dav1dTask *next_t = &t[1];
636
1.06M
                        *next_t = *t;
637
1.06M
                        next_t->sby++;
638
1.06M
                        const int ntr = f->frame_thread.next_tile_row[p] + 1;
639
1.06M
                        const int start = f->frame_hdr->tiling.row_start_sb[ntr];
640
1.06M
                        if (next_t->sby == start)
641
11.5k
                            f->frame_thread.next_tile_row[p] = ntr;
642
1.06M
                        next_t->recon_progress = next_t->sby + 1;
643
1.06M
                        insert_task(f, next_t, 0);
644
1.06M
                    }
645
1.33M
                    goto found;
646
2.87M
                } else if (t->type == DAV1D_TASK_TYPE_CDEF) {
647
112k
                    atomic_uint *prog = f->frame_thread.copy_lpf_progress;
648
112k
                    const int p1 = atomic_load(&prog[(t->sby - 1) >> 5]);
649
112k
                    if (p1 & (1U << ((t->sby - 1) & 31)))
650
17.4k
                        goto found;
651
2.76M
                } else {
652
2.76M
                    assert(t->deblock_progress);
653
2.76M
                    const int p1 = atomic_load(&f->frame_thread.deblock_progress);
654
2.76M
                    if (p1 >= t->deblock_progress) {
655
14.9k
                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
656
14.9k
                        goto found;
657
14.9k
                    }
658
2.76M
                }
659
5.13M
            next:
660
5.13M
                prev_t = t;
661
5.13M
                t = t->next;
662
5.13M
                f->task_thread.task_cur_prev = prev_t;
663
5.13M
            }
664
3.87M
            ttd->cur++;
665
3.87M
        }
666
4.21M
        if (reset_task_cur(c, ttd, UINT_MAX)) continue;
667
4.19M
        if (merge_pending(c)) continue;
668
4.25M
    park:
669
4.25M
        tc->task_thread.flushed = 1;
670
4.25M
        pthread_cond_signal(&tc->task_thread.td.cond);
671
        // we want to be woken up next time progress is signaled
672
4.25M
        atomic_store(&ttd->cond_signaled, 0);
673
4.25M
        pthread_cond_wait(&ttd->cond, &ttd->lock);
674
4.25M
        tc->task_thread.flushed = 0;
675
4.25M
        reset_task_cur(c, ttd, UINT_MAX);
676
4.25M
        continue;
677
678
2.24M
    found:
679
        // remove t from list
680
2.24M
        if (prev_t) prev_t->next = t->next;
681
1.90M
        else f->task_thread.task_head = t->next;
682
2.24M
        if (!t->next) f->task_thread.task_tail = prev_t;
683
2.24M
        if (t->type > DAV1D_TASK_TYPE_INIT_CDF && !f->task_thread.task_head)
684
132k
            ttd->cur++;
685
2.24M
        t->next = NULL;
686
        // we don't need to check cond_signaled here, since we found a task
687
        // after the last signal so we want to re-signal the next waiting thread
688
        // and again won't need to signal after that
689
2.24M
        atomic_store(&ttd->cond_signaled, 1);
690
2.24M
        pthread_cond_signal(&ttd->cond);
691
2.24M
        pthread_mutex_unlock(&ttd->lock);
692
3.05M
    found_unlocked:;
693
3.05M
        const int flush = atomic_load(c->flush);
694
3.05M
        int error = atomic_fetch_or(&f->task_thread.error, flush) | flush;
695
696
        // run it
697
3.05M
        tc->f = f;
698
3.05M
        int sby = t->sby;
699
3.05M
        switch (t->type) {
700
145k
        case DAV1D_TASK_TYPE_INIT: {
701
145k
            assert(c->n_fc > 1);
702
145k
            int res = dav1d_decode_frame_init(f);
703
145k
            int p1 = f->in_cdf.progress ? atomic_load(f->in_cdf.progress) : 1;
704
145k
            if (res || p1 == TILE_ERROR) {
705
321
                pthread_mutex_lock(&ttd->lock);
706
321
                abort_frame(f, res ? res : DAV1D_ERR(EINVAL));
707
321
                reset_task_cur(c, ttd, t->frame_idx);
708
144k
            } else {
709
144k
                t->type = DAV1D_TASK_TYPE_INIT_CDF;
710
144k
                if (p1) goto found_unlocked;
711
23.5k
                add_pending(f, t);
712
23.5k
                pthread_mutex_lock(&ttd->lock);
713
23.5k
            }
714
23.8k
            continue;
715
145k
        }
716
144k
        case DAV1D_TASK_TYPE_INIT_CDF: {
717
144k
            assert(c->n_fc > 1);
718
144k
            int res = DAV1D_ERR(EINVAL);
719
144k
            if (!atomic_load(&f->task_thread.error))
720
141k
                res = dav1d_decode_frame_init_cdf(f);
721
144k
            if (f->frame_hdr->refresh_context && !f->task_thread.update_set)
722
144k
                atomic_store(f->out_cdf.progress, res < 0 ? TILE_ERROR : 1);
723
424k
            for (int p = 1; p <= 2 && !res; p++)
724
280k
                res = dav1d_task_create_tile_sbrow(f, p, 0);
725
144k
            pthread_mutex_lock(&ttd->lock);
726
144k
            if (res) {
727
4.41k
                abort_frame(f, DAV1D_ERR(ENOMEM));
728
4.41k
                reset_task_cur(c, ttd, t->frame_idx);
729
4.41k
                atomic_store(&f->task_thread.init_done, 1);
730
4.41k
            }
731
144k
            continue;
732
144k
        }
733
706k
        case DAV1D_TASK_TYPE_TILE_ENTROPY:
734
1.40M
        case DAV1D_TASK_TYPE_TILE_RECONSTRUCTION: {
735
1.40M
            const int p = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
736
1.40M
            const int tile_idx = (int)(t - f->task_thread.tile_tasks[p]);
737
1.40M
            Dav1dTileState *const ts = &f->ts[tile_idx];
738
739
1.40M
            tc->ts = ts;
740
1.40M
            tc->by = sby << f->sb_shift;
741
1.40M
            const int uses_2pass = c->n_fc > 1;
742
1.40M
            tc->frame_thread.pass = !uses_2pass ? 0 :
743
1.40M
                1 + (t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION);
744
1.40M
            if (!error) error = dav1d_decode_tile_sbrow(tc);
745
1.40M
            const int progress = error ? TILE_ERROR : 1 + sby;
746
747
            // signal progress
748
1.40M
            atomic_fetch_or(&f->task_thread.error, error);
749
1.40M
            if (((sby + 1) << f->sb_shift) < ts->tiling.row_end) {
750
1.10M
                t->sby++;
751
1.10M
                t->deps_skip = 0;
752
1.10M
                if (!check_tile(t, f, uses_2pass)) {
753
691k
                    atomic_store(&ts->progress[p], progress);
754
691k
                    reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
755
691k
                    if (!atomic_fetch_or(&ttd->cond_signaled, 1))
756
126
                        pthread_cond_signal(&ttd->cond);
757
691k
                    goto found_unlocked;
758
691k
                }
759
1.10M
                atomic_store(&ts->progress[p], progress);
760
411k
                add_pending(f, t);
761
411k
                pthread_mutex_lock(&ttd->lock);
762
411k
            } else {
763
303k
                pthread_mutex_lock(&ttd->lock);
764
303k
                atomic_store(&ts->progress[p], progress);
765
303k
                reset_task_cur(c, ttd, t->frame_idx);
766
303k
                error = atomic_load(&f->task_thread.error);
767
303k
                if (f->frame_hdr->refresh_context &&
768
98.5k
                    tc->frame_thread.pass <= 1 && f->task_thread.update_set &&
769
49.9k
                    f->frame_hdr->tiling.update == tile_idx)
770
48.7k
                {
771
48.7k
                    if (!error)
772
45.4k
                        dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
773
45.4k
                                                &f->ts[f->frame_hdr->tiling.update].cdf);
774
48.7k
                    if (c->n_fc > 1)
775
48.7k
                        atomic_store(f->out_cdf.progress, error ? TILE_ERROR : 1);
776
48.7k
                }
777
303k
                if (atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1 == 0 &&
778
303k
                    atomic_load(&f->task_thread.done[0]) &&
779
1.45k
                    (!uses_2pass || atomic_load(&f->task_thread.done[1])))
780
1.45k
                {
781
1.45k
                    error = atomic_load(&f->task_thread.error);
782
1.45k
                    dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
783
1.45k
                                            error ? DAV1D_ERR(ENOMEM) : 0);
784
1.45k
                    f->n_tile_data = 0;
785
1.45k
                    pthread_cond_signal(&f->task_thread.cond);
786
1.45k
                }
787
303k
                assert(atomic_load(&f->task_thread.task_counter) >= 0);
788
303k
                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
789
200k
                    pthread_cond_signal(&ttd->cond);
790
303k
            }
791
714k
            continue;
792
1.40M
        }
793
714k
        case DAV1D_TASK_TYPE_DEBLOCK_COLS:
794
332k
            if (!atomic_load(&f->task_thread.error))
795
185k
                f->bd_fn.filter_sbrow_deblock_cols(f, sby);
796
332k
            if (ensure_progress(ttd, f, t, DAV1D_TASK_TYPE_DEBLOCK_ROWS,
797
332k
                                &f->frame_thread.deblock_progress,
798
332k
                                &t->deblock_progress)) continue;
799
            // fall-through
800
504k
        case DAV1D_TASK_TYPE_DEBLOCK_ROWS:
801
504k
            if (!atomic_load(&f->task_thread.error))
802
242k
                f->bd_fn.filter_sbrow_deblock_rows(f, sby);
803
            // signal deblock progress
804
504k
            if (f->frame_hdr->loopfilter.level_y[0] ||
805
211k
                f->frame_hdr->loopfilter.level_y[1])
806
332k
            {
807
332k
                error = atomic_load(&f->task_thread.error);
808
332k
                atomic_store(&f->frame_thread.deblock_progress,
809
332k
                             error ? TILE_ERROR : sby + 1);
810
332k
                reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
811
332k
                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
812
75.4k
                    pthread_cond_signal(&ttd->cond);
813
332k
            } else if (f->seq_hdr->cdef || f->lf.restore_planes) {
814
172k
                atomic_fetch_or(&f->frame_thread.copy_lpf_progress[sby >> 5],
815
172k
                                1U << (sby & 31));
816
                // CDEF needs the top buffer to be saved by lr_copy_lpf of the
817
                // previous sbrow
818
172k
                if (sby) {
819
155k
                    int prog = atomic_load(&f->frame_thread.copy_lpf_progress[(sby - 1) >> 5]);
820
155k
                    if (~prog & (1U << ((sby - 1) & 31))) {
821
17.4k
                        t->type = DAV1D_TASK_TYPE_CDEF;
822
17.4k
                        t->recon_progress = t->deblock_progress = 0;
823
17.4k
                        add_pending(f, t);
824
17.4k
                        pthread_mutex_lock(&ttd->lock);
825
17.4k
                        continue;
826
17.4k
                    }
827
155k
                }
828
172k
            }
829
            // fall-through
830
504k
        case DAV1D_TASK_TYPE_CDEF:
831
504k
            if (f->seq_hdr->cdef) {
832
307k
                if (!atomic_load(&f->task_thread.error))
833
160k
                    f->bd_fn.filter_sbrow_cdef(tc, sby);
834
307k
                reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
835
307k
                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
836
61.1k
                    pthread_cond_signal(&ttd->cond);
837
307k
            }
838
            // fall-through
839
509k
        case DAV1D_TASK_TYPE_SUPER_RESOLUTION:
840
509k
            if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
841
77.8k
                if (!atomic_load(&f->task_thread.error))
842
46.0k
                    f->bd_fn.filter_sbrow_resize(f, sby);
843
            // fall-through
844
509k
        case DAV1D_TASK_TYPE_LOOP_RESTORATION:
845
509k
            if (!atomic_load(&f->task_thread.error) && f->lf.restore_planes)
846
110k
                f->bd_fn.filter_sbrow_lr(f, sby);
847
            // fall-through
848
662k
        case DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS:
849
            // dummy to cover for no post-filters
850
1.33M
        case DAV1D_TASK_TYPE_ENTROPY_PROGRESS:
851
            // dummy to convert tile progress to frame
852
1.33M
            break;
853
0
        default: abort();
854
3.05M
        }
855
        // if task completed [typically LR], signal picture progress as per below
856
1.33M
        const int uses_2pass = c->n_fc > 1;
857
1.33M
        const int sbh = f->sbh;
858
1.33M
        const int sbsz = f->sb_step * 4;
859
1.33M
        if (t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS) {
860
668k
            error = atomic_load(&f->task_thread.error);
861
668k
            const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
862
668k
            assert(c->n_fc > 1);
863
668k
            if (f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
864
668k
                atomic_store(&f->sr_cur.progress[0], error ? FRAME_ERROR : y);
865
668k
            atomic_store(&f->frame_thread.entropy_progress,
866
668k
                         error ? TILE_ERROR : sby + 1);
867
668k
            if (sby + 1 == sbh)
868
668k
                atomic_store(&f->task_thread.done[1], 1);
869
668k
            pthread_mutex_lock(&ttd->lock);
870
668k
            const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
871
668k
            if (sby + 1 < sbh && num_tasks) {
872
529k
                reset_task_cur(c, ttd, t->frame_idx);
873
529k
                continue;
874
529k
            }
875
138k
            if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
876
138k
                atomic_load(&f->task_thread.done[1]))
877
16.6k
            {
878
16.6k
                error = atomic_load(&f->task_thread.error);
879
16.6k
                dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
880
16.6k
                                        error ? DAV1D_ERR(ENOMEM) : 0);
881
16.6k
                f->n_tile_data = 0;
882
16.6k
                pthread_cond_signal(&f->task_thread.cond);
883
16.6k
            }
884
138k
            reset_task_cur(c, ttd, t->frame_idx);
885
138k
            continue;
886
668k
        }
887
    // t->type != DAV1D_TASK_TYPE_ENTROPY_PROGRESS
888
1.33M
        atomic_fetch_or(&f->frame_thread.frame_progress[sby >> 5],
889
662k
                        1U << (sby & 31));
890
662k
        pthread_mutex_lock(&f->task_thread.lock);
891
662k
        sby = get_frame_progress(c, f);
892
662k
        error = atomic_load(&f->task_thread.error);
893
662k
        const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
894
663k
        if (c->n_fc > 1 && f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
895
663k
            atomic_store(&f->sr_cur.progress[1], error ? FRAME_ERROR : y);
896
662k
        pthread_mutex_unlock(&f->task_thread.lock);
897
662k
        if (sby + 1 == sbh)
898
662k
            atomic_store(&f->task_thread.done[0], 1);
899
662k
        pthread_mutex_lock(&ttd->lock);
900
662k
        const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
901
662k
        if (sby + 1 < sbh && num_tasks) {
902
183k
            reset_task_cur(c, ttd, t->frame_idx);
903
183k
            continue;
904
183k
        }
905
478k
        if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
906
113k
            (!uses_2pass || atomic_load(&f->task_thread.done[1])))
907
113k
        {
908
113k
            error = atomic_load(&f->task_thread.error);
909
113k
            dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
910
113k
                                    error ? DAV1D_ERR(ENOMEM) : 0);
911
113k
            f->n_tile_data = 0;
912
113k
            pthread_cond_signal(&f->task_thread.cond);
913
113k
        }
914
478k
        reset_task_cur(c, ttd, t->frame_idx);
915
478k
    }
916
2.46M
    pthread_mutex_unlock(&ttd->lock);
917
918
    return NULL;
919
2.46M
}