Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/vvc/thread.c
Line
Count
Source
1
/*
2
 * VVC thread logic
3
 *
4
 * Copyright (C) 2023 Nuo Mi
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
#include <stdatomic.h>
24
25
#include "libavcodec/executor.h"
26
#include "libavutil/mem.h"
27
#include "libavutil/thread.h"
28
29
#include "thread.h"
30
#include "ctu.h"
31
#include "filter.h"
32
#include "inter.h"
33
#include "intra.h"
34
#include "refs.h"
35
36
typedef struct ProgressListener {
37
    VVCProgressListener l;
38
    struct VVCTask *task;
39
    VVCContext *s;
40
} ProgressListener;
41
42
typedef enum VVCTaskStage {
43
    VVC_TASK_STAGE_INIT,                    // for CTU(0, 0) only
44
    VVC_TASK_STAGE_PARSE,
45
    VVC_TASK_STAGE_DEBLOCK_BS,
46
    VVC_TASK_STAGE_INTER,
47
    VVC_TASK_STAGE_RECON,
48
    VVC_TASK_STAGE_LMCS,
49
    VVC_TASK_STAGE_DEBLOCK_V,
50
    VVC_TASK_STAGE_DEBLOCK_H,
51
    VVC_TASK_STAGE_SAO,
52
    VVC_TASK_STAGE_ALF,
53
    VVC_TASK_STAGE_LAST
54
} VVCTaskStage;
55
56
typedef struct VVCTask {
57
    union {
58
        struct VVCTask *next;                //for executor debug only
59
        FFTask task;
60
    } u;
61
62
    VVCTaskStage stage;
63
64
    // ctu x, y, and raster scan order
65
    int rx, ry, rs;
66
    VVCFrameContext *fc;
67
68
    ProgressListener col_listener;
69
    ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71
    // for parse task only
72
    SliceContext *sc;
73
    EntryPoint *ep;
74
    int ctu_idx;                    //ctu idx in the current slice
75
76
    // tasks with target scores met are ready for scheduling
77
    atomic_uchar score[VVC_TASK_STAGE_LAST];
78
    atomic_uchar target_inter_score;
79
} VVCTask;
80
81
typedef struct VVCRowThread {
82
    atomic_int col_progress[VVC_PROGRESS_LAST];
83
} VVCRowThread;
84
85
typedef struct VVCFrameThread {
86
    // error return for tasks
87
    atomic_int ret;
88
89
    VVCRowThread *rows;
90
    VVCTask *tasks;
91
92
    int ctu_size;
93
    int ctu_width;
94
    int ctu_height;
95
    int ctu_count;
96
97
    //protected by lock
98
    atomic_int nb_scheduled_tasks;
99
    atomic_int nb_scheduled_listeners;
100
101
    int row_progress[VVC_PROGRESS_LAST];
102
103
    AVMutex lock;
104
    AVCond  cond;
105
} VVCFrameThread;
106
107
1.71M
#define PRIORITY_LOWEST 2
108
static void add_task(VVCContext *s, VVCTask *t)
109
1.70M
{
110
1.70M
    VVCFrameThread *ft     = t->fc->ft;
111
1.70M
    FFTask *task           = &t->u.task;
112
1.70M
    const int priorities[] = {
113
1.70M
        0,                  // VVC_TASK_STAGE_INIT,
114
1.70M
        0,                  // VVC_TASK_STAGE_PARSE,
115
1.70M
        1,                  // VVC_TASK_STAGE_DEBLOCK_BS
116
        // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117
        // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118
1.70M
        PRIORITY_LOWEST,    // VVC_TASK_STAGE_INTER
119
1.70M
        1,                  // VVC_TASK_STAGE_RECON,
120
1.70M
        1,                  // VVC_TASK_STAGE_LMCS,
121
1.70M
        1,                  // VVC_TASK_STAGE_DEBLOCK_V,
122
1.70M
        1,                  // VVC_TASK_STAGE_DEBLOCK_H,
123
1.70M
        1,                  // VVC_TASK_STAGE_SAO,
124
1.70M
        1,                  // VVC_TASK_STAGE_ALF,
125
1.70M
    };
126
127
1.70M
    atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128
1.70M
    task->priority = priorities[t->stage];
129
1.70M
    ff_executor_execute(s->executor, task);
130
1.70M
}
131
132
static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133
3.25M
{
134
3.25M
    memset(t, 0, sizeof(*t));
135
3.25M
    t->stage = stage;
136
3.25M
    t->fc    = fc;
137
3.25M
    t->rx    = rx;
138
3.25M
    t->ry    = ry;
139
3.25M
    t->rs    = ry * fc->ft->ctu_width + rx;
140
35.7M
    for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141
32.5M
        atomic_store(t->score + i, 0);
142
3.25M
    atomic_store(&t->target_inter_score, 0);
143
3.25M
}
144
145
static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146
1.66M
{
147
1.66M
    if (t->sc) {
148
        // the task already inited, error bitstream
149
1.15k
        return AVERROR_INVALIDDATA;
150
1.15k
    }
151
1.66M
    t->sc      = sc;
152
1.66M
    t->ep      = ep;
153
1.66M
    t->ctu_idx = ctu_idx;
154
155
1.66M
    return 0;
156
1.66M
}
157
158
static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159
41.7M
{
160
41.7M
    return atomic_fetch_add(&t->score[stage], 1) + 1;
161
41.7M
}
162
163
static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164
14.2M
{
165
14.2M
    return atomic_load(&t->score[stage]);
166
14.2M
}
167
168
//first row in tile or slice
169
static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170
1.56M
{
171
1.56M
    const VVCFrameThread *ft = fc->ft;
172
1.56M
    const VVCPPS *pps        = fc->ps.pps;
173
174
1.56M
    if (ry != pps->ctb_to_row_bd[ry]) {
175
0
        const int rs = ry * ft->ctu_width + rx;
176
0
        return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177
0
    }
178
1.56M
    return 1;
179
1.56M
}
180
181
static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182
55.9M
{
183
    // l:left, r:right, t: top, b: bottom
184
55.9M
    static const uint8_t target_score[] =
185
55.9M
    {
186
55.9M
        2,          //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187
55.9M
        0,          //VVC_TASK_STAGE_INTER,     not used
188
55.9M
        2,          //VVC_TASK_STAGE_RECON,     need l + rt recon
189
55.9M
        3,          //VVC_TASK_STAGE_LMCS,      need r + b + rb recon
190
55.9M
        1,          //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191
55.9M
        2,          //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192
55.9M
        5,          //VVC_TASK_STAGE_SAO,       need l + r + lb + b + rb deblock h
193
55.9M
        8,          //VVC_TASK_STAGE_ALF,       need sao around the ctu
194
55.9M
    };
195
55.9M
    uint8_t target = 0;
196
55.9M
    VVCFrameContext *fc = t->fc;
197
198
55.9M
    if (stage == VVC_TASK_STAGE_INIT)
199
1.53M
        return 1;
200
201
54.4M
    if (stage == VVC_TASK_STAGE_PARSE) {
202
4.78M
        const H266RawSPS *rsps   = fc->ps.sps->r;
203
4.78M
        const int wpp            = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204
4.78M
        const int no_prev_stage  = t->rs > 0;
205
4.78M
        target = 2 + wpp - no_prev_stage;                           //left parse + colocation + wpp - no_prev_stage
206
49.6M
    } else if (stage == VVC_TASK_STAGE_INTER) {
207
1.62M
        target = atomic_load(&t->target_inter_score);
208
48.0M
    } else {
209
48.0M
        target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210
48.0M
    }
211
212
    //+1 for previous stage
213
54.4M
    av_assert0(score <= target + 1);
214
54.4M
    return score == target + 1;
215
54.4M
}
216
217
static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218
    const int rx, const int ry, const VVCTaskStage stage)
219
296M
{
220
296M
    VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221
296M
    uint8_t score;
222
223
296M
    if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224
254M
        return;
225
226
41.7M
    score = task_add_score(t, stage);
227
41.7M
    if (task_has_target_score(t, stage, score)) {
228
1.70M
        av_assert0(s);
229
1.70M
        av_assert0(stage == t->stage);
230
1.70M
        add_task(s, t);
231
1.70M
    }
232
41.7M
}
233
234
static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235
1.76M
{
236
1.76M
    if (atomic_fetch_sub(scheduled, 1) == 1) {
237
1.59M
        ff_mutex_lock(&ft->lock);
238
1.59M
        ff_cond_signal(&ft->cond);
239
1.59M
        ff_mutex_unlock(&ft->lock);
240
1.59M
    }
241
1.76M
}
242
243
static void progress_done(VVCProgressListener *_l, const int type)
244
56.9k
{
245
56.9k
    const ProgressListener *l = (ProgressListener *)_l;
246
56.9k
    const VVCTask *t          = l->task;
247
56.9k
    VVCFrameThread *ft        = t->fc->ft;
248
249
56.9k
    frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250
56.9k
    sheduled_done(ft, &ft->nb_scheduled_listeners);
251
56.9k
}
252
253
static void pixel_done(VVCProgressListener *l)
254
16.8k
{
255
16.8k
    progress_done(l, VVC_TASK_STAGE_INTER);
256
16.8k
}
257
258
static void mv_done(VVCProgressListener *l)
259
40.0k
{
260
40.0k
    progress_done(l, VVC_TASK_STAGE_PARSE);
261
40.0k
}
262
263
static void listener_init(ProgressListener *l,  VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264
56.9k
{
265
56.9k
    const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267
56.9k
    l->task = t;
268
56.9k
    l->s    = s;
269
56.9k
    l->l.vp = vp;
270
56.9k
    l->l.y  = y;
271
56.9k
    l->l.progress_done = is_inter ? pixel_done : mv_done;
272
56.9k
    if (is_inter)
273
56.9k
        atomic_fetch_add(&t->target_inter_score, 1);
274
56.9k
}
275
276
static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277
    VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278
56.9k
{
279
56.9k
    VVCFrameThread *ft = t->fc->ft;
280
281
56.9k
    atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282
56.9k
    listener_init(l, t, s, vp, y);
283
56.9k
    ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284
56.9k
}
285
286
static void ep_init_wpp(EntryPoint *next, const EntryPoint *ep, const VVCSPS *sps)
287
0
{
288
0
    memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
289
0
    memcpy(next->pp, ep->pp, sizeof(next->pp));
290
0
    ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
291
0
}
292
293
static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
294
170k
{
295
170k
    VVCFrameThread *ft = fc->ft;
296
170k
    EntryPoint *ep     = t->ep;
297
170k
    const VVCSPS *sps  = fc->ps.sps;
298
299
170k
    if (sps->r->sps_entropy_coding_sync_enabled_flag) {
300
24.5k
        if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
301
24.5k
            EntryPoint *next = ep + 1;
302
24.5k
            if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1))
303
0
                ep_init_wpp(next, ep, sps);
304
24.5k
        }
305
24.5k
        if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
306
0
            frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
307
24.5k
    }
308
309
170k
    if (t->ctu_idx + 1 < t->ep->ctu_end) {
310
74.7k
        const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
311
74.7k
        const int next_rx = next_rs % ft->ctu_width;
312
74.7k
        const int next_ry = next_rs / ft->ctu_width;
313
74.7k
        frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
314
74.7k
    }
315
170k
}
316
317
static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
318
170k
{
319
170k
    const VVCSH *sh = &sc->sh;
320
321
170k
    if (!IS_I(sh->r)) {
322
18.8k
        CTU *ctu = fc->tab.ctus + rs;
323
56.5k
        for (int lx = 0; lx < 2; lx++) {
324
70.0k
            for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
325
32.4k
                int y = ctu->max_y[lx][i];
326
32.4k
                VVCRefPic *refp = sc->rpl[lx].refs + i;
327
32.4k
                VVCFrame *ref   = refp->ref;
328
32.4k
                if (ref && y >= 0) {
329
16.8k
                    if (refp->is_scaled)
330
0
                        y = y * refp->scale[1] >> 14;
331
16.8k
                    add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
332
16.8k
                }
333
32.4k
            }
334
37.6k
        }
335
18.8k
    }
336
170k
}
337
338
static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
339
170k
{
340
170k
    VVCFrameThread *ft  = fc->ft;
341
170k
    const int rs        = ry * ft->ctu_width + rx;
342
170k
    const int slice_idx = fc->tab.slice_idx[rs];
343
170k
    VVCTask *t          = ft->tasks + rs;
344
170k
    const SliceContext *sc = fc->slices[slice_idx];
345
346
170k
    schedule_next_parse(s, fc, sc, t);
347
170k
    schedule_inter(s, fc, sc, t, rs);
348
170k
}
349
350
static void task_stage_done(const VVCTask *t, VVCContext *s)
351
115M
{
352
115M
    VVCFrameContext *fc      = t->fc;
353
115M
    VVCFrameThread *ft       = fc->ft;
354
115M
    const VVCTaskStage stage = t->stage;
355
356
291M
#define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
357
358
    //this is a reserve map of ready_score, ordered by zigzag
359
115M
    if (stage == VVC_TASK_STAGE_PARSE) {
360
12.7M
        ADD( 0,  1, VVC_TASK_STAGE_DEBLOCK_BS);
361
12.7M
        ADD( 1,  0, VVC_TASK_STAGE_DEBLOCK_BS);
362
12.7M
        if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
363
12.5M
            return;
364
170k
        parse_task_done(s, fc, t->rx, t->ry);
365
102M
    } else if (stage == VVC_TASK_STAGE_RECON) {
366
12.7M
        ADD(-1,  1, VVC_TASK_STAGE_RECON);
367
12.7M
        ADD( 1,  0, VVC_TASK_STAGE_RECON);
368
12.7M
        ADD(-1, -1, VVC_TASK_STAGE_LMCS);
369
12.7M
        ADD( 0, -1, VVC_TASK_STAGE_LMCS);
370
12.7M
        ADD(-1,  0, VVC_TASK_STAGE_LMCS);
371
90.1M
    } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
372
12.6M
        ADD( 1,  0,  VVC_TASK_STAGE_DEBLOCK_V);
373
12.6M
        ADD(-1,  0,  VVC_TASK_STAGE_DEBLOCK_H);
374
77.4M
    } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
375
12.6M
        ADD( 0,  1,  VVC_TASK_STAGE_DEBLOCK_H);
376
12.6M
        ADD(-1, -1,  VVC_TASK_STAGE_SAO);
377
12.6M
        ADD( 0, -1,  VVC_TASK_STAGE_SAO);
378
12.6M
        ADD(-1,  0,  VVC_TASK_STAGE_SAO);
379
12.6M
        ADD( 1, -1,  VVC_TASK_STAGE_SAO);
380
12.6M
        ADD( 1,  0,  VVC_TASK_STAGE_SAO);
381
64.8M
    } else if (stage == VVC_TASK_STAGE_SAO) {
382
12.6M
        ADD(-1, -1,  VVC_TASK_STAGE_ALF);
383
12.6M
        ADD( 0, -1,  VVC_TASK_STAGE_ALF);
384
12.6M
        ADD(-1,  0,  VVC_TASK_STAGE_ALF);
385
12.6M
        ADD( 1, -1,  VVC_TASK_STAGE_ALF);
386
12.6M
        ADD(-1,  1,  VVC_TASK_STAGE_ALF);
387
12.6M
        ADD( 1,  0,  VVC_TASK_STAGE_ALF);
388
12.6M
        ADD( 0,  1,  VVC_TASK_STAGE_ALF);
389
12.6M
        ADD( 1,  1,  VVC_TASK_STAGE_ALF);
390
12.6M
    }
391
115M
}
392
393
static int task_is_stage_ready(VVCTask *t, int add)
394
15.7M
{
395
15.7M
    const VVCTaskStage stage = t->stage;
396
15.7M
    uint8_t score;
397
15.7M
    if (stage > VVC_TASK_STAGE_ALF)
398
1.54M
        return 0;
399
14.2M
    score = task_get_score(t, stage) + add;
400
14.2M
    return task_has_target_score(t, stage, score);
401
15.7M
}
402
403
static void check_colocation(VVCContext *s, VVCTask *t)
404
1.63M
{
405
1.63M
    const VVCFrameContext *fc = t->fc;
406
407
1.63M
    if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
408
790k
        VVCFrame *col       = fc->ref->collocated_ref;
409
790k
        const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
410
790k
        if (col && first_col) {
411
            //we depend on bottom and right boundary, do not - 1 for y
412
40.0k
            const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
413
40.0k
            add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
414
40.0k
            return;
415
40.0k
        }
416
790k
    }
417
1.59M
    frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
418
1.59M
}
419
420
static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
421
1.53M
{
422
1.53M
    const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
423
1.53M
    VVCTask *t   = ft->tasks + rs;
424
425
1.53M
    frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
426
1.53M
}
427
428
static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
429
1.53M
{
430
1.53M
    VVCFrameContext *fc = lc->fc;
431
1.53M
    VVCFrameThread *ft  = fc->ft;
432
1.53M
    const int ret       = ff_vvc_per_frame_init(fc);
433
434
1.53M
    if (ret < 0)
435
0
        return ret;
436
437
3.07M
    for (int i = 0; i < fc->nb_slices; i++) {
438
1.53M
        SliceContext *sc = fc->slices[i];
439
3.07M
        for (int j = 0; j < sc->nb_eps; j++) {
440
1.53M
            EntryPoint *ep = sc->eps + j;
441
3.16M
            for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
442
1.63M
                const int rs = sc->sh.ctb_addr_in_curr_slice[k];
443
1.63M
                VVCTask *t   = ft->tasks + rs;
444
1.63M
                check_colocation(s, t);
445
1.63M
            }
446
1.53M
            submit_entry_point(s, ft, sc, ep);
447
1.53M
        }
448
1.53M
    }
449
1.53M
    return 0;
450
1.53M
}
451
452
static void report_frame_progress(VVCFrameContext *fc,
453
   const int ry, const VVCProgress idx)
454
272k
{
455
272k
    VVCFrameThread *ft = fc->ft;
456
272k
    const int ctu_size = ft->ctu_size;
457
272k
    int old;
458
459
272k
    if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
460
195k
        int y;
461
195k
        ff_mutex_lock(&ft->lock);
462
195k
        y = old = ft->row_progress[idx];
463
391k
        while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
464
195k
            y++;
465
195k
        if (old != y)
466
195k
            ft->row_progress[idx] = y;
467
        // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
468
        // We need to unlock ft->lock first
469
195k
        ff_mutex_unlock(&ft->lock);
470
471
195k
        if (old != y) {
472
195k
            const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
473
195k
            ff_vvc_report_progress(fc->ref, idx, progress);
474
195k
        }
475
195k
    }
476
272k
}
477
478
static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
479
1.61M
{
480
1.61M
    int ret;
481
1.61M
    VVCFrameContext *fc = lc->fc;
482
1.61M
    const int rs        = t->rs;
483
1.61M
    const CTU *ctu      = fc->tab.ctus + rs;
484
485
1.61M
    lc->ep = t->ep;
486
487
1.61M
    ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
488
1.61M
    if (ret < 0)
489
1.44M
        return ret;
490
491
170k
    if (!ctu->has_dmvr)
492
170k
        report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
493
494
170k
    return 0;
495
1.61M
}
496
497
static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
498
170k
{
499
170k
    if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
500
163k
        ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
501
502
170k
    return 0;
503
170k
}
504
505
static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
506
170k
{
507
170k
    VVCFrameContext *fc = lc->fc;
508
170k
    const CTU *ctu      = fc->tab.ctus + t->rs;
509
170k
    int ret;
510
511
170k
    ret = ff_vvc_predict_inter(lc, t->rs);
512
170k
    if (ret < 0)
513
0
        return ret;
514
515
170k
    if (ctu->has_dmvr)
516
0
        report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
517
518
170k
    return 0;
519
170k
}
520
521
static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522
170k
{
523
170k
    return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
524
170k
}
525
526
static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
527
101k
{
528
101k
    VVCFrameContext *fc = lc->fc;
529
101k
    VVCFrameThread *ft  = fc->ft;
530
101k
    const int ctu_size  = ft->ctu_size;
531
101k
    const int x0        = t->rx * ctu_size;
532
101k
    const int y0        = t->ry * ctu_size;
533
534
101k
    ff_vvc_lmcs_filter(lc, x0, y0);
535
536
101k
    return 0;
537
101k
}
538
539
static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
540
101k
{
541
101k
    VVCFrameContext *fc = lc->fc;
542
101k
    VVCFrameThread *ft  = fc->ft;
543
101k
    const int ctb_size  = ft->ctu_size;
544
101k
    const int x0        = t->rx * ctb_size;
545
101k
    const int y0        = t->ry * ctb_size;
546
547
101k
    if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
548
95.7k
        ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
549
95.7k
        ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
550
95.7k
    }
551
552
101k
    return 0;
553
101k
}
554
555
static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
556
101k
{
557
101k
    VVCFrameContext *fc = lc->fc;
558
101k
    VVCFrameThread *ft  = fc->ft;
559
101k
    const int ctb_size  = ft->ctu_size;
560
101k
    const int x0        = t->rx * ctb_size;
561
101k
    const int y0        = t->ry * ctb_size;
562
563
101k
    if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
564
95.7k
        ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
565
95.7k
        ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
566
95.7k
    }
567
101k
    if (fc->ps.sps->r->sps_sao_enabled_flag)
568
58.8k
        ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
569
570
101k
    return 0;
571
101k
}
572
573
static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
574
101k
{
575
101k
    VVCFrameContext *fc = lc->fc;
576
101k
    VVCFrameThread *ft  = fc->ft;
577
101k
    const int ctb_size  = ft->ctu_size;
578
101k
    const int x0        = t->rx * ctb_size;
579
101k
    const int y0        = t->ry * ctb_size;
580
581
101k
    if (fc->ps.sps->r->sps_sao_enabled_flag) {
582
58.8k
        ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
583
58.8k
        ff_vvc_sao_filter(lc, x0, y0);
584
58.8k
    }
585
586
101k
    if (fc->ps.sps->r->sps_alf_enabled_flag)
587
49.4k
        ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
588
589
101k
    return 0;
590
101k
}
591
592
static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
593
101k
{
594
101k
    VVCFrameContext *fc = lc->fc;
595
101k
    VVCFrameThread *ft  = fc->ft;
596
101k
    const int ctu_size  = ft->ctu_size;
597
101k
    const int x0        = t->rx * ctu_size;
598
101k
    const int y0        = t->ry * ctu_size;
599
600
101k
    if (fc->ps.sps->r->sps_alf_enabled_flag) {
601
49.4k
        ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
602
49.4k
        ff_vvc_alf_filter(lc, x0, y0);
603
49.4k
    }
604
101k
    report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
605
606
101k
    return 0;
607
101k
}
608
609
const static char* task_name[] = {
610
    "INIT",
611
    "P",
612
    "B",
613
    "I",
614
    "R",
615
    "L",
616
    "V",
617
    "H",
618
    "S",
619
    "A"
620
};
621
622
typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
623
624
static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
625
15.7M
{
626
15.7M
    int ret;
627
15.7M
    VVCFrameContext *fc      = t->fc;
628
15.7M
    VVCFrameThread *ft       = fc->ft;
629
15.7M
    const VVCTaskStage stage = t->stage;
630
15.7M
    static const run_func run[] = {
631
15.7M
        run_init,
632
15.7M
        run_parse,
633
15.7M
        run_deblock_bs,
634
15.7M
        run_inter,
635
15.7M
        run_recon,
636
15.7M
        run_lmcs,
637
15.7M
        run_deblock_v,
638
15.7M
        run_deblock_h,
639
15.7M
        run_sao,
640
15.7M
        run_alf,
641
15.7M
    };
642
643
15.7M
    ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
644
645
15.7M
    lc->sc = t->sc;
646
647
15.7M
    if (!atomic_load(&ft->ret)) {
648
4.16M
        if ((ret = run[stage](s, lc, t)) < 0) {
649
#ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
650
            intptr_t zero = 0;
651
#else
652
1.44M
            int zero = 0;
653
1.44M
#endif
654
1.44M
            atomic_compare_exchange_strong(&ft->ret, &zero, ret);
655
1.44M
            av_log(s->avctx, AV_LOG_ERROR,
656
1.44M
                "frame %5d, %s(%3d, %3d) failed with %d\r\n",
657
1.44M
                (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
658
1.44M
        }
659
4.16M
        if (!ret)
660
2.72M
            task_stage_done(t, s);
661
4.16M
    }
662
15.7M
    return;
663
15.7M
}
664
665
static int task_run(FFTask *_t, void *local_context, void *user_data)
666
1.70M
{
667
1.70M
    VVCTask *t          = (VVCTask*)_t;
668
1.70M
    VVCContext *s       = (VVCContext *)user_data;
669
1.70M
    VVCLocalContext *lc = local_context;
670
1.70M
    VVCFrameThread *ft  = t->fc->ft;
671
672
1.70M
    lc->fc = t->fc;
673
674
15.7M
    do {
675
15.7M
        task_run_stage(t, s, lc);
676
15.7M
        t->stage++;
677
15.7M
    } while (task_is_stage_ready(t, 1));
678
679
1.70M
    if (t->stage != VVC_TASK_STAGE_LAST)
680
161k
        frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
681
682
1.70M
    sheduled_done(ft, &ft->nb_scheduled_tasks);
683
684
1.70M
    return 0;
685
1.70M
}
686
687
av_cold FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
688
15.4k
{
689
15.4k
    FFTaskCallbacks callbacks = {
690
15.4k
        s,
691
15.4k
        sizeof(VVCLocalContext),
692
15.4k
        PRIORITY_LOWEST + 1,
693
15.4k
        task_run,
694
15.4k
    };
695
15.4k
    return ff_executor_alloc(&callbacks, thread_count);
696
15.4k
}
697
698
av_cold void ff_vvc_executor_free(FFExecutor **e)
699
15.4k
{
700
15.4k
    ff_executor_free(e);
701
15.4k
}
702
703
void ff_vvc_frame_thread_free(VVCFrameContext *fc)
704
316k
{
705
316k
    VVCFrameThread *ft = fc->ft;
706
707
316k
    if (!ft)
708
246k
        return;
709
710
69.8k
    ff_mutex_destroy(&ft->lock);
711
69.8k
    ff_cond_destroy(&ft->cond);
712
69.8k
    av_freep(&ft->rows);
713
69.8k
    av_freep(&ft->tasks);
714
69.8k
    av_freep(&ft);
715
69.8k
}
716
717
static void frame_thread_init_score(VVCFrameContext *fc)
718
1.56M
{
719
1.56M
    const VVCFrameThread *ft = fc->ft;
720
1.56M
    VVCTask task;
721
722
1.56M
    task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
723
724
15.6M
    for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
725
14.0M
        task.stage = i;
726
727
56.3M
        for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
728
42.2M
            task.ry = -1;                           //top
729
42.2M
            task_stage_done(&task, NULL);
730
42.2M
            task.ry = ft->ctu_height;               //bottom
731
42.2M
            task_stage_done(&task, NULL);
732
42.2M
        }
733
734
28.1M
        for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
735
14.1M
            task.rx = -1;                           //left
736
14.1M
            task_stage_done(&task, NULL);
737
14.1M
            task.rx = ft->ctu_width;                //right
738
14.1M
            task_stage_done(&task, NULL);
739
14.1M
        }
740
14.0M
    }
741
1.56M
}
742
743
int ff_vvc_frame_thread_init(VVCFrameContext *fc)
744
1.56M
{
745
1.56M
    const VVCSPS *sps  = fc->ps.sps;
746
1.56M
    const VVCPPS *pps  = fc->ps.pps;
747
1.56M
    VVCFrameThread *ft = fc->ft;
748
1.56M
    int ret;
749
750
1.56M
    if (!ft || ft->ctu_width != pps->ctb_width ||
751
1.49M
        ft->ctu_height != pps->ctb_height ||
752
1.49M
        ft->ctu_size != sps->ctb_size_y) {
753
754
69.8k
        ff_vvc_frame_thread_free(fc);
755
69.8k
        ft = av_calloc(1, sizeof(*fc->ft));
756
69.8k
        if (!ft)
757
0
            return AVERROR(ENOMEM);
758
759
69.8k
        ft->ctu_width  = fc->ps.pps->ctb_width;
760
69.8k
        ft->ctu_height = fc->ps.pps->ctb_height;
761
69.8k
        ft->ctu_count  = fc->ps.pps->ctb_count;
762
69.8k
        ft->ctu_size   = fc->ps.sps->ctb_size_y;
763
764
69.8k
        ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
765
69.8k
        if (!ft->rows)
766
0
            goto fail;
767
768
69.8k
        ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
769
69.8k
        if (!ft->tasks)
770
0
            goto fail;
771
772
69.8k
        if ((ret = ff_cond_init(&ft->cond, NULL)))
773
0
            goto fail;
774
775
69.8k
        if ((ret = ff_mutex_init(&ft->lock, NULL))) {
776
0
            ff_cond_destroy(&ft->cond);
777
0
            goto fail;
778
0
        }
779
69.8k
    }
780
1.56M
    fc->ft = ft;
781
1.56M
    ft->ret = 0;
782
3.13M
    for (int y = 0; y < ft->ctu_height; y++) {
783
1.56M
        VVCRowThread *row = ft->rows + y;
784
1.56M
        memset(row->col_progress, 0, sizeof(row->col_progress));
785
1.56M
    }
786
787
3.25M
    for (int rs = 0; rs < ft->ctu_count; rs++) {
788
1.69M
        VVCTask *t = ft->tasks + rs;
789
1.69M
        task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
790
1.69M
    }
791
792
1.56M
    memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
793
794
1.56M
    frame_thread_init_score(fc);
795
796
1.56M
    return 0;
797
798
0
fail:
799
0
    if (ft) {
800
0
        av_freep(&ft->rows);
801
0
        av_freep(&ft->tasks);
802
0
        av_freep(&ft);
803
0
    }
804
805
0
    return AVERROR(ENOMEM);
806
1.56M
}
807
808
int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
809
1.53M
{
810
1.53M
    VVCFrameThread *ft = fc->ft;
811
812
3.07M
    for (int i = 0; i < fc->nb_slices; i++) {
813
1.53M
        SliceContext *sc = fc->slices[i];
814
3.07M
        for (int j = 0; j < sc->nb_eps; j++) {
815
1.53M
            EntryPoint *ep = sc->eps + j;
816
3.20M
            for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
817
1.66M
                const int rs = sc->sh.ctb_addr_in_curr_slice[k];
818
1.66M
                VVCTask *t   = ft->tasks + rs;
819
1.66M
                const int ret = task_init_parse(t, sc, ep, k);
820
1.66M
                if (ret < 0)
821
1.15k
                    return ret;
822
1.66M
            }
823
1.53M
        }
824
1.53M
    }
825
3.16M
    for (int rs = 0; rs < ft->ctu_count; rs++) {
826
1.63M
        const VVCTask *t = ft->tasks + rs;
827
1.63M
        if (!t->sc) {
828
0
            av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry);
829
0
            return AVERROR_INVALIDDATA;
830
0
        }
831
1.63M
    }
832
1.53M
    frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
833
834
1.53M
    return 0;
835
1.53M
}
836
837
int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
838
1.53M
{
839
1.53M
    VVCFrameThread *ft = fc->ft;
840
841
1.53M
    ff_mutex_lock(&ft->lock);
842
843
1.53M
    while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
844
0
        ff_cond_wait(&ft->cond, &ft->lock);
845
846
1.53M
    ff_mutex_unlock(&ft->lock);
847
1.53M
    ff_vvc_report_frame_finished(fc->ref);
848
849
1.53M
    ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order);
850
1.53M
    return ft->ret;
851
1.53M
}