Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/decode.c
Line
Count
Source
1
/*
2
 * Copyright © 2018-2021, VideoLAN and dav1d authors
3
 * Copyright © 2018, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <errno.h>
31
#include <limits.h>
32
#include <string.h>
33
#include <stdio.h>
34
#include <inttypes.h>
35
36
#include "dav1d/data.h"
37
38
#include "common/frame.h"
39
#include "common/intops.h"
40
41
#include "src/ctx.h"
42
#include "src/decode.h"
43
#include "src/dequant_tables.h"
44
#include "src/env.h"
45
#include "src/filmgrain.h"
46
#include "src/log.h"
47
#include "src/qm.h"
48
#include "src/recon.h"
49
#include "src/ref.h"
50
#include "src/tables.h"
51
#include "src/thread_task.h"
52
#include "src/warpmv.h"
53
54
static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
55
                              const Dav1dFrameHeader *const frame_hdr,
56
                              const int qidx, uint16_t (*dq)[3][2])
57
64.3k
{
58
350k
    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
59
286k
        const int yac = frame_hdr->segmentation.enabled ?
60
253k
            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
61
286k
        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
62
286k
        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
63
286k
        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
64
286k
        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
65
286k
        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
66
67
286k
        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
68
286k
        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
69
286k
        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
70
286k
        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
71
286k
        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
72
286k
        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
73
286k
    }
74
64.3k
}
75
76
static int read_mv_component_diff(MsacContext *const msac,
77
                                  CdfMvComponent *const mv_comp,
78
                                  const int mv_prec)
79
173k
{
80
173k
    const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
81
173k
    const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
82
173k
    int up, fp = 3, hp = 1;
83
84
173k
    if (!cl) {
85
113k
        up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
86
113k
        if (mv_prec >= 0) {  // !force_integer_mv
87
34.4k
            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
88
34.4k
            if (mv_prec > 0) // allow_high_precision_mv
89
15.1k
                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
90
34.4k
        }
91
113k
    } else {
92
59.8k
        up = 1 << cl;
93
510k
        for (int n = 0; n < cl; n++)
94
450k
            up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
95
59.8k
        if (mv_prec >= 0) {  // !force_integer_mv
96
6.35k
            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
97
6.35k
            if (mv_prec > 0) // allow_high_precision_mv
98
2.59k
                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
99
6.35k
        }
100
59.8k
    }
101
102
173k
    const int diff = ((up << 3) | (fp << 1) | hp) + 1;
103
104
173k
    return sign ? -diff : diff;
105
173k
}
106
107
static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
108
                             const int mv_prec)
109
120k
{
110
120k
    MsacContext *const msac = &ts->msac;
111
120k
    const enum MVJoint mv_joint =
112
120k
        dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
113
120k
    if (mv_joint & MV_JOINT_V)
114
87.4k
        ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
115
120k
    if (mv_joint & MV_JOINT_H)
116
86.1k
        ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
117
120k
}
118
119
static void read_tx_tree(Dav1dTaskContext *const t,
120
                         const enum RectTxfmSize from,
121
                         const int depth, uint16_t *const masks,
122
                         const int x_off, const int y_off)
123
55.8k
{
124
55.8k
    const Dav1dFrameContext *const f = t->f;
125
55.8k
    const int bx4 = t->bx & 31, by4 = t->by & 31;
126
55.8k
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
127
55.8k
    const int txw = t_dim->lw, txh = t_dim->lh;
128
55.8k
    int is_split;
129
130
55.8k
    if (depth < 2 && from > (int) TX_4X4) {
131
46.7k
        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
132
46.7k
        const int a = t->a->tx[bx4] < txw;
133
46.7k
        const int l = t->l.tx[by4] < txh;
134
135
46.7k
        is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
136
46.7k
                       t->ts->cdf.m.txpart[cat][a + l]);
137
46.7k
        if (is_split)
138
13.0k
            masks[depth] |= 1 << (y_off * 4 + x_off);
139
46.7k
    } else {
140
9.10k
        is_split = 0;
141
9.10k
    }
142
143
55.8k
    if (is_split && t_dim->max > TX_8X8) {
144
9.34k
        const enum RectTxfmSize sub = t_dim->sub;
145
9.34k
        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
146
9.34k
        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
147
148
9.34k
        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
149
9.34k
        t->bx += txsw;
150
9.34k
        if (txw >= txh && t->bx < f->bw)
151
6.77k
            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
152
9.34k
        t->bx -= txsw;
153
9.34k
        t->by += txsh;
154
9.34k
        if (txh >= txw && t->by < f->bh) {
155
5.94k
            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
156
5.94k
            t->bx += txsw;
157
5.94k
            if (txw >= txh && t->bx < f->bw)
158
3.55k
                read_tx_tree(t, sub, depth + 1, masks,
159
3.55k
                             x_off * 2 + 1, y_off * 2 + 1);
160
5.94k
            t->bx -= txsw;
161
5.94k
        }
162
9.34k
        t->by -= txsh;
163
46.5k
    } else {
164
46.5k
        dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
165
46.5k
        dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
166
46.5k
    }
167
55.8k
}
168
169
939k
static int neg_deinterleave(int diff, int ref, int max) {
170
939k
    if (!ref) return diff;
171
778k
    if (ref >= (max - 1)) return max - diff - 1;
172
724k
    if (2 * ref < max) {
173
430k
        if (diff <= 2 * ref) {
174
372k
            if (diff & 1)
175
52.1k
                return ref + ((diff + 1) >> 1);
176
320k
            else
177
320k
                return ref - (diff >> 1);
178
372k
        }
179
58.0k
        return diff;
180
430k
    } else {
181
294k
        if (diff <= 2 * (max - ref - 1)) {
182
258k
            if (diff & 1)
183
39.9k
                return ref + ((diff + 1) >> 1);
184
218k
            else
185
218k
                return ref - (diff >> 1);
186
258k
        }
187
35.3k
        return max - (diff + 1);
188
294k
    }
189
724k
}
190
191
static void find_matching_ref(const Dav1dTaskContext *const t,
192
                              const enum EdgeFlags intra_edge_flags,
193
                              const int bw4, const int bh4,
194
                              const int w4, const int h4,
195
                              const int have_left, const int have_top,
196
                              const int ref, uint64_t masks[2])
197
42.5k
{
198
42.5k
    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
199
42.5k
    int count = 0;
200
42.5k
    int have_topleft = have_top && have_left;
201
42.5k
    int have_topright = imax(bw4, bh4) < 32 &&
202
39.7k
                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
203
26.1k
                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
204
205
97.7k
#define bs(rp) dav1d_block_dimensions[(rp)->bs]
206
108k
#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
207
208
42.5k
    if (have_top) {
209
32.1k
        const refmvs_block *r2 = &r[-1][t->bx];
210
32.1k
        if (matches(r2)) {
211
30.6k
            masks[0] |= 1;
212
30.6k
            count = 1;
213
30.6k
        }
214
32.1k
        int aw4 = bs(r2)[0];
215
32.1k
        if (aw4 >= bw4) {
216
28.5k
            const int off = t->bx & (aw4 - 1);
217
28.5k
            if (off) have_topleft = 0;
218
28.5k
            if (aw4 - off > bw4) have_topright = 0;
219
28.5k
        } else {
220
3.62k
            unsigned mask = 1 << aw4;
221
8.61k
            for (int x = aw4; x < w4; x += aw4) {
222
5.00k
                r2 += aw4;
223
5.00k
                if (matches(r2)) {
224
4.69k
                    masks[0] |= mask;
225
4.69k
                    if (++count >= 8) return;
226
4.69k
                }
227
4.98k
                aw4 = bs(r2)[0];
228
4.98k
                mask <<= aw4;
229
4.98k
            }
230
3.62k
        }
231
32.1k
    }
232
42.5k
    if (have_left) {
233
37.7k
        /*const*/ refmvs_block *const *r2 = r;
234
37.7k
        if (matches(&r2[0][t->bx - 1])) {
235
36.3k
            masks[1] |= 1;
236
36.3k
            if (++count >= 8) return;
237
36.3k
        }
238
37.7k
        int lh4 = bs(&r2[0][t->bx - 1])[1];
239
37.7k
        if (lh4 >= bh4) {
240
32.1k
            if (t->by & (lh4 - 1)) have_topleft = 0;
241
32.1k
        } else {
242
5.61k
            unsigned mask = 1 << lh4;
243
12.8k
            for (int y = lh4; y < h4; y += lh4) {
244
7.36k
                r2 += lh4;
245
7.36k
                if (matches(&r2[0][t->bx - 1])) {
246
6.95k
                    masks[1] |= mask;
247
6.95k
                    if (++count >= 8) return;
248
6.95k
                }
249
7.28k
                lh4 = bs(&r2[0][t->bx - 1])[1];
250
7.28k
                mask <<= lh4;
251
7.28k
            }
252
5.61k
        }
253
37.7k
    }
254
42.4k
    if (have_topleft && matches(&r[-1][t->bx - 1])) {
255
14.8k
        masks[1] |= 1ULL << 32;
256
14.8k
        if (++count >= 8) return;
257
14.8k
    }
258
42.4k
    if (have_topright && matches(&r[-1][t->bx + bw4])) {
259
9.78k
        masks[0] |= 1ULL << 32;
260
9.78k
    }
261
42.4k
#undef matches
262
42.4k
}
263
264
static void derive_warpmv(const Dav1dTaskContext *const t,
265
                          const int bw4, const int bh4,
266
                          const uint64_t masks[2], const union mv mv,
267
                          Dav1dWarpedMotionParams *const wmp)
268
2.34k
{
269
2.34k
    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
270
2.34k
    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
271
272
7.03k
#define add_sample(dx, dy, sx, sy, rp) do { \
273
7.03k
    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
274
7.03k
    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
275
7.03k
    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
276
7.03k
    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
277
7.03k
    np++; \
278
7.03k
} while (0)
279
280
    // use masks[] to find the projectable motion vectors in the edges
281
2.34k
    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
282
728
        const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
283
728
        add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
284
3.36k
    } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
285
1.75k
        const int tz = ctz(xmask);
286
1.75k
        off += tz;
287
1.75k
        xmask >>= tz;
288
1.75k
        add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
289
1.75k
        xmask &= ~1;
290
1.75k
    }
291
2.34k
    if (np < 8 && masks[1] == 1) {
292
760
        const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
293
760
        add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
294
3.31k
    } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
295
1.73k
        const int tz = ctz(ymask);
296
1.73k
        off += tz;
297
1.73k
        ymask >>= tz;
298
1.73k
        add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
299
1.73k
        ymask &= ~1;
300
1.73k
    }
301
2.34k
    if (np < 8 && masks[1] >> 32) // top/left
302
1.25k
        add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
303
2.34k
    if (np < 8 && masks[0] >> 32) // top/right
304
810
        add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
305
2.34k
    assert(np > 0 && np <= 8);
306
2.34k
#undef bs
307
308
    // select according to motion vector difference against a threshold
309
2.34k
    int mvd[8], ret = 0;
310
2.34k
    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
311
9.37k
    for (int i = 0; i < np; i++) {
312
7.03k
        mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
313
7.03k
                 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
314
7.03k
        if (mvd[i] > thresh)
315
2.52k
            mvd[i] = -1;
316
4.50k
        else
317
4.50k
            ret++;
318
7.03k
    }
319
2.34k
    if (!ret) {
320
342
        ret = 1;
321
2.61k
    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
322
2.27k
        while (mvd[i] != -1) i++;
323
2.17k
        while (mvd[j] == -1) j--;
324
1.22k
        assert(i != j);
325
1.22k
        if (i > j) break;
326
        // replace the discarded samples;
327
612
        mvd[i] = mvd[j];
328
612
        memcpy(pts[i], pts[j], sizeof(*pts));
329
612
    }
330
331
2.34k
    if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
332
2.28k
        !dav1d_get_shear_params(wmp))
333
2.10k
    {
334
2.10k
        wmp->type = DAV1D_WM_TYPE_AFFINE;
335
2.10k
    } else
336
240
        wmp->type = DAV1D_WM_TYPE_IDENTITY;
337
2.34k
}
338
339
42.8k
static inline int findoddzero(const uint8_t *buf, int len) {
340
43.3k
    for (int n = 0; n < len; n++)
341
43.0k
        if (!buf[n * 2]) return 1;
342
317
    return 0;
343
42.8k
}
344
345
// meant to be SIMD'able, so that theoretical complexity of this function
346
// times block size goes from w4*h4 to w4+h4-1
347
// a and b are previous two lines containing (a) top/left entries or (b)
348
// top/left entries, with a[0] being either the first top or first left entry,
349
// depending on top_offset being 1 or 0, and b being the first top/left entry
350
// for whichever has one. left_offset indicates whether the (len-1)th entry
351
// has a left neighbour.
352
// output is order[] and ctx for each member of this diagonal.
353
static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
354
                          const int i, const int first, const int last,
355
                          uint8_t (*const order)[8], uint8_t *const ctx)
356
362k
{
357
362k
    int have_top = i > first;
358
359
362k
    assert(pal_idx);
360
362k
    pal_idx += first + (i - first) * stride;
361
3.20M
    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
362
2.84M
        const int have_left = j > 0;
363
364
2.84M
        assert(have_left || have_top);
365
366
4.06M
#define add(v_in) do { \
367
4.06M
        const int v = v_in; \
368
4.06M
        assert((unsigned)v < 8U); \
369
4.06M
        order[n][o_idx++] = v; \
370
4.06M
        mask |= 1 << v; \
371
4.06M
    } while (0)
372
373
2.84M
        unsigned mask = 0;
374
2.84M
        int o_idx = 0;
375
2.84M
        if (!have_left) {
376
166k
            ctx[n] = 0;
377
166k
            add(pal_idx[-stride]);
378
2.67M
        } else if (!have_top) {
379
195k
            ctx[n] = 0;
380
195k
            add(pal_idx[-1]);
381
2.48M
        } else {
382
2.48M
            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
383
2.48M
            const int same_t_l = t == l;
384
2.48M
            const int same_t_tl = t == tl;
385
2.48M
            const int same_l_tl = l == tl;
386
2.48M
            const int same_all = same_t_l & same_t_tl & same_l_tl;
387
388
2.48M
            if (same_all) {
389
1.44M
                ctx[n] = 4;
390
1.44M
                add(t);
391
1.44M
            } else if (same_t_l) {
392
72.1k
                ctx[n] = 3;
393
72.1k
                add(t);
394
72.1k
                add(tl);
395
966k
            } else if (same_t_tl | same_l_tl) {
396
786k
                ctx[n] = 2;
397
786k
                add(tl);
398
786k
                add(same_t_tl ? l : t);
399
786k
            } else {
400
179k
                ctx[n] = 1;
401
179k
                add(imin(t, l));
402
179k
                add(imax(t, l));
403
179k
                add(tl);
404
179k
            }
405
2.48M
        }
406
25.5M
        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
407
22.7M
            if (!(mask & m))
408
18.6M
                order[n][o_idx++] = bit;
409
2.84M
        assert(o_idx == 8);
410
2.84M
#undef add
411
2.84M
    }
412
362k
}
413
414
static void read_pal_indices(Dav1dTaskContext *const t,
415
                             uint8_t *const pal_idx,
416
                             const int pal_sz, const int pl,
417
                             const int w4, const int h4,
418
                             const int bw4, const int bh4)
419
16.9k
{
420
16.9k
    Dav1dTileState *const ts = t->ts;
421
16.9k
    const ptrdiff_t stride = bw4 * 4;
422
16.9k
    assert(pal_idx);
423
16.9k
    uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
424
16.9k
    pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
425
16.9k
    uint16_t (*const color_map_cdf)[8] =
426
16.9k
        ts->cdf.m.color_map[pl][pal_sz - 2];
427
16.9k
    uint8_t (*const order)[8] = t->scratch.pal_order;
428
16.9k
    uint8_t *const ctx = t->scratch.pal_ctx;
429
379k
    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
430
        // top/left-to-bottom/right diagonals ("wave-front")
431
362k
        const int first = imin(i, w4 * 4 - 1);
432
362k
        const int last = imax(0, i - h4 * 4 + 1);
433
362k
        order_palette(pal_tmp, stride, i, first, last, order, ctx);
434
3.20M
        for (int j = first, m = 0; j >= last; j--, m++) {
435
2.84M
            const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
436
2.84M
                                      color_map_cdf[ctx[m]], pal_sz - 1);
437
2.84M
            pal_tmp[(i - j) * stride + j] = order[m][color_idx];
438
2.84M
        }
439
362k
    }
440
441
16.9k
    t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
442
16.9k
                                 w4 * 4, h4 * 4);
443
16.9k
}
444
445
static void read_vartx_tree(Dav1dTaskContext *const t,
446
                            Av1Block *const b, const enum BlockSize bs,
447
                            const int bx4, const int by4)
448
206k
{
449
206k
    const Dav1dFrameContext *const f = t->f;
450
206k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
451
206k
    const int bw4 = b_dim[0], bh4 = b_dim[1];
452
453
    // var-tx tree coding
454
206k
    uint16_t tx_split[2] = { 0 };
455
206k
    b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
456
206k
    if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
457
100k
                     b->max_ytx == TX_4X4))
458
14.3k
    {
459
14.3k
        b->max_ytx = b->uvtx = TX_4X4;
460
14.3k
        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
461
10.4k
            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
462
10.4k
            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
463
10.4k
        }
464
192k
    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
465
163k
        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
466
25.3k
            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
467
25.3k
            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
468
25.3k
        }
469
163k
        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
470
163k
    } else {
471
28.9k
        assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
472
28.9k
        int y, x, y_off, x_off;
473
28.9k
        const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
474
58.8k
        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
475
60.1k
            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
476
30.2k
                read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
477
                // contexts are updated inside read_tx_tree()
478
30.2k
                t->bx += ytx->w;
479
30.2k
            }
480
29.8k
            t->bx -= x;
481
29.8k
            t->by += ytx->h;
482
29.8k
        }
483
28.9k
        t->by -= y;
484
28.9k
        if (DEBUG_BLOCK_INFO)
485
0
            printf("Post-vartxtree[%x/%x]: r=%d\n",
486
0
                   tx_split[0], tx_split[1], t->ts->msac.rng);
487
28.9k
        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
488
28.9k
    }
489
206k
    assert(!(tx_split[0] & ~0x33));
490
206k
    b->tx_split0 = (uint8_t)tx_split[0];
491
206k
    b->tx_split1 = tx_split[1];
492
206k
}
493
494
static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
495
                                            const int by, const int bx,
496
                                            const int w4, int h4,
497
                                            const uint8_t *ref_seg_map,
498
                                            const ptrdiff_t stride)
499
76.7k
{
500
76.7k
    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
501
502
76.7k
    unsigned seg_id = 8;
503
76.7k
    ref_seg_map += by * stride + bx;
504
140k
    do {
505
1.06M
        for (int x = 0; x < w4; x++)
506
924k
            seg_id = imin(seg_id, ref_seg_map[x]);
507
140k
        ref_seg_map += stride;
508
140k
    } while (--h4 > 0 && seg_id);
509
76.7k
    assert(seg_id < 8);
510
511
76.7k
    return seg_id;
512
76.7k
}
513
514
static inline void splat_oneref_mv(const Dav1dContext *const c,
515
                                   Dav1dTaskContext *const t,
516
                                   const enum BlockSize bs,
517
                                   const Av1Block *const b,
518
                                   const int bw4, const int bh4)
519
107k
{
520
107k
    const enum InterPredMode mode = b->inter_mode;
521
107k
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
522
107k
        .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
523
107k
        .mv.mv[0] = b->mv[0],
524
107k
        .bs = bs,
525
107k
        .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
526
107k
    };
527
107k
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
528
107k
}
529
530
static inline void splat_intrabc_mv(const Dav1dContext *const c,
531
                                    Dav1dTaskContext *const t,
532
                                    const enum BlockSize bs,
533
                                    const Av1Block *const b,
534
                                    const int bw4, const int bh4)
535
85.0k
{
536
85.0k
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
537
85.0k
        .ref.ref = { 0, -1 },
538
85.0k
        .mv.mv[0] = b->mv[0],
539
85.0k
        .bs = bs,
540
85.0k
        .mf = 0,
541
85.0k
    };
542
85.0k
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
543
85.0k
}
544
545
static inline void splat_tworef_mv(const Dav1dContext *const c,
546
                                   Dav1dTaskContext *const t,
547
                                   const enum BlockSize bs,
548
                                   const Av1Block *const b,
549
                                   const int bw4, const int bh4)
550
13.8k
{
551
13.8k
    assert(bw4 >= 2 && bh4 >= 2);
552
13.8k
    const enum CompInterPredMode mode = b->inter_mode;
553
13.8k
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
554
13.8k
        .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
555
13.8k
        .mv.mv = { b->mv[0], b->mv[1] },
556
13.8k
        .bs = bs,
557
13.8k
        .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
558
13.8k
    };
559
13.8k
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
560
13.8k
}
561
562
static inline void splat_intraref(const Dav1dContext *const c,
563
                                  Dav1dTaskContext *const t,
564
                                  const enum BlockSize bs,
565
                                  const int bw4, const int bh4)
566
222k
{
567
222k
    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
568
222k
        .ref.ref = { 0, -1 },
569
222k
        .mv.mv[0].n = INVALID_MV,
570
222k
        .bs = bs,
571
222k
        .mf = 0,
572
222k
    };
573
222k
    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
574
222k
}
575
576
static void mc_lowest_px(int *const dst, const int by4, const int bh4,
577
                         const int mvy, const int ss_ver,
578
                         const struct ScalableMotionParams *const smp)
579
322k
{
580
322k
    const int v_mul = 4 >> ss_ver;
581
322k
    if (!smp->scale) {
582
269k
        const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver);
583
269k
        *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy);
584
269k
    } else {
585
53.7k
        int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver);
586
53.7k
        const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8;
587
53.7k
        y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32;
588
53.7k
        const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4;
589
53.7k
        *dst = imax(*dst, bottom);
590
53.7k
    }
591
322k
}
592
593
static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst,
594
                                           const uint8_t *const b_dim,
595
                                           const Dav1dWarpedMotionParams *const wmp,
596
                                           const int ss_ver, const int ss_hor)
597
12.8k
{
598
12.8k
    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
599
12.8k
    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
600
12.8k
    const int32_t *const mat = wmp->matrix;
601
12.8k
    const int y = b_dim[1] * v_mul - 8; // lowest y
602
603
12.8k
    const int src_y = t->by * 4 + ((y + 4) << ss_ver);
604
12.8k
    const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
605
    // check left- and right-most blocks
606
31.2k
    for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) {
607
        // calculate transformation relative to center of 8x8 block in
608
        // luma pixel units
609
18.4k
        const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
610
18.4k
        const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
611
18.4k
        const int dy = (int) (mvy >> 16) - 4;
612
18.4k
        *dst = imax(*dst, dy + 4 + 8);
613
18.4k
    }
614
12.8k
}
615
616
static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst,
617
                                           const uint8_t *const b_dim,
618
                                           const Dav1dWarpedMotionParams *const wmp)
619
12.6k
{
620
12.6k
    affine_lowest_px(t, dst, b_dim, wmp, 0, 0);
621
12.6k
}
622
623
static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst,
624
                                             const uint8_t *const b_dim,
625
                                             const Dav1dWarpedMotionParams *const wmp)
626
6.32k
{
627
6.32k
    const Dav1dFrameContext *const f = t->f;
628
6.32k
    assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400);
629
6.32k
    if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444)
630
6.20k
        affine_lowest_px_luma(t, dst, b_dim, wmp);
631
119
    else
632
119
        affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1);
633
6.32k
}
634
635
static void obmc_lowest_px(Dav1dTaskContext *const t,
636
                           int (*const dst)[2], const int is_chroma,
637
                           const uint8_t *const b_dim,
638
                           const int bx4, const int by4, const int w4, const int h4)
639
41.1k
{
640
41.1k
    assert(!(t->bx & 1) && !(t->by & 1));
641
41.1k
    const Dav1dFrameContext *const f = t->f;
642
41.1k
    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
643
41.1k
    const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
644
41.1k
    const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
645
41.1k
    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
646
647
41.1k
    if (t->by > t->ts->tiling.row_start &&
648
34.8k
        (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
649
34.6k
    {
650
72.4k
        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
651
            // only odd blocks are considered for overlap handling, hence +1
652
37.8k
            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
653
37.8k
            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
654
655
37.8k
            if (a_r->ref.ref[0] > 0) {
656
37.7k
                const int oh4 = imin(b_dim[1], 16) >> 1;
657
37.7k
                mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by,
658
37.7k
                             (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver,
659
37.7k
                             &f->svc[a_r->ref.ref[0] - 1][1]);
660
37.7k
                i++;
661
37.7k
            }
662
37.8k
            x += imax(a_b_dim[0], 2);
663
37.8k
        }
664
34.6k
    }
665
666
41.1k
    if (t->bx > t->ts->tiling.col_start)
667
75.3k
        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
668
            // only odd blocks are considered for overlap handling, hence +1
669
38.8k
            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
670
38.8k
            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
671
672
38.8k
            if (l_r->ref.ref[0] > 0) {
673
38.7k
                const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
674
38.7k
                mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma],
675
38.7k
                             t->by + y, oh4, l_r->mv.mv[0].y, ss_ver,
676
38.7k
                             &f->svc[l_r->ref.ref[0] - 1][1]);
677
38.7k
                i++;
678
38.7k
            }
679
38.8k
            y += imax(l_b_dim[1], 2);
680
38.8k
        }
681
41.1k
}
682
683
static int decode_b(Dav1dTaskContext *const t,
684
                    const enum BlockLevel bl,
685
                    const enum BlockSize bs,
686
                    const enum BlockPartition bp,
687
3.05M
                    const enum EdgeFlags intra_edge_flags) {
688
3.05M
    Dav1dTileState *const ts = t->ts;
689
3.05M
    const Dav1dFrameContext *const f = t->f;
690
3.05M
    Av1Block b_mem, *const b = t->frame_thread.pass ?
691
18.4E
        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
692
3.05M
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
693
3.05M
    const int bx4 = t->bx & 31, by4 = t->by & 31;
694
3.05M
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
695
3.05M
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
696
3.05M
    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
697
3.05M
    const int bw4 = b_dim[0], bh4 = b_dim[1];
698
3.05M
    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
699
3.05M
    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
700
3.05M
    const int have_left = t->bx > ts->tiling.col_start;
701
3.05M
    const int have_top = t->by > ts->tiling.row_start;
702
3.05M
    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
703
2.94M
                           (bw4 > ss_hor || t->bx & 1) &&
704
2.75M
                           (bh4 > ss_ver || t->by & 1);
705
706
3.05M
    if (t->frame_thread.pass == 2) {
707
1.30M
        if (b->intra) {
708
1.15M
            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
709
710
1.15M
            const enum IntraPredMode y_mode_nofilt =
711
1.15M
                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
712
1.15M
#define set_ctx(rep_macro) \
713
2.30M
            rep_macro(edge->mode, off, y_mode_nofilt); \
714
2.30M
            rep_macro(edge->intra, off, 1)
715
1.15M
            BlockContext *edge = t->a;
716
3.45M
            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
717
2.30M
                case_set(b_dim[2 + i]);
718
2.30M
            }
719
1.15M
#undef set_ctx
720
1.15M
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
721
9.85k
                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
722
54.4k
                for (int x = 0; x < bw4; x++) {
723
44.6k
                    r[x].ref.ref[0] = 0;
724
44.6k
                    r[x].bs = bs;
725
44.6k
                }
726
9.85k
                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
727
75.0k
                for (int y = 0; y < bh4 - 1; y++) {
728
65.1k
                    rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
729
65.1k
                    rr[y][t->bx + bw4 - 1].bs = bs;
730
65.1k
                }
731
9.85k
            }
732
733
1.15M
            if (has_chroma) {
734
954k
                uint8_t uv_mode = b->uv_mode;
735
954k
                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
736
954k
                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
737
954k
            }
738
1.15M
        } else {
739
154k
            if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
740
110k
                b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
741
2.20k
            {
742
2.20k
                if (b->matrix[0] == INT16_MIN) {
743
236
                    t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
744
1.96k
                } else {
745
1.96k
                    t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
746
1.96k
                    t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
747
1.96k
                    t->warpmv.matrix[3] = b->matrix[1];
748
1.96k
                    t->warpmv.matrix[4] = b->matrix[2];
749
1.96k
                    t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
750
1.96k
                    dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
751
1.96k
                                          t->bx, t->by);
752
1.96k
                    dav1d_get_shear_params(&t->warpmv);
753
1.96k
#define signabs(v) v < 0 ? '-' : ' ', abs(v)
754
1.96k
                    if (DEBUG_BLOCK_INFO)
755
0
                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
756
0
                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
757
0
                               signabs(t->warpmv.matrix[0]),
758
0
                               signabs(t->warpmv.matrix[1]),
759
0
                               signabs(t->warpmv.matrix[2]),
760
0
                               signabs(t->warpmv.matrix[3]),
761
0
                               signabs(t->warpmv.matrix[4]),
762
0
                               signabs(t->warpmv.matrix[5]),
763
0
                               signabs(t->warpmv.u.p.alpha),
764
0
                               signabs(t->warpmv.u.p.beta),
765
0
                               signabs(t->warpmv.u.p.gamma),
766
0
                               signabs(t->warpmv.u.p.delta),
767
0
                               b->mv2d.y, b->mv2d.x);
768
1.96k
#undef signabs
769
1.96k
                }
770
2.20k
            }
771
154k
            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
772
773
154k
            const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
774
154k
            BlockContext *edge = t->a;
775
462k
            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
776
308k
#define set_ctx(rep_macro) \
777
308k
                rep_macro(edge->filter[0], off, filter[0]); \
778
308k
                rep_macro(edge->filter[1], off, filter[1]); \
779
308k
                rep_macro(edge->intra, off, 0)
780
308k
                case_set(b_dim[2 + i]);
781
308k
#undef set_ctx
782
308k
            }
783
784
154k
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
785
110k
                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
786
767k
                for (int x = 0; x < bw4; x++) {
787
656k
                    r[x].ref.ref[0] = b->ref[0] + 1;
788
656k
                    r[x].mv.mv[0] = b->mv[0];
789
656k
                    r[x].bs = bs;
790
656k
                }
791
110k
                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
792
818k
                for (int y = 0; y < bh4 - 1; y++) {
793
708k
                    rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
794
708k
                    rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
795
708k
                    rr[y][t->bx + bw4 - 1].bs = bs;
796
708k
                }
797
110k
            }
798
799
154k
            if (has_chroma) {
800
131k
                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
801
131k
                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
802
131k
            }
803
154k
        }
804
1.30M
        return 0;
805
1.30M
    }
806
807
1.74M
    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
808
809
1.74M
    b->bl = bl;
810
1.74M
    b->bp = bp;
811
1.74M
    b->bs = bs;
812
813
1.74M
    const Dav1dSegmentationData *seg = NULL;
814
815
    // segment_id (if seg_feature for skip/ref/gmv is enabled)
816
1.74M
    int seg_pred = 0;
817
1.74M
    if (f->frame_hdr->segmentation.enabled) {
818
1.04M
        if (!f->frame_hdr->segmentation.update_map) {
819
94.6k
            if (f->prev_segmap) {
820
76.2k
                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
821
76.2k
                                                       f->prev_segmap,
822
76.2k
                                                       f->b4_stride);
823
76.2k
                if (seg_id >= 8) return -1;
824
76.2k
                b->seg_id = seg_id;
825
76.2k
            } else {
826
18.4k
                b->seg_id = 0;
827
18.4k
            }
828
94.6k
            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
829
950k
        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
830
930k
            if (f->frame_hdr->segmentation.temporal &&
831
1.09k
                (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
832
1.09k
                                ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
833
1.09k
                                t->l.seg_pred[by4]])))
834
531
            {
835
                // temporal predicted seg_id
836
531
                if (f->prev_segmap) {
837
495
                    unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
838
495
                                                           w4, h4,
839
495
                                                           f->prev_segmap,
840
495
                                                           f->b4_stride);
841
495
                    if (seg_id >= 8) return -1;
842
495
                    b->seg_id = seg_id;
843
495
                } else {
844
36
                    b->seg_id = 0;
845
36
                }
846
929k
            } else {
847
929k
                int seg_ctx;
848
929k
                const unsigned pred_seg_id =
849
929k
                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
850
929k
                                        &seg_ctx, f->cur_segmap, f->b4_stride);
851
929k
                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
852
929k
                                          ts->cdf.m.seg_id[seg_ctx],
853
929k
                                          DAV1D_MAX_SEGMENTS - 1);
854
929k
                const unsigned last_active_seg_id =
855
929k
                    f->frame_hdr->segmentation.seg_data.last_active_segid;
856
929k
                b->seg_id = neg_deinterleave(diff, pred_seg_id,
857
929k
                                             last_active_seg_id + 1);
858
929k
                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
859
929k
                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
860
929k
            }
861
862
930k
            if (DEBUG_BLOCK_INFO)
863
0
                printf("Post-segid[preskip;%d]: r=%d\n",
864
0
                       b->seg_id, ts->msac.rng);
865
866
930k
            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
867
930k
        }
868
1.04M
    } else {
869
702k
        b->seg_id = 0;
870
702k
    }
871
872
    // skip_mode
873
1.74M
    if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
874
821k
        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
875
842
    {
876
842
        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
877
842
        b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
878
842
                           ts->cdf.m.skip_mode[smctx]);
879
842
        if (DEBUG_BLOCK_INFO)
880
0
            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
881
1.74M
    } else {
882
1.74M
        b->skip_mode = 0;
883
1.74M
    }
884
885
    // skip
886
1.75M
    if (b->skip_mode || (seg && seg->skip)) {
887
741k
        b->skip = 1;
888
1.00M
    } else {
889
1.00M
        const int sctx = t->a->skip[bx4] + t->l.skip[by4];
890
1.00M
        b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
891
1.00M
        if (DEBUG_BLOCK_INFO)
892
0
            printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
893
1.00M
    }
894
895
    // segment_id
896
1.74M
    if (f->frame_hdr->segmentation.enabled &&
897
1.04M
        f->frame_hdr->segmentation.update_map &&
898
950k
        !f->frame_hdr->segmentation.seg_data.preskip)
899
20.0k
    {
900
20.0k
        if (!b->skip && f->frame_hdr->segmentation.temporal &&
901
277
            (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
902
277
                            ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
903
277
                            t->l.seg_pred[by4]])))
904
114
        {
905
            // temporal predicted seg_id
906
114
            if (f->prev_segmap) {
907
12
                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
908
12
                                                       f->prev_segmap,
909
12
                                                       f->b4_stride);
910
12
                if (seg_id >= 8) return -1;
911
12
                b->seg_id = seg_id;
912
102
            } else {
913
102
                b->seg_id = 0;
914
102
            }
915
19.8k
        } else {
916
19.8k
            int seg_ctx;
917
19.8k
            const unsigned pred_seg_id =
918
19.8k
                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
919
19.8k
                                    &seg_ctx, f->cur_segmap, f->b4_stride);
920
19.8k
            if (b->skip) {
921
10.0k
                b->seg_id = pred_seg_id;
922
10.0k
            } else {
923
9.88k
                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
924
9.88k
                                          ts->cdf.m.seg_id[seg_ctx],
925
9.88k
                                          DAV1D_MAX_SEGMENTS - 1);
926
9.88k
                const unsigned last_active_seg_id =
927
9.88k
                    f->frame_hdr->segmentation.seg_data.last_active_segid;
928
9.88k
                b->seg_id = neg_deinterleave(diff, pred_seg_id,
929
9.88k
                                             last_active_seg_id + 1);
930
9.88k
                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
931
9.88k
            }
932
19.8k
            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
933
19.8k
        }
934
935
20.0k
        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
936
937
20.0k
        if (DEBUG_BLOCK_INFO)
938
0
            printf("Post-segid[postskip;%d]: r=%d\n",
939
0
                   b->seg_id, ts->msac.rng);
940
20.0k
    }
941
942
    // cdef index
943
1.74M
    if (!b->skip) {
944
681k
        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
945
367k
                                           ((t->by & 16) >> 3) : 0;
946
681k
        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
947
155k
            const int v = dav1d_msac_decode_bools(&ts->msac,
948
155k
                              f->frame_hdr->cdef.n_bits);
949
155k
            t->cur_sb_cdef_idx_ptr[idx] = v;
950
155k
            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
951
155k
            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
952
155k
            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
953
954
155k
            if (DEBUG_BLOCK_INFO)
955
0
                printf("Post-cdef_idx[%d]: r=%d\n",
956
0
                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
957
155k
        }
958
681k
    }
959
960
    // delta-q/lf
961
1.74M
    if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) {
962
246k
        const int prev_qidx = ts->last_qidx;
963
246k
        const int have_delta_q = f->frame_hdr->delta.q.present &&
964
79.7k
            (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
965
966
246k
        uint32_t prev_delta_lf = ts->last_delta_lf.u32;
967
968
246k
        if (have_delta_q) {
969
73.7k
            int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
970
73.7k
                                                          ts->cdf.m.delta_q, 3);
971
73.7k
            if (delta_q == 3) {
972
3.25k
                const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
973
3.25k
                delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
974
3.25k
                          1 + (1 << n_bits);
975
3.25k
            }
976
73.7k
            if (delta_q) {
977
11.9k
                if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
978
11.9k
                delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
979
11.9k
            }
980
73.7k
            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
981
73.7k
            if (have_delta_q && DEBUG_BLOCK_INFO)
982
0
                printf("Post-delta_q[%d->%d]: r=%d\n",
983
0
                       delta_q, ts->last_qidx, ts->msac.rng);
984
985
73.7k
            if (f->frame_hdr->delta.lf.present) {
986
18.5k
                const int n_lfs = f->frame_hdr->delta.lf.multi ?
987
13.5k
                    f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
988
989
75.5k
                for (int i = 0; i < n_lfs; i++) {
990
56.9k
                    int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
991
56.9k
                        ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
992
56.9k
                    if (delta_lf == 3) {
993
2.95k
                        const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
994
2.95k
                        delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
995
2.95k
                                   1 + (1 << n_bits);
996
2.95k
                    }
997
56.9k
                    if (delta_lf) {
998
8.86k
                        if (dav1d_msac_decode_bool_equi(&ts->msac))
999
5.34k
                            delta_lf = -delta_lf;
1000
8.86k
                        delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
1001
8.86k
                    }
1002
56.9k
                    ts->last_delta_lf.i8[i] =
1003
56.9k
                        iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
1004
56.9k
                    if (have_delta_q && DEBUG_BLOCK_INFO)
1005
0
                        printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
1006
0
                               ts->msac.rng);
1007
56.9k
                }
1008
18.5k
            }
1009
73.7k
        }
1010
246k
        if (ts->last_qidx == f->frame_hdr->quant.yac) {
1011
            // assign frame-wide q values to this sb
1012
205k
            ts->dq = f->dq;
1013
205k
        } else if (ts->last_qidx != prev_qidx) {
1014
            // find sb-specific quant parameters
1015
8.86k
            init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
1016
8.86k
            ts->dq = ts->dqmem;
1017
8.86k
        }
1018
246k
        if (!ts->last_delta_lf.u32) {
1019
            // assign frame-wide lf values to this sb
1020
230k
            ts->lflvl = f->lf.lvl;
1021
230k
        } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
1022
            // find sb-specific lf lvl parameters
1023
5.24k
            ts->lflvl = ts->lflvlmem;
1024
5.24k
            dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
1025
5.24k
        }
1026
246k
    }
1027
1028
1.74M
    if (b->skip_mode) {
1029
4
        b->intra = 0;
1030
1.74M
    } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
1031
132k
        if (seg && (seg->ref >= 0 || seg->globalmv)) {
1032
77.2k
            b->intra = !seg->ref;
1033
77.2k
        } else {
1034
54.7k
            const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
1035
54.7k
                                           have_top, have_left);
1036
54.7k
            b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
1037
54.7k
                            ts->cdf.m.intra[ictx]);
1038
54.7k
            if (DEBUG_BLOCK_INFO)
1039
0
                printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
1040
54.7k
        }
1041
1.61M
    } else if (f->frame_hdr->allow_intrabc) {
1042
297k
        b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
1043
297k
        if (DEBUG_BLOCK_INFO)
1044
0
            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
1045
1.31M
    } else {
1046
1.31M
        b->intra = 1;
1047
1.31M
    }
1048
1049
    // intra/inter-specific stuff
1050
1.74M
    if (b->intra) {
1051
1.54M
        uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
1052
10.3k
            ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
1053
1.54M
            ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
1054
1.53M
                        [dav1d_intra_mode_context[t->l.mode[by4]]];
1055
1.54M
        b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
1056
1.54M
                                                     N_INTRA_PRED_MODES - 1);
1057
1.54M
        if (DEBUG_BLOCK_INFO)
1058
0
            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
1059
1060
        // angle delta
1061
1.54M
        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
1062
713k
            b->y_mode <= VERT_LEFT_PRED)
1063
407k
        {
1064
407k
            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
1065
407k
            const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1066
407k
            b->y_angle = angle - 3;
1067
1.14M
        } else {
1068
1.14M
            b->y_angle = 0;
1069
1.14M
        }
1070
1071
1.54M
        if (has_chroma) {
1072
1.27M
            const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
1073
1.27M
                cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
1074
1.27M
            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
1075
1.27M
            b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
1076
1.27M
                             N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
1077
1.27M
            if (DEBUG_BLOCK_INFO)
1078
0
                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
1079
1080
1.27M
            b->uv_angle = 0;
1081
1.27M
            if (b->uv_mode == CFL_PRED) {
1082
274k
#define SIGN(a) (!!(a) + ((a) > 0))
1083
274k
                const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1084
274k
                                     ts->cdf.m.cfl_sign, 7) + 1;
1085
274k
                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
1086
274k
                assert(sign_u == sign / 3);
1087
274k
                if (sign_u) {
1088
256k
                    const int ctx = (sign_u == 2) * 3 + sign_v;
1089
256k
                    b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1090
256k
                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1091
256k
                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
1092
256k
                } else {
1093
18.8k
                    b->cfl_alpha[0] = 0;
1094
18.8k
                }
1095
274k
                if (sign_v) {
1096
186k
                    const int ctx = (sign_v == 2) * 3 + sign_u;
1097
186k
                    b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1098
186k
                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1099
186k
                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
1100
186k
                } else {
1101
88.1k
                    b->cfl_alpha[1] = 0;
1102
88.1k
                }
1103
274k
#undef SIGN
1104
274k
                if (DEBUG_BLOCK_INFO)
1105
0
                    printf("Post-uvalphas[%d/%d]: r=%d\n",
1106
0
                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
1107
1.00M
            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
1108
541k
                       b->uv_mode <= VERT_LEFT_PRED)
1109
275k
            {
1110
275k
                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
1111
275k
                const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1112
275k
                b->uv_angle = angle - 3;
1113
275k
            }
1114
1.27M
        }
1115
1116
1.54M
        b->pal_sz[0] = b->pal_sz[1] = 0;
1117
1.54M
        if (f->frame_hdr->allow_screen_content_tools &&
1118
404k
            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
1119
318k
        {
1120
318k
            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
1121
318k
            if (b->y_mode == DC_PRED) {
1122
132k
                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
1123
132k
                const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1124
132k
                                          ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
1125
132k
                if (DEBUG_BLOCK_INFO)
1126
0
                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
1127
132k
                if (use_y_pal)
1128
11.7k
                    f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
1129
132k
            }
1130
1131
318k
            if (has_chroma && b->uv_mode == DC_PRED) {
1132
83.7k
                const int pal_ctx = b->pal_sz[0] > 0;
1133
83.7k
                const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1134
83.7k
                                           ts->cdf.m.pal_uv[pal_ctx]);
1135
83.7k
                if (DEBUG_BLOCK_INFO)
1136
0
                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
1137
83.7k
                if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
1138
5.23k
                    f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
1139
83.7k
            }
1140
318k
        }
1141
1142
1.54M
        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
1143
609k
            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
1144
429k
        {
1145
429k
            const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
1146
429k
                                      ts->cdf.m.use_filter_intra[bs]);
1147
429k
            if (is_filter) {
1148
286k
                b->y_mode = FILTER_PRED;
1149
286k
                b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1150
286k
                                 ts->cdf.m.filter_intra, 4);
1151
286k
            }
1152
429k
            if (DEBUG_BLOCK_INFO)
1153
0
                printf("Post-filterintramode[%d/%d]: r=%d\n",
1154
0
                       b->y_mode, b->y_angle, ts->msac.rng);
1155
429k
        }
1156
1157
1.54M
        if (b->pal_sz[0]) {
1158
11.7k
            uint8_t *pal_idx;
1159
11.7k
            if (t->frame_thread.pass) {
1160
11.7k
                const int p = t->frame_thread.pass & 1;
1161
11.7k
                assert(ts->frame_thread[p].pal_idx);
1162
11.7k
                pal_idx = ts->frame_thread[p].pal_idx;
1163
11.7k
                ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
1164
11.7k
            } else
1165
0
                pal_idx = t->scratch.pal_idx_y;
1166
11.7k
            read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
1167
11.7k
            if (DEBUG_BLOCK_INFO)
1168
0
                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
1169
11.7k
        }
1170
1171
1.54M
        if (has_chroma && b->pal_sz[1]) {
1172
5.23k
            uint8_t *pal_idx;
1173
5.23k
            if (t->frame_thread.pass) {
1174
5.23k
                const int p = t->frame_thread.pass & 1;
1175
5.23k
                assert(ts->frame_thread[p].pal_idx);
1176
5.23k
                pal_idx = ts->frame_thread[p].pal_idx;
1177
5.23k
                ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
1178
5.23k
            } else
1179
0
                pal_idx = t->scratch.pal_idx_uv;
1180
5.23k
            read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
1181
5.23k
            if (DEBUG_BLOCK_INFO)
1182
0
                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
1183
5.23k
        }
1184
1185
1.54M
        const TxfmInfo *t_dim;
1186
1.54M
        if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
1187
6.21k
            b->tx = b->uvtx = (int) TX_4X4;
1188
6.21k
            t_dim = &dav1d_txfm_dimensions[TX_4X4];
1189
1.54M
        } else {
1190
1.54M
            b->tx = dav1d_max_txfm_size_for_bs[bs][0];
1191
1.54M
            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
1192
1.54M
            t_dim = &dav1d_txfm_dimensions[b->tx];
1193
1.54M
            if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
1194
368k
                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
1195
368k
                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
1196
368k
                int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
1197
368k
                                imin(t_dim->max, 2));
1198
1199
614k
                while (depth--) {
1200
246k
                    b->tx = t_dim->sub;
1201
246k
                    t_dim = &dav1d_txfm_dimensions[b->tx];
1202
246k
                }
1203
368k
            }
1204
1.54M
            if (DEBUG_BLOCK_INFO)
1205
0
                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
1206
1.54M
        }
1207
1208
        // reconstruction
1209
1.54M
        if (t->frame_thread.pass == 1) {
1210
1.54M
            f->bd_fn.read_coef_blocks(t, bs, b);
1211
1.54M
        } else {
1212
68
            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
1213
68
        }
1214
1215
1.54M
        if (f->frame_hdr->loopfilter.level_y[0] ||
1216
292k
            f->frame_hdr->loopfilter.level_y[1])
1217
1.28M
        {
1218
1.28M
            dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
1219
1.28M
                                       (const uint8_t (*)[8][2])
1220
1.28M
                                       &ts->lflvl[b->seg_id][0][0][0],
1221
1.28M
                                       t->bx, t->by, f->w4, f->h4, bs,
1222
1.28M
                                       b->tx, b->uvtx, f->cur.p.layout,
1223
1.28M
                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1224
1.28M
                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1225
1.28M
                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1226
1.28M
        }
1227
        // update contexts
1228
1.54M
        const enum IntraPredMode y_mode_nofilt =
1229
1.54M
            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
1230
1.54M
        BlockContext *edge = t->a;
1231
4.64M
        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
1232
3.09M
            int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
1233
3.09M
#define set_ctx(rep_macro) \
1234
3.09M
            rep_macro(edge->tx_intra, off, t_lsz); \
1235
3.09M
            rep_macro(edge->tx, off, t_lsz); \
1236
3.09M
            rep_macro(edge->mode, off, y_mode_nofilt); \
1237
3.09M
            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
1238
3.09M
            rep_macro(edge->seg_pred, off, seg_pred); \
1239
3.09M
            rep_macro(edge->skip_mode, off, 0); \
1240
3.09M
            rep_macro(edge->intra, off, 1); \
1241
3.09M
            rep_macro(edge->skip, off, b->skip); \
1242
            /* see aomedia bug 2183 for why we use luma coordinates here */ \
1243
3.09M
            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
1244
3.09M
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
1245
20.7k
                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
1246
20.7k
                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
1247
20.7k
                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
1248
20.7k
                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
1249
20.7k
                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
1250
20.7k
            }
1251
3.09M
            case_set(b_dim[2 + i]);
1252
3.09M
#undef set_ctx
1253
3.09M
        }
1254
1.54M
        if (b->pal_sz[0])
1255
11.7k
            f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
1256
1.54M
        if (has_chroma) {
1257
1.27M
            uint8_t uv_mode = b->uv_mode;
1258
1.27M
            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
1259
1.27M
            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
1260
1.27M
            if (b->pal_sz[1])
1261
5.23k
                f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
1262
1.27M
        }
1263
1.54M
        if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
1264
222k
            splat_intraref(f->c, t, bs, bw4, bh4);
1265
1.54M
    } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
1266
        // intra block copy
1267
85.0k
        refmvs_candidate mvstack[8];
1268
85.0k
        int n_mvs, ctx;
1269
85.0k
        dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1270
85.0k
                          (union refmvs_refpair) { .ref = { 0, -1 }},
1271
85.0k
                          bs, intra_edge_flags, t->by, t->bx);
1272
1273
85.0k
        if (mvstack[0].mv.mv[0].n)
1274
74.9k
            b->mv[0] = mvstack[0].mv.mv[0];
1275
10.1k
        else if (mvstack[1].mv.mv[0].n)
1276
0
            b->mv[0] = mvstack[1].mv.mv[0];
1277
10.1k
        else {
1278
10.1k
            if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
1279
4.96k
                b->mv[0].y = 0;
1280
4.96k
                b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
1281
5.13k
            } else {
1282
5.13k
                b->mv[0].y = -(512 << f->seq_hdr->sb128);
1283
5.13k
                b->mv[0].x = 0;
1284
5.13k
            }
1285
10.1k
        }
1286
1287
85.0k
        const union mv ref = b->mv[0];
1288
85.0k
        read_mv_residual(ts, &b->mv[0], -1);
1289
1290
        // clip intrabc motion vector to decoded parts of current tile
1291
85.0k
        int border_left = ts->tiling.col_start * 4;
1292
85.0k
        int border_top  = ts->tiling.row_start * 4;
1293
85.0k
        if (has_chroma) {
1294
65.4k
            if (bw4 < 2 &&  ss_hor)
1295
4.97k
                border_left += 4;
1296
65.4k
            if (bh4 < 2 &&  ss_ver)
1297
5.91k
                border_top  += 4;
1298
65.4k
        }
1299
85.0k
        int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
1300
85.0k
        int src_top    = t->by * 4 + (b->mv[0].y >> 3);
1301
85.0k
        int src_right  = src_left + bw4 * 4;
1302
85.0k
        int src_bottom = src_top  + bh4 * 4;
1303
85.0k
        const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
1304
1305
        // check against left or right tile boundary and adjust if necessary
1306
85.0k
        if (src_left < border_left) {
1307
22.5k
            src_right += border_left - src_left;
1308
22.5k
            src_left  += border_left - src_left;
1309
62.5k
        } else if (src_right > border_right) {
1310
1.35k
            src_left  -= src_right - border_right;
1311
1.35k
            src_right -= src_right - border_right;
1312
1.35k
        }
1313
        // check against top tile boundary and adjust if necessary
1314
85.0k
        if (src_top < border_top) {
1315
20.4k
            src_bottom += border_top - src_top;
1316
20.4k
            src_top    += border_top - src_top;
1317
20.4k
        }
1318
1319
85.0k
        const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1320
85.0k
        const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1321
85.0k
        const int sb_size = 1 << (6 + f->seq_hdr->sb128);
1322
        // check for overlap with current superblock
1323
85.0k
        if (src_bottom > sby && src_right > sbx) {
1324
2.33k
            if (src_top - border_top >= src_bottom - sby) {
1325
                // if possible move src up into the previous suberblock row
1326
2.22k
                src_top    -= src_bottom - sby;
1327
2.22k
                src_bottom -= src_bottom - sby;
1328
2.22k
            } else if (src_left - border_left >= src_right - sbx) {
1329
                // if possible move src left into the previous suberblock
1330
58
                src_left  -= src_right - sbx;
1331
58
                src_right -= src_right - sbx;
1332
58
            }
1333
2.33k
        }
1334
        // move src up if it is below current superblock row
1335
85.0k
        if (src_bottom > sby + sb_size) {
1336
3.92k
            src_top    -= src_bottom - (sby + sb_size);
1337
3.92k
            src_bottom -= src_bottom - (sby + sb_size);
1338
3.92k
        }
1339
        // error out if mv still overlaps with the current superblock
1340
85.0k
        if (src_bottom > sby && src_right > sbx)
1341
49
            return -1;
1342
1343
85.0k
        b->mv[0].x = (src_left - t->bx * 4) * 8;
1344
85.0k
        b->mv[0].y = (src_top  - t->by * 4) * 8;
1345
1346
85.0k
        if (DEBUG_BLOCK_INFO)
1347
0
            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
1348
0
                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
1349
0
                   mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
1350
85.0k
        read_vartx_tree(t, b, bs, bx4, by4);
1351
1352
        // reconstruction
1353
85.0k
        if (t->frame_thread.pass == 1) {
1354
85.0k
            f->bd_fn.read_coef_blocks(t, bs, b);
1355
85.0k
            b->filter2d = FILTER_2D_BILINEAR;
1356
18.4E
        } else {
1357
18.4E
            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1358
18.4E
        }
1359
1360
85.0k
        splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
1361
85.0k
        BlockContext *edge = t->a;
1362
255k
        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
1363
170k
#define set_ctx(rep_macro) \
1364
170k
            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
1365
170k
            rep_macro(edge->mode, off, DC_PRED); \
1366
170k
            rep_macro(edge->pal_sz, off, 0); \
1367
            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1368
170k
            rep_macro(t->pal_sz_uv[i], off, 0); \
1369
170k
            rep_macro(edge->seg_pred, off, seg_pred); \
1370
170k
            rep_macro(edge->skip_mode, off, 0); \
1371
170k
            rep_macro(edge->intra, off, 0); \
1372
170k
            rep_macro(edge->skip, off, b->skip)
1373
170k
            case_set(b_dim[2 + i]);
1374
170k
#undef set_ctx
1375
170k
        }
1376
85.0k
        if (has_chroma) {
1377
65.4k
            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
1378
65.4k
            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
1379
65.4k
        }
1380
115k
    } else {
1381
        // inter-specific mode/mv coding
1382
115k
        int is_comp, has_subpel_filter;
1383
1384
115k
        if (b->skip_mode) {
1385
4
            is_comp = 1;
1386
115k
        } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
1387
52.1k
                   f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
1388
28.1k
        {
1389
28.1k
            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
1390
28.1k
                                         have_top, have_left);
1391
28.1k
            is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
1392
28.1k
                          ts->cdf.m.comp[ctx]);
1393
28.1k
            if (DEBUG_BLOCK_INFO)
1394
0
                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
1395
87.0k
        } else {
1396
87.0k
            is_comp = 0;
1397
87.0k
        }
1398
1399
115k
        if (b->skip_mode) {
1400
4
            b->ref[0] = f->frame_hdr->skip_mode_refs[0];
1401
4
            b->ref[1] = f->frame_hdr->skip_mode_refs[1];
1402
4
            b->comp_type = COMP_INTER_AVG;
1403
4
            b->inter_mode = NEARESTMV_NEARESTMV;
1404
4
            b->drl_idx = NEAREST_DRL;
1405
4
            has_subpel_filter = 0;
1406
1407
4
            refmvs_candidate mvstack[8];
1408
4
            int n_mvs, ctx;
1409
4
            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1410
4
                              (union refmvs_refpair) { .ref = {
1411
4
                                    b->ref[0] + 1, b->ref[1] + 1 }},
1412
4
                              bs, intra_edge_flags, t->by, t->bx);
1413
1414
4
            b->mv[0] = mvstack[0].mv.mv[0];
1415
4
            b->mv[1] = mvstack[0].mv.mv[1];
1416
4
            fix_mv_precision(f->frame_hdr, &b->mv[0]);
1417
4
            fix_mv_precision(f->frame_hdr, &b->mv[1]);
1418
4
            if (DEBUG_BLOCK_INFO)
1419
0
                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
1420
0
                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1421
0
                       b->ref[0], b->ref[1]);
1422
115k
        } else if (is_comp) {
1423
13.8k
            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
1424
13.8k
                                                 have_top, have_left);
1425
13.8k
            if (dav1d_msac_decode_bool_adapt(&ts->msac,
1426
13.8k
                    ts->cdf.m.comp_dir[dir_ctx]))
1427
11.4k
            {
1428
                // bidir - first reference (fw)
1429
11.4k
                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
1430
11.4k
                                                     have_top, have_left);
1431
11.4k
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
1432
11.4k
                        ts->cdf.m.comp_fwd_ref[0][ctx1]))
1433
4.70k
                {
1434
4.70k
                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
1435
4.70k
                                                           have_top, have_left);
1436
4.70k
                    b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1437
4.70k
                                        ts->cdf.m.comp_fwd_ref[2][ctx2]);
1438
6.78k
                } else {
1439
6.78k
                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1440
6.78k
                                                           have_top, have_left);
1441
6.78k
                    b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1442
6.78k
                                    ts->cdf.m.comp_fwd_ref[1][ctx2]);
1443
6.78k
                }
1444
1445
                // second reference (bw)
1446
11.4k
                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
1447
11.4k
                                                     have_top, have_left);
1448
11.4k
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
1449
11.4k
                        ts->cdf.m.comp_bwd_ref[0][ctx3]))
1450
4.75k
                {
1451
4.75k
                    b->ref[1] = 6;
1452
6.73k
                } else {
1453
6.73k
                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1454
6.73k
                                                           have_top, have_left);
1455
6.73k
                    b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1456
6.73k
                                        ts->cdf.m.comp_bwd_ref[1][ctx4]);
1457
6.73k
                }
1458
11.4k
            } else {
1459
                // unidir
1460
2.38k
                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
1461
2.38k
                                                     have_top, have_left);
1462
2.38k
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
1463
2.38k
                        ts->cdf.m.comp_uni_ref[0][uctx_p]))
1464
640
                {
1465
640
                    b->ref[0] = 4;
1466
640
                    b->ref[1] = 6;
1467
1.74k
                } else {
1468
1.74k
                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
1469
1.74k
                                                           have_top, have_left);
1470
1.74k
                    b->ref[0] = 0;
1471
1.74k
                    b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
1472
1.74k
                                        ts->cdf.m.comp_uni_ref[1][uctx_p1]);
1473
1.74k
                    if (b->ref[1] == 2) {
1474
1.08k
                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
1475
1.08k
                                                               have_top, have_left);
1476
1.08k
                        b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
1477
1.08k
                                         ts->cdf.m.comp_uni_ref[2][uctx_p2]);
1478
1.08k
                    }
1479
1.74k
                }
1480
2.38k
            }
1481
13.8k
            if (DEBUG_BLOCK_INFO)
1482
0
                printf("Post-refs[%d/%d]: r=%d\n",
1483
0
                       b->ref[0], b->ref[1], ts->msac.rng);
1484
1485
13.8k
            refmvs_candidate mvstack[8];
1486
13.8k
            int n_mvs, ctx;
1487
13.8k
            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1488
13.8k
                              (union refmvs_refpair) { .ref = {
1489
13.8k
                                    b->ref[0] + 1, b->ref[1] + 1 }},
1490
13.8k
                              bs, intra_edge_flags, t->by, t->bx);
1491
1492
13.8k
            b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1493
13.8k
                                ts->cdf.m.comp_inter_mode[ctx],
1494
13.8k
                                N_COMP_INTER_PRED_MODES - 1);
1495
13.8k
            if (DEBUG_BLOCK_INFO)
1496
0
                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
1497
0
                       b->inter_mode, ctx, n_mvs, ts->msac.rng);
1498
1499
13.8k
            const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
1500
13.8k
            b->drl_idx = NEAREST_DRL;
1501
13.8k
            if (b->inter_mode == NEWMV_NEWMV) {
1502
2.42k
                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1503
2.42k
                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1504
2.42k
                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1505
2.42k
                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
1506
2.42k
                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
1507
42
                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1508
42
                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1509
42
                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
1510
42
                    }
1511
2.42k
                    if (DEBUG_BLOCK_INFO)
1512
0
                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1513
0
                               b->drl_idx, n_mvs, ts->msac.rng);
1514
2.42k
                }
1515
11.4k
            } else if (im[0] == NEARMV || im[1] == NEARMV) {
1516
3.33k
                b->drl_idx = NEARER_DRL;
1517
3.33k
                if (n_mvs > 2) { // NEAR or NEARISH
1518
65
                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1519
65
                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1520
65
                                      ts->cdf.m.drl_bit[drl_ctx_v2]);
1521
65
                    if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
1522
12
                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
1523
12
                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1524
12
                                          ts->cdf.m.drl_bit[drl_ctx_v3]);
1525
12
                    }
1526
65
                    if (DEBUG_BLOCK_INFO)
1527
0
                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1528
0
                               b->drl_idx, n_mvs, ts->msac.rng);
1529
65
                }
1530
3.33k
            }
1531
13.8k
            assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1532
1533
13.8k
#define assign_comp_mv(idx) \
1534
27.7k
            switch (im[idx]) { \
1535
5.41k
            case NEARMV: \
1536
14.9k
            case NEARESTMV: \
1537
14.9k
                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1538
14.9k
                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
1539
14.9k
                break; \
1540
5.41k
            case GLOBALMV: \
1541
5.39k
                has_subpel_filter |= \
1542
5.39k
                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
1543
5.39k
                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
1544
5.39k
                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
1545
5.39k
                break; \
1546
7.38k
            case NEWMV: \
1547
7.38k
                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1548
7.38k
                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
1549
7.38k
                read_mv_residual(ts, &b->mv[idx], mv_prec); \
1550
7.38k
                break; \
1551
27.7k
            }
1552
13.8k
            has_subpel_filter = imin(bw4, bh4) == 1 ||
1553
13.8k
                                b->inter_mode != GLOBALMV_GLOBALMV;
1554
13.8k
            assign_comp_mv(0);
1555
13.8k
            assign_comp_mv(1);
1556
13.8k
#undef assign_comp_mv
1557
13.8k
            if (DEBUG_BLOCK_INFO)
1558
0
                printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
1559
0
                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1560
0
                       ts->msac.rng);
1561
1562
            // jnt_comp vs. seg vs. wedge
1563
13.8k
            int is_segwedge = 0;
1564
13.8k
            if (f->seq_hdr->masked_compound) {
1565
10.6k
                const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
1566
1567
10.6k
                is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
1568
10.6k
                                  ts->cdf.m.mask_comp[mask_ctx]);
1569
10.6k
                if (DEBUG_BLOCK_INFO)
1570
0
                    printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
1571
0
                           is_segwedge, mask_ctx, ts->msac.rng);
1572
10.6k
            }
1573
1574
13.8k
            if (!is_segwedge) {
1575
12.0k
                if (f->seq_hdr->jnt_comp) {
1576
1.36k
                    const int jnt_ctx =
1577
1.36k
                        get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
1578
1.36k
                                         f->cur.frame_hdr->frame_offset,
1579
1.36k
                                         f->refp[b->ref[0]].p.frame_hdr->frame_offset,
1580
1.36k
                                         f->refp[b->ref[1]].p.frame_hdr->frame_offset,
1581
1.36k
                                         t->a, &t->l, by4, bx4);
1582
1.36k
                    b->comp_type = COMP_INTER_WEIGHTED_AVG +
1583
1.36k
                                   dav1d_msac_decode_bool_adapt(&ts->msac,
1584
1.36k
                                       ts->cdf.m.jnt_comp[jnt_ctx]);
1585
1.36k
                    if (DEBUG_BLOCK_INFO)
1586
0
                        printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
1587
0
                               b->comp_type == COMP_INTER_AVG,
1588
0
                               jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
1589
0
                               t->l.comp_type[by4], t->l.ref[0][by4],
1590
0
                               ts->msac.rng);
1591
10.6k
                } else {
1592
10.6k
                    b->comp_type = COMP_INTER_AVG;
1593
10.6k
                }
1594
12.0k
            } else {
1595
1.83k
                if (wedge_allowed_mask & (1 << bs)) {
1596
1.20k
                    const int ctx = dav1d_wedge_ctx_lut[bs];
1597
1.20k
                    b->comp_type = COMP_INTER_WEDGE -
1598
1.20k
                                   dav1d_msac_decode_bool_adapt(&ts->msac,
1599
1.20k
                                       ts->cdf.m.wedge_comp[ctx]);
1600
1.20k
                    if (b->comp_type == COMP_INTER_WEDGE)
1601
518
                        b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1602
518
                                           ts->cdf.m.wedge_idx[ctx], 15);
1603
1.20k
                } else {
1604
633
                    b->comp_type = COMP_INTER_SEG;
1605
633
                }
1606
1.83k
                b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
1607
1.83k
                if (DEBUG_BLOCK_INFO)
1608
0
                    printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
1609
0
                           b->comp_type == COMP_INTER_WEDGE,
1610
0
                           b->wedge_idx, b->mask_sign, ts->msac.rng);
1611
1.83k
            }
1612
101k
        } else {
1613
101k
            b->comp_type = COMP_INTER_NONE;
1614
1615
            // ref
1616
101k
            if (seg && seg->ref > 0) {
1617
21.3k
                b->ref[0] = seg->ref - 1;
1618
80.0k
            } else if (seg && (seg->globalmv || seg->skip)) {
1619
48.1k
                b->ref[0] = 0;
1620
48.1k
            } else {
1621
31.8k
                const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
1622
31.8k
                                                 have_top, have_left);
1623
31.8k
                if (dav1d_msac_decode_bool_adapt(&ts->msac,
1624
31.8k
                                                 ts->cdf.m.ref[0][ctx1]))
1625
13.3k
                {
1626
13.3k
                    const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
1627
13.3k
                                                       have_top, have_left);
1628
13.3k
                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
1629
13.3k
                                                     ts->cdf.m.ref[1][ctx2]))
1630
8.10k
                    {
1631
8.10k
                        b->ref[0] = 6;
1632
8.10k
                    } else {
1633
5.25k
                        const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
1634
5.25k
                                                           have_top, have_left);
1635
5.25k
                        b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1636
5.25k
                                            ts->cdf.m.ref[5][ctx3]);
1637
5.25k
                    }
1638
18.4k
                } else {
1639
18.4k
                    const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
1640
18.4k
                                                       have_top, have_left);
1641
18.4k
                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
1642
18.4k
                                                     ts->cdf.m.ref[2][ctx2]))
1643
9.77k
                    {
1644
9.77k
                        const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
1645
9.77k
                                                           have_top, have_left);
1646
9.77k
                        b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1647
9.77k
                                            ts->cdf.m.ref[4][ctx3]);
1648
9.77k
                    } else {
1649
8.69k
                        const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
1650
8.69k
                                                           have_top, have_left);
1651
8.69k
                        b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1652
8.69k
                                        ts->cdf.m.ref[3][ctx3]);
1653
8.69k
                    }
1654
18.4k
                }
1655
31.8k
                if (DEBUG_BLOCK_INFO)
1656
0
                    printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
1657
31.8k
            }
1658
101k
            b->ref[1] = -1;
1659
1660
101k
            refmvs_candidate mvstack[8];
1661
101k
            int n_mvs, ctx;
1662
101k
            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1663
101k
                              (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
1664
101k
                              bs, intra_edge_flags, t->by, t->bx);
1665
1666
            // mode parsing and mv derivation from ref_mvs
1667
101k
            if ((seg && (seg->skip || seg->globalmv)) ||
1668
49.3k
                dav1d_msac_decode_bool_adapt(&ts->msac,
1669
49.3k
                                             ts->cdf.m.newmv_mode[ctx & 7]))
1670
79.5k
            {
1671
79.5k
                if ((seg && (seg->skip || seg->globalmv)) ||
1672
21.1k
                    !dav1d_msac_decode_bool_adapt(&ts->msac,
1673
21.1k
                         ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
1674
59.7k
                {
1675
59.7k
                    b->inter_mode = GLOBALMV;
1676
59.7k
                    b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
1677
59.7k
                                          t->bx, t->by, bw4, bh4, f->frame_hdr);
1678
59.7k
                    has_subpel_filter = imin(bw4, bh4) == 1 ||
1679
42.9k
                        f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
1680
59.7k
                } else {
1681
19.8k
                    has_subpel_filter = 1;
1682
19.8k
                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
1683
19.8k
                            ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
1684
6.84k
                    { // NEAREST, NEARER, NEAR or NEARISH
1685
6.84k
                        b->inter_mode = NEARMV;
1686
6.84k
                        b->drl_idx = NEARER_DRL;
1687
6.84k
                        if (n_mvs > 2) { // NEARER, NEAR or NEARISH
1688
2.35k
                            const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1689
2.35k
                            b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1690
2.35k
                                              ts->cdf.m.drl_bit[drl_ctx_v2]);
1691
2.35k
                            if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
1692
884
                                const int drl_ctx_v3 =
1693
884
                                    get_drl_context(mvstack, 2);
1694
884
                                b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1695
884
                                                  ts->cdf.m.drl_bit[drl_ctx_v3]);
1696
884
                            }
1697
2.35k
                        }
1698
12.9k
                    } else {
1699
12.9k
                        b->inter_mode = NEARESTMV;
1700
12.9k
                        b->drl_idx = NEAREST_DRL;
1701
12.9k
                    }
1702
19.8k
                    assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1703
19.8k
                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1704
19.8k
                    if (b->drl_idx < NEAR_DRL)
1705
18.3k
                        fix_mv_precision(f->frame_hdr, &b->mv[0]);
1706
19.8k
                }
1707
1708
79.5k
                if (DEBUG_BLOCK_INFO)
1709
0
                    printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
1710
0
                           b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
1711
0
                           ts->msac.rng);
1712
79.5k
            } else {
1713
21.7k
                has_subpel_filter = 1;
1714
21.7k
                b->inter_mode = NEWMV;
1715
21.7k
                b->drl_idx = NEAREST_DRL;
1716
21.7k
                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1717
5.41k
                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1718
5.41k
                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1719
5.41k
                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
1720
5.41k
                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
1721
1.80k
                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1722
1.80k
                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1723
1.80k
                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
1724
1.80k
                    }
1725
5.41k
                }
1726
21.7k
                assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1727
21.7k
                if (n_mvs > 1) {
1728
5.41k
                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1729
16.3k
                } else {
1730
16.3k
                    assert(!b->drl_idx);
1731
16.3k
                    b->mv[0] = mvstack[0].mv.mv[0];
1732
16.3k
                    fix_mv_precision(f->frame_hdr, &b->mv[0]);
1733
16.3k
                }
1734
21.7k
                if (DEBUG_BLOCK_INFO)
1735
0
                    printf("Post-intermode[%d,drl=%d]: r=%d\n",
1736
0
                           b->inter_mode, b->drl_idx, ts->msac.rng);
1737
21.7k
                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
1738
21.7k
                read_mv_residual(ts, &b->mv[0], mv_prec);
1739
21.7k
                if (DEBUG_BLOCK_INFO)
1740
0
                    printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
1741
0
                           b->mv[0].y, b->mv[0].x, ts->msac.rng);
1742
21.7k
            }
1743
1744
            // interintra flags
1745
101k
            const int ii_sz_grp = dav1d_ymode_size_context[bs];
1746
101k
            if (f->seq_hdr->inter_intra &&
1747
16.6k
                interintra_allowed_mask & (1 << bs) &&
1748
10.6k
                dav1d_msac_decode_bool_adapt(&ts->msac,
1749
10.6k
                                             ts->cdf.m.interintra[ii_sz_grp]))
1750
2.19k
            {
1751
2.19k
                b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1752
2.19k
                                         ts->cdf.m.interintra_mode[ii_sz_grp],
1753
2.19k
                                         N_INTER_INTRA_PRED_MODES - 1);
1754
2.19k
                const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
1755
2.19k
                b->interintra_type = INTER_INTRA_BLEND +
1756
2.19k
                                     dav1d_msac_decode_bool_adapt(&ts->msac,
1757
2.19k
                                         ts->cdf.m.interintra_wedge[wedge_ctx]);
1758
2.19k
                if (b->interintra_type == INTER_INTRA_WEDGE)
1759
1.16k
                    b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1760
1.16k
                                       ts->cdf.m.wedge_idx[wedge_ctx], 15);
1761
99.1k
            } else {
1762
99.1k
                b->interintra_type = INTER_INTRA_NONE;
1763
99.1k
            }
1764
101k
            if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
1765
0
                interintra_allowed_mask & (1 << bs))
1766
0
            {
1767
0
                printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
1768
0
                       b->interintra_type, b->interintra_mode,
1769
0
                       b->wedge_idx, ts->msac.rng);
1770
0
            }
1771
1772
            // motion variation
1773
101k
            if (f->frame_hdr->switchable_motion_mode &&
1774
91.0k
                b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
1775
                // is not warped global motion
1776
63.5k
                !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
1777
40.2k
                  f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
1778
                // has overlappable neighbours
1779
60.9k
                ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
1780
23.3k
                 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
1781
42.5k
            {
1782
                // reaching here means the block allows obmc - check warp by
1783
                // finding matching-ref blocks in top/left edges
1784
42.5k
                uint64_t mask[2] = { 0, 0 };
1785
42.5k
                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
1786
42.5k
                                  have_left, have_top, b->ref[0], mask);
1787
42.5k
                const int allow_warp = !f->svc[b->ref[0]][0].scale &&
1788
36.7k
                    !f->frame_hdr->force_integer_mv &&
1789
36.4k
                    f->frame_hdr->warp_motion && (mask[0] | mask[1]);
1790
1791
42.5k
                b->motion_mode = allow_warp ?
1792
8.32k
                    dav1d_msac_decode_symbol_adapt4(&ts->msac,
1793
8.32k
                        ts->cdf.m.motion_mode[bs], 2) :
1794
42.5k
                    dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
1795
42.5k
                if (b->motion_mode == MM_WARP) {
1796
2.34k
                    has_subpel_filter = 0;
1797
2.34k
                    derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
1798
2.34k
#define signabs(v) v < 0 ? '-' : ' ', abs(v)
1799
2.34k
                    if (DEBUG_BLOCK_INFO)
1800
0
                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
1801
0
                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
1802
0
                               "mv=y:%d,x:%d\n",
1803
0
                               signabs(t->warpmv.matrix[0]),
1804
0
                               signabs(t->warpmv.matrix[1]),
1805
0
                               signabs(t->warpmv.matrix[2]),
1806
0
                               signabs(t->warpmv.matrix[3]),
1807
0
                               signabs(t->warpmv.matrix[4]),
1808
0
                               signabs(t->warpmv.matrix[5]),
1809
0
                               signabs(t->warpmv.u.p.alpha),
1810
0
                               signabs(t->warpmv.u.p.beta),
1811
0
                               signabs(t->warpmv.u.p.gamma),
1812
0
                               signabs(t->warpmv.u.p.delta),
1813
0
                               b->mv[0].y, b->mv[0].x);
1814
2.34k
#undef signabs
1815
2.34k
                    if (t->frame_thread.pass) {
1816
2.34k
                        if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
1817
2.10k
                            b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
1818
2.10k
                            b->matrix[1] = t->warpmv.matrix[3];
1819
2.10k
                            b->matrix[2] = t->warpmv.matrix[4];
1820
2.10k
                            b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
1821
2.10k
                        } else {
1822
240
                            b->matrix[0] = INT16_MIN;
1823
240
                        }
1824
2.34k
                    }
1825
2.34k
                }
1826
1827
42.5k
                if (DEBUG_BLOCK_INFO)
1828
0
                    printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%"
1829
0
                           PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0],
1830
0
                            mask[1]);
1831
58.7k
            } else {
1832
58.7k
                b->motion_mode = MM_TRANSLATION;
1833
58.7k
            }
1834
101k
        }
1835
1836
        // subpel filter
1837
115k
        enum Dav1dFilterMode filter[2];
1838
115k
        if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
1839
89.0k
            if (has_subpel_filter) {
1840
50.5k
                const int comp = b->comp_type != COMP_INTER_NONE;
1841
50.5k
                const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
1842
50.5k
                                                by4, bx4);
1843
50.5k
                filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1844
50.5k
                               ts->cdf.m.filter[0][ctx1],
1845
50.5k
                               DAV1D_N_SWITCHABLE_FILTERS - 1);
1846
50.5k
                if (f->seq_hdr->dual_filter) {
1847
7.85k
                    const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
1848
7.85k
                                                    b->ref[0], by4, bx4);
1849
7.85k
                    if (DEBUG_BLOCK_INFO)
1850
0
                        printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
1851
0
                               filter[0], ctx1, ts->msac.rng);
1852
7.85k
                    filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1853
7.85k
                                    ts->cdf.m.filter[1][ctx2],
1854
7.85k
                                    DAV1D_N_SWITCHABLE_FILTERS - 1);
1855
7.85k
                    if (DEBUG_BLOCK_INFO)
1856
0
                        printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
1857
0
                               filter[1], ctx2, ts->msac.rng);
1858
42.7k
                } else {
1859
42.7k
                    filter[1] = filter[0];
1860
42.7k
                    if (DEBUG_BLOCK_INFO)
1861
0
                        printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
1862
0
                               filter[0], ctx1, ts->msac.rng);
1863
42.7k
                }
1864
50.5k
            } else {
1865
38.4k
                filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
1866
38.4k
            }
1867
89.0k
        } else {
1868
26.1k
            filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
1869
26.1k
        }
1870
115k
        b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
1871
1872
115k
        read_vartx_tree(t, b, bs, bx4, by4);
1873
1874
        // reconstruction
1875
121k
        if (t->frame_thread.pass == 1) {
1876
121k
            f->bd_fn.read_coef_blocks(t, bs, b);
1877
18.4E
        } else {
1878
18.4E
            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1879
18.4E
        }
1880
1881
115k
        if (f->frame_hdr->loopfilter.level_y[0] ||
1882
958
            f->frame_hdr->loopfilter.level_y[1])
1883
120k
        {
1884
120k
            const int is_globalmv =
1885
120k
                b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
1886
120k
            const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
1887
120k
                &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
1888
120k
            const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
1889
120k
            enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx;
1890
120k
            if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
1891
10.3k
                ytx  = (enum RectTxfmSize) TX_4X4;
1892
10.3k
                uvtx = (enum RectTxfmSize) TX_4X4;
1893
10.3k
            }
1894
120k
            dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
1895
120k
                                       t->bx, t->by, f->w4, f->h4, b->skip, bs,
1896
120k
                                       ytx, tx_split, uvtx, f->cur.p.layout,
1897
120k
                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1898
120k
                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1899
120k
                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1900
120k
        }
1901
1902
        // context updates
1903
115k
        if (is_comp)
1904
13.8k
            splat_tworef_mv(f->c, t, bs, b, bw4, bh4);
1905
101k
        else
1906
101k
            splat_oneref_mv(f->c, t, bs, b, bw4, bh4);
1907
115k
        BlockContext *edge = t->a;
1908
358k
        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
1909
243k
#define set_ctx(rep_macro) \
1910
243k
            rep_macro(edge->seg_pred, off, seg_pred); \
1911
243k
            rep_macro(edge->skip_mode, off, b->skip_mode); \
1912
243k
            rep_macro(edge->intra, off, 0); \
1913
243k
            rep_macro(edge->skip, off, b->skip); \
1914
243k
            rep_macro(edge->pal_sz, off, 0); \
1915
            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1916
243k
            rep_macro(t->pal_sz_uv[i], off, 0); \
1917
243k
            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
1918
243k
            rep_macro(edge->comp_type, off, b->comp_type); \
1919
243k
            rep_macro(edge->filter[0], off, filter[0]); \
1920
243k
            rep_macro(edge->filter[1], off, filter[1]); \
1921
243k
            rep_macro(edge->mode, off, b->inter_mode); \
1922
243k
            rep_macro(edge->ref[0], off, b->ref[0]); \
1923
243k
            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
1924
243k
            case_set(b_dim[2 + i]);
1925
243k
#undef set_ctx
1926
243k
        }
1927
115k
        if (has_chroma) {
1928
109k
            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
1929
109k
            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
1930
109k
        }
1931
115k
    }
1932
1933
    // update contexts
1934
1.74M
    if (f->frame_hdr->segmentation.enabled &&
1935
1.04M
        f->frame_hdr->segmentation.update_map)
1936
950k
    {
1937
950k
        uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
1938
950k
#define set_ctx(rep_macro) \
1939
5.01M
        for (int y = 0; y < bh4; y++) { \
1940
4.06M
            rep_macro(seg_ptr, 0, b->seg_id); \
1941
4.06M
            seg_ptr += f->b4_stride; \
1942
4.06M
        }
1943
950k
        case_set(b_dim[2]);
1944
950k
#undef set_ctx
1945
950k
    }
1946
1.74M
    if (!b->skip) {
1947
681k
        uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
1948
681k
        const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
1949
681k
        const int bx_idx = (bx4 & 16) >> 4;
1950
2.46M
        for (int y = 0; y < bh4; y += 2, noskip_mask++) {
1951
1.78M
            (*noskip_mask)[bx_idx] |= mask;
1952
1.78M
            if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
1953
219k
                (*noskip_mask)[1] |= mask;
1954
1.78M
        }
1955
681k
    }
1956
1957
1.75M
    if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) {
1958
121k
        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
1959
121k
        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
1960
1961
        // keep track of motion vectors for each reference
1962
121k
        if (b->comp_type == COMP_INTER_NONE) {
1963
            // y
1964
107k
            if (imin(bw4, bh4) > 1 &&
1965
74.2k
                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
1966
71.8k
                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
1967
4.52k
            {
1968
4.52k
                affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim,
1969
4.52k
                                      b->motion_mode == MM_WARP ? &t->warpmv :
1970
4.52k
                                      &f->frame_hdr->gmv[b->ref[0]]);
1971
103k
            } else {
1972
103k
                mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y,
1973
103k
                             0, &f->svc[b->ref[0]][1]);
1974
103k
                if (b->motion_mode == MM_OBMC) {
1975
20.6k
                    obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4);
1976
20.6k
                }
1977
103k
            }
1978
1979
            // uv
1980
107k
            if (has_chroma) {
1981
                // sub8x8 derivation
1982
98.0k
                int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
1983
98.0k
                refmvs_block *const *r;
1984
98.0k
                if (is_sub8x8) {
1985
1.60k
                    assert(ss_hor == 1);
1986
1.60k
                    r = &t->rt.r[(t->by & 31) + 5];
1987
1.60k
                    if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
1988
1.60k
                    if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
1989
1.60k
                    if (bw4 == 1 && bh4 == ss_ver)
1990
300
                        is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
1991
1.60k
                }
1992
1993
                // chroma prediction
1994
98.0k
                if (is_sub8x8) {
1995
1.59k
                    assert(ss_hor == 1);
1996
1.59k
                    if (bw4 == 1 && bh4 == ss_ver) {
1997
300
                        const refmvs_block *const rr = &r[-1][t->bx - 1];
1998
300
                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
1999
300
                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
2000
300
                                     &f->svc[rr->ref.ref[0] - 1][1]);
2001
300
                    }
2002
1.59k
                    if (bw4 == 1) {
2003
633
                        const refmvs_block *const rr = &r[0][t->bx - 1];
2004
633
                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
2005
633
                                     t->by, bh4, rr->mv.mv[0].y, ss_ver,
2006
633
                                     &f->svc[rr->ref.ref[0] - 1][1]);
2007
633
                    }
2008
1.59k
                    if (bh4 == ss_ver) {
2009
1.25k
                        const refmvs_block *const rr = &r[-1][t->bx];
2010
1.25k
                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
2011
1.25k
                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
2012
1.25k
                                     &f->svc[rr->ref.ref[0] - 1][1]);
2013
1.25k
                    }
2014
1.59k
                    mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4,
2015
1.59k
                                 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
2016
96.4k
                } else {
2017
96.4k
                    if (imin(cbw4, cbh4) > 1 &&
2018
64.5k
                        ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
2019
62.1k
                         (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
2020
4.46k
                    {
2021
4.46k
                        affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim,
2022
4.46k
                                                b->motion_mode == MM_WARP ? &t->warpmv :
2023
4.46k
                                                &f->frame_hdr->gmv[b->ref[0]]);
2024
91.9k
                    } else {
2025
91.9k
                        mc_lowest_px(&lowest_px[b->ref[0]][1],
2026
91.9k
                                     t->by & ~ss_ver, bh4 << (bh4 == ss_ver),
2027
91.9k
                                     b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
2028
91.9k
                        if (b->motion_mode == MM_OBMC) {
2029
20.5k
                            obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4);
2030
20.5k
                        }
2031
91.9k
                    }
2032
96.4k
                }
2033
98.0k
            }
2034
107k
        } else {
2035
            // y
2036
41.6k
            for (int i = 0; i < 2; i++) {
2037
27.7k
                if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
2038
1.96k
                    affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim,
2039
1.96k
                                          &f->frame_hdr->gmv[b->ref[i]]);
2040
25.7k
                } else {
2041
25.7k
                    mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4,
2042
25.7k
                                 b->mv[i].y, 0, &f->svc[b->ref[i]][1]);
2043
25.7k
                }
2044
27.7k
            }
2045
2046
            // uv
2047
35.3k
            if (has_chroma) for (int i = 0; i < 2; i++) {
2048
23.5k
                if (b->inter_mode == GLOBALMV_GLOBALMV &&
2049
4.72k
                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
2050
1.85k
                {
2051
1.85k
                    affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim,
2052
1.85k
                                            &f->frame_hdr->gmv[b->ref[i]]);
2053
21.7k
                } else {
2054
21.7k
                    mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4,
2055
21.7k
                                 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]);
2056
21.7k
                }
2057
23.5k
            }
2058
13.8k
        }
2059
121k
    }
2060
2061
1.74M
    return 0;
2062
1.74M
}
2063
2064
#if __has_feature(memory_sanitizer)
2065
2066
#include <sanitizer/msan_interface.h>
2067
2068
static int checked_decode_b(Dav1dTaskContext *const t,
2069
                            const enum BlockLevel bl,
2070
                            const enum BlockSize bs,
2071
                            const enum BlockPartition bp,
2072
                            const enum EdgeFlags intra_edge_flags)
2073
{
2074
    const Dav1dFrameContext *const f = t->f;
2075
    const int err = decode_b(t, bl, bs, bp, intra_edge_flags);
2076
2077
    if (err == 0 && !(t->frame_thread.pass & 1)) {
2078
        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2079
        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2080
        const uint8_t *const b_dim = dav1d_block_dimensions[bs];
2081
        const int bw4 = b_dim[0], bh4 = b_dim[1];
2082
        const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
2083
        const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 &&
2084
                               (bw4 > ss_hor || t->bx & 1) &&
2085
                               (bh4 > ss_ver || t->by & 1);
2086
2087
        for (int p = 0; p < 1 + 2 * has_chroma; p++) {
2088
            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2089
            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2090
            const ptrdiff_t stride = f->cur.stride[!!p];
2091
            const int bx = t->bx & ~ss_hor;
2092
            const int by = t->by & ~ss_ver;
2093
            const int width  = w4 << (2 - ss_hor + (bw4 == ss_hor));
2094
            const int height = h4 << (2 - ss_ver + (bh4 == ss_ver));
2095
2096
            const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride +
2097
                                  (bx << (2 - ss_hor + !!f->seq_hdr->hbd));
2098
2099
            for (int y = 0; y < height; data += stride, y++) {
2100
                const size_t line_sz = width << !!f->seq_hdr->hbd;
2101
                if (__msan_test_shadow(data, line_sz) != -1) {
2102
                    fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n",
2103
                            p, bx, by, w4, h4, y);
2104
                    __msan_check_mem_is_initialized(data, line_sz);
2105
                }
2106
            }
2107
        }
2108
    }
2109
2110
    return err;
2111
}
2112
2113
#define decode_b checked_decode_b
2114
2115
#endif /* defined(__has_feature) */
2116
2117
static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
2118
                     const EdgeNode *const node)
2119
2.46M
{
2120
2.46M
    const Dav1dFrameContext *const f = t->f;
2121
2.46M
    Dav1dTileState *const ts = t->ts;
2122
2.46M
    const int hsz = 16 >> bl;
2123
2.46M
    const int have_h_split = f->bw > t->bx + hsz;
2124
2.46M
    const int have_v_split = f->bh > t->by + hsz;
2125
2126
2.46M
    if (!have_h_split && !have_v_split) {
2127
176k
        assert(bl < BL_8X8);
2128
176k
        return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
2129
176k
    }
2130
2131
2.28M
    uint16_t *pc;
2132
2.28M
    enum BlockPartition bp;
2133
2.28M
    int ctx, bx8, by8;
2134
2.28M
    if (t->frame_thread.pass != 2) {
2135
1.29M
        if (0 && bl == BL_64X64)
2136
0
            printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
2137
0
                   f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng);
2138
1.29M
        bx8 = (t->bx & 31) >> 1;
2139
1.29M
        by8 = (t->by & 31) >> 1;
2140
1.29M
        ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
2141
1.29M
        pc = ts->cdf.m.partition[bl][ctx];
2142
1.29M
    }
2143
2144
2.28M
    if (have_h_split && have_v_split) {
2145
1.88M
        if (t->frame_thread.pass == 2) {
2146
807k
            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2147
807k
            bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
2148
1.08M
        } else {
2149
1.08M
            bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc,
2150
1.08M
                                                  dav1d_partition_type_count[bl]);
2151
1.08M
            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
2152
681
                (bp == PARTITION_V || bp == PARTITION_V4 ||
2153
651
                 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
2154
46
            {
2155
46
                return 1;
2156
46
            }
2157
1.08M
            if (DEBUG_BLOCK_INFO)
2158
0
                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2159
0
                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
2160
0
                       ts->msac.rng);
2161
1.08M
        }
2162
1.88M
        const uint8_t *const b = dav1d_block_sizes[bl][bp];
2163
2164
1.88M
        switch (bp) {
2165
763k
        case PARTITION_NONE:
2166
763k
            if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
2167
4
                return -1;
2168
763k
            break;
2169
763k
        case PARTITION_H:
2170
242k
            if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
2171
2
                return -1;
2172
242k
            t->by += hsz;
2173
242k
            if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
2174
2
                return -1;
2175
242k
            t->by -= hsz;
2176
242k
            break;
2177
165k
        case PARTITION_V:
2178
165k
            if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
2179
4
                return -1;
2180
165k
            t->bx += hsz;
2181
165k
            if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
2182
2
                return -1;
2183
165k
            t->bx -= hsz;
2184
165k
            break;
2185
428k
        case PARTITION_SPLIT:
2186
428k
            if (bl == BL_8X8) {
2187
62.0k
                const EdgeTip *const tip = (const EdgeTip *) node;
2188
62.0k
                assert(hsz == 1);
2189
62.0k
                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
2190
2
                    return -1;
2191
62.0k
                const enum Filter2d tl_filter = t->tl_4x4_filter;
2192
62.0k
                t->bx++;
2193
62.0k
                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
2194
2
                    return -1;
2195
62.0k
                t->bx--;
2196
62.0k
                t->by++;
2197
62.0k
                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
2198
3
                    return -1;
2199
62.0k
                t->bx++;
2200
62.0k
                t->tl_4x4_filter = tl_filter;
2201
62.0k
                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
2202
2
                    return -1;
2203
62.0k
                t->bx--;
2204
62.0k
                t->by--;
2205
62.0k
#if ARCH_X86_64
2206
62.0k
                if (t->frame_thread.pass) {
2207
                    /* In 8-bit mode with 2-pass decoding the coefficient buffer
2208
                     * can end up misaligned due to skips here. Work around
2209
                     * the issue by explicitly realigning the buffer. */
2210
62.0k
                    const int p = t->frame_thread.pass & 1;
2211
62.0k
                    ts->frame_thread[p].cf =
2212
62.0k
                        (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63);
2213
62.0k
                }
2214
62.0k
#endif
2215
366k
            } else {
2216
366k
                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
2217
31
                    return 1;
2218
366k
                t->bx += hsz;
2219
366k
                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
2220
32
                    return 1;
2221
366k
                t->bx -= hsz;
2222
366k
                t->by += hsz;
2223
366k
                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
2224
24
                    return 1;
2225
366k
                t->bx += hsz;
2226
366k
                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
2227
31
                    return 1;
2228
365k
                t->bx -= hsz;
2229
365k
                t->by -= hsz;
2230
365k
            }
2231
427k
            break;
2232
427k
        case PARTITION_T_TOP_SPLIT: {
2233
37.7k
            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
2234
2
                return -1;
2235
37.7k
            t->bx += hsz;
2236
37.7k
            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
2237
0
                return -1;
2238
37.7k
            t->bx -= hsz;
2239
37.7k
            t->by += hsz;
2240
37.7k
            if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
2241
0
                return -1;
2242
37.7k
            t->by -= hsz;
2243
37.7k
            break;
2244
37.7k
        }
2245
41.5k
        case PARTITION_T_BOTTOM_SPLIT: {
2246
41.5k
            if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
2247
2
                return -1;
2248
41.5k
            t->by += hsz;
2249
41.5k
            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
2250
0
                return -1;
2251
41.5k
            t->bx += hsz;
2252
41.5k
            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
2253
0
                return -1;
2254
41.5k
            t->bx -= hsz;
2255
41.5k
            t->by -= hsz;
2256
41.5k
            break;
2257
41.5k
        }
2258
30.2k
        case PARTITION_T_LEFT_SPLIT: {
2259
30.2k
            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
2260
0
                return -1;
2261
30.2k
            t->by += hsz;
2262
30.2k
            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
2263
2
                return -1;
2264
30.2k
            t->by -= hsz;
2265
30.2k
            t->bx += hsz;
2266
30.2k
            if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
2267
2
                return -1;
2268
30.2k
            t->bx -= hsz;
2269
30.2k
            break;
2270
30.2k
        }
2271
33.1k
        case PARTITION_T_RIGHT_SPLIT: {
2272
33.1k
            if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
2273
2
                return -1;
2274
33.1k
            t->bx += hsz;
2275
33.1k
            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
2276
2
                return -1;
2277
33.1k
            t->by += hsz;
2278
33.1k
            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
2279
2
                return -1;
2280
33.1k
            t->by -= hsz;
2281
33.1k
            t->bx -= hsz;
2282
33.1k
            break;
2283
33.1k
        }
2284
88.0k
        case PARTITION_H4: {
2285
88.0k
            const EdgeBranch *const branch = (const EdgeBranch *) node;
2286
88.0k
            if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
2287
4
                return -1;
2288
87.9k
            t->by += hsz >> 1;
2289
87.9k
            if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
2290
0
                return -1;
2291
87.9k
            t->by += hsz >> 1;
2292
87.9k
            if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
2293
2
                return -1;
2294
87.9k
            t->by += hsz >> 1;
2295
87.9k
            if (t->by < f->bh)
2296
87.9k
                if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
2297
0
                    return -1;
2298
87.9k
            t->by -= hsz * 3 >> 1;
2299
87.9k
            break;
2300
87.9k
        }
2301
60.3k
        case PARTITION_V4: {
2302
60.3k
            const EdgeBranch *const branch = (const EdgeBranch *) node;
2303
60.3k
            if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
2304
1
                return -1;
2305
60.3k
            t->bx += hsz >> 1;
2306
60.3k
            if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
2307
0
                return -1;
2308
60.3k
            t->bx += hsz >> 1;
2309
60.3k
            if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
2310
2
                return -1;
2311
60.3k
            t->bx += hsz >> 1;
2312
60.3k
            if (t->bx < f->bw)
2313
59.7k
                if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
2314
2
                    return -1;
2315
60.3k
            t->bx -= hsz * 3 >> 1;
2316
60.3k
            break;
2317
60.3k
        }
2318
0
        default: assert(0);
2319
1.88M
        }
2320
1.88M
    } else if (have_h_split) {
2321
173k
        unsigned is_split;
2322
173k
        if (t->frame_thread.pass == 2) {
2323
81.1k
            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2324
81.1k
            is_split = b->bl != bl;
2325
92.1k
        } else {
2326
92.1k
            is_split = dav1d_msac_decode_bool(&ts->msac,
2327
92.1k
                           gather_top_partition_prob(pc, bl));
2328
92.1k
            if (DEBUG_BLOCK_INFO)
2329
0
                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2330
0
                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2331
0
                       is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng);
2332
92.1k
        }
2333
2334
173k
        assert(bl < BL_8X8);
2335
173k
        if (is_split) {
2336
88.3k
            bp = PARTITION_SPLIT;
2337
88.3k
            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
2338
88.3k
            t->bx += hsz;
2339
88.3k
            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
2340
88.2k
            t->bx -= hsz;
2341
88.2k
        } else {
2342
85.0k
            bp = PARTITION_H;
2343
85.0k
            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
2344
85.0k
                         PARTITION_H, node->h[0]))
2345
0
                return -1;
2346
85.0k
        }
2347
222k
    } else {
2348
222k
        assert(have_v_split);
2349
222k
        unsigned is_split;
2350
222k
        if (t->frame_thread.pass == 2) {
2351
102k
            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2352
102k
            is_split = b->bl != bl;
2353
119k
        } else {
2354
119k
            is_split = dav1d_msac_decode_bool(&ts->msac,
2355
119k
                           gather_left_partition_prob(pc, bl));
2356
119k
            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
2357
4
                return 1;
2358
119k
            if (DEBUG_BLOCK_INFO)
2359
0
                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2360
0
                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2361
0
                       is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng);
2362
119k
        }
2363
2364
222k
        assert(bl < BL_8X8);
2365
222k
        if (is_split) {
2366
96.1k
            bp = PARTITION_SPLIT;
2367
96.1k
            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
2368
96.1k
            t->by += hsz;
2369
96.1k
            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
2370
96.1k
            t->by -= hsz;
2371
125k
        } else {
2372
125k
            bp = PARTITION_V;
2373
125k
            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
2374
125k
                         PARTITION_V, node->v[0]))
2375
1
                return -1;
2376
125k
        }
2377
222k
    }
2378
2379
2.28M
    if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
2380
983k
#define set_ctx(rep_macro) \
2381
983k
        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
2382
983k
        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
2383
983k
        case_set_upto16(ulog2(hsz));
2384
983k
#undef set_ctx
2385
983k
    }
2386
2387
2.28M
    return 0;
2388
2.28M
}
2389
2390
352k
static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
2391
352k
    memset(ctx->intra, keyframe, sizeof(ctx->intra));
2392
352k
    memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
2393
352k
    if (keyframe)
2394
146k
        memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
2395
2396
352k
    if (pass == 2) return;
2397
2398
180k
    memset(ctx->partition, 0, sizeof(ctx->partition));
2399
180k
    memset(ctx->skip, 0, sizeof(ctx->skip));
2400
180k
    memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
2401
180k
    memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
2402
180k
    memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
2403
180k
    memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
2404
180k
    memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
2405
180k
    if (!keyframe) {
2406
104k
        memset(ctx->ref, -1, sizeof(ctx->ref));
2407
104k
        memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
2408
104k
        memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
2409
104k
    }
2410
180k
    memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
2411
180k
    memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
2412
180k
    memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
2413
180k
    memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
2414
180k
    memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
2415
180k
}
2416
2417
// { Y+U+V, Y+U } * 4
2418
static const uint8_t ss_size_mul[4][2] = {
2419
    [DAV1D_PIXEL_LAYOUT_I400] = {  4, 4 },
2420
    [DAV1D_PIXEL_LAYOUT_I420] = {  6, 5 },
2421
    [DAV1D_PIXEL_LAYOUT_I422] = {  8, 6 },
2422
    [DAV1D_PIXEL_LAYOUT_I444] = { 12, 8 },
2423
};
2424
2425
static void setup_tile(Dav1dTileState *const ts,
2426
                       const Dav1dFrameContext *const f,
2427
                       const uint8_t *const data, const size_t sz,
2428
                       const int tile_row, const int tile_col,
2429
                       const unsigned tile_start_off)
2430
56.3k
{
2431
56.3k
    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2432
56.3k
    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2433
56.3k
    const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
2434
56.3k
    const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
2435
56.3k
    const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
2436
56.3k
    const int sb_shift = f->sb_shift;
2437
2438
56.3k
    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2439
169k
    for (int p = 0; p < 2; p++) {
2440
112k
        ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ?
2441
49.8k
            &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] :
2442
112k
            NULL;
2443
112k
        ts->frame_thread[p].cbi = f->frame_thread.cbi ?
2444
112k
            &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] :
2445
112k
            NULL;
2446
112k
        ts->frame_thread[p].cf = f->frame_thread.cf ?
2447
112k
            (uint8_t*)f->frame_thread.cf +
2448
112k
                (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
2449
112k
            NULL;
2450
112k
    }
2451
2452
56.3k
    dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
2453
56.3k
    ts->last_qidx = f->frame_hdr->quant.yac;
2454
56.3k
    ts->last_delta_lf.u32 = 0;
2455
2456
56.3k
    dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
2457
2458
56.3k
    ts->tiling.row = tile_row;
2459
56.3k
    ts->tiling.col = tile_col;
2460
56.3k
    ts->tiling.col_start = col_sb_start << sb_shift;
2461
56.3k
    ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
2462
56.3k
    ts->tiling.row_start = row_sb_start << sb_shift;
2463
56.3k
    ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
2464
2465
    // Reference Restoration Unit (used for exp coding)
2466
56.3k
    int sb_idx, unit_idx;
2467
56.3k
    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2468
        // vertical components only
2469
3.19k
        sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
2470
3.19k
        unit_idx = (ts->tiling.row_start & 16) >> 3;
2471
53.1k
    } else {
2472
53.1k
        sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
2473
53.1k
        unit_idx = ((ts->tiling.row_start & 16) >> 3) +
2474
53.1k
                   ((ts->tiling.col_start & 16) >> 4);
2475
53.1k
    }
2476
225k
    for (int p = 0; p < 3; p++) {
2477
169k
        if (!((f->lf.restore_planes >> p) & 1U))
2478
97.6k
            continue;
2479
2480
71.4k
        if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2481
2.32k
            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2482
2.32k
            const int d = f->frame_hdr->super_res.width_scale_denominator;
2483
2.32k
            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2484
2.32k
            const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
2485
2.32k
            const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
2486
2.32k
            const int px_x = x << (unit_size_log2 + ss_hor);
2487
2.32k
            const int u_idx = unit_idx + ((px_x & 64) >> 6);
2488
2.32k
            const int sb128x = px_x >> 7;
2489
2.32k
            if (sb128x >= f->sr_sb128w) continue;
2490
2.29k
            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
2491
69.1k
        } else {
2492
69.1k
            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2493
69.1k
        }
2494
2495
71.3k
        ts->lr_ref[p]->filter_v[0] = 3;
2496
71.3k
        ts->lr_ref[p]->filter_v[1] = -7;
2497
71.3k
        ts->lr_ref[p]->filter_v[2] = 15;
2498
71.3k
        ts->lr_ref[p]->filter_h[0] = 3;
2499
71.3k
        ts->lr_ref[p]->filter_h[1] = -7;
2500
71.3k
        ts->lr_ref[p]->filter_h[2] = 15;
2501
71.3k
        ts->lr_ref[p]->sgr_weights[0] = -32;
2502
71.3k
        ts->lr_ref[p]->sgr_weights[1] = 31;
2503
71.3k
    }
2504
2505
56.3k
    if (f->c->n_tc > 1) {
2506
169k
        for (int p = 0; p < 2; p++)
2507
112k
            atomic_init(&ts->progress[p], row_sb_start);
2508
56.3k
    }
2509
56.3k
}
2510
2511
static void read_restoration_info(Dav1dTaskContext *const t,
2512
                                  Av1RestorationUnit *const lr, const int p,
2513
                                  const enum Dav1dRestorationType frame_type)
2514
209k
{
2515
209k
    const Dav1dFrameContext *const f = t->f;
2516
209k
    Dav1dTileState *const ts = t->ts;
2517
2518
209k
    if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
2519
107k
        const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
2520
107k
                               ts->cdf.m.restore_switchable, 2);
2521
107k
        lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */
2522
107k
    } else {
2523
101k
        const unsigned type =
2524
101k
            dav1d_msac_decode_bool_adapt(&ts->msac,
2525
101k
                frame_type == DAV1D_RESTORATION_WIENER ?
2526
55.7k
                ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
2527
101k
        lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
2528
101k
    }
2529
2530
209k
    if (lr->type == DAV1D_RESTORATION_WIENER) {
2531
41.7k
        lr->filter_v[0] = p ? 0 :
2532
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2533
16.3k
                ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
2534
41.7k
        lr->filter_v[1] =
2535
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2536
41.7k
                ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
2537
41.7k
        lr->filter_v[2] =
2538
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2539
41.7k
                ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
2540
2541
41.7k
        lr->filter_h[0] = p ? 0 :
2542
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2543
16.3k
                ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
2544
41.7k
        lr->filter_h[1] =
2545
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2546
41.7k
                ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
2547
41.7k
        lr->filter_h[2] =
2548
41.7k
            dav1d_msac_decode_subexp(&ts->msac,
2549
41.7k
                ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
2550
41.7k
        memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
2551
41.7k
        ts->lr_ref[p] = lr;
2552
41.7k
        if (DEBUG_BLOCK_INFO)
2553
0
            printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
2554
0
                   p, lr->filter_v[0], lr->filter_v[1],
2555
0
                   lr->filter_v[2], lr->filter_h[0],
2556
0
                   lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
2557
167k
    } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
2558
37.5k
        const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
2559
37.5k
        const uint16_t *const sgr_params = dav1d_sgr_params[idx];
2560
37.5k
        lr->type += idx;
2561
37.5k
        lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
2562
31.0k
            ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
2563
37.5k
        lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
2564
30.4k
            ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
2565
37.5k
        memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
2566
37.5k
        memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
2567
37.5k
        ts->lr_ref[p] = lr;
2568
37.5k
        if (DEBUG_BLOCK_INFO)
2569
0
            printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
2570
0
                   p, idx, lr->sgr_weights[0],
2571
0
                   lr->sgr_weights[1], ts->msac.rng);
2572
37.5k
    }
2573
209k
}
2574
2575
// modeled after the equivalent function in aomdec:decodeframe.c
2576
0
static int check_trailing_bits_after_symbol_coder(const MsacContext *const msac) {
2577
    // check marker bit (single 1), followed by zeroes
2578
0
    const int n_bits = -(msac->cnt + 14);
2579
0
    assert(n_bits <= 0); // this assumes we errored out when cnt <= -15 in caller
2580
0
    const int n_bytes = (n_bits + 7) >> 3;
2581
0
    const uint8_t *p = &msac->buf_pos[n_bytes];
2582
0
    const int pattern = 128 >> ((n_bits - 1) & 7);
2583
0
    if ((p[-1] & (2 * pattern - 1)) != pattern)
2584
0
        return 1;
2585
2586
    // check remainder zero bytes
2587
0
    for (; p < msac->buf_end; p++)
2588
0
        if (*p)
2589
0
            return 1;
2590
2591
0
    return 0;
2592
0
}
2593
2594
177k
int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
2595
177k
    const Dav1dFrameContext *const f = t->f;
2596
177k
    const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
2597
177k
    Dav1dTileState *const ts = t->ts;
2598
177k
    const Dav1dContext *const c = f->c;
2599
177k
    const int sb_step = f->sb_step;
2600
177k
    const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
2601
177k
    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2602
177k
    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2603
2604
177k
    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
2605
109k
        dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
2606
109k
                                     ts->tiling.col_end, ts->tiling.row_start,
2607
109k
                                     ts->tiling.row_end, t->by >> f->sb_shift,
2608
109k
                                     ts->tiling.row, t->frame_thread.pass);
2609
109k
    }
2610
2611
177k
    if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) {
2612
100k
        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
2613
100k
        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
2614
800k
        for (int n = 0; n < 7; n++)
2615
2.10M
            for (int m = 0; m < 2; m++)
2616
1.40M
                lowest_px[n][m] = INT_MIN;
2617
100k
    }
2618
2619
177k
    reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass);
2620
177k
    if (t->frame_thread.pass == 2) {
2621
84.2k
        const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0;
2622
84.2k
        for (t->bx = ts->tiling.col_start,
2623
84.2k
             t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w;
2624
291k
             t->bx < ts->tiling.col_end; t->bx += sb_step)
2625
207k
        {
2626
207k
            if (atomic_load_explicit(c->flush, memory_order_acquire))
2627
41
                return 1;
2628
207k
            if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
2629
0
                return 1;
2630
207k
            if (t->bx & 16 || f->seq_hdr->sb128)
2631
105k
                t->a++;
2632
207k
        }
2633
84.2k
        f->bd_fn.backup_ipred_edge(t);
2634
84.2k
        return 0;
2635
84.2k
    }
2636
2637
93.1k
    if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
2638
3.62k
        f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row,
2639
3.62k
                                   ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2640
3.62k
                                   t->by >> 1, (t->by + sb_step) >> 1);
2641
3.62k
    }
2642
93.1k
    memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
2643
93.1k
    const int sb128y = t->by >> 5;
2644
93.1k
    for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
2645
93.1k
         t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
2646
339k
         t->bx < ts->tiling.col_end; t->bx += sb_step)
2647
246k
    {
2648
246k
        if (atomic_load_explicit(c->flush, memory_order_acquire))
2649
101
            return 1;
2650
246k
        if (root_bl == BL_128X128) {
2651
69.6k
            t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
2652
69.6k
            t->cur_sb_cdef_idx_ptr[0] = -1;
2653
69.6k
            t->cur_sb_cdef_idx_ptr[1] = -1;
2654
69.6k
            t->cur_sb_cdef_idx_ptr[2] = -1;
2655
69.6k
            t->cur_sb_cdef_idx_ptr[3] = -1;
2656
176k
        } else {
2657
176k
            t->cur_sb_cdef_idx_ptr =
2658
176k
                &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
2659
176k
                                      ((t->by & 16) >> 3)];
2660
176k
            t->cur_sb_cdef_idx_ptr[0] = -1;
2661
176k
        }
2662
        // Restoration filter
2663
984k
        for (int p = 0; p < 3; p++) {
2664
738k
            if (!((f->lf.restore_planes >> p) & 1U))
2665
386k
                continue;
2666
2667
351k
            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2668
351k
            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2669
351k
            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2670
351k
            const int y = t->by * 4 >> ss_ver;
2671
351k
            const int h = (f->cur.p.h + ss_ver) >> ss_ver;
2672
2673
351k
            const int unit_size = 1 << unit_size_log2;
2674
351k
            const unsigned mask = unit_size - 1;
2675
351k
            if (y & mask) continue;
2676
273k
            const int half_unit = unit_size >> 1;
2677
            // Round half up at frame boundaries, if there's more than one
2678
            // restoration unit
2679
273k
            if (y && y + half_unit > h) continue;
2680
2681
260k
            const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
2682
2683
260k
            if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2684
16.6k
                const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
2685
16.6k
                const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
2686
2687
16.6k
                const int d = f->frame_hdr->super_res.width_scale_denominator;
2688
16.6k
                const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
2689
16.6k
                const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;
2690
16.6k
                const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
2691
2692
37.1k
                for (int x = x0; x < imin(x1, n_units); x++) {
2693
20.4k
                    const int px_x = x << (unit_size_log2 + ss_hor);
2694
20.4k
                    const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
2695
20.4k
                    const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
2696
20.4k
                    Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2697
2698
20.4k
                    read_restoration_info(t, lr, p, frame_type);
2699
20.4k
                }
2700
243k
            } else {
2701
243k
                const int x = 4 * t->bx >> ss_hor;
2702
243k
                if (x & mask) continue;
2703
201k
                const int w = (f->cur.p.w + ss_hor) >> ss_hor;
2704
                // Round half up at frame boundaries, if there's more than one
2705
                // restoration unit
2706
201k
                if (x && x + half_unit > w) continue;
2707
188k
                const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
2708
188k
                const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
2709
188k
                Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2710
2711
188k
                read_restoration_info(t, lr, p, frame_type);
2712
188k
            }
2713
260k
        }
2714
246k
        if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
2715
99
            return 1;
2716
245k
        if (t->bx & 16 || f->seq_hdr->sb128) {
2717
127k
            t->a++;
2718
127k
            t->lf_mask++;
2719
127k
        }
2720
245k
    }
2721
2722
92.9k
    if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
2723
10.9k
        dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
2724
10.9k
                               ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2725
10.9k
                               t->by >> 1, (t->by + sb_step) >> 1);
2726
10.9k
    }
2727
2728
    // backup pre-loopfilter pixels for intra prediction of the next sbrow
2729
92.9k
    if (t->frame_thread.pass != 1)
2730
0
        f->bd_fn.backup_ipred_edge(t);
2731
2732
    // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
2733
    // up the initial value in neighbour tiles when running the loopfilter
2734
92.9k
    int align_h = (f->bh + 31) & ~31;
2735
92.9k
    memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
2736
92.9k
           &t->l.tx_lpf_y[t->by & 16], sb_step);
2737
92.9k
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2738
92.9k
    align_h >>= ss_ver;
2739
92.9k
    memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
2740
92.9k
           &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
2741
2742
    // error out on symbol decoder overread
2743
92.9k
    if (ts->msac.cnt <= -15) return 1;
2744
2745
90.6k
    return c->strict_std_compliance &&
2746
0
           (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] &&
2747
0
           check_trailing_bits_after_symbol_coder(&ts->msac);
2748
92.9k
}
2749
2750
55.5k
int dav1d_decode_frame_init(Dav1dFrameContext *const f) {
2751
55.5k
    const Dav1dContext *const c = f->c;
2752
55.5k
    int retval = DAV1D_ERR(ENOMEM);
2753
2754
55.5k
    if (f->sbh > f->lf.start_of_tile_row_sz) {
2755
17.7k
        dav1d_free(f->lf.start_of_tile_row);
2756
17.7k
        f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t));
2757
17.7k
        if (!f->lf.start_of_tile_row) {
2758
0
            f->lf.start_of_tile_row_sz = 0;
2759
0
            goto error;
2760
0
        }
2761
17.7k
        f->lf.start_of_tile_row_sz = f->sbh;
2762
17.7k
    }
2763
55.5k
    int sby = 0;
2764
113k
    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
2765
58.2k
        f->lf.start_of_tile_row[sby++] = tile_row;
2766
162k
        while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1])
2767
104k
            f->lf.start_of_tile_row[sby++] = 0;
2768
58.2k
    }
2769
2770
55.5k
    const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
2771
55.5k
    if (n_ts != f->n_ts) {
2772
17.7k
        if (c->n_fc > 1) {
2773
17.7k
            dav1d_free(f->frame_thread.tile_start_off);
2774
17.7k
            f->frame_thread.tile_start_off =
2775
17.7k
                dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts);
2776
17.7k
            if (!f->frame_thread.tile_start_off) {
2777
0
                f->n_ts = 0;
2778
0
                goto error;
2779
0
            }
2780
17.7k
        }
2781
17.7k
        dav1d_free_aligned(f->ts);
2782
17.7k
        f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32);
2783
17.7k
        if (!f->ts) goto error;
2784
17.7k
        f->n_ts = n_ts;
2785
17.7k
    }
2786
2787
55.5k
    const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1));
2788
55.5k
    if (a_sz != f->a_sz) {
2789
17.8k
        dav1d_free(f->a);
2790
17.8k
        f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz);
2791
17.8k
        if (!f->a) {
2792
0
            f->a_sz = 0;
2793
0
            goto error;
2794
0
        }
2795
17.8k
        f->a_sz = a_sz;
2796
17.8k
    }
2797
2798
55.5k
    const int num_sb128 = f->sb128w * f->sb128h;
2799
55.5k
    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2800
55.5k
    const int hbd = !!f->seq_hdr->hbd;
2801
55.5k
    if (c->n_fc > 1) {
2802
55.5k
        const unsigned sb_step4 = f->sb_step * 4;
2803
55.5k
        int tile_idx = 0;
2804
113k
        for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
2805
58.2k
            const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
2806
58.2k
                                     sb_step4 * f->sb128w * 128;
2807
58.2k
            const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
2808
58.2k
                                     f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4;
2809
132k
            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
2810
74.6k
                f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
2811
74.6k
                    f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4;
2812
74.6k
            }
2813
58.2k
        }
2814
2815
55.5k
        const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh;
2816
55.5k
        if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) {
2817
17.8k
            dav1d_free(f->tile_thread.lowest_pixel_mem);
2818
17.8k
            f->tile_thread.lowest_pixel_mem =
2819
17.8k
                dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz *
2820
17.8k
                             sizeof(*f->tile_thread.lowest_pixel_mem));
2821
17.8k
            if (!f->tile_thread.lowest_pixel_mem) {
2822
0
                f->tile_thread.lowest_pixel_mem_sz = 0;
2823
0
                goto error;
2824
0
            }
2825
17.8k
            f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz;
2826
17.8k
        }
2827
55.5k
        int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem;
2828
113k
        for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows;
2829
58.2k
             tile_row++, tile_row_base += f->frame_hdr->tiling.cols)
2830
58.2k
        {
2831
58.2k
            const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
2832
58.2k
                                      f->frame_hdr->tiling.row_start_sb[tile_row];
2833
132k
            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
2834
74.5k
                f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr;
2835
74.5k
                lowest_pixel_ptr += tile_row_sb_h;
2836
74.5k
            }
2837
58.2k
        }
2838
2839
55.5k
        const int cbi_sz = num_sb128 * size_mul[0];
2840
55.5k
        if (cbi_sz != f->frame_thread.cbi_sz) {
2841
17.8k
            dav1d_free_aligned(f->frame_thread.cbi);
2842
17.8k
            f->frame_thread.cbi =
2843
17.8k
                dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) *
2844
17.8k
                                    cbi_sz * 32 * 32 / 4, 64);
2845
17.8k
            if (!f->frame_thread.cbi) {
2846
0
                f->frame_thread.cbi_sz = 0;
2847
0
                goto error;
2848
0
            }
2849
17.8k
            f->frame_thread.cbi_sz = cbi_sz;
2850
17.8k
        }
2851
2852
55.5k
        const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
2853
55.5k
        if (cf_sz != f->frame_thread.cf_sz) {
2854
17.8k
            dav1d_free_aligned(f->frame_thread.cf);
2855
17.8k
            f->frame_thread.cf =
2856
17.8k
                dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64);
2857
17.8k
            if (!f->frame_thread.cf) {
2858
0
                f->frame_thread.cf_sz = 0;
2859
0
                goto error;
2860
0
            }
2861
17.8k
            memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
2862
17.8k
            f->frame_thread.cf_sz = cf_sz;
2863
17.8k
        }
2864
2865
55.5k
        if (f->frame_hdr->allow_screen_content_tools) {
2866
25.6k
            const int pal_sz = num_sb128 << hbd;
2867
25.6k
            if (pal_sz != f->frame_thread.pal_sz) {
2868
8.83k
                dav1d_free_aligned(f->frame_thread.pal);
2869
8.83k
                f->frame_thread.pal =
2870
8.83k
                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) *
2871
8.83k
                                        pal_sz * 16 * 16, 64);
2872
8.83k
                if (!f->frame_thread.pal) {
2873
0
                    f->frame_thread.pal_sz = 0;
2874
0
                    goto error;
2875
0
                }
2876
8.83k
                f->frame_thread.pal_sz = pal_sz;
2877
8.83k
            }
2878
2879
25.6k
            const int pal_idx_sz = num_sb128 * size_mul[1];
2880
25.6k
            if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
2881
8.83k
                dav1d_free_aligned(f->frame_thread.pal_idx);
2882
8.83k
                f->frame_thread.pal_idx =
2883
8.83k
                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) *
2884
8.83k
                                        pal_idx_sz * 128 * 128 / 8, 64);
2885
8.83k
                if (!f->frame_thread.pal_idx) {
2886
0
                    f->frame_thread.pal_idx_sz = 0;
2887
0
                    goto error;
2888
0
                }
2889
8.83k
                f->frame_thread.pal_idx_sz = pal_idx_sz;
2890
8.83k
            }
2891
29.8k
        } else if (f->frame_thread.pal) {
2892
252
            dav1d_freep_aligned(&f->frame_thread.pal);
2893
252
            dav1d_freep_aligned(&f->frame_thread.pal_idx);
2894
252
            f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
2895
252
        }
2896
55.5k
    }
2897
2898
    // update allocation of block contexts for above
2899
55.5k
    ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
2900
55.5k
    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
2901
55.5k
    const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize;
2902
55.5k
    if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] ||
2903
37.5k
        uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] ||
2904
37.5k
        need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy ||
2905
37.3k
        f->sbh != f->lf.cdef_buf_sbh)
2906
18.1k
    {
2907
18.1k
        dav1d_free_aligned(f->lf.cdef_line_buf);
2908
18.1k
        size_t alloc_sz = 64;
2909
18.1k
        alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy;
2910
18.1k
        alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy;
2911
18.1k
        uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32);
2912
18.1k
        if (!ptr) {
2913
0
            f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0;
2914
0
            goto error;
2915
0
        }
2916
2917
18.1k
        ptr += 32;
2918
18.1k
        if (y_stride < 0) {
2919
0
            f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1);
2920
0
            f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3);
2921
18.1k
        } else {
2922
18.1k
            f->lf.cdef_line[0][0] = ptr + y_stride * 0;
2923
18.1k
            f->lf.cdef_line[1][0] = ptr + y_stride * 2;
2924
18.1k
        }
2925
18.1k
        ptr += llabs(y_stride) * f->sbh * 4;
2926
18.1k
        if (uv_stride < 0) {
2927
0
            f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1);
2928
0
            f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3);
2929
0
            f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5);
2930
0
            f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7);
2931
18.1k
        } else {
2932
18.1k
            f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
2933
18.1k
            f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
2934
18.1k
            f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
2935
18.1k
            f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
2936
18.1k
        }
2937
2938
18.1k
        if (need_cdef_lpf_copy) {
2939
1.43k
            ptr += llabs(uv_stride) * f->sbh * 8;
2940
1.43k
            if (y_stride < 0)
2941
0
                f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1);
2942
1.43k
            else
2943
1.43k
                f->lf.cdef_lpf_line[0] = ptr;
2944
1.43k
            ptr += llabs(y_stride) * f->sbh * 4;
2945
1.43k
            if (uv_stride < 0) {
2946
0
                f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1);
2947
0
                f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1);
2948
1.43k
            } else {
2949
1.43k
                f->lf.cdef_lpf_line[1] = ptr;
2950
1.43k
                f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4;
2951
1.43k
            }
2952
1.43k
        }
2953
2954
18.1k
        f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4;
2955
18.1k
        f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8;
2956
18.1k
        f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
2957
18.1k
        f->lf.cdef_buf_sbh = f->sbh;
2958
18.1k
    }
2959
2960
55.5k
    const int sb128 = f->seq_hdr->sb128;
2961
18.4E
    const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12;
2962
55.5k
    y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1];
2963
55.5k
    if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] ||
2964
37.5k
        uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1])
2965
18.0k
    {
2966
18.0k
        dav1d_free_aligned(f->lf.lr_line_buf);
2967
        // lr simd may overread the input, so slightly over-allocate the lpf buffer
2968
18.0k
        size_t alloc_sz = 128;
2969
18.0k
        alloc_sz += (size_t)llabs(y_stride) * num_lines;
2970
18.0k
        alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2;
2971
18.0k
        uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64);
2972
18.0k
        if (!ptr) {
2973
0
            f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0;
2974
0
            goto error;
2975
0
        }
2976
2977
18.0k
        ptr += 64;
2978
18.0k
        if (y_stride < 0)
2979
0
            f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1);
2980
18.0k
        else
2981
18.0k
            f->lf.lr_lpf_line[0] = ptr;
2982
18.0k
        ptr += llabs(y_stride) * num_lines;
2983
18.0k
        if (uv_stride < 0) {
2984
0
            f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1);
2985
0
            f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1);
2986
18.0k
        } else {
2987
18.0k
            f->lf.lr_lpf_line[1] = ptr;
2988
18.0k
            f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines;
2989
18.0k
        }
2990
2991
18.0k
        f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines;
2992
18.0k
        f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2;
2993
18.0k
    }
2994
2995
    // update allocation for loopfilter masks
2996
55.5k
    if (num_sb128 != f->lf.mask_sz) {
2997
17.8k
        dav1d_free(f->lf.mask);
2998
17.8k
        dav1d_free(f->lf.level);
2999
17.8k
        f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128);
3000
        // over-allocate by 3 bytes since some of the SIMD implementations
3001
        // index this from the level type and can thus over-read by up to 3
3002
17.8k
        f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
3003
17.8k
        if (!f->lf.mask || !f->lf.level) {
3004
0
            f->lf.mask_sz = 0;
3005
0
            goto error;
3006
0
        }
3007
17.8k
        if (c->n_fc > 1) {
3008
17.8k
            dav1d_free(f->frame_thread.b);
3009
17.8k
            f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) *
3010
17.8k
                                             num_sb128 * 32 * 32);
3011
17.8k
            if (!f->frame_thread.b) {
3012
0
                f->lf.mask_sz = 0;
3013
0
                goto error;
3014
0
            }
3015
17.8k
        }
3016
17.8k
        f->lf.mask_sz = num_sb128;
3017
17.8k
    }
3018
3019
55.5k
    f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
3020
55.5k
    const int lr_mask_sz = f->sr_sb128w * f->sb128h;
3021
55.5k
    if (lr_mask_sz != f->lf.lr_mask_sz) {
3022
17.8k
        dav1d_free(f->lf.lr_mask);
3023
17.8k
        f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz);
3024
17.8k
        if (!f->lf.lr_mask) {
3025
0
            f->lf.lr_mask_sz = 0;
3026
0
            goto error;
3027
0
        }
3028
17.8k
        f->lf.lr_mask_sz = lr_mask_sz;
3029
17.8k
    }
3030
55.5k
    f->lf.restore_planes =
3031
55.5k
        ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
3032
55.5k
        ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
3033
55.5k
        ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
3034
55.5k
    if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
3035
25.8k
        dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
3036
25.8k
        f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
3037
25.8k
    }
3038
55.5k
    dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
3039
55.5k
    memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
3040
3041
55.5k
    const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
3042
55.5k
    if (ipred_edge_sz != f->ipred_edge_sz) {
3043
18.0k
        dav1d_free_aligned(f->ipred_edge[0]);
3044
18.0k
        uint8_t *ptr = f->ipred_edge[0] =
3045
18.0k
            dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64);
3046
18.0k
        if (!ptr) {
3047
0
            f->ipred_edge_sz = 0;
3048
0
            goto error;
3049
0
        }
3050
18.0k
        f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
3051
18.0k
        f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
3052
18.0k
        f->ipred_edge_sz = ipred_edge_sz;
3053
18.0k
    }
3054
3055
55.5k
    const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
3056
55.5k
    if (re_sz != f->lf.re_sz) {
3057
17.6k
        dav1d_free(f->lf.tx_lpf_right_edge[0]);
3058
17.6k
        f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2);
3059
17.6k
        if (!f->lf.tx_lpf_right_edge[0]) {
3060
0
            f->lf.re_sz = 0;
3061
0
            goto error;
3062
0
        }
3063
17.6k
        f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
3064
17.6k
        f->lf.re_sz = re_sz;
3065
17.6k
    }
3066
3067
    // init ref mvs
3068
55.5k
    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
3069
50.1k
        const int ret =
3070
50.1k
            dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
3071
50.1k
                                    f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs,
3072
50.1k
                                    f->c->n_tc, f->c->n_fc);
3073
50.1k
        if (ret < 0) goto error;
3074
50.1k
    }
3075
3076
    // setup dequant tables
3077
55.5k
    init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
3078
55.5k
    if (f->frame_hdr->quant.qm)
3079
1.00M
        for (int i = 0; i < N_RECT_TX_SIZES; i++) {
3080
956k
            f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
3081
956k
            f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
3082
956k
            f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
3083
956k
        }
3084
5.14k
    else
3085
5.14k
        memset(f->qm, 0, sizeof(f->qm));
3086
3087
    // setup jnt_comp weights
3088
55.5k
    if (f->frame_hdr->switchable_comp_refs) {
3089
281k
        for (int i = 0; i < 7; i++) {
3090
245k
            const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
3091
3092
983k
            for (int j = i + 1; j < 7; j++) {
3093
737k
                const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
3094
3095
737k
                const unsigned d1 =
3096
737k
                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
3097
737k
                                          f->cur.frame_hdr->frame_offset)), 31);
3098
737k
                const unsigned d0 =
3099
737k
                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
3100
737k
                                          f->cur.frame_hdr->frame_offset)), 31);
3101
737k
                const int order = d0 <= d1;
3102
3103
737k
                static const uint8_t quant_dist_weight[3][2] = {
3104
737k
                    { 2, 3 }, { 2, 5 }, { 2, 7 }
3105
737k
                };
3106
737k
                static const uint8_t quant_dist_lookup_table[4][2] = {
3107
737k
                    { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
3108
737k
                };
3109
3110
737k
                int k;
3111
2.81M
                for (k = 0; k < 3; k++) {
3112
2.12M
                    const int c0 = quant_dist_weight[k][order];
3113
2.12M
                    const int c1 = quant_dist_weight[k][!order];
3114
2.12M
                    const int d0_c0 = d0 * c0;
3115
2.12M
                    const int d1_c1 = d1 * c1;
3116
2.12M
                    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
3117
2.12M
                }
3118
3119
737k
                f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
3120
737k
            }
3121
245k
        }
3122
35.1k
    }
3123
3124
    /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
3125
     * so just point the chroma pointers in 4:0:0 to the luma plane here to
3126
     * avoid having additional in-loop branches in various places. We never
3127
     * dereference those pointers so it doesn't really matter what they
3128
     * point at, as long as the pointers are valid. */
3129
55.5k
    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
3130
55.5k
    f->lf.p[0] = f->cur.data[0];
3131
55.5k
    f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
3132
55.5k
    f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
3133
55.5k
    f->lf.sr_p[0] = f->sr_cur.p.data[0];
3134
55.5k
    f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
3135
55.5k
    f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
3136
3137
55.5k
    retval = 0;
3138
55.5k
error:
3139
55.5k
    return retval;
3140
55.5k
}
3141
3142
53.8k
int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) {
3143
53.8k
    const Dav1dContext *const c = f->c;
3144
53.8k
    int retval = DAV1D_ERR(EINVAL);
3145
3146
53.8k
    if (f->frame_hdr->refresh_context)
3147
37.7k
        dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
3148
3149
    // parse individual tiles per tile group
3150
53.8k
    int tile_row = 0, tile_col = 0;
3151
53.8k
    f->task_thread.update_set = 0;
3152
107k
    for (int i = 0; i < f->n_tile_data; i++) {
3153
53.8k
        const uint8_t *data = f->tile[i].data.data;
3154
53.8k
        size_t size = f->tile[i].data.sz;
3155
3156
110k
        for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
3157
56.5k
            size_t tile_sz;
3158
56.5k
            if (j == f->tile[i].end) {
3159
53.6k
                tile_sz = size;
3160
53.6k
            } else {
3161
2.92k
                if (f->frame_hdr->tiling.n_bytes > size) goto error;
3162
2.90k
                tile_sz = 0;
3163
6.36k
                for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
3164
3.45k
                    tile_sz |= (unsigned)*data++ << (k * 8);
3165
2.90k
                tile_sz++;
3166
2.90k
                size -= f->frame_hdr->tiling.n_bytes;
3167
2.90k
                if (tile_sz > size) goto error;
3168
2.90k
            }
3169
3170
56.3k
            setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
3171
18.4E
                       c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
3172
3173
56.3k
            if (tile_col == f->frame_hdr->tiling.cols) {
3174
54.3k
                tile_col = 0;
3175
54.3k
                tile_row++;
3176
54.3k
            }
3177
56.3k
            if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
3178
37.7k
                f->task_thread.update_set = 1;
3179
56.3k
            data += tile_sz;
3180
56.3k
            size -= tile_sz;
3181
56.3k
        }
3182
53.8k
    }
3183
3184
53.6k
    if (c->n_tc > 1) {
3185
53.6k
        const int uses_2pass = c->n_fc > 1;
3186
228k
        for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++)
3187
175k
            reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr),
3188
18.4E
                          uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0);
3189
53.6k
    }
3190
3191
53.6k
    retval = 0;
3192
53.8k
error:
3193
53.8k
    return retval;
3194
53.6k
}
3195
3196
0
int dav1d_decode_frame_main(Dav1dFrameContext *const f) {
3197
0
    const Dav1dContext *const c = f->c;
3198
0
    int retval = DAV1D_ERR(EINVAL);
3199
3200
0
    assert(f->c->n_tc == 1);
3201
3202
0
    Dav1dTaskContext *const t = &c->tc[f - c->fc];
3203
0
    t->f = f;
3204
0
    t->frame_thread.pass = 0;
3205
3206
0
    for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++)
3207
0
        reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0);
3208
3209
    // no threading - we explicitly interleave tile/sbrow decoding
3210
    // and post-filtering, so that the full process runs in-line
3211
0
    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
3212
0
        const int sbh_end =
3213
0
            imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh);
3214
0
        for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
3215
0
             sby < sbh_end; sby++)
3216
0
        {
3217
0
            t->by = sby << (4 + f->seq_hdr->sb128);
3218
0
            const int by_end = (t->by + f->sb_step) >> 1;
3219
0
            if (f->frame_hdr->use_ref_frame_mvs) {
3220
0
                f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row,
3221
0
                                           0, f->bw >> 1, t->by >> 1, by_end);
3222
0
            }
3223
0
            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
3224
0
                t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
3225
0
                if (dav1d_decode_tile_sbrow(t)) goto error;
3226
0
            }
3227
0
            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
3228
0
                dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
3229
0
                                       0, f->bw >> 1, t->by >> 1, by_end);
3230
0
            }
3231
3232
            // loopfilter + cdef + restoration
3233
0
            f->bd_fn.filter_sbrow(f, sby);
3234
0
        }
3235
0
    }
3236
3237
0
    retval = 0;
3238
0
error:
3239
0
    return retval;
3240
0
}
3241
3242
93.6k
void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) {
3243
93.6k
    const Dav1dContext *const c = f->c;
3244
3245
93.6k
    if (f->sr_cur.p.data[0])
3246
93.6k
        atomic_init(&f->task_thread.error, 0);
3247
3248
93.6k
    if (c->n_fc > 1 && retval && f->frame_thread.cf) {
3249
21.1k
        memset(f->frame_thread.cf, 0,
3250
21.1k
               (size_t)f->frame_thread.cf_sz * 128 * 128 / 2);
3251
21.1k
    }
3252
749k
    for (int i = 0; i < 7; i++) {
3253
655k
        if (f->refp[i].p.frame_hdr) {
3254
348k
            if (!retval && c->n_fc > 1 && c->strict_std_compliance &&
3255
348k
                atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR)
3256
0
            {
3257
0
                retval = DAV1D_ERR(EINVAL);
3258
0
                atomic_store(&f->task_thread.error, 1);
3259
0
                atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
3260
0
            }
3261
348k
            dav1d_thread_picture_unref(&f->refp[i]);
3262
348k
        }
3263
655k
        dav1d_ref_dec(&f->ref_mvs_ref[i]);
3264
655k
    }
3265
3266
93.6k
    dav1d_picture_unref_internal(&f->cur);
3267
93.6k
    dav1d_thread_picture_unref(&f->sr_cur);
3268
93.6k
    dav1d_cdf_thread_unref(&f->in_cdf);
3269
93.6k
    if (f->frame_hdr && f->frame_hdr->refresh_context) {
3270
47.4k
        if (f->out_cdf.progress)
3271
47.4k
            atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR);
3272
47.4k
        dav1d_cdf_thread_unref(&f->out_cdf);
3273
47.4k
    }
3274
93.6k
    dav1d_ref_dec(&f->cur_segmap_ref);
3275
93.6k
    dav1d_ref_dec(&f->prev_segmap_ref);
3276
93.6k
    dav1d_ref_dec(&f->mvs_ref);
3277
93.6k
    dav1d_ref_dec(&f->seq_hdr_ref);
3278
93.6k
    dav1d_ref_dec(&f->frame_hdr_ref);
3279
3280
150k
    for (int i = 0; i < f->n_tile_data; i++)
3281
56.5k
        dav1d_data_unref_internal(&f->tile[i].data);
3282
93.6k
    f->task_thread.retval = retval;
3283
93.6k
}
3284
3285
0
int dav1d_decode_frame(Dav1dFrameContext *const f) {
3286
0
    assert(f->c->n_fc == 1);
3287
    // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task
3288
    // threads also. Not sure it makes a measurable difference.
3289
0
    int res = dav1d_decode_frame_init(f);
3290
0
    if (!res) res = dav1d_decode_frame_init_cdf(f);
3291
    // wait until all threads have completed
3292
0
    if (!res) {
3293
0
        if (f->c->n_tc > 1) {
3294
0
            res = dav1d_task_create_tile_sbrow(f, 0, 1);
3295
0
            pthread_mutex_lock(&f->task_thread.ttd->lock);
3296
0
            pthread_cond_signal(&f->task_thread.ttd->cond);
3297
0
            if (!res) {
3298
0
                while (!f->task_thread.done[0] ||
3299
0
                       atomic_load(&f->task_thread.task_counter) > 0)
3300
0
                {
3301
0
                    pthread_cond_wait(&f->task_thread.cond,
3302
0
                                      &f->task_thread.ttd->lock);
3303
0
                }
3304
0
            }
3305
0
            pthread_mutex_unlock(&f->task_thread.ttd->lock);
3306
0
            res = f->task_thread.retval;
3307
0
        } else {
3308
0
            res = dav1d_decode_frame_main(f);
3309
0
            if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) {
3310
0
                dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
3311
0
                                        &f->ts[f->frame_hdr->tiling.update].cdf);
3312
0
            }
3313
0
        }
3314
0
    }
3315
0
    dav1d_decode_frame_exit(f, res);
3316
0
    res = f->task_thread.retval;
3317
0
    f->n_tile_data = 0;
3318
0
    return res;
3319
0
}
3320
3321
5.94k
static int get_upscale_x0(const int in_w, const int out_w, const int step) {
3322
5.94k
    const int err = out_w * step - (in_w << 14);
3323
5.94k
    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2);
3324
5.94k
    return x0 & 0x3fff;
3325
5.94k
}
3326
3327
56.6k
int dav1d_submit_frame(Dav1dContext *const c) {
3328
56.6k
    Dav1dFrameContext *f;
3329
56.6k
    int res = -1;
3330
3331
    // wait for c->out_delayed[next] and move into c->out if visible
3332
56.6k
    Dav1dThreadPicture *out_delayed;
3333
56.6k
    if (c->n_fc > 1) {
3334
56.6k
        pthread_mutex_lock(&c->task_thread.lock);
3335
56.6k
        const unsigned next = c->frame_thread.next++;
3336
56.6k
        if (c->frame_thread.next == c->n_fc)
3337
7.73k
            c->frame_thread.next = 0;
3338
3339
56.6k
        f = &c->fc[next];
3340
72.7k
        while (f->n_tile_data > 0)
3341
16.0k
            pthread_cond_wait(&f->task_thread.cond,
3342
16.0k
                              &c->task_thread.lock);
3343
56.6k
        out_delayed = &c->frame_thread.out_delayed[next];
3344
56.6k
        if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
3345
38.4k
            unsigned first = atomic_load(&c->task_thread.first);
3346
38.4k
            if (first + 1U < c->n_fc)
3347
38.4k
                atomic_fetch_add(&c->task_thread.first, 1U);
3348
5.58k
            else
3349
38.4k
                atomic_store(&c->task_thread.first, 0);
3350
38.4k
            atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
3351
38.4k
                                           &first, UINT_MAX);
3352
38.4k
            if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
3353
17.9k
                c->task_thread.cur--;
3354
38.4k
        }
3355
56.6k
        const int error = f->task_thread.retval;
3356
56.6k
        if (error) {
3357
156
            f->task_thread.retval = 0;
3358
156
            c->cached_error = error;
3359
156
            dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
3360
156
            dav1d_thread_picture_unref(out_delayed);
3361
56.5k
        } else if (out_delayed->p.data[0]) {
3362
38.2k
            const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
3363
38.2k
                                                           memory_order_relaxed);
3364
38.2k
            if ((out_delayed->visible || c->output_invisible_frames) &&
3365
37.7k
                progress != FRAME_ERROR)
3366
37.7k
            {
3367
37.7k
                dav1d_thread_picture_ref(&c->out, out_delayed);
3368
37.7k
                c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
3369
37.7k
            }
3370
38.2k
            dav1d_thread_picture_unref(out_delayed);
3371
38.2k
        }
3372
56.6k
    } else {
3373
0
        f = c->fc;
3374
0
    }
3375
3376
56.6k
    f->seq_hdr = c->seq_hdr;
3377
56.6k
    f->seq_hdr_ref = c->seq_hdr_ref;
3378
56.6k
    dav1d_ref_inc(f->seq_hdr_ref);
3379
56.6k
    f->frame_hdr = c->frame_hdr;
3380
56.6k
    f->frame_hdr_ref = c->frame_hdr_ref;
3381
56.6k
    c->frame_hdr = NULL;
3382
56.6k
    c->frame_hdr_ref = NULL;
3383
56.6k
    f->dsp = &c->dsp[f->seq_hdr->hbd];
3384
3385
56.6k
    const int bpc = 8 + 2 * f->seq_hdr->hbd;
3386
3387
56.6k
    if (!f->dsp->ipred.intra_pred[DC_PRED]) {
3388
5.12k
        Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
3389
3390
5.12k
        switch (bpc) {
3391
0
#define assign_bitdepth_case(bd) \
3392
5.12k
            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
3393
5.12k
            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
3394
5.12k
            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
3395
5.12k
            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
3396
5.12k
            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
3397
5.12k
            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
3398
5.12k
            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
3399
5.12k
            break
3400
0
#if CONFIG_8BPC
3401
2.71k
        case 8:
3402
2.71k
            assign_bitdepth_case(8);
3403
0
#endif
3404
0
#if CONFIG_16BPC
3405
1.94k
        case 10:
3406
2.41k
        case 12:
3407
2.41k
            assign_bitdepth_case(16);
3408
0
#endif
3409
0
#undef assign_bitdepth_case
3410
0
        default:
3411
0
            dav1d_log(c, "Compiled without support for %d-bit decoding\n",
3412
0
                    8 + 2 * f->seq_hdr->hbd);
3413
0
            res = DAV1D_ERR(ENOPROTOOPT);
3414
0
            goto error;
3415
5.12k
        }
3416
5.12k
    }
3417
3418
56.6k
#define assign_bitdepth_case(bd) \
3419
56.6k
        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
3420
56.6k
        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
3421
56.6k
        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
3422
56.6k
        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
3423
56.6k
        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
3424
56.6k
        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
3425
56.6k
        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
3426
56.6k
        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
3427
56.6k
        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
3428
56.6k
        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
3429
56.6k
        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
3430
56.6k
        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
3431
56.6k
        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
3432
56.6k
        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
3433
56.6k
    if (!f->seq_hdr->hbd) {
3434
32.9k
#if CONFIG_8BPC
3435
32.9k
        assign_bitdepth_case(8);
3436
32.9k
#endif
3437
32.9k
    } else {
3438
23.6k
#if CONFIG_16BPC
3439
23.6k
        assign_bitdepth_case(16);
3440
23.6k
#endif
3441
23.6k
    }
3442
56.6k
#undef assign_bitdepth_case
3443
3444
56.6k
    int ref_coded_width[7];
3445
56.6k
    if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
3446
49.8k
        if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
3447
44.1k
            const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3448
44.1k
            if (!c->refs[pri_ref].p.p.data[0]) {
3449
3
                res = DAV1D_ERR(EINVAL);
3450
3
                goto error;
3451
3
            }
3452
44.1k
        }
3453
398k
        for (int i = 0; i < 7; i++) {
3454
348k
            const int refidx = f->frame_hdr->refidx[i];
3455
348k
            if (!c->refs[refidx].p.p.data[0] ||
3456
348k
                f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
3457
348k
                f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
3458
348k
                f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
3459
348k
                f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
3460
348k
                f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
3461
348k
                bpc != c->refs[refidx].p.p.p.bpc)
3462
125
            {
3463
151
                for (int j = 0; j < i; j++)
3464
26
                    dav1d_thread_picture_unref(&f->refp[j]);
3465
125
                res = DAV1D_ERR(EINVAL);
3466
125
                goto error;
3467
125
            }
3468
348k
            dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
3469
348k
            ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
3470
348k
            if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
3471
273k
                f->frame_hdr->height != c->refs[refidx].p.p.p.h)
3472
91.4k
            {
3473
91.4k
#define scale_fac(ref_sz, this_sz) \
3474
188k
    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
3475
91.4k
                f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
3476
91.4k
                                               f->frame_hdr->width[0]);
3477
91.4k
                f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
3478
91.4k
                                               f->frame_hdr->height);
3479
91.4k
                f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
3480
91.4k
                f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
3481
256k
            } else {
3482
256k
                f->svc[i][0].scale = f->svc[i][1].scale = 0;
3483
256k
            }
3484
348k
            f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
3485
131k
                                     !f->frame_hdr->force_integer_mv &&
3486
106k
                                     !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
3487
105k
                                     !f->svc[i][0].scale;
3488
348k
        }
3489
49.8k
    }
3490
3491
    // setup entropy
3492
56.5k
    if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
3493
12.4k
        dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
3494
44.0k
    } else {
3495
44.0k
        const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3496
44.0k
        dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
3497
44.0k
    }
3498
56.5k
    if (f->frame_hdr->refresh_context) {
3499
39.8k
        res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1);
3500
39.8k
        if (res < 0) goto error;
3501
39.8k
    }
3502
3503
    // FIXME qsort so tiles are in order (for frame threading)
3504
56.5k
    if (f->n_tile_data_alloc < c->n_tile_data) {
3505
18.2k
        dav1d_free(f->tile);
3506
18.2k
        assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
3507
18.2k
        f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile));
3508
18.2k
        if (!f->tile) {
3509
0
            f->n_tile_data_alloc = f->n_tile_data = 0;
3510
0
            res = DAV1D_ERR(ENOMEM);
3511
0
            goto error;
3512
0
        }
3513
18.2k
        f->n_tile_data_alloc = c->n_tile_data;
3514
18.2k
    }
3515
56.5k
    memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
3516
56.5k
    memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
3517
56.5k
    f->n_tile_data = c->n_tile_data;
3518
56.5k
    c->n_tile_data = 0;
3519
3520
    // allocate frame
3521
56.5k
    res = dav1d_thread_picture_alloc(c, f, bpc);
3522
56.5k
    if (res < 0) goto error;
3523
3524
56.5k
    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3525
2.97k
        res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
3526
2.97k
        if (res < 0) goto error;
3527
53.5k
    } else {
3528
53.5k
        dav1d_picture_ref(&f->cur, &f->sr_cur.p);
3529
53.5k
    }
3530
3531
56.5k
    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3532
2.97k
        f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
3533
2.97k
        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
3534
2.97k
        const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
3535
2.97k
        const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
3536
2.97k
        f->resize_step[1] = scale_fac(in_cw, out_cw);
3537
2.97k
#undef scale_fac
3538
2.97k
        f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
3539
2.97k
        f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
3540
2.97k
    }
3541
3542
    // move f->cur into output queue
3543
56.5k
    if (c->n_fc == 1) {
3544
0
        if (f->frame_hdr->show_frame || c->output_invisible_frames) {
3545
0
            dav1d_thread_picture_ref(&c->out, &f->sr_cur);
3546
0
            c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
3547
0
        }
3548
56.5k
    } else {
3549
56.5k
        dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
3550
56.5k
    }
3551
3552
56.5k
    f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
3553
56.5k
    f->h4 = (f->frame_hdr->height + 3) >> 2;
3554
56.5k
    f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
3555
56.5k
    f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
3556
56.5k
    f->sb128w = (f->bw + 31) >> 5;
3557
56.5k
    f->sb128h = (f->bh + 31) >> 5;
3558
56.5k
    f->sb_shift = 4 + f->seq_hdr->sb128;
3559
56.5k
    f->sb_step = 16 << f->seq_hdr->sb128;
3560
56.5k
    f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
3561
56.5k
    f->b4_stride = (f->bw + 31) & ~31;
3562
56.5k
    f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
3563
56.5k
    atomic_init(&f->task_thread.error, 0);
3564
56.5k
    const int uses_2pass = c->n_fc > 1;
3565
56.5k
    const int cols = f->frame_hdr->tiling.cols;
3566
56.5k
    const int rows = f->frame_hdr->tiling.rows;
3567
56.5k
    atomic_store(&f->task_thread.task_counter,
3568
56.5k
                 (cols * rows + f->sbh) << uses_2pass);
3569
3570
    // ref_mvs
3571
56.5k
    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
3572
50.8k
        f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
3573
50.8k
            sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
3574
50.8k
        if (!f->mvs_ref) {
3575
0
            res = DAV1D_ERR(ENOMEM);
3576
0
            goto error;
3577
0
        }
3578
50.8k
        f->mvs = f->mvs_ref->data;
3579
50.8k
        if (!f->frame_hdr->allow_intrabc) {
3580
398k
            for (int i = 0; i < 7; i++)
3581
348k
                f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
3582
49.7k
        } else {
3583
1.10k
            memset(f->refpoc, 0, sizeof(f->refpoc));
3584
1.10k
        }
3585
50.8k
        if (f->frame_hdr->use_ref_frame_mvs) {
3586
29.3k
            for (int i = 0; i < 7; i++) {
3587
25.6k
                const int refidx = f->frame_hdr->refidx[i];
3588
25.6k
                const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1;
3589
25.6k
                const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1;
3590
25.6k
                if (c->refs[refidx].refmvs != NULL &&
3591
16.9k
                    ref_w == f->bw && ref_h == f->bh)
3592
15.5k
                {
3593
15.5k
                    f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
3594
15.5k
                    dav1d_ref_inc(f->ref_mvs_ref[i]);
3595
15.5k
                    f->ref_mvs[i] = c->refs[refidx].refmvs->data;
3596
15.5k
                } else {
3597
10.1k
                    f->ref_mvs[i] = NULL;
3598
10.1k
                    f->ref_mvs_ref[i] = NULL;
3599
10.1k
                }
3600
25.6k
                memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
3601
25.6k
                       sizeof(*f->refrefpoc));
3602
25.6k
            }
3603
47.1k
        } else {
3604
47.1k
            memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3605
47.1k
        }
3606
50.8k
    } else {
3607
5.68k
        f->mvs_ref = NULL;
3608
5.68k
        memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3609
5.68k
    }
3610
3611
    // segmap
3612
56.5k
    if (f->frame_hdr->segmentation.enabled) {
3613
        // By default, the previous segmentation map is not initialised.
3614
25.7k
        f->prev_segmap_ref = NULL;
3615
25.7k
        f->prev_segmap = NULL;
3616
3617
        // We might need a previous frame's segmentation map. This
3618
        // happens if there is either no update or a temporal update.
3619
25.7k
        if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
3620
22.2k
            const int pri_ref = f->frame_hdr->primary_ref_frame;
3621
22.2k
            assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
3622
22.2k
            const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
3623
22.2k
            const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
3624
22.2k
            if (ref_w == f->bw && ref_h == f->bh) {
3625
20.6k
                f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
3626
20.6k
                if (f->prev_segmap_ref) {
3627
18.1k
                    dav1d_ref_inc(f->prev_segmap_ref);
3628
18.1k
                    f->prev_segmap = f->prev_segmap_ref->data;
3629
18.1k
                }
3630
20.6k
            }
3631
22.2k
        }
3632
3633
25.7k
        if (f->frame_hdr->segmentation.update_map) {
3634
            // We're updating an existing map, but need somewhere to
3635
            // put the new values. Allocate them here (the data
3636
            // actually gets set elsewhere)
3637
3.80k
            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
3638
3.80k
                sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
3639
3.80k
            if (!f->cur_segmap_ref) {
3640
0
                dav1d_ref_dec(&f->prev_segmap_ref);
3641
0
                res = DAV1D_ERR(ENOMEM);
3642
0
                goto error;
3643
0
            }
3644
3.80k
            f->cur_segmap = f->cur_segmap_ref->data;
3645
21.9k
        } else if (f->prev_segmap_ref) {
3646
            // We're not updating an existing map, and we have a valid
3647
            // reference. Use that.
3648
18.0k
            f->cur_segmap_ref = f->prev_segmap_ref;
3649
18.0k
            dav1d_ref_inc(f->cur_segmap_ref);
3650
18.0k
            f->cur_segmap = f->prev_segmap_ref->data;
3651
18.0k
        } else {
3652
            // We need to make a new map. Allocate one here and zero it out.
3653
3.96k
            const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
3654
3.96k
            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
3655
3.96k
            if (!f->cur_segmap_ref) {
3656
0
                res = DAV1D_ERR(ENOMEM);
3657
0
                goto error;
3658
0
            }
3659
3.96k
            f->cur_segmap = f->cur_segmap_ref->data;
3660
3.96k
            memset(f->cur_segmap, 0, segmap_size);
3661
3.96k
        }
3662
30.7k
    } else {
3663
30.7k
        f->cur_segmap = NULL;
3664
30.7k
        f->cur_segmap_ref = NULL;
3665
30.7k
        f->prev_segmap_ref = NULL;
3666
30.7k
    }
3667
3668
    // update references etc.
3669
56.5k
    const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
3670
508k
    for (int i = 0; i < 8; i++) {
3671
452k
        if (refresh_frame_flags & (1 << i)) {
3672
203k
            if (c->refs[i].p.p.frame_hdr)
3673
162k
                dav1d_thread_picture_unref(&c->refs[i].p);
3674
203k
            dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
3675
3676
203k
            dav1d_cdf_thread_unref(&c->cdf[i]);
3677
203k
            if (f->frame_hdr->refresh_context) {
3678
127k
                dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
3679
127k
            } else {
3680
75.4k
                dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
3681
75.4k
            }
3682
3683
203k
            dav1d_ref_dec(&c->refs[i].segmap);
3684
203k
            c->refs[i].segmap = f->cur_segmap_ref;
3685
203k
            if (f->cur_segmap_ref)
3686
90.3k
                dav1d_ref_inc(f->cur_segmap_ref);
3687
203k
            dav1d_ref_dec(&c->refs[i].refmvs);
3688
203k
            if (!f->frame_hdr->allow_intrabc) {
3689
194k
                c->refs[i].refmvs = f->mvs_ref;
3690
194k
                if (f->mvs_ref)
3691
150k
                    dav1d_ref_inc(f->mvs_ref);
3692
194k
            }
3693
203k
            memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
3694
203k
        }
3695
452k
    }
3696
3697
56.5k
    if (c->n_fc == 1) {
3698
0
        if ((res = dav1d_decode_frame(f)) < 0) {
3699
0
            dav1d_thread_picture_unref(&c->out);
3700
0
            for (int i = 0; i < 8; i++) {
3701
0
                if (refresh_frame_flags & (1 << i)) {
3702
0
                    if (c->refs[i].p.p.frame_hdr)
3703
0
                        dav1d_thread_picture_unref(&c->refs[i].p);
3704
0
                    dav1d_cdf_thread_unref(&c->cdf[i]);
3705
0
                    dav1d_ref_dec(&c->refs[i].segmap);
3706
0
                    dav1d_ref_dec(&c->refs[i].refmvs);
3707
0
                }
3708
0
            }
3709
0
            goto error;
3710
0
        }
3711
56.5k
    } else {
3712
56.5k
        dav1d_task_frame_init(f);
3713
56.5k
        pthread_mutex_unlock(&c->task_thread.lock);
3714
56.5k
    }
3715
3716
56.5k
    return 0;
3717
128
error:
3718
128
    atomic_init(&f->task_thread.error, 1);
3719
128
    dav1d_cdf_thread_unref(&f->in_cdf);
3720
128
    if (f->frame_hdr->refresh_context)
3721
107
        dav1d_cdf_thread_unref(&f->out_cdf);
3722
1.02k
    for (int i = 0; i < 7; i++) {
3723
896
        if (f->refp[i].p.frame_hdr)
3724
0
            dav1d_thread_picture_unref(&f->refp[i]);
3725
896
        dav1d_ref_dec(&f->ref_mvs_ref[i]);
3726
896
    }
3727
128
    if (c->n_fc == 1)
3728
0
        dav1d_thread_picture_unref(&c->out);
3729
128
    else
3730
128
        dav1d_thread_picture_unref(out_delayed);
3731
128
    dav1d_picture_unref_internal(&f->cur);
3732
128
    dav1d_thread_picture_unref(&f->sr_cur);
3733
128
    dav1d_ref_dec(&f->mvs_ref);
3734
128
    dav1d_ref_dec(&f->seq_hdr_ref);
3735
128
    dav1d_ref_dec(&f->frame_hdr_ref);
3736
128
    dav1d_data_props_copy(&c->cached_error_props, &c->in.m);
3737
3738
128
    for (int i = 0; i < f->n_tile_data; i++)
3739
0
        dav1d_data_unref_internal(&f->tile[i].data);
3740
128
    f->n_tile_data = 0;
3741
3742
128
    if (c->n_fc > 1)
3743
128
        pthread_mutex_unlock(&c->task_thread.lock);
3744
3745
128
    return res;
3746
56.5k
}