Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018-2021, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <errno.h> |
31 | | #include <limits.h> |
32 | | #include <string.h> |
33 | | #include <stdio.h> |
34 | | #include <inttypes.h> |
35 | | |
36 | | #include "dav1d/data.h" |
37 | | |
38 | | #include "common/frame.h" |
39 | | #include "common/intops.h" |
40 | | |
41 | | #include "src/ctx.h" |
42 | | #include "src/decode.h" |
43 | | #include "src/dequant_tables.h" |
44 | | #include "src/env.h" |
45 | | #include "src/filmgrain.h" |
46 | | #include "src/log.h" |
47 | | #include "src/qm.h" |
48 | | #include "src/recon.h" |
49 | | #include "src/ref.h" |
50 | | #include "src/tables.h" |
51 | | #include "src/thread_task.h" |
52 | | #include "src/warpmv.h" |
53 | | |
54 | | static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr, |
55 | | const Dav1dFrameHeader *const frame_hdr, |
56 | | const int qidx, uint16_t (*dq)[3][2]) |
57 | 0 | { |
58 | 0 | for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) { |
59 | 0 | const int yac = frame_hdr->segmentation.enabled ? |
60 | 0 | iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx; |
61 | 0 | const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta); |
62 | 0 | const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta); |
63 | 0 | const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta); |
64 | 0 | const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta); |
65 | 0 | const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta); |
66 | |
|
67 | 0 | dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0]; |
68 | 0 | dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1]; |
69 | 0 | dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0]; |
70 | 0 | dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1]; |
71 | 0 | dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0]; |
72 | 0 | dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1]; |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | | static int read_mv_component_diff(MsacContext *const msac, |
77 | | CdfMvComponent *const mv_comp, |
78 | | const int mv_prec) |
79 | 0 | { |
80 | 0 | const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign); |
81 | 0 | const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10); |
82 | 0 | int up, fp = 3, hp = 1; |
83 | |
|
84 | 0 | if (!cl) { |
85 | 0 | up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0); |
86 | 0 | if (mv_prec >= 0) { // !force_integer_mv |
87 | 0 | fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3); |
88 | 0 | if (mv_prec > 0) // allow_high_precision_mv |
89 | 0 | hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp); |
90 | 0 | } |
91 | 0 | } else { |
92 | 0 | up = 1 << cl; |
93 | 0 | for (int n = 0; n < cl; n++) |
94 | 0 | up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n; |
95 | 0 | if (mv_prec >= 0) { // !force_integer_mv |
96 | 0 | fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3); |
97 | 0 | if (mv_prec > 0) // allow_high_precision_mv |
98 | 0 | hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp); |
99 | 0 | } |
100 | 0 | } |
101 | |
|
102 | 0 | const int diff = ((up << 3) | (fp << 1) | hp) + 1; |
103 | |
|
104 | 0 | return sign ? -diff : diff; |
105 | 0 | } |
106 | | |
107 | | static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv, |
108 | | const int mv_prec) |
109 | 0 | { |
110 | 0 | MsacContext *const msac = &ts->msac; |
111 | 0 | const enum MVJoint mv_joint = |
112 | 0 | dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1); |
113 | 0 | if (mv_joint & MV_JOINT_V) |
114 | 0 | ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec); |
115 | 0 | if (mv_joint & MV_JOINT_H) |
116 | 0 | ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec); |
117 | 0 | } |
118 | | |
119 | | static void read_tx_tree(Dav1dTaskContext *const t, |
120 | | const enum RectTxfmSize from, |
121 | | const int depth, uint16_t *const masks, |
122 | | const int x_off, const int y_off) |
123 | 0 | { |
124 | 0 | const Dav1dFrameContext *const f = t->f; |
125 | 0 | const int bx4 = t->bx & 31, by4 = t->by & 31; |
126 | 0 | const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from]; |
127 | 0 | const int txw = t_dim->lw, txh = t_dim->lh; |
128 | 0 | int is_split; |
129 | |
|
130 | 0 | if (depth < 2 && from > (int) TX_4X4) { |
131 | 0 | const int cat = 2 * (TX_64X64 - t_dim->max) - depth; |
132 | 0 | const int a = t->a->tx[bx4] < txw; |
133 | 0 | const int l = t->l.tx[by4] < txh; |
134 | |
|
135 | 0 | is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac, |
136 | 0 | t->ts->cdf.m.txpart[cat][a + l]); |
137 | 0 | if (is_split) |
138 | 0 | masks[depth] |= 1 << (y_off * 4 + x_off); |
139 | 0 | } else { |
140 | 0 | is_split = 0; |
141 | 0 | } |
142 | |
|
143 | 0 | if (is_split && t_dim->max > TX_8X8) { |
144 | 0 | const enum RectTxfmSize sub = t_dim->sub; |
145 | 0 | const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub]; |
146 | 0 | const int txsw = sub_t_dim->w, txsh = sub_t_dim->h; |
147 | |
|
148 | 0 | read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0); |
149 | 0 | t->bx += txsw; |
150 | 0 | if (txw >= txh && t->bx < f->bw) |
151 | 0 | read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0); |
152 | 0 | t->bx -= txsw; |
153 | 0 | t->by += txsh; |
154 | 0 | if (txh >= txw && t->by < f->bh) { |
155 | 0 | read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1); |
156 | 0 | t->bx += txsw; |
157 | 0 | if (txw >= txh && t->bx < f->bw) |
158 | 0 | read_tx_tree(t, sub, depth + 1, masks, |
159 | 0 | x_off * 2 + 1, y_off * 2 + 1); |
160 | 0 | t->bx -= txsw; |
161 | 0 | } |
162 | 0 | t->by -= txsh; |
163 | 0 | } else { |
164 | 0 | dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw); |
165 | 0 | dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh); |
166 | 0 | } |
167 | 0 | } |
168 | | |
169 | 0 | static int neg_deinterleave(int diff, int ref, int max) { |
170 | 0 | if (!ref) return diff; |
171 | 0 | if (ref >= (max - 1)) return max - diff - 1; |
172 | 0 | if (2 * ref < max) { |
173 | 0 | if (diff <= 2 * ref) { |
174 | 0 | if (diff & 1) |
175 | 0 | return ref + ((diff + 1) >> 1); |
176 | 0 | else |
177 | 0 | return ref - (diff >> 1); |
178 | 0 | } |
179 | 0 | return diff; |
180 | 0 | } else { |
181 | 0 | if (diff <= 2 * (max - ref - 1)) { |
182 | 0 | if (diff & 1) |
183 | 0 | return ref + ((diff + 1) >> 1); |
184 | 0 | else |
185 | 0 | return ref - (diff >> 1); |
186 | 0 | } |
187 | 0 | return max - (diff + 1); |
188 | 0 | } |
189 | 0 | } |
190 | | |
191 | | static void find_matching_ref(const Dav1dTaskContext *const t, |
192 | | const enum EdgeFlags intra_edge_flags, |
193 | | const int bw4, const int bh4, |
194 | | const int w4, const int h4, |
195 | | const int have_left, const int have_top, |
196 | | const int ref, uint64_t masks[2]) |
197 | 0 | { |
198 | 0 | /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5]; |
199 | 0 | int count = 0; |
200 | 0 | int have_topleft = have_top && have_left; |
201 | 0 | int have_topright = imax(bw4, bh4) < 32 && |
202 | 0 | have_top && t->bx + bw4 < t->ts->tiling.col_end && |
203 | 0 | (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT); |
204 | |
|
205 | 0 | #define bs(rp) dav1d_block_dimensions[(rp)->bs] |
206 | 0 | #define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1) |
207 | |
|
208 | 0 | if (have_top) { |
209 | 0 | const refmvs_block *r2 = &r[-1][t->bx]; |
210 | 0 | if (matches(r2)) { |
211 | 0 | masks[0] |= 1; |
212 | 0 | count = 1; |
213 | 0 | } |
214 | 0 | int aw4 = bs(r2)[0]; |
215 | 0 | if (aw4 >= bw4) { |
216 | 0 | const int off = t->bx & (aw4 - 1); |
217 | 0 | if (off) have_topleft = 0; |
218 | 0 | if (aw4 - off > bw4) have_topright = 0; |
219 | 0 | } else { |
220 | 0 | unsigned mask = 1 << aw4; |
221 | 0 | for (int x = aw4; x < w4; x += aw4) { |
222 | 0 | r2 += aw4; |
223 | 0 | if (matches(r2)) { |
224 | 0 | masks[0] |= mask; |
225 | 0 | if (++count >= 8) return; |
226 | 0 | } |
227 | 0 | aw4 = bs(r2)[0]; |
228 | 0 | mask <<= aw4; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | 0 | if (have_left) { |
233 | 0 | /*const*/ refmvs_block *const *r2 = r; |
234 | 0 | if (matches(&r2[0][t->bx - 1])) { |
235 | 0 | masks[1] |= 1; |
236 | 0 | if (++count >= 8) return; |
237 | 0 | } |
238 | 0 | int lh4 = bs(&r2[0][t->bx - 1])[1]; |
239 | 0 | if (lh4 >= bh4) { |
240 | 0 | if (t->by & (lh4 - 1)) have_topleft = 0; |
241 | 0 | } else { |
242 | 0 | unsigned mask = 1 << lh4; |
243 | 0 | for (int y = lh4; y < h4; y += lh4) { |
244 | 0 | r2 += lh4; |
245 | 0 | if (matches(&r2[0][t->bx - 1])) { |
246 | 0 | masks[1] |= mask; |
247 | 0 | if (++count >= 8) return; |
248 | 0 | } |
249 | 0 | lh4 = bs(&r2[0][t->bx - 1])[1]; |
250 | 0 | mask <<= lh4; |
251 | 0 | } |
252 | 0 | } |
253 | 0 | } |
254 | 0 | if (have_topleft && matches(&r[-1][t->bx - 1])) { |
255 | 0 | masks[1] |= 1ULL << 32; |
256 | 0 | if (++count >= 8) return; |
257 | 0 | } |
258 | 0 | if (have_topright && matches(&r[-1][t->bx + bw4])) { |
259 | 0 | masks[0] |= 1ULL << 32; |
260 | 0 | } |
261 | 0 | #undef matches |
262 | 0 | } |
263 | | |
264 | | static void derive_warpmv(const Dav1dTaskContext *const t, |
265 | | const int bw4, const int bh4, |
266 | | const uint64_t masks[2], const union mv mv, |
267 | | Dav1dWarpedMotionParams *const wmp) |
268 | 0 | { |
269 | 0 | int pts[8][2 /* in, out */][2 /* x, y */], np = 0; |
270 | 0 | /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5]; |
271 | |
|
272 | 0 | #define add_sample(dx, dy, sx, sy, rp) do { \ |
273 | 0 | pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \ |
274 | 0 | pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \ |
275 | 0 | pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \ |
276 | 0 | pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \ |
277 | 0 | np++; \ |
278 | 0 | } while (0) |
279 | | |
280 | | // use masks[] to find the projectable motion vectors in the edges |
281 | 0 | if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) { |
282 | 0 | const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1); |
283 | 0 | add_sample(-off, 0, 1, -1, &r[-1][t->bx]); |
284 | 0 | } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top |
285 | 0 | const int tz = ctz(xmask); |
286 | 0 | off += tz; |
287 | 0 | xmask >>= tz; |
288 | 0 | add_sample(off, 0, 1, -1, &r[-1][t->bx + off]); |
289 | 0 | xmask &= ~1; |
290 | 0 | } |
291 | 0 | if (np < 8 && masks[1] == 1) { |
292 | 0 | const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1); |
293 | 0 | add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]); |
294 | 0 | } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left |
295 | 0 | const int tz = ctz(ymask); |
296 | 0 | off += tz; |
297 | 0 | ymask >>= tz; |
298 | 0 | add_sample(0, off, -1, 1, &r[off][t->bx - 1]); |
299 | 0 | ymask &= ~1; |
300 | 0 | } |
301 | 0 | if (np < 8 && masks[1] >> 32) // top/left |
302 | 0 | add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]); |
303 | 0 | if (np < 8 && masks[0] >> 32) // top/right |
304 | 0 | add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]); |
305 | 0 | assert(np > 0 && np <= 8); |
306 | 0 | #undef bs |
307 | | |
308 | | // select according to motion vector difference against a threshold |
309 | 0 | int mvd[8], ret = 0; |
310 | 0 | const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28); |
311 | 0 | for (int i = 0; i < np; i++) { |
312 | 0 | mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) + |
313 | 0 | abs(pts[i][1][1] - pts[i][0][1] - mv.y); |
314 | 0 | if (mvd[i] > thresh) |
315 | 0 | mvd[i] = -1; |
316 | 0 | else |
317 | 0 | ret++; |
318 | 0 | } |
319 | 0 | if (!ret) { |
320 | 0 | ret = 1; |
321 | 0 | } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) { |
322 | 0 | while (mvd[i] != -1) i++; |
323 | 0 | while (mvd[j] == -1) j--; |
324 | 0 | assert(i != j); |
325 | 0 | if (i > j) break; |
326 | | // replace the discarded samples; |
327 | 0 | mvd[i] = mvd[j]; |
328 | 0 | memcpy(pts[i], pts[j], sizeof(*pts)); |
329 | 0 | } |
330 | | |
331 | 0 | if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) && |
332 | 0 | !dav1d_get_shear_params(wmp)) |
333 | 0 | { |
334 | 0 | wmp->type = DAV1D_WM_TYPE_AFFINE; |
335 | 0 | } else |
336 | 0 | wmp->type = DAV1D_WM_TYPE_IDENTITY; |
337 | 0 | } |
338 | | |
339 | 0 | static inline int findoddzero(const uint8_t *buf, int len) { |
340 | 0 | for (int n = 0; n < len; n++) |
341 | 0 | if (!buf[n * 2]) return 1; |
342 | 0 | return 0; |
343 | 0 | } |
344 | | |
345 | | // meant to be SIMD'able, so that theoretical complexity of this function |
346 | | // times block size goes from w4*h4 to w4+h4-1 |
347 | | // a and b are previous two lines containing (a) top/left entries or (b) |
348 | | // top/left entries, with a[0] being either the first top or first left entry, |
349 | | // depending on top_offset being 1 or 0, and b being the first top/left entry |
350 | | // for whichever has one. left_offset indicates whether the (len-1)th entry |
351 | | // has a left neighbour. |
352 | | // output is order[] and ctx for each member of this diagonal. |
353 | | static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride, |
354 | | const int i, const int first, const int last, |
355 | | uint8_t (*const order)[8], uint8_t *const ctx) |
356 | 0 | { |
357 | 0 | int have_top = i > first; |
358 | |
|
359 | 0 | assert(pal_idx); |
360 | 0 | pal_idx += first + (i - first) * stride; |
361 | 0 | for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) { |
362 | 0 | const int have_left = j > 0; |
363 | |
|
364 | 0 | assert(have_left || have_top); |
365 | | |
366 | 0 | #define add(v_in) do { \ |
367 | 0 | const int v = v_in; \ |
368 | 0 | assert((unsigned)v < 8U); \ |
369 | 0 | order[n][o_idx++] = v; \ |
370 | 0 | mask |= 1 << v; \ |
371 | 0 | } while (0) |
372 | | |
373 | 0 | unsigned mask = 0; |
374 | 0 | int o_idx = 0; |
375 | 0 | if (!have_left) { |
376 | 0 | ctx[n] = 0; |
377 | 0 | add(pal_idx[-stride]); |
378 | 0 | } else if (!have_top) { |
379 | 0 | ctx[n] = 0; |
380 | 0 | add(pal_idx[-1]); |
381 | 0 | } else { |
382 | 0 | const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)]; |
383 | 0 | const int same_t_l = t == l; |
384 | 0 | const int same_t_tl = t == tl; |
385 | 0 | const int same_l_tl = l == tl; |
386 | 0 | const int same_all = same_t_l & same_t_tl & same_l_tl; |
387 | |
|
388 | 0 | if (same_all) { |
389 | 0 | ctx[n] = 4; |
390 | 0 | add(t); |
391 | 0 | } else if (same_t_l) { |
392 | 0 | ctx[n] = 3; |
393 | 0 | add(t); |
394 | 0 | add(tl); |
395 | 0 | } else if (same_t_tl | same_l_tl) { |
396 | 0 | ctx[n] = 2; |
397 | 0 | add(tl); |
398 | 0 | add(same_t_tl ? l : t); |
399 | 0 | } else { |
400 | 0 | ctx[n] = 1; |
401 | 0 | add(imin(t, l)); |
402 | 0 | add(imax(t, l)); |
403 | 0 | add(tl); |
404 | 0 | } |
405 | 0 | } |
406 | 0 | for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++) |
407 | 0 | if (!(mask & m)) |
408 | 0 | order[n][o_idx++] = bit; |
409 | 0 | assert(o_idx == 8); |
410 | 0 | #undef add |
411 | 0 | } |
412 | 0 | } |
413 | | |
414 | | static void read_pal_indices(Dav1dTaskContext *const t, |
415 | | uint8_t *const pal_idx, |
416 | | const int pal_sz, const int pl, |
417 | | const int w4, const int h4, |
418 | | const int bw4, const int bh4) |
419 | 0 | { |
420 | 0 | Dav1dTileState *const ts = t->ts; |
421 | 0 | const ptrdiff_t stride = bw4 * 4; |
422 | 0 | assert(pal_idx); |
423 | 0 | uint8_t *const pal_tmp = t->scratch.pal_idx_uv; |
424 | 0 | pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz); |
425 | 0 | uint16_t (*const color_map_cdf)[8] = |
426 | 0 | ts->cdf.m.color_map[pl][pal_sz - 2]; |
427 | 0 | uint8_t (*const order)[8] = t->scratch.pal_order; |
428 | 0 | uint8_t *const ctx = t->scratch.pal_ctx; |
429 | 0 | for (int i = 1; i < 4 * (w4 + h4) - 1; i++) { |
430 | | // top/left-to-bottom/right diagonals ("wave-front") |
431 | 0 | const int first = imin(i, w4 * 4 - 1); |
432 | 0 | const int last = imax(0, i - h4 * 4 + 1); |
433 | 0 | order_palette(pal_tmp, stride, i, first, last, order, ctx); |
434 | 0 | for (int j = first, m = 0; j >= last; j--, m++) { |
435 | 0 | const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
436 | 0 | color_map_cdf[ctx[m]], pal_sz - 1); |
437 | 0 | pal_tmp[(i - j) * stride + j] = order[m][color_idx]; |
438 | 0 | } |
439 | 0 | } |
440 | |
|
441 | 0 | t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4, |
442 | 0 | w4 * 4, h4 * 4); |
443 | 0 | } |
444 | | |
445 | | static void read_vartx_tree(Dav1dTaskContext *const t, |
446 | | Av1Block *const b, const enum BlockSize bs, |
447 | | const int bx4, const int by4) |
448 | 0 | { |
449 | 0 | const Dav1dFrameContext *const f = t->f; |
450 | 0 | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
451 | 0 | const int bw4 = b_dim[0], bh4 = b_dim[1]; |
452 | | |
453 | | // var-tx tree coding |
454 | 0 | uint16_t tx_split[2] = { 0 }; |
455 | 0 | b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0]; |
456 | 0 | if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] || |
457 | 0 | b->max_ytx == TX_4X4)) |
458 | 0 | { |
459 | 0 | b->max_ytx = b->uvtx = TX_4X4; |
460 | 0 | if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) { |
461 | 0 | dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4); |
462 | 0 | dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4); |
463 | 0 | } |
464 | 0 | } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) { |
465 | 0 | if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) { |
466 | 0 | dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]); |
467 | 0 | dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]); |
468 | 0 | } |
469 | 0 | b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; |
470 | 0 | } else { |
471 | 0 | assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64); |
472 | 0 | int y, x, y_off, x_off; |
473 | 0 | const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx]; |
474 | 0 | for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) { |
475 | 0 | for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) { |
476 | 0 | read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off); |
477 | | // contexts are updated inside read_tx_tree() |
478 | 0 | t->bx += ytx->w; |
479 | 0 | } |
480 | 0 | t->bx -= x; |
481 | 0 | t->by += ytx->h; |
482 | 0 | } |
483 | 0 | t->by -= y; |
484 | 0 | if (DEBUG_BLOCK_INFO) |
485 | 0 | printf("Post-vartxtree[%x/%x]: r=%d\n", |
486 | 0 | tx_split[0], tx_split[1], t->ts->msac.rng); |
487 | 0 | b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; |
488 | 0 | } |
489 | 0 | assert(!(tx_split[0] & ~0x33)); |
490 | 0 | b->tx_split0 = (uint8_t)tx_split[0]; |
491 | 0 | b->tx_split1 = tx_split[1]; |
492 | 0 | } |
493 | | |
494 | | static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f, |
495 | | const int by, const int bx, |
496 | | const int w4, int h4, |
497 | | const uint8_t *ref_seg_map, |
498 | | const ptrdiff_t stride) |
499 | 0 | { |
500 | 0 | assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE); |
501 | | |
502 | 0 | unsigned seg_id = 8; |
503 | 0 | ref_seg_map += by * stride + bx; |
504 | 0 | do { |
505 | 0 | for (int x = 0; x < w4; x++) |
506 | 0 | seg_id = imin(seg_id, ref_seg_map[x]); |
507 | 0 | ref_seg_map += stride; |
508 | 0 | } while (--h4 > 0 && seg_id); |
509 | 0 | assert(seg_id < 8); |
510 | | |
511 | 0 | return seg_id; |
512 | 0 | } |
513 | | |
514 | | static inline void splat_oneref_mv(const Dav1dContext *const c, |
515 | | Dav1dTaskContext *const t, |
516 | | const enum BlockSize bs, |
517 | | const Av1Block *const b, |
518 | | const int bw4, const int bh4) |
519 | 0 | { |
520 | 0 | const enum InterPredMode mode = b->inter_mode; |
521 | 0 | const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { |
522 | 0 | .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 }, |
523 | 0 | .mv.mv[0] = b->mv[0], |
524 | 0 | .bs = bs, |
525 | 0 | .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2), |
526 | 0 | }; |
527 | 0 | c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); |
528 | 0 | } |
529 | | |
530 | | static inline void splat_intrabc_mv(const Dav1dContext *const c, |
531 | | Dav1dTaskContext *const t, |
532 | | const enum BlockSize bs, |
533 | | const Av1Block *const b, |
534 | | const int bw4, const int bh4) |
535 | 0 | { |
536 | 0 | const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { |
537 | 0 | .ref.ref = { 0, -1 }, |
538 | 0 | .mv.mv[0] = b->mv[0], |
539 | 0 | .bs = bs, |
540 | 0 | .mf = 0, |
541 | 0 | }; |
542 | 0 | c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); |
543 | 0 | } |
544 | | |
545 | | static inline void splat_tworef_mv(const Dav1dContext *const c, |
546 | | Dav1dTaskContext *const t, |
547 | | const enum BlockSize bs, |
548 | | const Av1Block *const b, |
549 | | const int bw4, const int bh4) |
550 | 0 | { |
551 | 0 | assert(bw4 >= 2 && bh4 >= 2); |
552 | 0 | const enum CompInterPredMode mode = b->inter_mode; |
553 | 0 | const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { |
554 | 0 | .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 }, |
555 | 0 | .mv.mv = { b->mv[0], b->mv[1] }, |
556 | 0 | .bs = bs, |
557 | 0 | .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2, |
558 | 0 | }; |
559 | 0 | c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); |
560 | 0 | } |
561 | | |
562 | | static inline void splat_intraref(const Dav1dContext *const c, |
563 | | Dav1dTaskContext *const t, |
564 | | const enum BlockSize bs, |
565 | | const int bw4, const int bh4) |
566 | 0 | { |
567 | 0 | const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { |
568 | 0 | .ref.ref = { 0, -1 }, |
569 | 0 | .mv.mv[0].n = INVALID_MV, |
570 | 0 | .bs = bs, |
571 | 0 | .mf = 0, |
572 | 0 | }; |
573 | 0 | c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); |
574 | 0 | } |
575 | | |
576 | | static void mc_lowest_px(int *const dst, const int by4, const int bh4, |
577 | | const int mvy, const int ss_ver, |
578 | | const struct ScalableMotionParams *const smp) |
579 | 0 | { |
580 | 0 | const int v_mul = 4 >> ss_ver; |
581 | 0 | if (!smp->scale) { |
582 | 0 | const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver); |
583 | 0 | *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy); |
584 | 0 | } else { |
585 | 0 | int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver); |
586 | 0 | const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8; |
587 | 0 | y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32; |
588 | 0 | const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4; |
589 | 0 | *dst = imax(*dst, bottom); |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | | static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst, |
594 | | const uint8_t *const b_dim, |
595 | | const Dav1dWarpedMotionParams *const wmp, |
596 | | const int ss_ver, const int ss_hor) |
597 | 0 | { |
598 | 0 | const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver; |
599 | 0 | assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7)); |
600 | 0 | const int32_t *const mat = wmp->matrix; |
601 | 0 | const int y = b_dim[1] * v_mul - 8; // lowest y |
602 | |
|
603 | 0 | const int src_y = t->by * 4 + ((y + 4) << ss_ver); |
604 | 0 | const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1]; |
605 | | // check left- and right-most blocks |
606 | 0 | for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) { |
607 | | // calculate transformation relative to center of 8x8 block in |
608 | | // luma pixel units |
609 | 0 | const int src_x = t->bx * 4 + ((x + 4) << ss_hor); |
610 | 0 | const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver; |
611 | 0 | const int dy = (int) (mvy >> 16) - 4; |
612 | 0 | *dst = imax(*dst, dy + 4 + 8); |
613 | 0 | } |
614 | 0 | } |
615 | | |
616 | | static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst, |
617 | | const uint8_t *const b_dim, |
618 | | const Dav1dWarpedMotionParams *const wmp) |
619 | 0 | { |
620 | 0 | affine_lowest_px(t, dst, b_dim, wmp, 0, 0); |
621 | 0 | } |
622 | | |
623 | | static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst, |
624 | | const uint8_t *const b_dim, |
625 | | const Dav1dWarpedMotionParams *const wmp) |
626 | 0 | { |
627 | 0 | const Dav1dFrameContext *const f = t->f; |
628 | 0 | assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400); |
629 | 0 | if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444) |
630 | 0 | affine_lowest_px_luma(t, dst, b_dim, wmp); |
631 | 0 | else |
632 | 0 | affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1); |
633 | 0 | } |
634 | | |
635 | | static void obmc_lowest_px(Dav1dTaskContext *const t, |
636 | | int (*const dst)[2], const int is_chroma, |
637 | | const uint8_t *const b_dim, |
638 | | const int bx4, const int by4, const int w4, const int h4) |
639 | 0 | { |
640 | 0 | assert(!(t->bx & 1) && !(t->by & 1)); |
641 | 0 | const Dav1dFrameContext *const f = t->f; |
642 | 0 | /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5]; |
643 | 0 | const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
644 | 0 | const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
645 | 0 | const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver; |
646 | |
|
647 | 0 | if (t->by > t->ts->tiling.row_start && |
648 | 0 | (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16)) |
649 | 0 | { |
650 | 0 | for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) { |
651 | | // only odd blocks are considered for overlap handling, hence +1 |
652 | 0 | const refmvs_block *const a_r = &r[-1][t->bx + x + 1]; |
653 | 0 | const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs]; |
654 | |
|
655 | 0 | if (a_r->ref.ref[0] > 0) { |
656 | 0 | const int oh4 = imin(b_dim[1], 16) >> 1; |
657 | 0 | mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by, |
658 | 0 | (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver, |
659 | 0 | &f->svc[a_r->ref.ref[0] - 1][1]); |
660 | 0 | i++; |
661 | 0 | } |
662 | 0 | x += imax(a_b_dim[0], 2); |
663 | 0 | } |
664 | 0 | } |
665 | |
|
666 | 0 | if (t->bx > t->ts->tiling.col_start) |
667 | 0 | for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) { |
668 | | // only odd blocks are considered for overlap handling, hence +1 |
669 | 0 | const refmvs_block *const l_r = &r[y + 1][t->bx - 1]; |
670 | 0 | const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs]; |
671 | |
|
672 | 0 | if (l_r->ref.ref[0] > 0) { |
673 | 0 | const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]); |
674 | 0 | mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma], |
675 | 0 | t->by + y, oh4, l_r->mv.mv[0].y, ss_ver, |
676 | 0 | &f->svc[l_r->ref.ref[0] - 1][1]); |
677 | 0 | i++; |
678 | 0 | } |
679 | 0 | y += imax(l_b_dim[1], 2); |
680 | 0 | } |
681 | 0 | } |
682 | | |
683 | | static int decode_b(Dav1dTaskContext *const t, |
684 | | const enum BlockLevel bl, |
685 | | const enum BlockSize bs, |
686 | | const enum BlockPartition bp, |
687 | 0 | const enum EdgeFlags intra_edge_flags) { |
688 | 0 | Dav1dTileState *const ts = t->ts; |
689 | 0 | const Dav1dFrameContext *const f = t->f; |
690 | 0 | Av1Block b_mem, *const b = t->frame_thread.pass ? |
691 | 0 | &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem; |
692 | 0 | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
693 | 0 | const int bx4 = t->bx & 31, by4 = t->by & 31; |
694 | 0 | const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
695 | 0 | const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
696 | 0 | const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver; |
697 | 0 | const int bw4 = b_dim[0], bh4 = b_dim[1]; |
698 | 0 | const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); |
699 | 0 | const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver; |
700 | 0 | const int have_left = t->bx > ts->tiling.col_start; |
701 | 0 | const int have_top = t->by > ts->tiling.row_start; |
702 | 0 | const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 && |
703 | 0 | (bw4 > ss_hor || t->bx & 1) && |
704 | 0 | (bh4 > ss_ver || t->by & 1); |
705 | |
|
706 | 0 | if (t->frame_thread.pass == 2) { |
707 | 0 | if (b->intra) { |
708 | 0 | f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b); |
709 | |
|
710 | 0 | const enum IntraPredMode y_mode_nofilt = |
711 | 0 | b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode; |
712 | 0 | #define set_ctx(rep_macro) \ |
713 | 0 | rep_macro(edge->mode, off, y_mode_nofilt); \ |
714 | 0 | rep_macro(edge->intra, off, 1) |
715 | 0 | BlockContext *edge = t->a; |
716 | 0 | for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { |
717 | 0 | case_set(b_dim[2 + i]); |
718 | 0 | } |
719 | 0 | #undef set_ctx |
720 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr)) { |
721 | 0 | refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx]; |
722 | 0 | for (int x = 0; x < bw4; x++) { |
723 | 0 | r[x].ref.ref[0] = 0; |
724 | 0 | r[x].bs = bs; |
725 | 0 | } |
726 | 0 | refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5]; |
727 | 0 | for (int y = 0; y < bh4 - 1; y++) { |
728 | 0 | rr[y][t->bx + bw4 - 1].ref.ref[0] = 0; |
729 | 0 | rr[y][t->bx + bw4 - 1].bs = bs; |
730 | 0 | } |
731 | 0 | } |
732 | |
|
733 | 0 | if (has_chroma) { |
734 | 0 | uint8_t uv_mode = b->uv_mode; |
735 | 0 | dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode); |
736 | 0 | dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode); |
737 | 0 | } |
738 | 0 | } else { |
739 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ && |
740 | 0 | b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP) |
741 | 0 | { |
742 | 0 | if (b->matrix[0] == INT16_MIN) { |
743 | 0 | t->warpmv.type = DAV1D_WM_TYPE_IDENTITY; |
744 | 0 | } else { |
745 | 0 | t->warpmv.type = DAV1D_WM_TYPE_AFFINE; |
746 | 0 | t->warpmv.matrix[2] = b->matrix[0] + 0x10000; |
747 | 0 | t->warpmv.matrix[3] = b->matrix[1]; |
748 | 0 | t->warpmv.matrix[4] = b->matrix[2]; |
749 | 0 | t->warpmv.matrix[5] = b->matrix[3] + 0x10000; |
750 | 0 | dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv, |
751 | 0 | t->bx, t->by); |
752 | 0 | dav1d_get_shear_params(&t->warpmv); |
753 | 0 | #define signabs(v) v < 0 ? '-' : ' ', abs(v) |
754 | 0 | if (DEBUG_BLOCK_INFO) |
755 | 0 | printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n" |
756 | 0 | "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n", |
757 | 0 | signabs(t->warpmv.matrix[0]), |
758 | 0 | signabs(t->warpmv.matrix[1]), |
759 | 0 | signabs(t->warpmv.matrix[2]), |
760 | 0 | signabs(t->warpmv.matrix[3]), |
761 | 0 | signabs(t->warpmv.matrix[4]), |
762 | 0 | signabs(t->warpmv.matrix[5]), |
763 | 0 | signabs(t->warpmv.u.p.alpha), |
764 | 0 | signabs(t->warpmv.u.p.beta), |
765 | 0 | signabs(t->warpmv.u.p.gamma), |
766 | 0 | signabs(t->warpmv.u.p.delta), |
767 | 0 | b->mv2d.y, b->mv2d.x); |
768 | 0 | #undef signabs |
769 | 0 | } |
770 | 0 | } |
771 | 0 | if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; |
772 | | |
773 | 0 | const uint8_t *const filter = dav1d_filter_dir[b->filter2d]; |
774 | 0 | BlockContext *edge = t->a; |
775 | 0 | for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { |
776 | 0 | #define set_ctx(rep_macro) \ |
777 | 0 | rep_macro(edge->filter[0], off, filter[0]); \ |
778 | 0 | rep_macro(edge->filter[1], off, filter[1]); \ |
779 | 0 | rep_macro(edge->intra, off, 0) |
780 | 0 | case_set(b_dim[2 + i]); |
781 | 0 | #undef set_ctx |
782 | 0 | } |
783 | | |
784 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr)) { |
785 | 0 | refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx]; |
786 | 0 | for (int x = 0; x < bw4; x++) { |
787 | 0 | r[x].ref.ref[0] = b->ref[0] + 1; |
788 | 0 | r[x].mv.mv[0] = b->mv[0]; |
789 | 0 | r[x].bs = bs; |
790 | 0 | } |
791 | 0 | refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5]; |
792 | 0 | for (int y = 0; y < bh4 - 1; y++) { |
793 | 0 | rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1; |
794 | 0 | rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0]; |
795 | 0 | rr[y][t->bx + bw4 - 1].bs = bs; |
796 | 0 | } |
797 | 0 | } |
798 | |
|
799 | 0 | if (has_chroma) { |
800 | 0 | dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); |
801 | 0 | dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); |
802 | 0 | } |
803 | 0 | } |
804 | 0 | return 0; |
805 | 0 | } |
806 | | |
807 | 0 | const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver; |
808 | |
|
809 | 0 | b->bl = bl; |
810 | 0 | b->bp = bp; |
811 | 0 | b->bs = bs; |
812 | |
|
813 | 0 | const Dav1dSegmentationData *seg = NULL; |
814 | | |
815 | | // segment_id (if seg_feature for skip/ref/gmv is enabled) |
816 | 0 | int seg_pred = 0; |
817 | 0 | if (f->frame_hdr->segmentation.enabled) { |
818 | 0 | if (!f->frame_hdr->segmentation.update_map) { |
819 | 0 | if (f->prev_segmap) { |
820 | 0 | unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4, |
821 | 0 | f->prev_segmap, |
822 | 0 | f->b4_stride); |
823 | 0 | if (seg_id >= 8) return -1; |
824 | 0 | b->seg_id = seg_id; |
825 | 0 | } else { |
826 | 0 | b->seg_id = 0; |
827 | 0 | } |
828 | 0 | seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; |
829 | 0 | } else if (f->frame_hdr->segmentation.seg_data.preskip) { |
830 | 0 | if (f->frame_hdr->segmentation.temporal && |
831 | 0 | (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac, |
832 | 0 | ts->cdf.m.seg_pred[t->a->seg_pred[bx4] + |
833 | 0 | t->l.seg_pred[by4]]))) |
834 | 0 | { |
835 | | // temporal predicted seg_id |
836 | 0 | if (f->prev_segmap) { |
837 | 0 | unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, |
838 | 0 | w4, h4, |
839 | 0 | f->prev_segmap, |
840 | 0 | f->b4_stride); |
841 | 0 | if (seg_id >= 8) return -1; |
842 | 0 | b->seg_id = seg_id; |
843 | 0 | } else { |
844 | 0 | b->seg_id = 0; |
845 | 0 | } |
846 | 0 | } else { |
847 | 0 | int seg_ctx; |
848 | 0 | const unsigned pred_seg_id = |
849 | 0 | get_cur_frame_segid(t->by, t->bx, have_top, have_left, |
850 | 0 | &seg_ctx, f->cur_segmap, f->b4_stride); |
851 | 0 | const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
852 | 0 | ts->cdf.m.seg_id[seg_ctx], |
853 | 0 | DAV1D_MAX_SEGMENTS - 1); |
854 | 0 | const unsigned last_active_seg_id = |
855 | 0 | f->frame_hdr->segmentation.seg_data.last_active_segid; |
856 | 0 | b->seg_id = neg_deinterleave(diff, pred_seg_id, |
857 | 0 | last_active_seg_id + 1); |
858 | 0 | if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error? |
859 | 0 | if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error? |
860 | 0 | } |
861 | | |
862 | 0 | if (DEBUG_BLOCK_INFO) |
863 | 0 | printf("Post-segid[preskip;%d]: r=%d\n", |
864 | 0 | b->seg_id, ts->msac.rng); |
865 | |
|
866 | 0 | seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; |
867 | 0 | } |
868 | 0 | } else { |
869 | 0 | b->seg_id = 0; |
870 | 0 | } |
871 | | |
872 | | // skip_mode |
873 | 0 | if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) && |
874 | 0 | f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1) |
875 | 0 | { |
876 | 0 | const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4]; |
877 | 0 | b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac, |
878 | 0 | ts->cdf.m.skip_mode[smctx]); |
879 | 0 | if (DEBUG_BLOCK_INFO) |
880 | 0 | printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng); |
881 | 0 | } else { |
882 | 0 | b->skip_mode = 0; |
883 | 0 | } |
884 | | |
885 | | // skip |
886 | 0 | if (b->skip_mode || (seg && seg->skip)) { |
887 | 0 | b->skip = 1; |
888 | 0 | } else { |
889 | 0 | const int sctx = t->a->skip[bx4] + t->l.skip[by4]; |
890 | 0 | b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]); |
891 | 0 | if (DEBUG_BLOCK_INFO) |
892 | 0 | printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng); |
893 | 0 | } |
894 | | |
895 | | // segment_id |
896 | 0 | if (f->frame_hdr->segmentation.enabled && |
897 | 0 | f->frame_hdr->segmentation.update_map && |
898 | 0 | !f->frame_hdr->segmentation.seg_data.preskip) |
899 | 0 | { |
900 | 0 | if (!b->skip && f->frame_hdr->segmentation.temporal && |
901 | 0 | (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac, |
902 | 0 | ts->cdf.m.seg_pred[t->a->seg_pred[bx4] + |
903 | 0 | t->l.seg_pred[by4]]))) |
904 | 0 | { |
905 | | // temporal predicted seg_id |
906 | 0 | if (f->prev_segmap) { |
907 | 0 | unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4, |
908 | 0 | f->prev_segmap, |
909 | 0 | f->b4_stride); |
910 | 0 | if (seg_id >= 8) return -1; |
911 | 0 | b->seg_id = seg_id; |
912 | 0 | } else { |
913 | 0 | b->seg_id = 0; |
914 | 0 | } |
915 | 0 | } else { |
916 | 0 | int seg_ctx; |
917 | 0 | const unsigned pred_seg_id = |
918 | 0 | get_cur_frame_segid(t->by, t->bx, have_top, have_left, |
919 | 0 | &seg_ctx, f->cur_segmap, f->b4_stride); |
920 | 0 | if (b->skip) { |
921 | 0 | b->seg_id = pred_seg_id; |
922 | 0 | } else { |
923 | 0 | const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
924 | 0 | ts->cdf.m.seg_id[seg_ctx], |
925 | 0 | DAV1D_MAX_SEGMENTS - 1); |
926 | 0 | const unsigned last_active_seg_id = |
927 | 0 | f->frame_hdr->segmentation.seg_data.last_active_segid; |
928 | 0 | b->seg_id = neg_deinterleave(diff, pred_seg_id, |
929 | 0 | last_active_seg_id + 1); |
930 | 0 | if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error? |
931 | 0 | } |
932 | 0 | if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error? |
933 | 0 | } |
934 | | |
935 | 0 | seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; |
936 | |
|
937 | 0 | if (DEBUG_BLOCK_INFO) |
938 | 0 | printf("Post-segid[postskip;%d]: r=%d\n", |
939 | 0 | b->seg_id, ts->msac.rng); |
940 | 0 | } |
941 | | |
942 | | // cdef index |
943 | 0 | if (!b->skip) { |
944 | 0 | const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) + |
945 | 0 | ((t->by & 16) >> 3) : 0; |
946 | 0 | if (t->cur_sb_cdef_idx_ptr[idx] == -1) { |
947 | 0 | const int v = dav1d_msac_decode_bools(&ts->msac, |
948 | 0 | f->frame_hdr->cdef.n_bits); |
949 | 0 | t->cur_sb_cdef_idx_ptr[idx] = v; |
950 | 0 | if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v; |
951 | 0 | if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v; |
952 | 0 | if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v; |
953 | |
|
954 | 0 | if (DEBUG_BLOCK_INFO) |
955 | 0 | printf("Post-cdef_idx[%d]: r=%d\n", |
956 | 0 | *t->cur_sb_cdef_idx_ptr, ts->msac.rng); |
957 | 0 | } |
958 | 0 | } |
959 | | |
960 | | // delta-q/lf |
961 | 0 | if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) { |
962 | 0 | const int prev_qidx = ts->last_qidx; |
963 | 0 | const int have_delta_q = f->frame_hdr->delta.q.present && |
964 | 0 | (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip); |
965 | |
|
966 | 0 | uint32_t prev_delta_lf = ts->last_delta_lf.u32; |
967 | |
|
968 | 0 | if (have_delta_q) { |
969 | 0 | int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
970 | 0 | ts->cdf.m.delta_q, 3); |
971 | 0 | if (delta_q == 3) { |
972 | 0 | const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3); |
973 | 0 | delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) + |
974 | 0 | 1 + (1 << n_bits); |
975 | 0 | } |
976 | 0 | if (delta_q) { |
977 | 0 | if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q; |
978 | 0 | delta_q *= 1 << f->frame_hdr->delta.q.res_log2; |
979 | 0 | } |
980 | 0 | ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255); |
981 | 0 | if (have_delta_q && DEBUG_BLOCK_INFO) |
982 | 0 | printf("Post-delta_q[%d->%d]: r=%d\n", |
983 | 0 | delta_q, ts->last_qidx, ts->msac.rng); |
984 | |
|
985 | 0 | if (f->frame_hdr->delta.lf.present) { |
986 | 0 | const int n_lfs = f->frame_hdr->delta.lf.multi ? |
987 | 0 | f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1; |
988 | |
|
989 | 0 | for (int i = 0; i < n_lfs; i++) { |
990 | 0 | int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
991 | 0 | ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3); |
992 | 0 | if (delta_lf == 3) { |
993 | 0 | const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3); |
994 | 0 | delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) + |
995 | 0 | 1 + (1 << n_bits); |
996 | 0 | } |
997 | 0 | if (delta_lf) { |
998 | 0 | if (dav1d_msac_decode_bool_equi(&ts->msac)) |
999 | 0 | delta_lf = -delta_lf; |
1000 | 0 | delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2; |
1001 | 0 | } |
1002 | 0 | ts->last_delta_lf.i8[i] = |
1003 | 0 | iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63); |
1004 | 0 | if (have_delta_q && DEBUG_BLOCK_INFO) |
1005 | 0 | printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf, |
1006 | 0 | ts->msac.rng); |
1007 | 0 | } |
1008 | 0 | } |
1009 | 0 | } |
1010 | 0 | if (ts->last_qidx == f->frame_hdr->quant.yac) { |
1011 | | // assign frame-wide q values to this sb |
1012 | 0 | ts->dq = f->dq; |
1013 | 0 | } else if (ts->last_qidx != prev_qidx) { |
1014 | | // find sb-specific quant parameters |
1015 | 0 | init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem); |
1016 | 0 | ts->dq = ts->dqmem; |
1017 | 0 | } |
1018 | 0 | if (!ts->last_delta_lf.u32) { |
1019 | | // assign frame-wide lf values to this sb |
1020 | 0 | ts->lflvl = f->lf.lvl; |
1021 | 0 | } else if (ts->last_delta_lf.u32 != prev_delta_lf) { |
1022 | | // find sb-specific lf lvl parameters |
1023 | 0 | ts->lflvl = ts->lflvlmem; |
1024 | 0 | dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8); |
1025 | 0 | } |
1026 | 0 | } |
1027 | |
|
1028 | 0 | if (b->skip_mode) { |
1029 | 0 | b->intra = 0; |
1030 | 0 | } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) { |
1031 | 0 | if (seg && (seg->ref >= 0 || seg->globalmv)) { |
1032 | 0 | b->intra = !seg->ref; |
1033 | 0 | } else { |
1034 | 0 | const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4, |
1035 | 0 | have_top, have_left); |
1036 | 0 | b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, |
1037 | 0 | ts->cdf.m.intra[ictx]); |
1038 | 0 | if (DEBUG_BLOCK_INFO) |
1039 | 0 | printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng); |
1040 | 0 | } |
1041 | 0 | } else if (f->frame_hdr->allow_intrabc) { |
1042 | 0 | b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc); |
1043 | 0 | if (DEBUG_BLOCK_INFO) |
1044 | 0 | printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng); |
1045 | 0 | } else { |
1046 | 0 | b->intra = 1; |
1047 | 0 | } |
1048 | | |
1049 | | // intra/inter-specific stuff |
1050 | 0 | if (b->intra) { |
1051 | 0 | uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ? |
1052 | 0 | ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] : |
1053 | 0 | ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]] |
1054 | 0 | [dav1d_intra_mode_context[t->l.mode[by4]]]; |
1055 | 0 | b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf, |
1056 | 0 | N_INTRA_PRED_MODES - 1); |
1057 | 0 | if (DEBUG_BLOCK_INFO) |
1058 | 0 | printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng); |
1059 | | |
1060 | | // angle delta |
1061 | 0 | if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED && |
1062 | 0 | b->y_mode <= VERT_LEFT_PRED) |
1063 | 0 | { |
1064 | 0 | uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED]; |
1065 | 0 | const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6); |
1066 | 0 | b->y_angle = angle - 3; |
1067 | 0 | } else { |
1068 | 0 | b->y_angle = 0; |
1069 | 0 | } |
1070 | |
|
1071 | 0 | if (has_chroma) { |
1072 | 0 | const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ? |
1073 | 0 | cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs)); |
1074 | 0 | uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode]; |
1075 | 0 | b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf, |
1076 | 0 | N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed); |
1077 | 0 | if (DEBUG_BLOCK_INFO) |
1078 | 0 | printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng); |
1079 | |
|
1080 | 0 | b->uv_angle = 0; |
1081 | 0 | if (b->uv_mode == CFL_PRED) { |
1082 | 0 | #define SIGN(a) (!!(a) + ((a) > 0)) |
1083 | 0 | const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
1084 | 0 | ts->cdf.m.cfl_sign, 7) + 1; |
1085 | 0 | const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3; |
1086 | 0 | assert(sign_u == sign / 3); |
1087 | 0 | if (sign_u) { |
1088 | 0 | const int ctx = (sign_u == 2) * 3 + sign_v; |
1089 | 0 | b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac, |
1090 | 0 | ts->cdf.m.cfl_alpha[ctx], 15) + 1; |
1091 | 0 | if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0]; |
1092 | 0 | } else { |
1093 | 0 | b->cfl_alpha[0] = 0; |
1094 | 0 | } |
1095 | 0 | if (sign_v) { |
1096 | 0 | const int ctx = (sign_v == 2) * 3 + sign_u; |
1097 | 0 | b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac, |
1098 | 0 | ts->cdf.m.cfl_alpha[ctx], 15) + 1; |
1099 | 0 | if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1]; |
1100 | 0 | } else { |
1101 | 0 | b->cfl_alpha[1] = 0; |
1102 | 0 | } |
1103 | 0 | #undef SIGN |
1104 | 0 | if (DEBUG_BLOCK_INFO) |
1105 | 0 | printf("Post-uvalphas[%d/%d]: r=%d\n", |
1106 | 0 | b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng); |
1107 | 0 | } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED && |
1108 | 0 | b->uv_mode <= VERT_LEFT_PRED) |
1109 | 0 | { |
1110 | 0 | uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED]; |
1111 | 0 | const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6); |
1112 | 0 | b->uv_angle = angle - 3; |
1113 | 0 | } |
1114 | 0 | } |
1115 | | |
1116 | 0 | b->pal_sz[0] = b->pal_sz[1] = 0; |
1117 | 0 | if (f->frame_hdr->allow_screen_content_tools && |
1118 | 0 | imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4) |
1119 | 0 | { |
1120 | 0 | const int sz_ctx = b_dim[2] + b_dim[3] - 2; |
1121 | 0 | if (b->y_mode == DC_PRED) { |
1122 | 0 | const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0); |
1123 | 0 | const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac, |
1124 | 0 | ts->cdf.m.pal_y[sz_ctx][pal_ctx]); |
1125 | 0 | if (DEBUG_BLOCK_INFO) |
1126 | 0 | printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng); |
1127 | 0 | if (use_y_pal) |
1128 | 0 | f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4); |
1129 | 0 | } |
1130 | |
|
1131 | 0 | if (has_chroma && b->uv_mode == DC_PRED) { |
1132 | 0 | const int pal_ctx = b->pal_sz[0] > 0; |
1133 | 0 | const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac, |
1134 | 0 | ts->cdf.m.pal_uv[pal_ctx]); |
1135 | 0 | if (DEBUG_BLOCK_INFO) |
1136 | 0 | printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng); |
1137 | 0 | if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates |
1138 | 0 | f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4); |
1139 | 0 | } |
1140 | 0 | } |
1141 | |
|
1142 | 0 | if (b->y_mode == DC_PRED && !b->pal_sz[0] && |
1143 | 0 | imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra) |
1144 | 0 | { |
1145 | 0 | const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac, |
1146 | 0 | ts->cdf.m.use_filter_intra[bs]); |
1147 | 0 | if (is_filter) { |
1148 | 0 | b->y_mode = FILTER_PRED; |
1149 | 0 | b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
1150 | 0 | ts->cdf.m.filter_intra, 4); |
1151 | 0 | } |
1152 | 0 | if (DEBUG_BLOCK_INFO) |
1153 | 0 | printf("Post-filterintramode[%d/%d]: r=%d\n", |
1154 | 0 | b->y_mode, b->y_angle, ts->msac.rng); |
1155 | 0 | } |
1156 | |
|
1157 | 0 | if (b->pal_sz[0]) { |
1158 | 0 | uint8_t *pal_idx; |
1159 | 0 | if (t->frame_thread.pass) { |
1160 | 0 | const int p = t->frame_thread.pass & 1; |
1161 | 0 | assert(ts->frame_thread[p].pal_idx); |
1162 | 0 | pal_idx = ts->frame_thread[p].pal_idx; |
1163 | 0 | ts->frame_thread[p].pal_idx += bw4 * bh4 * 8; |
1164 | 0 | } else |
1165 | 0 | pal_idx = t->scratch.pal_idx_y; |
1166 | 0 | read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4); |
1167 | 0 | if (DEBUG_BLOCK_INFO) |
1168 | 0 | printf("Post-y-pal-indices: r=%d\n", ts->msac.rng); |
1169 | 0 | } |
1170 | | |
1171 | 0 | if (has_chroma && b->pal_sz[1]) { |
1172 | 0 | uint8_t *pal_idx; |
1173 | 0 | if (t->frame_thread.pass) { |
1174 | 0 | const int p = t->frame_thread.pass & 1; |
1175 | 0 | assert(ts->frame_thread[p].pal_idx); |
1176 | 0 | pal_idx = ts->frame_thread[p].pal_idx; |
1177 | 0 | ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8; |
1178 | 0 | } else |
1179 | 0 | pal_idx = t->scratch.pal_idx_uv; |
1180 | 0 | read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4); |
1181 | 0 | if (DEBUG_BLOCK_INFO) |
1182 | 0 | printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng); |
1183 | 0 | } |
1184 | | |
1185 | 0 | const TxfmInfo *t_dim; |
1186 | 0 | if (f->frame_hdr->segmentation.lossless[b->seg_id]) { |
1187 | 0 | b->tx = b->uvtx = (int) TX_4X4; |
1188 | 0 | t_dim = &dav1d_txfm_dimensions[TX_4X4]; |
1189 | 0 | } else { |
1190 | 0 | b->tx = dav1d_max_txfm_size_for_bs[bs][0]; |
1191 | 0 | b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; |
1192 | 0 | t_dim = &dav1d_txfm_dimensions[b->tx]; |
1193 | 0 | if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) { |
1194 | 0 | const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4); |
1195 | 0 | uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx]; |
1196 | 0 | int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf, |
1197 | 0 | imin(t_dim->max, 2)); |
1198 | |
|
1199 | 0 | while (depth--) { |
1200 | 0 | b->tx = t_dim->sub; |
1201 | 0 | t_dim = &dav1d_txfm_dimensions[b->tx]; |
1202 | 0 | } |
1203 | 0 | } |
1204 | 0 | if (DEBUG_BLOCK_INFO) |
1205 | 0 | printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng); |
1206 | 0 | } |
1207 | | |
1208 | | // reconstruction |
1209 | 0 | if (t->frame_thread.pass == 1) { |
1210 | 0 | f->bd_fn.read_coef_blocks(t, bs, b); |
1211 | 0 | } else { |
1212 | 0 | f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b); |
1213 | 0 | } |
1214 | |
|
1215 | 0 | if (f->frame_hdr->loopfilter.level_y[0] || |
1216 | 0 | f->frame_hdr->loopfilter.level_y[1]) |
1217 | 0 | { |
1218 | 0 | dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride, |
1219 | 0 | (const uint8_t (*)[8][2]) |
1220 | 0 | &ts->lflvl[b->seg_id][0][0][0], |
1221 | 0 | t->bx, t->by, f->w4, f->h4, bs, |
1222 | 0 | b->tx, b->uvtx, f->cur.p.layout, |
1223 | 0 | &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], |
1224 | 0 | has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, |
1225 | 0 | has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); |
1226 | 0 | } |
1227 | | // update contexts |
1228 | 0 | const enum IntraPredMode y_mode_nofilt = |
1229 | 0 | b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode; |
1230 | 0 | BlockContext *edge = t->a; |
1231 | 0 | for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { |
1232 | 0 | int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh |
1233 | 0 | #define set_ctx(rep_macro) \ |
1234 | 0 | rep_macro(edge->tx_intra, off, t_lsz); \ |
1235 | 0 | rep_macro(edge->tx, off, t_lsz); \ |
1236 | 0 | rep_macro(edge->mode, off, y_mode_nofilt); \ |
1237 | 0 | rep_macro(edge->pal_sz, off, b->pal_sz[0]); \ |
1238 | 0 | rep_macro(edge->seg_pred, off, seg_pred); \ |
1239 | 0 | rep_macro(edge->skip_mode, off, 0); \ |
1240 | 0 | rep_macro(edge->intra, off, 1); \ |
1241 | 0 | rep_macro(edge->skip, off, b->skip); \ |
1242 | | /* see aomedia bug 2183 for why we use luma coordinates here */ \ |
1243 | 0 | rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \ |
1244 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \ |
1245 | 0 | rep_macro(edge->comp_type, off, COMP_INTER_NONE); \ |
1246 | 0 | rep_macro(edge->ref[0], off, ((uint8_t) -1)); \ |
1247 | 0 | rep_macro(edge->ref[1], off, ((uint8_t) -1)); \ |
1248 | 0 | rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \ |
1249 | 0 | rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \ |
1250 | 0 | } |
1251 | 0 | case_set(b_dim[2 + i]); |
1252 | 0 | #undef set_ctx |
1253 | 0 | } |
1254 | 0 | if (b->pal_sz[0]) |
1255 | 0 | f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4); |
1256 | 0 | if (has_chroma) { |
1257 | 0 | uint8_t uv_mode = b->uv_mode; |
1258 | 0 | dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode); |
1259 | 0 | dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode); |
1260 | 0 | if (b->pal_sz[1]) |
1261 | 0 | f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4); |
1262 | 0 | } |
1263 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) |
1264 | 0 | splat_intraref(f->c, t, bs, bw4, bh4); |
1265 | 0 | } else if (IS_KEY_OR_INTRA(f->frame_hdr)) { |
1266 | | // intra block copy |
1267 | 0 | refmvs_candidate mvstack[8]; |
1268 | 0 | int n_mvs, ctx; |
1269 | 0 | dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, |
1270 | 0 | (union refmvs_refpair) { .ref = { 0, -1 }}, |
1271 | 0 | bs, intra_edge_flags, t->by, t->bx); |
1272 | |
|
1273 | 0 | if (mvstack[0].mv.mv[0].n) |
1274 | 0 | b->mv[0] = mvstack[0].mv.mv[0]; |
1275 | 0 | else if (mvstack[1].mv.mv[0].n) |
1276 | 0 | b->mv[0] = mvstack[1].mv.mv[0]; |
1277 | 0 | else { |
1278 | 0 | if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) { |
1279 | 0 | b->mv[0].y = 0; |
1280 | 0 | b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048; |
1281 | 0 | } else { |
1282 | 0 | b->mv[0].y = -(512 << f->seq_hdr->sb128); |
1283 | 0 | b->mv[0].x = 0; |
1284 | 0 | } |
1285 | 0 | } |
1286 | |
|
1287 | 0 | const union mv ref = b->mv[0]; |
1288 | 0 | read_mv_residual(ts, &b->mv[0], -1); |
1289 | | |
1290 | | // clip intrabc motion vector to decoded parts of current tile |
1291 | 0 | int border_left = ts->tiling.col_start * 4; |
1292 | 0 | int border_top = ts->tiling.row_start * 4; |
1293 | 0 | if (has_chroma) { |
1294 | 0 | if (bw4 < 2 && ss_hor) |
1295 | 0 | border_left += 4; |
1296 | 0 | if (bh4 < 2 && ss_ver) |
1297 | 0 | border_top += 4; |
1298 | 0 | } |
1299 | 0 | int src_left = t->bx * 4 + (b->mv[0].x >> 3); |
1300 | 0 | int src_top = t->by * 4 + (b->mv[0].y >> 3); |
1301 | 0 | int src_right = src_left + bw4 * 4; |
1302 | 0 | int src_bottom = src_top + bh4 * 4; |
1303 | 0 | const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4; |
1304 | | |
1305 | | // check against left or right tile boundary and adjust if necessary |
1306 | 0 | if (src_left < border_left) { |
1307 | 0 | src_right += border_left - src_left; |
1308 | 0 | src_left += border_left - src_left; |
1309 | 0 | } else if (src_right > border_right) { |
1310 | 0 | src_left -= src_right - border_right; |
1311 | 0 | src_right -= src_right - border_right; |
1312 | 0 | } |
1313 | | // check against top tile boundary and adjust if necessary |
1314 | 0 | if (src_top < border_top) { |
1315 | 0 | src_bottom += border_top - src_top; |
1316 | 0 | src_top += border_top - src_top; |
1317 | 0 | } |
1318 | |
|
1319 | 0 | const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128); |
1320 | 0 | const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128); |
1321 | 0 | const int sb_size = 1 << (6 + f->seq_hdr->sb128); |
1322 | | // check for overlap with current superblock |
1323 | 0 | if (src_bottom > sby && src_right > sbx) { |
1324 | 0 | if (src_top - border_top >= src_bottom - sby) { |
1325 | | // if possible move src up into the previous suberblock row |
1326 | 0 | src_top -= src_bottom - sby; |
1327 | 0 | src_bottom -= src_bottom - sby; |
1328 | 0 | } else if (src_left - border_left >= src_right - sbx) { |
1329 | | // if possible move src left into the previous suberblock |
1330 | 0 | src_left -= src_right - sbx; |
1331 | 0 | src_right -= src_right - sbx; |
1332 | 0 | } |
1333 | 0 | } |
1334 | | // move src up if it is below current superblock row |
1335 | 0 | if (src_bottom > sby + sb_size) { |
1336 | 0 | src_top -= src_bottom - (sby + sb_size); |
1337 | 0 | src_bottom -= src_bottom - (sby + sb_size); |
1338 | 0 | } |
1339 | | // error out if mv still overlaps with the current superblock |
1340 | 0 | if (src_bottom > sby && src_right > sbx) |
1341 | 0 | return -1; |
1342 | | |
1343 | 0 | b->mv[0].x = (src_left - t->bx * 4) * 8; |
1344 | 0 | b->mv[0].y = (src_top - t->by * 4) * 8; |
1345 | |
|
1346 | 0 | if (DEBUG_BLOCK_INFO) |
1347 | 0 | printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n", |
1348 | 0 | b->mv[0].y, b->mv[0].x, ref.y, ref.x, |
1349 | 0 | mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng); |
1350 | 0 | read_vartx_tree(t, b, bs, bx4, by4); |
1351 | | |
1352 | | // reconstruction |
1353 | 0 | if (t->frame_thread.pass == 1) { |
1354 | 0 | f->bd_fn.read_coef_blocks(t, bs, b); |
1355 | 0 | b->filter2d = FILTER_2D_BILINEAR; |
1356 | 0 | } else { |
1357 | 0 | if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; |
1358 | 0 | } |
1359 | | |
1360 | 0 | splat_intrabc_mv(f->c, t, bs, b, bw4, bh4); |
1361 | 0 | BlockContext *edge = t->a; |
1362 | 0 | for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { |
1363 | 0 | #define set_ctx(rep_macro) \ |
1364 | 0 | rep_macro(edge->tx_intra, off, b_dim[2 + i]); \ |
1365 | 0 | rep_macro(edge->mode, off, DC_PRED); \ |
1366 | 0 | rep_macro(edge->pal_sz, off, 0); \ |
1367 | | /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \ |
1368 | 0 | rep_macro(t->pal_sz_uv[i], off, 0); \ |
1369 | 0 | rep_macro(edge->seg_pred, off, seg_pred); \ |
1370 | 0 | rep_macro(edge->skip_mode, off, 0); \ |
1371 | 0 | rep_macro(edge->intra, off, 0); \ |
1372 | 0 | rep_macro(edge->skip, off, b->skip) |
1373 | 0 | case_set(b_dim[2 + i]); |
1374 | 0 | #undef set_ctx |
1375 | 0 | } |
1376 | 0 | if (has_chroma) { |
1377 | 0 | dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); |
1378 | 0 | dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); |
1379 | 0 | } |
1380 | 0 | } else { |
1381 | | // inter-specific mode/mv coding |
1382 | 0 | int is_comp, has_subpel_filter; |
1383 | |
|
1384 | 0 | if (b->skip_mode) { |
1385 | 0 | is_comp = 1; |
1386 | 0 | } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) && |
1387 | 0 | f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1) |
1388 | 0 | { |
1389 | 0 | const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4, |
1390 | 0 | have_top, have_left); |
1391 | 0 | is_comp = dav1d_msac_decode_bool_adapt(&ts->msac, |
1392 | 0 | ts->cdf.m.comp[ctx]); |
1393 | 0 | if (DEBUG_BLOCK_INFO) |
1394 | 0 | printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng); |
1395 | 0 | } else { |
1396 | 0 | is_comp = 0; |
1397 | 0 | } |
1398 | |
|
1399 | 0 | if (b->skip_mode) { |
1400 | 0 | b->ref[0] = f->frame_hdr->skip_mode_refs[0]; |
1401 | 0 | b->ref[1] = f->frame_hdr->skip_mode_refs[1]; |
1402 | 0 | b->comp_type = COMP_INTER_AVG; |
1403 | 0 | b->inter_mode = NEARESTMV_NEARESTMV; |
1404 | 0 | b->drl_idx = NEAREST_DRL; |
1405 | 0 | has_subpel_filter = 0; |
1406 | |
|
1407 | 0 | refmvs_candidate mvstack[8]; |
1408 | 0 | int n_mvs, ctx; |
1409 | 0 | dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, |
1410 | 0 | (union refmvs_refpair) { .ref = { |
1411 | 0 | b->ref[0] + 1, b->ref[1] + 1 }}, |
1412 | 0 | bs, intra_edge_flags, t->by, t->bx); |
1413 | |
|
1414 | 0 | b->mv[0] = mvstack[0].mv.mv[0]; |
1415 | 0 | b->mv[1] = mvstack[0].mv.mv[1]; |
1416 | 0 | fix_mv_precision(f->frame_hdr, &b->mv[0]); |
1417 | 0 | fix_mv_precision(f->frame_hdr, &b->mv[1]); |
1418 | 0 | if (DEBUG_BLOCK_INFO) |
1419 | 0 | printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n", |
1420 | 0 | b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x, |
1421 | 0 | b->ref[0], b->ref[1]); |
1422 | 0 | } else if (is_comp) { |
1423 | 0 | const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4, |
1424 | 0 | have_top, have_left); |
1425 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1426 | 0 | ts->cdf.m.comp_dir[dir_ctx])) |
1427 | 0 | { |
1428 | | // bidir - first reference (fw) |
1429 | 0 | const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4, |
1430 | 0 | have_top, have_left); |
1431 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1432 | 0 | ts->cdf.m.comp_fwd_ref[0][ctx1])) |
1433 | 0 | { |
1434 | 0 | const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4, |
1435 | 0 | have_top, have_left); |
1436 | 0 | b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac, |
1437 | 0 | ts->cdf.m.comp_fwd_ref[2][ctx2]); |
1438 | 0 | } else { |
1439 | 0 | const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4, |
1440 | 0 | have_top, have_left); |
1441 | 0 | b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac, |
1442 | 0 | ts->cdf.m.comp_fwd_ref[1][ctx2]); |
1443 | 0 | } |
1444 | | |
1445 | | // second reference (bw) |
1446 | 0 | const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4, |
1447 | 0 | have_top, have_left); |
1448 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1449 | 0 | ts->cdf.m.comp_bwd_ref[0][ctx3])) |
1450 | 0 | { |
1451 | 0 | b->ref[1] = 6; |
1452 | 0 | } else { |
1453 | 0 | const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4, |
1454 | 0 | have_top, have_left); |
1455 | 0 | b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac, |
1456 | 0 | ts->cdf.m.comp_bwd_ref[1][ctx4]); |
1457 | 0 | } |
1458 | 0 | } else { |
1459 | | // unidir |
1460 | 0 | const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4, |
1461 | 0 | have_top, have_left); |
1462 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1463 | 0 | ts->cdf.m.comp_uni_ref[0][uctx_p])) |
1464 | 0 | { |
1465 | 0 | b->ref[0] = 4; |
1466 | 0 | b->ref[1] = 6; |
1467 | 0 | } else { |
1468 | 0 | const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4, |
1469 | 0 | have_top, have_left); |
1470 | 0 | b->ref[0] = 0; |
1471 | 0 | b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac, |
1472 | 0 | ts->cdf.m.comp_uni_ref[1][uctx_p1]); |
1473 | 0 | if (b->ref[1] == 2) { |
1474 | 0 | const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4, |
1475 | 0 | have_top, have_left); |
1476 | 0 | b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac, |
1477 | 0 | ts->cdf.m.comp_uni_ref[2][uctx_p2]); |
1478 | 0 | } |
1479 | 0 | } |
1480 | 0 | } |
1481 | 0 | if (DEBUG_BLOCK_INFO) |
1482 | 0 | printf("Post-refs[%d/%d]: r=%d\n", |
1483 | 0 | b->ref[0], b->ref[1], ts->msac.rng); |
1484 | |
|
1485 | 0 | refmvs_candidate mvstack[8]; |
1486 | 0 | int n_mvs, ctx; |
1487 | 0 | dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, |
1488 | 0 | (union refmvs_refpair) { .ref = { |
1489 | 0 | b->ref[0] + 1, b->ref[1] + 1 }}, |
1490 | 0 | bs, intra_edge_flags, t->by, t->bx); |
1491 | |
|
1492 | 0 | b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac, |
1493 | 0 | ts->cdf.m.comp_inter_mode[ctx], |
1494 | 0 | N_COMP_INTER_PRED_MODES - 1); |
1495 | 0 | if (DEBUG_BLOCK_INFO) |
1496 | 0 | printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n", |
1497 | 0 | b->inter_mode, ctx, n_mvs, ts->msac.rng); |
1498 | |
|
1499 | 0 | const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode]; |
1500 | 0 | b->drl_idx = NEAREST_DRL; |
1501 | 0 | if (b->inter_mode == NEWMV_NEWMV) { |
1502 | 0 | if (n_mvs > 1) { // NEARER, NEAR or NEARISH |
1503 | 0 | const int drl_ctx_v1 = get_drl_context(mvstack, 0); |
1504 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1505 | 0 | ts->cdf.m.drl_bit[drl_ctx_v1]); |
1506 | 0 | if (b->drl_idx == NEARER_DRL && n_mvs > 2) { |
1507 | 0 | const int drl_ctx_v2 = get_drl_context(mvstack, 1); |
1508 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1509 | 0 | ts->cdf.m.drl_bit[drl_ctx_v2]); |
1510 | 0 | } |
1511 | 0 | if (DEBUG_BLOCK_INFO) |
1512 | 0 | printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n", |
1513 | 0 | b->drl_idx, n_mvs, ts->msac.rng); |
1514 | 0 | } |
1515 | 0 | } else if (im[0] == NEARMV || im[1] == NEARMV) { |
1516 | 0 | b->drl_idx = NEARER_DRL; |
1517 | 0 | if (n_mvs > 2) { // NEAR or NEARISH |
1518 | 0 | const int drl_ctx_v2 = get_drl_context(mvstack, 1); |
1519 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1520 | 0 | ts->cdf.m.drl_bit[drl_ctx_v2]); |
1521 | 0 | if (b->drl_idx == NEAR_DRL && n_mvs > 3) { |
1522 | 0 | const int drl_ctx_v3 = get_drl_context(mvstack, 2); |
1523 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1524 | 0 | ts->cdf.m.drl_bit[drl_ctx_v3]); |
1525 | 0 | } |
1526 | 0 | if (DEBUG_BLOCK_INFO) |
1527 | 0 | printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n", |
1528 | 0 | b->drl_idx, n_mvs, ts->msac.rng); |
1529 | 0 | } |
1530 | 0 | } |
1531 | 0 | assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); |
1532 | | |
1533 | 0 | #define assign_comp_mv(idx) \ |
1534 | 0 | switch (im[idx]) { \ |
1535 | 0 | case NEARMV: \ |
1536 | 0 | case NEARESTMV: \ |
1537 | 0 | b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \ |
1538 | 0 | fix_mv_precision(f->frame_hdr, &b->mv[idx]); \ |
1539 | 0 | break; \ |
1540 | 0 | case GLOBALMV: \ |
1541 | 0 | has_subpel_filter |= \ |
1542 | 0 | f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \ |
1543 | 0 | b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \ |
1544 | 0 | t->bx, t->by, bw4, bh4, f->frame_hdr); \ |
1545 | 0 | break; \ |
1546 | 0 | case NEWMV: \ |
1547 | 0 | b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \ |
1548 | 0 | const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \ |
1549 | 0 | read_mv_residual(ts, &b->mv[idx], mv_prec); \ |
1550 | 0 | break; \ |
1551 | 0 | } |
1552 | 0 | has_subpel_filter = imin(bw4, bh4) == 1 || |
1553 | 0 | b->inter_mode != GLOBALMV_GLOBALMV; |
1554 | 0 | assign_comp_mv(0); |
1555 | 0 | assign_comp_mv(1); |
1556 | 0 | #undef assign_comp_mv |
1557 | 0 | if (DEBUG_BLOCK_INFO) |
1558 | 0 | printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n", |
1559 | 0 | b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x, |
1560 | 0 | ts->msac.rng); |
1561 | | |
1562 | | // jnt_comp vs. seg vs. wedge |
1563 | 0 | int is_segwedge = 0; |
1564 | 0 | if (f->seq_hdr->masked_compound) { |
1565 | 0 | const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4); |
1566 | |
|
1567 | 0 | is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac, |
1568 | 0 | ts->cdf.m.mask_comp[mask_ctx]); |
1569 | 0 | if (DEBUG_BLOCK_INFO) |
1570 | 0 | printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n", |
1571 | 0 | is_segwedge, mask_ctx, ts->msac.rng); |
1572 | 0 | } |
1573 | |
|
1574 | 0 | if (!is_segwedge) { |
1575 | 0 | if (f->seq_hdr->jnt_comp) { |
1576 | 0 | const int jnt_ctx = |
1577 | 0 | get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits, |
1578 | 0 | f->cur.frame_hdr->frame_offset, |
1579 | 0 | f->refp[b->ref[0]].p.frame_hdr->frame_offset, |
1580 | 0 | f->refp[b->ref[1]].p.frame_hdr->frame_offset, |
1581 | 0 | t->a, &t->l, by4, bx4); |
1582 | 0 | b->comp_type = COMP_INTER_WEIGHTED_AVG + |
1583 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
1584 | 0 | ts->cdf.m.jnt_comp[jnt_ctx]); |
1585 | 0 | if (DEBUG_BLOCK_INFO) |
1586 | 0 | printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n", |
1587 | 0 | b->comp_type == COMP_INTER_AVG, |
1588 | 0 | jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4], |
1589 | 0 | t->l.comp_type[by4], t->l.ref[0][by4], |
1590 | 0 | ts->msac.rng); |
1591 | 0 | } else { |
1592 | 0 | b->comp_type = COMP_INTER_AVG; |
1593 | 0 | } |
1594 | 0 | } else { |
1595 | 0 | if (wedge_allowed_mask & (1 << bs)) { |
1596 | 0 | const int ctx = dav1d_wedge_ctx_lut[bs]; |
1597 | 0 | b->comp_type = COMP_INTER_WEDGE - |
1598 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
1599 | 0 | ts->cdf.m.wedge_comp[ctx]); |
1600 | 0 | if (b->comp_type == COMP_INTER_WEDGE) |
1601 | 0 | b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac, |
1602 | 0 | ts->cdf.m.wedge_idx[ctx], 15); |
1603 | 0 | } else { |
1604 | 0 | b->comp_type = COMP_INTER_SEG; |
1605 | 0 | } |
1606 | 0 | b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac); |
1607 | 0 | if (DEBUG_BLOCK_INFO) |
1608 | 0 | printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n", |
1609 | 0 | b->comp_type == COMP_INTER_WEDGE, |
1610 | 0 | b->wedge_idx, b->mask_sign, ts->msac.rng); |
1611 | 0 | } |
1612 | 0 | } else { |
1613 | 0 | b->comp_type = COMP_INTER_NONE; |
1614 | | |
1615 | | // ref |
1616 | 0 | if (seg && seg->ref > 0) { |
1617 | 0 | b->ref[0] = seg->ref - 1; |
1618 | 0 | } else if (seg && (seg->globalmv || seg->skip)) { |
1619 | 0 | b->ref[0] = 0; |
1620 | 0 | } else { |
1621 | 0 | const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4, |
1622 | 0 | have_top, have_left); |
1623 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1624 | 0 | ts->cdf.m.ref[0][ctx1])) |
1625 | 0 | { |
1626 | 0 | const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4, |
1627 | 0 | have_top, have_left); |
1628 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1629 | 0 | ts->cdf.m.ref[1][ctx2])) |
1630 | 0 | { |
1631 | 0 | b->ref[0] = 6; |
1632 | 0 | } else { |
1633 | 0 | const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4, |
1634 | 0 | have_top, have_left); |
1635 | 0 | b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac, |
1636 | 0 | ts->cdf.m.ref[5][ctx3]); |
1637 | 0 | } |
1638 | 0 | } else { |
1639 | 0 | const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4, |
1640 | 0 | have_top, have_left); |
1641 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1642 | 0 | ts->cdf.m.ref[2][ctx2])) |
1643 | 0 | { |
1644 | 0 | const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4, |
1645 | 0 | have_top, have_left); |
1646 | 0 | b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac, |
1647 | 0 | ts->cdf.m.ref[4][ctx3]); |
1648 | 0 | } else { |
1649 | 0 | const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4, |
1650 | 0 | have_top, have_left); |
1651 | 0 | b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac, |
1652 | 0 | ts->cdf.m.ref[3][ctx3]); |
1653 | 0 | } |
1654 | 0 | } |
1655 | 0 | if (DEBUG_BLOCK_INFO) |
1656 | 0 | printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng); |
1657 | 0 | } |
1658 | 0 | b->ref[1] = -1; |
1659 | |
|
1660 | 0 | refmvs_candidate mvstack[8]; |
1661 | 0 | int n_mvs, ctx; |
1662 | 0 | dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, |
1663 | 0 | (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }}, |
1664 | 0 | bs, intra_edge_flags, t->by, t->bx); |
1665 | | |
1666 | | // mode parsing and mv derivation from ref_mvs |
1667 | 0 | if ((seg && (seg->skip || seg->globalmv)) || |
1668 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
1669 | 0 | ts->cdf.m.newmv_mode[ctx & 7])) |
1670 | 0 | { |
1671 | 0 | if ((seg && (seg->skip || seg->globalmv)) || |
1672 | 0 | !dav1d_msac_decode_bool_adapt(&ts->msac, |
1673 | 0 | ts->cdf.m.globalmv_mode[(ctx >> 3) & 1])) |
1674 | 0 | { |
1675 | 0 | b->inter_mode = GLOBALMV; |
1676 | 0 | b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]], |
1677 | 0 | t->bx, t->by, bw4, bh4, f->frame_hdr); |
1678 | 0 | has_subpel_filter = imin(bw4, bh4) == 1 || |
1679 | 0 | f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION; |
1680 | 0 | } else { |
1681 | 0 | has_subpel_filter = 1; |
1682 | 0 | if (dav1d_msac_decode_bool_adapt(&ts->msac, |
1683 | 0 | ts->cdf.m.refmv_mode[(ctx >> 4) & 15])) |
1684 | 0 | { // NEAREST, NEARER, NEAR or NEARISH |
1685 | 0 | b->inter_mode = NEARMV; |
1686 | 0 | b->drl_idx = NEARER_DRL; |
1687 | 0 | if (n_mvs > 2) { // NEARER, NEAR or NEARISH |
1688 | 0 | const int drl_ctx_v2 = get_drl_context(mvstack, 1); |
1689 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1690 | 0 | ts->cdf.m.drl_bit[drl_ctx_v2]); |
1691 | 0 | if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH |
1692 | 0 | const int drl_ctx_v3 = |
1693 | 0 | get_drl_context(mvstack, 2); |
1694 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1695 | 0 | ts->cdf.m.drl_bit[drl_ctx_v3]); |
1696 | 0 | } |
1697 | 0 | } |
1698 | 0 | } else { |
1699 | 0 | b->inter_mode = NEARESTMV; |
1700 | 0 | b->drl_idx = NEAREST_DRL; |
1701 | 0 | } |
1702 | 0 | assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); |
1703 | 0 | b->mv[0] = mvstack[b->drl_idx].mv.mv[0]; |
1704 | 0 | if (b->drl_idx < NEAR_DRL) |
1705 | 0 | fix_mv_precision(f->frame_hdr, &b->mv[0]); |
1706 | 0 | } |
1707 | | |
1708 | 0 | if (DEBUG_BLOCK_INFO) |
1709 | 0 | printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n", |
1710 | 0 | b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs, |
1711 | 0 | ts->msac.rng); |
1712 | 0 | } else { |
1713 | 0 | has_subpel_filter = 1; |
1714 | 0 | b->inter_mode = NEWMV; |
1715 | 0 | b->drl_idx = NEAREST_DRL; |
1716 | 0 | if (n_mvs > 1) { // NEARER, NEAR or NEARISH |
1717 | 0 | const int drl_ctx_v1 = get_drl_context(mvstack, 0); |
1718 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1719 | 0 | ts->cdf.m.drl_bit[drl_ctx_v1]); |
1720 | 0 | if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH |
1721 | 0 | const int drl_ctx_v2 = get_drl_context(mvstack, 1); |
1722 | 0 | b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, |
1723 | 0 | ts->cdf.m.drl_bit[drl_ctx_v2]); |
1724 | 0 | } |
1725 | 0 | } |
1726 | 0 | assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); |
1727 | 0 | if (n_mvs > 1) { |
1728 | 0 | b->mv[0] = mvstack[b->drl_idx].mv.mv[0]; |
1729 | 0 | } else { |
1730 | 0 | assert(!b->drl_idx); |
1731 | 0 | b->mv[0] = mvstack[0].mv.mv[0]; |
1732 | 0 | fix_mv_precision(f->frame_hdr, &b->mv[0]); |
1733 | 0 | } |
1734 | 0 | if (DEBUG_BLOCK_INFO) |
1735 | 0 | printf("Post-intermode[%d,drl=%d]: r=%d\n", |
1736 | 0 | b->inter_mode, b->drl_idx, ts->msac.rng); |
1737 | 0 | const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; |
1738 | 0 | read_mv_residual(ts, &b->mv[0], mv_prec); |
1739 | 0 | if (DEBUG_BLOCK_INFO) |
1740 | 0 | printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n", |
1741 | 0 | b->mv[0].y, b->mv[0].x, ts->msac.rng); |
1742 | 0 | } |
1743 | | |
1744 | | // interintra flags |
1745 | 0 | const int ii_sz_grp = dav1d_ymode_size_context[bs]; |
1746 | 0 | if (f->seq_hdr->inter_intra && |
1747 | 0 | interintra_allowed_mask & (1 << bs) && |
1748 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
1749 | 0 | ts->cdf.m.interintra[ii_sz_grp])) |
1750 | 0 | { |
1751 | 0 | b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
1752 | 0 | ts->cdf.m.interintra_mode[ii_sz_grp], |
1753 | 0 | N_INTER_INTRA_PRED_MODES - 1); |
1754 | 0 | const int wedge_ctx = dav1d_wedge_ctx_lut[bs]; |
1755 | 0 | b->interintra_type = INTER_INTRA_BLEND + |
1756 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
1757 | 0 | ts->cdf.m.interintra_wedge[wedge_ctx]); |
1758 | 0 | if (b->interintra_type == INTER_INTRA_WEDGE) |
1759 | 0 | b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac, |
1760 | 0 | ts->cdf.m.wedge_idx[wedge_ctx], 15); |
1761 | 0 | } else { |
1762 | 0 | b->interintra_type = INTER_INTRA_NONE; |
1763 | 0 | } |
1764 | 0 | if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra && |
1765 | 0 | interintra_allowed_mask & (1 << bs)) |
1766 | 0 | { |
1767 | 0 | printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n", |
1768 | 0 | b->interintra_type, b->interintra_mode, |
1769 | 0 | b->wedge_idx, ts->msac.rng); |
1770 | 0 | } |
1771 | | |
1772 | | // motion variation |
1773 | 0 | if (f->frame_hdr->switchable_motion_mode && |
1774 | 0 | b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 && |
1775 | | // is not warped global motion |
1776 | 0 | !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV && |
1777 | 0 | f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) && |
1778 | | // has overlappable neighbours |
1779 | 0 | ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) || |
1780 | 0 | (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1)))) |
1781 | 0 | { |
1782 | | // reaching here means the block allows obmc - check warp by |
1783 | | // finding matching-ref blocks in top/left edges |
1784 | 0 | uint64_t mask[2] = { 0, 0 }; |
1785 | 0 | find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4, |
1786 | 0 | have_left, have_top, b->ref[0], mask); |
1787 | 0 | const int allow_warp = !f->svc[b->ref[0]][0].scale && |
1788 | 0 | !f->frame_hdr->force_integer_mv && |
1789 | 0 | f->frame_hdr->warp_motion && (mask[0] | mask[1]); |
1790 | |
|
1791 | 0 | b->motion_mode = allow_warp ? |
1792 | 0 | dav1d_msac_decode_symbol_adapt4(&ts->msac, |
1793 | 0 | ts->cdf.m.motion_mode[bs], 2) : |
1794 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]); |
1795 | 0 | if (b->motion_mode == MM_WARP) { |
1796 | 0 | has_subpel_filter = 0; |
1797 | 0 | derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv); |
1798 | 0 | #define signabs(v) v < 0 ? '-' : ' ', abs(v) |
1799 | 0 | if (DEBUG_BLOCK_INFO) |
1800 | 0 | printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n" |
1801 | 0 | "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, " |
1802 | 0 | "mv=y:%d,x:%d\n", |
1803 | 0 | signabs(t->warpmv.matrix[0]), |
1804 | 0 | signabs(t->warpmv.matrix[1]), |
1805 | 0 | signabs(t->warpmv.matrix[2]), |
1806 | 0 | signabs(t->warpmv.matrix[3]), |
1807 | 0 | signabs(t->warpmv.matrix[4]), |
1808 | 0 | signabs(t->warpmv.matrix[5]), |
1809 | 0 | signabs(t->warpmv.u.p.alpha), |
1810 | 0 | signabs(t->warpmv.u.p.beta), |
1811 | 0 | signabs(t->warpmv.u.p.gamma), |
1812 | 0 | signabs(t->warpmv.u.p.delta), |
1813 | 0 | b->mv[0].y, b->mv[0].x); |
1814 | 0 | #undef signabs |
1815 | 0 | if (t->frame_thread.pass) { |
1816 | 0 | if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) { |
1817 | 0 | b->matrix[0] = t->warpmv.matrix[2] - 0x10000; |
1818 | 0 | b->matrix[1] = t->warpmv.matrix[3]; |
1819 | 0 | b->matrix[2] = t->warpmv.matrix[4]; |
1820 | 0 | b->matrix[3] = t->warpmv.matrix[5] - 0x10000; |
1821 | 0 | } else { |
1822 | 0 | b->matrix[0] = INT16_MIN; |
1823 | 0 | } |
1824 | 0 | } |
1825 | 0 | } |
1826 | |
|
1827 | 0 | if (DEBUG_BLOCK_INFO) |
1828 | 0 | printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%" |
1829 | 0 | PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0], |
1830 | 0 | mask[1]); |
1831 | 0 | } else { |
1832 | 0 | b->motion_mode = MM_TRANSLATION; |
1833 | 0 | } |
1834 | 0 | } |
1835 | | |
1836 | | // subpel filter |
1837 | 0 | enum Dav1dFilterMode filter[2]; |
1838 | 0 | if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) { |
1839 | 0 | if (has_subpel_filter) { |
1840 | 0 | const int comp = b->comp_type != COMP_INTER_NONE; |
1841 | 0 | const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0], |
1842 | 0 | by4, bx4); |
1843 | 0 | filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
1844 | 0 | ts->cdf.m.filter[0][ctx1], |
1845 | 0 | DAV1D_N_SWITCHABLE_FILTERS - 1); |
1846 | 0 | if (f->seq_hdr->dual_filter) { |
1847 | 0 | const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1, |
1848 | 0 | b->ref[0], by4, bx4); |
1849 | 0 | if (DEBUG_BLOCK_INFO) |
1850 | 0 | printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n", |
1851 | 0 | filter[0], ctx1, ts->msac.rng); |
1852 | 0 | filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
1853 | 0 | ts->cdf.m.filter[1][ctx2], |
1854 | 0 | DAV1D_N_SWITCHABLE_FILTERS - 1); |
1855 | 0 | if (DEBUG_BLOCK_INFO) |
1856 | 0 | printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n", |
1857 | 0 | filter[1], ctx2, ts->msac.rng); |
1858 | 0 | } else { |
1859 | 0 | filter[1] = filter[0]; |
1860 | 0 | if (DEBUG_BLOCK_INFO) |
1861 | 0 | printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n", |
1862 | 0 | filter[0], ctx1, ts->msac.rng); |
1863 | 0 | } |
1864 | 0 | } else { |
1865 | 0 | filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR; |
1866 | 0 | } |
1867 | 0 | } else { |
1868 | 0 | filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode; |
1869 | 0 | } |
1870 | 0 | b->filter2d = dav1d_filter_2d[filter[1]][filter[0]]; |
1871 | |
|
1872 | 0 | read_vartx_tree(t, b, bs, bx4, by4); |
1873 | | |
1874 | | // reconstruction |
1875 | 0 | if (t->frame_thread.pass == 1) { |
1876 | 0 | f->bd_fn.read_coef_blocks(t, bs, b); |
1877 | 0 | } else { |
1878 | 0 | if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; |
1879 | 0 | } |
1880 | | |
1881 | 0 | if (f->frame_hdr->loopfilter.level_y[0] || |
1882 | 0 | f->frame_hdr->loopfilter.level_y[1]) |
1883 | 0 | { |
1884 | 0 | const int is_globalmv = |
1885 | 0 | b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV); |
1886 | 0 | const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2]) |
1887 | 0 | &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv]; |
1888 | 0 | const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 }; |
1889 | 0 | enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx; |
1890 | 0 | if (f->frame_hdr->segmentation.lossless[b->seg_id]) { |
1891 | 0 | ytx = (enum RectTxfmSize) TX_4X4; |
1892 | 0 | uvtx = (enum RectTxfmSize) TX_4X4; |
1893 | 0 | } |
1894 | 0 | dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls, |
1895 | 0 | t->bx, t->by, f->w4, f->h4, b->skip, bs, |
1896 | 0 | ytx, tx_split, uvtx, f->cur.p.layout, |
1897 | 0 | &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], |
1898 | 0 | has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, |
1899 | 0 | has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); |
1900 | 0 | } |
1901 | | |
1902 | | // context updates |
1903 | 0 | if (is_comp) |
1904 | 0 | splat_tworef_mv(f->c, t, bs, b, bw4, bh4); |
1905 | 0 | else |
1906 | 0 | splat_oneref_mv(f->c, t, bs, b, bw4, bh4); |
1907 | 0 | BlockContext *edge = t->a; |
1908 | 0 | for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { |
1909 | 0 | #define set_ctx(rep_macro) \ |
1910 | 0 | rep_macro(edge->seg_pred, off, seg_pred); \ |
1911 | 0 | rep_macro(edge->skip_mode, off, b->skip_mode); \ |
1912 | 0 | rep_macro(edge->intra, off, 0); \ |
1913 | 0 | rep_macro(edge->skip, off, b->skip); \ |
1914 | 0 | rep_macro(edge->pal_sz, off, 0); \ |
1915 | | /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \ |
1916 | 0 | rep_macro(t->pal_sz_uv[i], off, 0); \ |
1917 | 0 | rep_macro(edge->tx_intra, off, b_dim[2 + i]); \ |
1918 | 0 | rep_macro(edge->comp_type, off, b->comp_type); \ |
1919 | 0 | rep_macro(edge->filter[0], off, filter[0]); \ |
1920 | 0 | rep_macro(edge->filter[1], off, filter[1]); \ |
1921 | 0 | rep_macro(edge->mode, off, b->inter_mode); \ |
1922 | 0 | rep_macro(edge->ref[0], off, b->ref[0]); \ |
1923 | 0 | rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1])) |
1924 | 0 | case_set(b_dim[2 + i]); |
1925 | 0 | #undef set_ctx |
1926 | 0 | } |
1927 | 0 | if (has_chroma) { |
1928 | 0 | dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); |
1929 | 0 | dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); |
1930 | 0 | } |
1931 | 0 | } |
1932 | | |
1933 | | // update contexts |
1934 | 0 | if (f->frame_hdr->segmentation.enabled && |
1935 | 0 | f->frame_hdr->segmentation.update_map) |
1936 | 0 | { |
1937 | 0 | uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx]; |
1938 | 0 | #define set_ctx(rep_macro) \ |
1939 | 0 | for (int y = 0; y < bh4; y++) { \ |
1940 | 0 | rep_macro(seg_ptr, 0, b->seg_id); \ |
1941 | 0 | seg_ptr += f->b4_stride; \ |
1942 | 0 | } |
1943 | 0 | case_set(b_dim[2]); |
1944 | 0 | #undef set_ctx |
1945 | 0 | } |
1946 | 0 | if (!b->skip) { |
1947 | 0 | uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1]; |
1948 | 0 | const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15); |
1949 | 0 | const int bx_idx = (bx4 & 16) >> 4; |
1950 | 0 | for (int y = 0; y < bh4; y += 2, noskip_mask++) { |
1951 | 0 | (*noskip_mask)[bx_idx] |= mask; |
1952 | 0 | if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway |
1953 | 0 | (*noskip_mask)[1] |= mask; |
1954 | 0 | } |
1955 | 0 | } |
1956 | |
|
1957 | 0 | if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) { |
1958 | 0 | const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift; |
1959 | 0 | int (*const lowest_px)[2] = ts->lowest_pixel[sby]; |
1960 | | |
1961 | | // keep track of motion vectors for each reference |
1962 | 0 | if (b->comp_type == COMP_INTER_NONE) { |
1963 | | // y |
1964 | 0 | if (imin(bw4, bh4) > 1 && |
1965 | 0 | ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || |
1966 | 0 | (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) |
1967 | 0 | { |
1968 | 0 | affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim, |
1969 | 0 | b->motion_mode == MM_WARP ? &t->warpmv : |
1970 | 0 | &f->frame_hdr->gmv[b->ref[0]]); |
1971 | 0 | } else { |
1972 | 0 | mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y, |
1973 | 0 | 0, &f->svc[b->ref[0]][1]); |
1974 | 0 | if (b->motion_mode == MM_OBMC) { |
1975 | 0 | obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4); |
1976 | 0 | } |
1977 | 0 | } |
1978 | | |
1979 | | // uv |
1980 | 0 | if (has_chroma) { |
1981 | | // sub8x8 derivation |
1982 | 0 | int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver; |
1983 | 0 | refmvs_block *const *r; |
1984 | 0 | if (is_sub8x8) { |
1985 | 0 | assert(ss_hor == 1); |
1986 | 0 | r = &t->rt.r[(t->by & 31) + 5]; |
1987 | 0 | if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0; |
1988 | 0 | if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0; |
1989 | 0 | if (bw4 == 1 && bh4 == ss_ver) |
1990 | 0 | is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0; |
1991 | 0 | } |
1992 | | |
1993 | | // chroma prediction |
1994 | 0 | if (is_sub8x8) { |
1995 | 0 | assert(ss_hor == 1); |
1996 | 0 | if (bw4 == 1 && bh4 == ss_ver) { |
1997 | 0 | const refmvs_block *const rr = &r[-1][t->bx - 1]; |
1998 | 0 | mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], |
1999 | 0 | t->by - 1, bh4, rr->mv.mv[0].y, ss_ver, |
2000 | 0 | &f->svc[rr->ref.ref[0] - 1][1]); |
2001 | 0 | } |
2002 | 0 | if (bw4 == 1) { |
2003 | 0 | const refmvs_block *const rr = &r[0][t->bx - 1]; |
2004 | 0 | mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], |
2005 | 0 | t->by, bh4, rr->mv.mv[0].y, ss_ver, |
2006 | 0 | &f->svc[rr->ref.ref[0] - 1][1]); |
2007 | 0 | } |
2008 | 0 | if (bh4 == ss_ver) { |
2009 | 0 | const refmvs_block *const rr = &r[-1][t->bx]; |
2010 | 0 | mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], |
2011 | 0 | t->by - 1, bh4, rr->mv.mv[0].y, ss_ver, |
2012 | 0 | &f->svc[rr->ref.ref[0] - 1][1]); |
2013 | 0 | } |
2014 | 0 | mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4, |
2015 | 0 | b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]); |
2016 | 0 | } else { |
2017 | 0 | if (imin(cbw4, cbh4) > 1 && |
2018 | 0 | ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || |
2019 | 0 | (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) |
2020 | 0 | { |
2021 | 0 | affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim, |
2022 | 0 | b->motion_mode == MM_WARP ? &t->warpmv : |
2023 | 0 | &f->frame_hdr->gmv[b->ref[0]]); |
2024 | 0 | } else { |
2025 | 0 | mc_lowest_px(&lowest_px[b->ref[0]][1], |
2026 | 0 | t->by & ~ss_ver, bh4 << (bh4 == ss_ver), |
2027 | 0 | b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]); |
2028 | 0 | if (b->motion_mode == MM_OBMC) { |
2029 | 0 | obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4); |
2030 | 0 | } |
2031 | 0 | } |
2032 | 0 | } |
2033 | 0 | } |
2034 | 0 | } else { |
2035 | | // y |
2036 | 0 | for (int i = 0; i < 2; i++) { |
2037 | 0 | if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) { |
2038 | 0 | affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim, |
2039 | 0 | &f->frame_hdr->gmv[b->ref[i]]); |
2040 | 0 | } else { |
2041 | 0 | mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4, |
2042 | 0 | b->mv[i].y, 0, &f->svc[b->ref[i]][1]); |
2043 | 0 | } |
2044 | 0 | } |
2045 | | |
2046 | | // uv |
2047 | 0 | if (has_chroma) for (int i = 0; i < 2; i++) { |
2048 | 0 | if (b->inter_mode == GLOBALMV_GLOBALMV && |
2049 | 0 | imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]]) |
2050 | 0 | { |
2051 | 0 | affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim, |
2052 | 0 | &f->frame_hdr->gmv[b->ref[i]]); |
2053 | 0 | } else { |
2054 | 0 | mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4, |
2055 | 0 | b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]); |
2056 | 0 | } |
2057 | 0 | } |
2058 | 0 | } |
2059 | 0 | } |
2060 | | |
2061 | 0 | return 0; |
2062 | 0 | } |
2063 | | |
2064 | | #if __has_feature(memory_sanitizer) |
2065 | | |
2066 | | #include <sanitizer/msan_interface.h> |
2067 | | |
2068 | | static int checked_decode_b(Dav1dTaskContext *const t, |
2069 | | const enum BlockLevel bl, |
2070 | | const enum BlockSize bs, |
2071 | | const enum BlockPartition bp, |
2072 | | const enum EdgeFlags intra_edge_flags) |
2073 | | { |
2074 | | const Dav1dFrameContext *const f = t->f; |
2075 | | const int err = decode_b(t, bl, bs, bp, intra_edge_flags); |
2076 | | |
2077 | | if (err == 0 && !(t->frame_thread.pass & 1)) { |
2078 | | const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
2079 | | const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
2080 | | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
2081 | | const int bw4 = b_dim[0], bh4 = b_dim[1]; |
2082 | | const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); |
2083 | | const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 && |
2084 | | (bw4 > ss_hor || t->bx & 1) && |
2085 | | (bh4 > ss_ver || t->by & 1); |
2086 | | |
2087 | | for (int p = 0; p < 1 + 2 * has_chroma; p++) { |
2088 | | const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
2089 | | const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
2090 | | const ptrdiff_t stride = f->cur.stride[!!p]; |
2091 | | const int bx = t->bx & ~ss_hor; |
2092 | | const int by = t->by & ~ss_ver; |
2093 | | const int width = w4 << (2 - ss_hor + (bw4 == ss_hor)); |
2094 | | const int height = h4 << (2 - ss_ver + (bh4 == ss_ver)); |
2095 | | |
2096 | | const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride + |
2097 | | (bx << (2 - ss_hor + !!f->seq_hdr->hbd)); |
2098 | | |
2099 | | for (int y = 0; y < height; data += stride, y++) { |
2100 | | const size_t line_sz = width << !!f->seq_hdr->hbd; |
2101 | | if (__msan_test_shadow(data, line_sz) != -1) { |
2102 | | fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n", |
2103 | | p, bx, by, w4, h4, y); |
2104 | | __msan_check_mem_is_initialized(data, line_sz); |
2105 | | } |
2106 | | } |
2107 | | } |
2108 | | } |
2109 | | |
2110 | | return err; |
2111 | | } |
2112 | | |
2113 | | #define decode_b checked_decode_b |
2114 | | |
2115 | | #endif /* defined(__has_feature) */ |
2116 | | |
2117 | | static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl, |
2118 | | const EdgeNode *const node) |
2119 | 0 | { |
2120 | 0 | const Dav1dFrameContext *const f = t->f; |
2121 | 0 | Dav1dTileState *const ts = t->ts; |
2122 | 0 | const int hsz = 16 >> bl; |
2123 | 0 | const int have_h_split = f->bw > t->bx + hsz; |
2124 | 0 | const int have_v_split = f->bh > t->by + hsz; |
2125 | |
|
2126 | 0 | if (!have_h_split && !have_v_split) { |
2127 | 0 | assert(bl < BL_8X8); |
2128 | 0 | return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)); |
2129 | 0 | } |
2130 | | |
2131 | 0 | uint16_t *pc; |
2132 | 0 | enum BlockPartition bp; |
2133 | 0 | int ctx, bx8, by8; |
2134 | 0 | if (t->frame_thread.pass != 2) { |
2135 | 0 | if (0 && bl == BL_64X64) |
2136 | 0 | printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n", |
2137 | 0 | f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng); |
2138 | 0 | bx8 = (t->bx & 31) >> 1; |
2139 | 0 | by8 = (t->by & 31) >> 1; |
2140 | 0 | ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8); |
2141 | 0 | pc = ts->cdf.m.partition[bl][ctx]; |
2142 | 0 | } |
2143 | |
|
2144 | 0 | if (have_h_split && have_v_split) { |
2145 | 0 | if (t->frame_thread.pass == 2) { |
2146 | 0 | const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; |
2147 | 0 | bp = b->bl == bl ? b->bp : PARTITION_SPLIT; |
2148 | 0 | } else { |
2149 | 0 | bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc, |
2150 | 0 | dav1d_partition_type_count[bl]); |
2151 | 0 | if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && |
2152 | 0 | (bp == PARTITION_V || bp == PARTITION_V4 || |
2153 | 0 | bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT)) |
2154 | 0 | { |
2155 | 0 | return 1; |
2156 | 0 | } |
2157 | 0 | if (DEBUG_BLOCK_INFO) |
2158 | 0 | printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", |
2159 | 0 | f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp, |
2160 | 0 | ts->msac.rng); |
2161 | 0 | } |
2162 | 0 | const uint8_t *const b = dav1d_block_sizes[bl][bp]; |
2163 | |
|
2164 | 0 | switch (bp) { |
2165 | 0 | case PARTITION_NONE: |
2166 | 0 | if (decode_b(t, bl, b[0], PARTITION_NONE, node->o)) |
2167 | 0 | return -1; |
2168 | 0 | break; |
2169 | 0 | case PARTITION_H: |
2170 | 0 | if (decode_b(t, bl, b[0], PARTITION_H, node->h[0])) |
2171 | 0 | return -1; |
2172 | 0 | t->by += hsz; |
2173 | 0 | if (decode_b(t, bl, b[0], PARTITION_H, node->h[1])) |
2174 | 0 | return -1; |
2175 | 0 | t->by -= hsz; |
2176 | 0 | break; |
2177 | 0 | case PARTITION_V: |
2178 | 0 | if (decode_b(t, bl, b[0], PARTITION_V, node->v[0])) |
2179 | 0 | return -1; |
2180 | 0 | t->bx += hsz; |
2181 | 0 | if (decode_b(t, bl, b[0], PARTITION_V, node->v[1])) |
2182 | 0 | return -1; |
2183 | 0 | t->bx -= hsz; |
2184 | 0 | break; |
2185 | 0 | case PARTITION_SPLIT: |
2186 | 0 | if (bl == BL_8X8) { |
2187 | 0 | const EdgeTip *const tip = (const EdgeTip *) node; |
2188 | 0 | assert(hsz == 1); |
2189 | 0 | if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL)) |
2190 | 0 | return -1; |
2191 | 0 | const enum Filter2d tl_filter = t->tl_4x4_filter; |
2192 | 0 | t->bx++; |
2193 | 0 | if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0])) |
2194 | 0 | return -1; |
2195 | 0 | t->bx--; |
2196 | 0 | t->by++; |
2197 | 0 | if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1])) |
2198 | 0 | return -1; |
2199 | 0 | t->bx++; |
2200 | 0 | t->tl_4x4_filter = tl_filter; |
2201 | 0 | if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2])) |
2202 | 0 | return -1; |
2203 | 0 | t->bx--; |
2204 | 0 | t->by--; |
2205 | 0 | #if ARCH_X86_64 |
2206 | 0 | if (t->frame_thread.pass) { |
2207 | | /* In 8-bit mode with 2-pass decoding the coefficient buffer |
2208 | | * can end up misaligned due to skips here. Work around |
2209 | | * the issue by explicitly realigning the buffer. */ |
2210 | 0 | const int p = t->frame_thread.pass & 1; |
2211 | 0 | ts->frame_thread[p].cf = |
2212 | 0 | (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63); |
2213 | 0 | } |
2214 | 0 | #endif |
2215 | 0 | } else { |
2216 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) |
2217 | 0 | return 1; |
2218 | 0 | t->bx += hsz; |
2219 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) |
2220 | 0 | return 1; |
2221 | 0 | t->bx -= hsz; |
2222 | 0 | t->by += hsz; |
2223 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) |
2224 | 0 | return 1; |
2225 | 0 | t->bx += hsz; |
2226 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3))) |
2227 | 0 | return 1; |
2228 | 0 | t->bx -= hsz; |
2229 | 0 | t->by -= hsz; |
2230 | 0 | } |
2231 | 0 | break; |
2232 | 0 | case PARTITION_T_TOP_SPLIT: { |
2233 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL)) |
2234 | 0 | return -1; |
2235 | 0 | t->bx += hsz; |
2236 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1])) |
2237 | 0 | return -1; |
2238 | 0 | t->bx -= hsz; |
2239 | 0 | t->by += hsz; |
2240 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1])) |
2241 | 0 | return -1; |
2242 | 0 | t->by -= hsz; |
2243 | 0 | break; |
2244 | 0 | } |
2245 | 0 | case PARTITION_T_BOTTOM_SPLIT: { |
2246 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0])) |
2247 | 0 | return -1; |
2248 | 0 | t->by += hsz; |
2249 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0])) |
2250 | 0 | return -1; |
2251 | 0 | t->bx += hsz; |
2252 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0)) |
2253 | 0 | return -1; |
2254 | 0 | t->bx -= hsz; |
2255 | 0 | t->by -= hsz; |
2256 | 0 | break; |
2257 | 0 | } |
2258 | 0 | case PARTITION_T_LEFT_SPLIT: { |
2259 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL)) |
2260 | 0 | return -1; |
2261 | 0 | t->by += hsz; |
2262 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1])) |
2263 | 0 | return -1; |
2264 | 0 | t->by -= hsz; |
2265 | 0 | t->bx += hsz; |
2266 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1])) |
2267 | 0 | return -1; |
2268 | 0 | t->bx -= hsz; |
2269 | 0 | break; |
2270 | 0 | } |
2271 | 0 | case PARTITION_T_RIGHT_SPLIT: { |
2272 | 0 | if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0])) |
2273 | 0 | return -1; |
2274 | 0 | t->bx += hsz; |
2275 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0])) |
2276 | 0 | return -1; |
2277 | 0 | t->by += hsz; |
2278 | 0 | if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0)) |
2279 | 0 | return -1; |
2280 | 0 | t->by -= hsz; |
2281 | 0 | t->bx -= hsz; |
2282 | 0 | break; |
2283 | 0 | } |
2284 | 0 | case PARTITION_H4: { |
2285 | 0 | const EdgeBranch *const branch = (const EdgeBranch *) node; |
2286 | 0 | if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0])) |
2287 | 0 | return -1; |
2288 | 0 | t->by += hsz >> 1; |
2289 | 0 | if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4)) |
2290 | 0 | return -1; |
2291 | 0 | t->by += hsz >> 1; |
2292 | 0 | if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM)) |
2293 | 0 | return -1; |
2294 | 0 | t->by += hsz >> 1; |
2295 | 0 | if (t->by < f->bh) |
2296 | 0 | if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1])) |
2297 | 0 | return -1; |
2298 | 0 | t->by -= hsz * 3 >> 1; |
2299 | 0 | break; |
2300 | 0 | } |
2301 | 0 | case PARTITION_V4: { |
2302 | 0 | const EdgeBranch *const branch = (const EdgeBranch *) node; |
2303 | 0 | if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0])) |
2304 | 0 | return -1; |
2305 | 0 | t->bx += hsz >> 1; |
2306 | 0 | if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4)) |
2307 | 0 | return -1; |
2308 | 0 | t->bx += hsz >> 1; |
2309 | 0 | if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT)) |
2310 | 0 | return -1; |
2311 | 0 | t->bx += hsz >> 1; |
2312 | 0 | if (t->bx < f->bw) |
2313 | 0 | if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1])) |
2314 | 0 | return -1; |
2315 | 0 | t->bx -= hsz * 3 >> 1; |
2316 | 0 | break; |
2317 | 0 | } |
2318 | 0 | default: assert(0); |
2319 | 0 | } |
2320 | 0 | } else if (have_h_split) { |
2321 | 0 | unsigned is_split; |
2322 | 0 | if (t->frame_thread.pass == 2) { |
2323 | 0 | const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; |
2324 | 0 | is_split = b->bl != bl; |
2325 | 0 | } else { |
2326 | 0 | is_split = dav1d_msac_decode_bool(&ts->msac, |
2327 | 0 | gather_top_partition_prob(pc, bl)); |
2328 | 0 | if (DEBUG_BLOCK_INFO) |
2329 | 0 | printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", |
2330 | 0 | f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, |
2331 | 0 | is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng); |
2332 | 0 | } |
2333 | |
|
2334 | 0 | assert(bl < BL_8X8); |
2335 | 0 | if (is_split) { |
2336 | 0 | bp = PARTITION_SPLIT; |
2337 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1; |
2338 | 0 | t->bx += hsz; |
2339 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1; |
2340 | 0 | t->bx -= hsz; |
2341 | 0 | } else { |
2342 | 0 | bp = PARTITION_H; |
2343 | 0 | if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0], |
2344 | 0 | PARTITION_H, node->h[0])) |
2345 | 0 | return -1; |
2346 | 0 | } |
2347 | 0 | } else { |
2348 | 0 | assert(have_v_split); |
2349 | 0 | unsigned is_split; |
2350 | 0 | if (t->frame_thread.pass == 2) { |
2351 | 0 | const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; |
2352 | 0 | is_split = b->bl != bl; |
2353 | 0 | } else { |
2354 | 0 | is_split = dav1d_msac_decode_bool(&ts->msac, |
2355 | 0 | gather_left_partition_prob(pc, bl)); |
2356 | 0 | if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split) |
2357 | 0 | return 1; |
2358 | 0 | if (DEBUG_BLOCK_INFO) |
2359 | 0 | printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", |
2360 | 0 | f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, |
2361 | 0 | is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng); |
2362 | 0 | } |
2363 | | |
2364 | 0 | assert(bl < BL_8X8); |
2365 | 0 | if (is_split) { |
2366 | 0 | bp = PARTITION_SPLIT; |
2367 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1; |
2368 | 0 | t->by += hsz; |
2369 | 0 | if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1; |
2370 | 0 | t->by -= hsz; |
2371 | 0 | } else { |
2372 | 0 | bp = PARTITION_V; |
2373 | 0 | if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0], |
2374 | 0 | PARTITION_V, node->v[0])) |
2375 | 0 | return -1; |
2376 | 0 | } |
2377 | 0 | } |
2378 | | |
2379 | 0 | if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) { |
2380 | 0 | #define set_ctx(rep_macro) \ |
2381 | 0 | rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \ |
2382 | 0 | rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp]) |
2383 | 0 | case_set_upto16(ulog2(hsz)); |
2384 | 0 | #undef set_ctx |
2385 | 0 | } |
2386 | | |
2387 | 0 | return 0; |
2388 | 0 | } |
2389 | | |
2390 | 0 | static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) { |
2391 | 0 | memset(ctx->intra, keyframe, sizeof(ctx->intra)); |
2392 | 0 | memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode)); |
2393 | 0 | if (keyframe) |
2394 | 0 | memset(ctx->mode, DC_PRED, sizeof(ctx->mode)); |
2395 | |
|
2396 | 0 | if (pass == 2) return; |
2397 | | |
2398 | 0 | memset(ctx->partition, 0, sizeof(ctx->partition)); |
2399 | 0 | memset(ctx->skip, 0, sizeof(ctx->skip)); |
2400 | 0 | memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode)); |
2401 | 0 | memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y)); |
2402 | 0 | memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv)); |
2403 | 0 | memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra)); |
2404 | 0 | memset(ctx->tx, TX_64X64, sizeof(ctx->tx)); |
2405 | 0 | if (!keyframe) { |
2406 | 0 | memset(ctx->ref, -1, sizeof(ctx->ref)); |
2407 | 0 | memset(ctx->comp_type, 0, sizeof(ctx->comp_type)); |
2408 | 0 | memset(ctx->mode, NEARESTMV, sizeof(ctx->mode)); |
2409 | 0 | } |
2410 | 0 | memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef)); |
2411 | 0 | memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef)); |
2412 | 0 | memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter)); |
2413 | 0 | memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred)); |
2414 | 0 | memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz)); |
2415 | 0 | } |
2416 | | |
2417 | | // { Y+U+V, Y+U } * 4 |
2418 | | static const uint8_t ss_size_mul[4][2] = { |
2419 | | [DAV1D_PIXEL_LAYOUT_I400] = { 4, 4 }, |
2420 | | [DAV1D_PIXEL_LAYOUT_I420] = { 6, 5 }, |
2421 | | [DAV1D_PIXEL_LAYOUT_I422] = { 8, 6 }, |
2422 | | [DAV1D_PIXEL_LAYOUT_I444] = { 12, 8 }, |
2423 | | }; |
2424 | | |
2425 | | static void setup_tile(Dav1dTileState *const ts, |
2426 | | const Dav1dFrameContext *const f, |
2427 | | const uint8_t *const data, const size_t sz, |
2428 | | const int tile_row, const int tile_col, |
2429 | | const unsigned tile_start_off) |
2430 | 0 | { |
2431 | 0 | const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col]; |
2432 | 0 | const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128; |
2433 | 0 | const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1]; |
2434 | 0 | const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row]; |
2435 | 0 | const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1]; |
2436 | 0 | const int sb_shift = f->sb_shift; |
2437 | |
|
2438 | 0 | const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout]; |
2439 | 0 | for (int p = 0; p < 2; p++) { |
2440 | 0 | ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ? |
2441 | 0 | &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] : |
2442 | 0 | NULL; |
2443 | 0 | ts->frame_thread[p].cbi = f->frame_thread.cbi ? |
2444 | 0 | &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] : |
2445 | 0 | NULL; |
2446 | 0 | ts->frame_thread[p].cf = f->frame_thread.cf ? |
2447 | 0 | (uint8_t*)f->frame_thread.cf + |
2448 | 0 | (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) : |
2449 | 0 | NULL; |
2450 | 0 | } |
2451 | |
|
2452 | 0 | dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf); |
2453 | 0 | ts->last_qidx = f->frame_hdr->quant.yac; |
2454 | 0 | ts->last_delta_lf.u32 = 0; |
2455 | |
|
2456 | 0 | dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update); |
2457 | |
|
2458 | 0 | ts->tiling.row = tile_row; |
2459 | 0 | ts->tiling.col = tile_col; |
2460 | 0 | ts->tiling.col_start = col_sb_start << sb_shift; |
2461 | 0 | ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw); |
2462 | 0 | ts->tiling.row_start = row_sb_start << sb_shift; |
2463 | 0 | ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh); |
2464 | | |
2465 | | // Reference Restoration Unit (used for exp coding) |
2466 | 0 | int sb_idx, unit_idx; |
2467 | 0 | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { |
2468 | | // vertical components only |
2469 | 0 | sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w; |
2470 | 0 | unit_idx = (ts->tiling.row_start & 16) >> 3; |
2471 | 0 | } else { |
2472 | 0 | sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start; |
2473 | 0 | unit_idx = ((ts->tiling.row_start & 16) >> 3) + |
2474 | 0 | ((ts->tiling.col_start & 16) >> 4); |
2475 | 0 | } |
2476 | 0 | for (int p = 0; p < 3; p++) { |
2477 | 0 | if (!((f->lf.restore_planes >> p) & 1U)) |
2478 | 0 | continue; |
2479 | | |
2480 | 0 | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { |
2481 | 0 | const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
2482 | 0 | const int d = f->frame_hdr->super_res.width_scale_denominator; |
2483 | 0 | const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p]; |
2484 | 0 | const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3; |
2485 | 0 | const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift; |
2486 | 0 | const int px_x = x << (unit_size_log2 + ss_hor); |
2487 | 0 | const int u_idx = unit_idx + ((px_x & 64) >> 6); |
2488 | 0 | const int sb128x = px_x >> 7; |
2489 | 0 | if (sb128x >= f->sr_sb128w) continue; |
2490 | 0 | ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx]; |
2491 | 0 | } else { |
2492 | 0 | ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; |
2493 | 0 | } |
2494 | | |
2495 | 0 | ts->lr_ref[p]->filter_v[0] = 3; |
2496 | 0 | ts->lr_ref[p]->filter_v[1] = -7; |
2497 | 0 | ts->lr_ref[p]->filter_v[2] = 15; |
2498 | 0 | ts->lr_ref[p]->filter_h[0] = 3; |
2499 | 0 | ts->lr_ref[p]->filter_h[1] = -7; |
2500 | 0 | ts->lr_ref[p]->filter_h[2] = 15; |
2501 | 0 | ts->lr_ref[p]->sgr_weights[0] = -32; |
2502 | 0 | ts->lr_ref[p]->sgr_weights[1] = 31; |
2503 | 0 | } |
2504 | |
|
2505 | 0 | if (f->c->n_tc > 1) { |
2506 | 0 | for (int p = 0; p < 2; p++) |
2507 | 0 | atomic_init(&ts->progress[p], row_sb_start); |
2508 | 0 | } |
2509 | 0 | } |
2510 | | |
2511 | | static void read_restoration_info(Dav1dTaskContext *const t, |
2512 | | Av1RestorationUnit *const lr, const int p, |
2513 | | const enum Dav1dRestorationType frame_type) |
2514 | 0 | { |
2515 | 0 | const Dav1dFrameContext *const f = t->f; |
2516 | 0 | Dav1dTileState *const ts = t->ts; |
2517 | |
|
2518 | 0 | if (frame_type == DAV1D_RESTORATION_SWITCHABLE) { |
2519 | 0 | const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac, |
2520 | 0 | ts->cdf.m.restore_switchable, 2); |
2521 | 0 | lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */ |
2522 | 0 | } else { |
2523 | 0 | const unsigned type = |
2524 | 0 | dav1d_msac_decode_bool_adapt(&ts->msac, |
2525 | 0 | frame_type == DAV1D_RESTORATION_WIENER ? |
2526 | 0 | ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj); |
2527 | 0 | lr->type = type ? frame_type : DAV1D_RESTORATION_NONE; |
2528 | 0 | } |
2529 | |
|
2530 | 0 | if (lr->type == DAV1D_RESTORATION_WIENER) { |
2531 | 0 | lr->filter_v[0] = p ? 0 : |
2532 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2533 | 0 | ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5; |
2534 | 0 | lr->filter_v[1] = |
2535 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2536 | 0 | ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23; |
2537 | 0 | lr->filter_v[2] = |
2538 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2539 | 0 | ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17; |
2540 | |
|
2541 | 0 | lr->filter_h[0] = p ? 0 : |
2542 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2543 | 0 | ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5; |
2544 | 0 | lr->filter_h[1] = |
2545 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2546 | 0 | ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23; |
2547 | 0 | lr->filter_h[2] = |
2548 | 0 | dav1d_msac_decode_subexp(&ts->msac, |
2549 | 0 | ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17; |
2550 | 0 | memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights)); |
2551 | 0 | ts->lr_ref[p] = lr; |
2552 | 0 | if (DEBUG_BLOCK_INFO) |
2553 | 0 | printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n", |
2554 | 0 | p, lr->filter_v[0], lr->filter_v[1], |
2555 | 0 | lr->filter_v[2], lr->filter_h[0], |
2556 | 0 | lr->filter_h[1], lr->filter_h[2], ts->msac.rng); |
2557 | 0 | } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) { |
2558 | 0 | const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4); |
2559 | 0 | const uint16_t *const sgr_params = dav1d_sgr_params[idx]; |
2560 | 0 | lr->type += idx; |
2561 | 0 | lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac, |
2562 | 0 | ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0; |
2563 | 0 | lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac, |
2564 | 0 | ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95; |
2565 | 0 | memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v)); |
2566 | 0 | memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h)); |
2567 | 0 | ts->lr_ref[p] = lr; |
2568 | 0 | if (DEBUG_BLOCK_INFO) |
2569 | 0 | printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n", |
2570 | 0 | p, idx, lr->sgr_weights[0], |
2571 | 0 | lr->sgr_weights[1], ts->msac.rng); |
2572 | 0 | } |
2573 | 0 | } |
2574 | | |
2575 | | // modeled after the equivalent function in aomdec:decodeframe.c |
2576 | 0 | static int check_trailing_bits_after_symbol_coder(const MsacContext *const msac) { |
2577 | | // check marker bit (single 1), followed by zeroes |
2578 | 0 | const int n_bits = -(msac->cnt + 14); |
2579 | 0 | assert(n_bits <= 0); // this assumes we errored out when cnt <= -15 in caller |
2580 | 0 | const int n_bytes = (n_bits + 7) >> 3; |
2581 | 0 | const uint8_t *p = &msac->buf_pos[n_bytes]; |
2582 | 0 | const int pattern = 128 >> ((n_bits - 1) & 7); |
2583 | 0 | if ((p[-1] & (2 * pattern - 1)) != pattern) |
2584 | 0 | return 1; |
2585 | | |
2586 | | // check remainder zero bytes |
2587 | 0 | for (; p < msac->buf_end; p++) |
2588 | 0 | if (*p) |
2589 | 0 | return 1; |
2590 | | |
2591 | 0 | return 0; |
2592 | 0 | } |
2593 | | |
2594 | 0 | int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) { |
2595 | 0 | const Dav1dFrameContext *const f = t->f; |
2596 | 0 | const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64; |
2597 | 0 | Dav1dTileState *const ts = t->ts; |
2598 | 0 | const Dav1dContext *const c = f->c; |
2599 | 0 | const int sb_step = f->sb_step; |
2600 | 0 | const int tile_row = ts->tiling.row, tile_col = ts->tiling.col; |
2601 | 0 | const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col]; |
2602 | 0 | const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128; |
2603 | |
|
2604 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { |
2605 | 0 | dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start, |
2606 | 0 | ts->tiling.col_end, ts->tiling.row_start, |
2607 | 0 | ts->tiling.row_end, t->by >> f->sb_shift, |
2608 | 0 | ts->tiling.row, t->frame_thread.pass); |
2609 | 0 | } |
2610 | |
|
2611 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) { |
2612 | 0 | const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift; |
2613 | 0 | int (*const lowest_px)[2] = ts->lowest_pixel[sby]; |
2614 | 0 | for (int n = 0; n < 7; n++) |
2615 | 0 | for (int m = 0; m < 2; m++) |
2616 | 0 | lowest_px[n][m] = INT_MIN; |
2617 | 0 | } |
2618 | |
|
2619 | 0 | reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass); |
2620 | 0 | if (t->frame_thread.pass == 2) { |
2621 | 0 | const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0; |
2622 | 0 | for (t->bx = ts->tiling.col_start, |
2623 | 0 | t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w; |
2624 | 0 | t->bx < ts->tiling.col_end; t->bx += sb_step) |
2625 | 0 | { |
2626 | 0 | if (atomic_load_explicit(c->flush, memory_order_acquire)) |
2627 | 0 | return 1; |
2628 | 0 | if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl])) |
2629 | 0 | return 1; |
2630 | 0 | if (t->bx & 16 || f->seq_hdr->sb128) |
2631 | 0 | t->a++; |
2632 | 0 | } |
2633 | 0 | f->bd_fn.backup_ipred_edge(t); |
2634 | 0 | return 0; |
2635 | 0 | } |
2636 | | |
2637 | 0 | if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) { |
2638 | 0 | f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row, |
2639 | 0 | ts->tiling.col_start >> 1, ts->tiling.col_end >> 1, |
2640 | 0 | t->by >> 1, (t->by + sb_step) >> 1); |
2641 | 0 | } |
2642 | 0 | memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv)); |
2643 | 0 | const int sb128y = t->by >> 5; |
2644 | 0 | for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w, |
2645 | 0 | t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start; |
2646 | 0 | t->bx < ts->tiling.col_end; t->bx += sb_step) |
2647 | 0 | { |
2648 | 0 | if (atomic_load_explicit(c->flush, memory_order_acquire)) |
2649 | 0 | return 1; |
2650 | 0 | if (root_bl == BL_128X128) { |
2651 | 0 | t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx; |
2652 | 0 | t->cur_sb_cdef_idx_ptr[0] = -1; |
2653 | 0 | t->cur_sb_cdef_idx_ptr[1] = -1; |
2654 | 0 | t->cur_sb_cdef_idx_ptr[2] = -1; |
2655 | 0 | t->cur_sb_cdef_idx_ptr[3] = -1; |
2656 | 0 | } else { |
2657 | 0 | t->cur_sb_cdef_idx_ptr = |
2658 | 0 | &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) + |
2659 | 0 | ((t->by & 16) >> 3)]; |
2660 | 0 | t->cur_sb_cdef_idx_ptr[0] = -1; |
2661 | 0 | } |
2662 | | // Restoration filter |
2663 | 0 | for (int p = 0; p < 3; p++) { |
2664 | 0 | if (!((f->lf.restore_planes >> p) & 1U)) |
2665 | 0 | continue; |
2666 | | |
2667 | 0 | const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
2668 | 0 | const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
2669 | 0 | const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p]; |
2670 | 0 | const int y = t->by * 4 >> ss_ver; |
2671 | 0 | const int h = (f->cur.p.h + ss_ver) >> ss_ver; |
2672 | |
|
2673 | 0 | const int unit_size = 1 << unit_size_log2; |
2674 | 0 | const unsigned mask = unit_size - 1; |
2675 | 0 | if (y & mask) continue; |
2676 | 0 | const int half_unit = unit_size >> 1; |
2677 | | // Round half up at frame boundaries, if there's more than one |
2678 | | // restoration unit |
2679 | 0 | if (y && y + half_unit > h) continue; |
2680 | | |
2681 | 0 | const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p]; |
2682 | |
|
2683 | 0 | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { |
2684 | 0 | const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor; |
2685 | 0 | const int n_units = imax(1, (w + half_unit) >> unit_size_log2); |
2686 | |
|
2687 | 0 | const int d = f->frame_hdr->super_res.width_scale_denominator; |
2688 | 0 | const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3; |
2689 | 0 | const int x0 = ((4 * t->bx * d >> ss_hor) + rnd) >> shift; |
2690 | 0 | const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift; |
2691 | |
|
2692 | 0 | for (int x = x0; x < imin(x1, n_units); x++) { |
2693 | 0 | const int px_x = x << (unit_size_log2 + ss_hor); |
2694 | 0 | const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7); |
2695 | 0 | const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6); |
2696 | 0 | Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; |
2697 | |
|
2698 | 0 | read_restoration_info(t, lr, p, frame_type); |
2699 | 0 | } |
2700 | 0 | } else { |
2701 | 0 | const int x = 4 * t->bx >> ss_hor; |
2702 | 0 | if (x & mask) continue; |
2703 | 0 | const int w = (f->cur.p.w + ss_hor) >> ss_hor; |
2704 | | // Round half up at frame boundaries, if there's more than one |
2705 | | // restoration unit |
2706 | 0 | if (x && x + half_unit > w) continue; |
2707 | 0 | const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5); |
2708 | 0 | const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4); |
2709 | 0 | Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; |
2710 | |
|
2711 | 0 | read_restoration_info(t, lr, p, frame_type); |
2712 | 0 | } |
2713 | 0 | } |
2714 | 0 | if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl])) |
2715 | 0 | return 1; |
2716 | 0 | if (t->bx & 16 || f->seq_hdr->sb128) { |
2717 | 0 | t->a++; |
2718 | 0 | t->lf_mask++; |
2719 | 0 | } |
2720 | 0 | } |
2721 | | |
2722 | 0 | if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) { |
2723 | 0 | dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt, |
2724 | 0 | ts->tiling.col_start >> 1, ts->tiling.col_end >> 1, |
2725 | 0 | t->by >> 1, (t->by + sb_step) >> 1); |
2726 | 0 | } |
2727 | | |
2728 | | // backup pre-loopfilter pixels for intra prediction of the next sbrow |
2729 | 0 | if (t->frame_thread.pass != 1) |
2730 | 0 | f->bd_fn.backup_ipred_edge(t); |
2731 | | |
2732 | | // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix" |
2733 | | // up the initial value in neighbour tiles when running the loopfilter |
2734 | 0 | int align_h = (f->bh + 31) & ~31; |
2735 | 0 | memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by], |
2736 | 0 | &t->l.tx_lpf_y[t->by & 16], sb_step); |
2737 | 0 | const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; |
2738 | 0 | align_h >>= ss_ver; |
2739 | 0 | memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)], |
2740 | 0 | &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver); |
2741 | | |
2742 | | // error out on symbol decoder overread |
2743 | 0 | if (ts->msac.cnt <= -15) return 1; |
2744 | | |
2745 | 0 | return c->strict_std_compliance && |
2746 | 0 | (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] && |
2747 | 0 | check_trailing_bits_after_symbol_coder(&ts->msac); |
2748 | 0 | } |
2749 | | |
2750 | 0 | int dav1d_decode_frame_init(Dav1dFrameContext *const f) { |
2751 | 0 | const Dav1dContext *const c = f->c; |
2752 | 0 | int retval = DAV1D_ERR(ENOMEM); |
2753 | |
|
2754 | 0 | if (f->sbh > f->lf.start_of_tile_row_sz) { |
2755 | 0 | dav1d_free(f->lf.start_of_tile_row); |
2756 | 0 | f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t)); |
2757 | 0 | if (!f->lf.start_of_tile_row) { |
2758 | 0 | f->lf.start_of_tile_row_sz = 0; |
2759 | 0 | goto error; |
2760 | 0 | } |
2761 | 0 | f->lf.start_of_tile_row_sz = f->sbh; |
2762 | 0 | } |
2763 | 0 | int sby = 0; |
2764 | 0 | for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { |
2765 | 0 | f->lf.start_of_tile_row[sby++] = tile_row; |
2766 | 0 | while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1]) |
2767 | 0 | f->lf.start_of_tile_row[sby++] = 0; |
2768 | 0 | } |
2769 | |
|
2770 | 0 | const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows; |
2771 | 0 | if (n_ts != f->n_ts) { |
2772 | 0 | if (c->n_fc > 1) { |
2773 | 0 | dav1d_free(f->frame_thread.tile_start_off); |
2774 | 0 | f->frame_thread.tile_start_off = |
2775 | 0 | dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts); |
2776 | 0 | if (!f->frame_thread.tile_start_off) { |
2777 | 0 | f->n_ts = 0; |
2778 | 0 | goto error; |
2779 | 0 | } |
2780 | 0 | } |
2781 | 0 | dav1d_free_aligned(f->ts); |
2782 | 0 | f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32); |
2783 | 0 | if (!f->ts) goto error; |
2784 | 0 | f->n_ts = n_ts; |
2785 | 0 | } |
2786 | | |
2787 | 0 | const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1)); |
2788 | 0 | if (a_sz != f->a_sz) { |
2789 | 0 | dav1d_free(f->a); |
2790 | 0 | f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz); |
2791 | 0 | if (!f->a) { |
2792 | 0 | f->a_sz = 0; |
2793 | 0 | goto error; |
2794 | 0 | } |
2795 | 0 | f->a_sz = a_sz; |
2796 | 0 | } |
2797 | | |
2798 | 0 | const int num_sb128 = f->sb128w * f->sb128h; |
2799 | 0 | const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout]; |
2800 | 0 | const int hbd = !!f->seq_hdr->hbd; |
2801 | 0 | if (c->n_fc > 1) { |
2802 | 0 | const unsigned sb_step4 = f->sb_step * 4; |
2803 | 0 | int tile_idx = 0; |
2804 | 0 | for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { |
2805 | 0 | const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] * |
2806 | 0 | sb_step4 * f->sb128w * 128; |
2807 | 0 | const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] - |
2808 | 0 | f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4; |
2809 | 0 | for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { |
2810 | 0 | f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff * |
2811 | 0 | f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4; |
2812 | 0 | } |
2813 | 0 | } |
2814 | |
|
2815 | 0 | const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh; |
2816 | 0 | if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) { |
2817 | 0 | dav1d_free(f->tile_thread.lowest_pixel_mem); |
2818 | 0 | f->tile_thread.lowest_pixel_mem = |
2819 | 0 | dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz * |
2820 | 0 | sizeof(*f->tile_thread.lowest_pixel_mem)); |
2821 | 0 | if (!f->tile_thread.lowest_pixel_mem) { |
2822 | 0 | f->tile_thread.lowest_pixel_mem_sz = 0; |
2823 | 0 | goto error; |
2824 | 0 | } |
2825 | 0 | f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz; |
2826 | 0 | } |
2827 | 0 | int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem; |
2828 | 0 | for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows; |
2829 | 0 | tile_row++, tile_row_base += f->frame_hdr->tiling.cols) |
2830 | 0 | { |
2831 | 0 | const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] - |
2832 | 0 | f->frame_hdr->tiling.row_start_sb[tile_row]; |
2833 | 0 | for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { |
2834 | 0 | f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr; |
2835 | 0 | lowest_pixel_ptr += tile_row_sb_h; |
2836 | 0 | } |
2837 | 0 | } |
2838 | |
|
2839 | 0 | const int cbi_sz = num_sb128 * size_mul[0]; |
2840 | 0 | if (cbi_sz != f->frame_thread.cbi_sz) { |
2841 | 0 | dav1d_free_aligned(f->frame_thread.cbi); |
2842 | 0 | f->frame_thread.cbi = |
2843 | 0 | dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) * |
2844 | 0 | cbi_sz * 32 * 32 / 4, 64); |
2845 | 0 | if (!f->frame_thread.cbi) { |
2846 | 0 | f->frame_thread.cbi_sz = 0; |
2847 | 0 | goto error; |
2848 | 0 | } |
2849 | 0 | f->frame_thread.cbi_sz = cbi_sz; |
2850 | 0 | } |
2851 | | |
2852 | 0 | const int cf_sz = (num_sb128 * size_mul[0]) << hbd; |
2853 | 0 | if (cf_sz != f->frame_thread.cf_sz) { |
2854 | 0 | dav1d_free_aligned(f->frame_thread.cf); |
2855 | 0 | f->frame_thread.cf = |
2856 | 0 | dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64); |
2857 | 0 | if (!f->frame_thread.cf) { |
2858 | 0 | f->frame_thread.cf_sz = 0; |
2859 | 0 | goto error; |
2860 | 0 | } |
2861 | 0 | memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2); |
2862 | 0 | f->frame_thread.cf_sz = cf_sz; |
2863 | 0 | } |
2864 | | |
2865 | 0 | if (f->frame_hdr->allow_screen_content_tools) { |
2866 | 0 | const int pal_sz = num_sb128 << hbd; |
2867 | 0 | if (pal_sz != f->frame_thread.pal_sz) { |
2868 | 0 | dav1d_free_aligned(f->frame_thread.pal); |
2869 | 0 | f->frame_thread.pal = |
2870 | 0 | dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) * |
2871 | 0 | pal_sz * 16 * 16, 64); |
2872 | 0 | if (!f->frame_thread.pal) { |
2873 | 0 | f->frame_thread.pal_sz = 0; |
2874 | 0 | goto error; |
2875 | 0 | } |
2876 | 0 | f->frame_thread.pal_sz = pal_sz; |
2877 | 0 | } |
2878 | | |
2879 | 0 | const int pal_idx_sz = num_sb128 * size_mul[1]; |
2880 | 0 | if (pal_idx_sz != f->frame_thread.pal_idx_sz) { |
2881 | 0 | dav1d_free_aligned(f->frame_thread.pal_idx); |
2882 | 0 | f->frame_thread.pal_idx = |
2883 | 0 | dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) * |
2884 | 0 | pal_idx_sz * 128 * 128 / 8, 64); |
2885 | 0 | if (!f->frame_thread.pal_idx) { |
2886 | 0 | f->frame_thread.pal_idx_sz = 0; |
2887 | 0 | goto error; |
2888 | 0 | } |
2889 | 0 | f->frame_thread.pal_idx_sz = pal_idx_sz; |
2890 | 0 | } |
2891 | 0 | } else if (f->frame_thread.pal) { |
2892 | 0 | dav1d_freep_aligned(&f->frame_thread.pal); |
2893 | 0 | dav1d_freep_aligned(&f->frame_thread.pal_idx); |
2894 | 0 | f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0; |
2895 | 0 | } |
2896 | 0 | } |
2897 | | |
2898 | | // update allocation of block contexts for above |
2899 | 0 | ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1]; |
2900 | 0 | const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; |
2901 | 0 | const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize; |
2902 | 0 | if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] || |
2903 | 0 | uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] || |
2904 | 0 | need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy || |
2905 | 0 | f->sbh != f->lf.cdef_buf_sbh) |
2906 | 0 | { |
2907 | 0 | dav1d_free_aligned(f->lf.cdef_line_buf); |
2908 | 0 | size_t alloc_sz = 64; |
2909 | 0 | alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy; |
2910 | 0 | alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy; |
2911 | 0 | uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32); |
2912 | 0 | if (!ptr) { |
2913 | 0 | f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0; |
2914 | 0 | goto error; |
2915 | 0 | } |
2916 | | |
2917 | 0 | ptr += 32; |
2918 | 0 | if (y_stride < 0) { |
2919 | 0 | f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1); |
2920 | 0 | f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3); |
2921 | 0 | } else { |
2922 | 0 | f->lf.cdef_line[0][0] = ptr + y_stride * 0; |
2923 | 0 | f->lf.cdef_line[1][0] = ptr + y_stride * 2; |
2924 | 0 | } |
2925 | 0 | ptr += llabs(y_stride) * f->sbh * 4; |
2926 | 0 | if (uv_stride < 0) { |
2927 | 0 | f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1); |
2928 | 0 | f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3); |
2929 | 0 | f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5); |
2930 | 0 | f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7); |
2931 | 0 | } else { |
2932 | 0 | f->lf.cdef_line[0][1] = ptr + uv_stride * 0; |
2933 | 0 | f->lf.cdef_line[0][2] = ptr + uv_stride * 2; |
2934 | 0 | f->lf.cdef_line[1][1] = ptr + uv_stride * 4; |
2935 | 0 | f->lf.cdef_line[1][2] = ptr + uv_stride * 6; |
2936 | 0 | } |
2937 | |
|
2938 | 0 | if (need_cdef_lpf_copy) { |
2939 | 0 | ptr += llabs(uv_stride) * f->sbh * 8; |
2940 | 0 | if (y_stride < 0) |
2941 | 0 | f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1); |
2942 | 0 | else |
2943 | 0 | f->lf.cdef_lpf_line[0] = ptr; |
2944 | 0 | ptr += llabs(y_stride) * f->sbh * 4; |
2945 | 0 | if (uv_stride < 0) { |
2946 | 0 | f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1); |
2947 | 0 | f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1); |
2948 | 0 | } else { |
2949 | 0 | f->lf.cdef_lpf_line[1] = ptr; |
2950 | 0 | f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4; |
2951 | 0 | } |
2952 | 0 | } |
2953 | |
|
2954 | 0 | f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4; |
2955 | 0 | f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8; |
2956 | 0 | f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy; |
2957 | 0 | f->lf.cdef_buf_sbh = f->sbh; |
2958 | 0 | } |
2959 | | |
2960 | 0 | const int sb128 = f->seq_hdr->sb128; |
2961 | 0 | const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12; |
2962 | 0 | y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1]; |
2963 | 0 | if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] || |
2964 | 0 | uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1]) |
2965 | 0 | { |
2966 | 0 | dav1d_free_aligned(f->lf.lr_line_buf); |
2967 | | // lr simd may overread the input, so slightly over-allocate the lpf buffer |
2968 | 0 | size_t alloc_sz = 128; |
2969 | 0 | alloc_sz += (size_t)llabs(y_stride) * num_lines; |
2970 | 0 | alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2; |
2971 | 0 | uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64); |
2972 | 0 | if (!ptr) { |
2973 | 0 | f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0; |
2974 | 0 | goto error; |
2975 | 0 | } |
2976 | | |
2977 | 0 | ptr += 64; |
2978 | 0 | if (y_stride < 0) |
2979 | 0 | f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1); |
2980 | 0 | else |
2981 | 0 | f->lf.lr_lpf_line[0] = ptr; |
2982 | 0 | ptr += llabs(y_stride) * num_lines; |
2983 | 0 | if (uv_stride < 0) { |
2984 | 0 | f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1); |
2985 | 0 | f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1); |
2986 | 0 | } else { |
2987 | 0 | f->lf.lr_lpf_line[1] = ptr; |
2988 | 0 | f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines; |
2989 | 0 | } |
2990 | |
|
2991 | 0 | f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines; |
2992 | 0 | f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2; |
2993 | 0 | } |
2994 | | |
2995 | | // update allocation for loopfilter masks |
2996 | 0 | if (num_sb128 != f->lf.mask_sz) { |
2997 | 0 | dav1d_free(f->lf.mask); |
2998 | 0 | dav1d_free(f->lf.level); |
2999 | 0 | f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128); |
3000 | | // over-allocate by 3 bytes since some of the SIMD implementations |
3001 | | // index this from the level type and can thus over-read by up to 3 |
3002 | 0 | f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3); |
3003 | 0 | if (!f->lf.mask || !f->lf.level) { |
3004 | 0 | f->lf.mask_sz = 0; |
3005 | 0 | goto error; |
3006 | 0 | } |
3007 | 0 | if (c->n_fc > 1) { |
3008 | 0 | dav1d_free(f->frame_thread.b); |
3009 | 0 | f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) * |
3010 | 0 | num_sb128 * 32 * 32); |
3011 | 0 | if (!f->frame_thread.b) { |
3012 | 0 | f->lf.mask_sz = 0; |
3013 | 0 | goto error; |
3014 | 0 | } |
3015 | 0 | } |
3016 | 0 | f->lf.mask_sz = num_sb128; |
3017 | 0 | } |
3018 | | |
3019 | 0 | f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7; |
3020 | 0 | const int lr_mask_sz = f->sr_sb128w * f->sb128h; |
3021 | 0 | if (lr_mask_sz != f->lf.lr_mask_sz) { |
3022 | 0 | dav1d_free(f->lf.lr_mask); |
3023 | 0 | f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz); |
3024 | 0 | if (!f->lf.lr_mask) { |
3025 | 0 | f->lf.lr_mask_sz = 0; |
3026 | 0 | goto error; |
3027 | 0 | } |
3028 | 0 | f->lf.lr_mask_sz = lr_mask_sz; |
3029 | 0 | } |
3030 | 0 | f->lf.restore_planes = |
3031 | 0 | ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) + |
3032 | 0 | ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) + |
3033 | 0 | ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2); |
3034 | 0 | if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) { |
3035 | 0 | dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness); |
3036 | 0 | f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness; |
3037 | 0 | } |
3038 | 0 | dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 }); |
3039 | 0 | memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128); |
3040 | |
|
3041 | 0 | const int ipred_edge_sz = f->sbh * f->sb128w << hbd; |
3042 | 0 | if (ipred_edge_sz != f->ipred_edge_sz) { |
3043 | 0 | dav1d_free_aligned(f->ipred_edge[0]); |
3044 | 0 | uint8_t *ptr = f->ipred_edge[0] = |
3045 | 0 | dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64); |
3046 | 0 | if (!ptr) { |
3047 | 0 | f->ipred_edge_sz = 0; |
3048 | 0 | goto error; |
3049 | 0 | } |
3050 | 0 | f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1; |
3051 | 0 | f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2; |
3052 | 0 | f->ipred_edge_sz = ipred_edge_sz; |
3053 | 0 | } |
3054 | | |
3055 | 0 | const int re_sz = f->sb128h * f->frame_hdr->tiling.cols; |
3056 | 0 | if (re_sz != f->lf.re_sz) { |
3057 | 0 | dav1d_free(f->lf.tx_lpf_right_edge[0]); |
3058 | 0 | f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2); |
3059 | 0 | if (!f->lf.tx_lpf_right_edge[0]) { |
3060 | 0 | f->lf.re_sz = 0; |
3061 | 0 | goto error; |
3062 | 0 | } |
3063 | 0 | f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32; |
3064 | 0 | f->lf.re_sz = re_sz; |
3065 | 0 | } |
3066 | | |
3067 | | // init ref mvs |
3068 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { |
3069 | 0 | const int ret = |
3070 | 0 | dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr, |
3071 | 0 | f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs, |
3072 | 0 | f->c->n_tc, f->c->n_fc); |
3073 | 0 | if (ret < 0) goto error; |
3074 | 0 | } |
3075 | | |
3076 | | // setup dequant tables |
3077 | 0 | init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq); |
3078 | 0 | if (f->frame_hdr->quant.qm) |
3079 | 0 | for (int i = 0; i < N_RECT_TX_SIZES; i++) { |
3080 | 0 | f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i]; |
3081 | 0 | f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i]; |
3082 | 0 | f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i]; |
3083 | 0 | } |
3084 | 0 | else |
3085 | 0 | memset(f->qm, 0, sizeof(f->qm)); |
3086 | | |
3087 | | // setup jnt_comp weights |
3088 | 0 | if (f->frame_hdr->switchable_comp_refs) { |
3089 | 0 | for (int i = 0; i < 7; i++) { |
3090 | 0 | const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset; |
3091 | |
|
3092 | 0 | for (int j = i + 1; j < 7; j++) { |
3093 | 0 | const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset; |
3094 | |
|
3095 | 0 | const unsigned d1 = |
3096 | 0 | imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc, |
3097 | 0 | f->cur.frame_hdr->frame_offset)), 31); |
3098 | 0 | const unsigned d0 = |
3099 | 0 | imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc, |
3100 | 0 | f->cur.frame_hdr->frame_offset)), 31); |
3101 | 0 | const int order = d0 <= d1; |
3102 | |
|
3103 | 0 | static const uint8_t quant_dist_weight[3][2] = { |
3104 | 0 | { 2, 3 }, { 2, 5 }, { 2, 7 } |
3105 | 0 | }; |
3106 | 0 | static const uint8_t quant_dist_lookup_table[4][2] = { |
3107 | 0 | { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } |
3108 | 0 | }; |
3109 | |
|
3110 | 0 | int k; |
3111 | 0 | for (k = 0; k < 3; k++) { |
3112 | 0 | const int c0 = quant_dist_weight[k][order]; |
3113 | 0 | const int c1 = quant_dist_weight[k][!order]; |
3114 | 0 | const int d0_c0 = d0 * c0; |
3115 | 0 | const int d1_c1 = d1 * c1; |
3116 | 0 | if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break; |
3117 | 0 | } |
3118 | |
|
3119 | 0 | f->jnt_weights[i][j] = quant_dist_lookup_table[k][order]; |
3120 | 0 | } |
3121 | 0 | } |
3122 | 0 | } |
3123 | | |
3124 | | /* Init loopfilter pointers. Increasing NULL pointers is technically UB, |
3125 | | * so just point the chroma pointers in 4:0:0 to the luma plane here to |
3126 | | * avoid having additional in-loop branches in various places. We never |
3127 | | * dereference those pointers so it doesn't really matter what they |
3128 | | * point at, as long as the pointers are valid. */ |
3129 | 0 | const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400; |
3130 | 0 | f->lf.p[0] = f->cur.data[0]; |
3131 | 0 | f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0]; |
3132 | 0 | f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0]; |
3133 | 0 | f->lf.sr_p[0] = f->sr_cur.p.data[0]; |
3134 | 0 | f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0]; |
3135 | 0 | f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0]; |
3136 | |
|
3137 | 0 | retval = 0; |
3138 | 0 | error: |
3139 | 0 | return retval; |
3140 | 0 | } |
3141 | | |
3142 | 0 | int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) { |
3143 | 0 | const Dav1dContext *const c = f->c; |
3144 | 0 | int retval = DAV1D_ERR(EINVAL); |
3145 | |
|
3146 | 0 | if (f->frame_hdr->refresh_context) |
3147 | 0 | dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf); |
3148 | | |
3149 | | // parse individual tiles per tile group |
3150 | 0 | int tile_row = 0, tile_col = 0; |
3151 | 0 | f->task_thread.update_set = 0; |
3152 | 0 | for (int i = 0; i < f->n_tile_data; i++) { |
3153 | 0 | const uint8_t *data = f->tile[i].data.data; |
3154 | 0 | size_t size = f->tile[i].data.sz; |
3155 | |
|
3156 | 0 | for (int j = f->tile[i].start; j <= f->tile[i].end; j++) { |
3157 | 0 | size_t tile_sz; |
3158 | 0 | if (j == f->tile[i].end) { |
3159 | 0 | tile_sz = size; |
3160 | 0 | } else { |
3161 | 0 | if (f->frame_hdr->tiling.n_bytes > size) goto error; |
3162 | 0 | tile_sz = 0; |
3163 | 0 | for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++) |
3164 | 0 | tile_sz |= (unsigned)*data++ << (k * 8); |
3165 | 0 | tile_sz++; |
3166 | 0 | size -= f->frame_hdr->tiling.n_bytes; |
3167 | 0 | if (tile_sz > size) goto error; |
3168 | 0 | } |
3169 | | |
3170 | 0 | setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++, |
3171 | 0 | c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0); |
3172 | |
|
3173 | 0 | if (tile_col == f->frame_hdr->tiling.cols) { |
3174 | 0 | tile_col = 0; |
3175 | 0 | tile_row++; |
3176 | 0 | } |
3177 | 0 | if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context) |
3178 | 0 | f->task_thread.update_set = 1; |
3179 | 0 | data += tile_sz; |
3180 | 0 | size -= tile_sz; |
3181 | 0 | } |
3182 | 0 | } |
3183 | | |
3184 | 0 | if (c->n_tc > 1) { |
3185 | 0 | const int uses_2pass = c->n_fc > 1; |
3186 | 0 | for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++) |
3187 | 0 | reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), |
3188 | 0 | uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0); |
3189 | 0 | } |
3190 | |
|
3191 | 0 | retval = 0; |
3192 | 0 | error: |
3193 | 0 | return retval; |
3194 | 0 | } |
3195 | | |
3196 | 0 | int dav1d_decode_frame_main(Dav1dFrameContext *const f) { |
3197 | 0 | const Dav1dContext *const c = f->c; |
3198 | 0 | int retval = DAV1D_ERR(EINVAL); |
3199 | |
|
3200 | 0 | assert(f->c->n_tc == 1); |
3201 | | |
3202 | 0 | Dav1dTaskContext *const t = &c->tc[f - c->fc]; |
3203 | 0 | t->f = f; |
3204 | 0 | t->frame_thread.pass = 0; |
3205 | |
|
3206 | 0 | for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++) |
3207 | 0 | reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0); |
3208 | | |
3209 | | // no threading - we explicitly interleave tile/sbrow decoding |
3210 | | // and post-filtering, so that the full process runs in-line |
3211 | 0 | for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { |
3212 | 0 | const int sbh_end = |
3213 | 0 | imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh); |
3214 | 0 | for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row]; |
3215 | 0 | sby < sbh_end; sby++) |
3216 | 0 | { |
3217 | 0 | t->by = sby << (4 + f->seq_hdr->sb128); |
3218 | 0 | const int by_end = (t->by + f->sb_step) >> 1; |
3219 | 0 | if (f->frame_hdr->use_ref_frame_mvs) { |
3220 | 0 | f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row, |
3221 | 0 | 0, f->bw >> 1, t->by >> 1, by_end); |
3222 | 0 | } |
3223 | 0 | for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { |
3224 | 0 | t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col]; |
3225 | 0 | if (dav1d_decode_tile_sbrow(t)) goto error; |
3226 | 0 | } |
3227 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr)) { |
3228 | 0 | dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt, |
3229 | 0 | 0, f->bw >> 1, t->by >> 1, by_end); |
3230 | 0 | } |
3231 | | |
3232 | | // loopfilter + cdef + restoration |
3233 | 0 | f->bd_fn.filter_sbrow(f, sby); |
3234 | 0 | } |
3235 | 0 | } |
3236 | | |
3237 | 0 | retval = 0; |
3238 | 0 | error: |
3239 | 0 | return retval; |
3240 | 0 | } |
3241 | | |
3242 | 0 | void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) { |
3243 | 0 | const Dav1dContext *const c = f->c; |
3244 | |
|
3245 | 0 | if (f->sr_cur.p.data[0]) |
3246 | 0 | atomic_init(&f->task_thread.error, 0); |
3247 | |
|
3248 | 0 | if (c->n_fc > 1 && retval && f->frame_thread.cf) { |
3249 | 0 | memset(f->frame_thread.cf, 0, |
3250 | 0 | (size_t)f->frame_thread.cf_sz * 128 * 128 / 2); |
3251 | 0 | } |
3252 | 0 | for (int i = 0; i < 7; i++) { |
3253 | 0 | if (f->refp[i].p.frame_hdr) { |
3254 | 0 | if (!retval && c->n_fc > 1 && c->strict_std_compliance && |
3255 | 0 | atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR) |
3256 | 0 | { |
3257 | 0 | retval = DAV1D_ERR(EINVAL); |
3258 | 0 | atomic_store(&f->task_thread.error, 1); |
3259 | 0 | atomic_store(&f->sr_cur.progress[1], FRAME_ERROR); |
3260 | 0 | } |
3261 | 0 | dav1d_thread_picture_unref(&f->refp[i]); |
3262 | 0 | } |
3263 | 0 | dav1d_ref_dec(&f->ref_mvs_ref[i]); |
3264 | 0 | } |
3265 | |
|
3266 | 0 | dav1d_picture_unref_internal(&f->cur); |
3267 | 0 | dav1d_thread_picture_unref(&f->sr_cur); |
3268 | 0 | dav1d_cdf_thread_unref(&f->in_cdf); |
3269 | 0 | if (f->frame_hdr && f->frame_hdr->refresh_context) { |
3270 | 0 | if (f->out_cdf.progress) |
3271 | 0 | atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR); |
3272 | 0 | dav1d_cdf_thread_unref(&f->out_cdf); |
3273 | 0 | } |
3274 | 0 | dav1d_ref_dec(&f->cur_segmap_ref); |
3275 | 0 | dav1d_ref_dec(&f->prev_segmap_ref); |
3276 | 0 | dav1d_ref_dec(&f->mvs_ref); |
3277 | 0 | dav1d_ref_dec(&f->seq_hdr_ref); |
3278 | 0 | dav1d_ref_dec(&f->frame_hdr_ref); |
3279 | |
|
3280 | 0 | for (int i = 0; i < f->n_tile_data; i++) |
3281 | 0 | dav1d_data_unref_internal(&f->tile[i].data); |
3282 | 0 | f->task_thread.retval = retval; |
3283 | 0 | } |
3284 | | |
3285 | 0 | int dav1d_decode_frame(Dav1dFrameContext *const f) { |
3286 | 0 | assert(f->c->n_fc == 1); |
3287 | | // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task |
3288 | | // threads also. Not sure it makes a measurable difference. |
3289 | 0 | int res = dav1d_decode_frame_init(f); |
3290 | 0 | if (!res) res = dav1d_decode_frame_init_cdf(f); |
3291 | | // wait until all threads have completed |
3292 | 0 | if (!res) { |
3293 | 0 | if (f->c->n_tc > 1) { |
3294 | 0 | res = dav1d_task_create_tile_sbrow(f, 0, 1); |
3295 | 0 | pthread_mutex_lock(&f->task_thread.ttd->lock); |
3296 | 0 | pthread_cond_signal(&f->task_thread.ttd->cond); |
3297 | 0 | if (!res) { |
3298 | 0 | while (!f->task_thread.done[0] || |
3299 | 0 | atomic_load(&f->task_thread.task_counter) > 0) |
3300 | 0 | { |
3301 | 0 | pthread_cond_wait(&f->task_thread.cond, |
3302 | 0 | &f->task_thread.ttd->lock); |
3303 | 0 | } |
3304 | 0 | } |
3305 | 0 | pthread_mutex_unlock(&f->task_thread.ttd->lock); |
3306 | 0 | res = f->task_thread.retval; |
3307 | 0 | } else { |
3308 | 0 | res = dav1d_decode_frame_main(f); |
3309 | 0 | if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) { |
3310 | 0 | dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf, |
3311 | 0 | &f->ts[f->frame_hdr->tiling.update].cdf); |
3312 | 0 | } |
3313 | 0 | } |
3314 | 0 | } |
3315 | 0 | dav1d_decode_frame_exit(f, res); |
3316 | 0 | res = f->task_thread.retval; |
3317 | 0 | f->n_tile_data = 0; |
3318 | 0 | return res; |
3319 | 0 | } |
3320 | | |
3321 | 0 | static int get_upscale_x0(const int in_w, const int out_w, const int step) { |
3322 | 0 | const int err = out_w * step - (in_w << 14); |
3323 | 0 | const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2); |
3324 | 0 | return x0 & 0x3fff; |
3325 | 0 | } |
3326 | | |
3327 | 0 | int dav1d_submit_frame(Dav1dContext *const c) { |
3328 | 0 | Dav1dFrameContext *f; |
3329 | 0 | int res = -1; |
3330 | | |
3331 | | // wait for c->out_delayed[next] and move into c->out if visible |
3332 | 0 | Dav1dThreadPicture *out_delayed; |
3333 | 0 | if (c->n_fc > 1) { |
3334 | 0 | pthread_mutex_lock(&c->task_thread.lock); |
3335 | 0 | const unsigned next = c->frame_thread.next++; |
3336 | 0 | if (c->frame_thread.next == c->n_fc) |
3337 | 0 | c->frame_thread.next = 0; |
3338 | |
|
3339 | 0 | f = &c->fc[next]; |
3340 | 0 | while (f->n_tile_data > 0) |
3341 | 0 | pthread_cond_wait(&f->task_thread.cond, |
3342 | 0 | &c->task_thread.lock); |
3343 | 0 | out_delayed = &c->frame_thread.out_delayed[next]; |
3344 | 0 | if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) { |
3345 | 0 | unsigned first = atomic_load(&c->task_thread.first); |
3346 | 0 | if (first + 1U < c->n_fc) |
3347 | 0 | atomic_fetch_add(&c->task_thread.first, 1U); |
3348 | 0 | else |
3349 | 0 | atomic_store(&c->task_thread.first, 0); |
3350 | 0 | atomic_compare_exchange_strong(&c->task_thread.reset_task_cur, |
3351 | 0 | &first, UINT_MAX); |
3352 | 0 | if (c->task_thread.cur && c->task_thread.cur < c->n_fc) |
3353 | 0 | c->task_thread.cur--; |
3354 | 0 | } |
3355 | 0 | const int error = f->task_thread.retval; |
3356 | 0 | if (error) { |
3357 | 0 | f->task_thread.retval = 0; |
3358 | 0 | c->cached_error = error; |
3359 | 0 | dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m); |
3360 | 0 | dav1d_thread_picture_unref(out_delayed); |
3361 | 0 | } else if (out_delayed->p.data[0]) { |
3362 | 0 | const unsigned progress = atomic_load_explicit(&out_delayed->progress[1], |
3363 | 0 | memory_order_relaxed); |
3364 | 0 | if ((out_delayed->visible || c->output_invisible_frames) && |
3365 | 0 | progress != FRAME_ERROR) |
3366 | 0 | { |
3367 | 0 | dav1d_thread_picture_ref(&c->out, out_delayed); |
3368 | 0 | c->event_flags |= dav1d_picture_get_event_flags(out_delayed); |
3369 | 0 | } |
3370 | 0 | dav1d_thread_picture_unref(out_delayed); |
3371 | 0 | } |
3372 | 0 | } else { |
3373 | 0 | f = c->fc; |
3374 | 0 | } |
3375 | |
|
3376 | 0 | f->seq_hdr = c->seq_hdr; |
3377 | 0 | f->seq_hdr_ref = c->seq_hdr_ref; |
3378 | 0 | dav1d_ref_inc(f->seq_hdr_ref); |
3379 | 0 | f->frame_hdr = c->frame_hdr; |
3380 | 0 | f->frame_hdr_ref = c->frame_hdr_ref; |
3381 | 0 | c->frame_hdr = NULL; |
3382 | 0 | c->frame_hdr_ref = NULL; |
3383 | 0 | f->dsp = &c->dsp[f->seq_hdr->hbd]; |
3384 | |
|
3385 | 0 | const int bpc = 8 + 2 * f->seq_hdr->hbd; |
3386 | |
|
3387 | 0 | if (!f->dsp->ipred.intra_pred[DC_PRED]) { |
3388 | 0 | Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd]; |
3389 | |
|
3390 | 0 | switch (bpc) { |
3391 | 0 | #define assign_bitdepth_case(bd) \ |
3392 | 0 | dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \ |
3393 | 0 | dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \ |
3394 | 0 | dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \ |
3395 | 0 | dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \ |
3396 | 0 | dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \ |
3397 | 0 | dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \ |
3398 | 0 | dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \ |
3399 | 0 | break |
3400 | 0 | #if CONFIG_8BPC |
3401 | 0 | case 8: |
3402 | 0 | assign_bitdepth_case(8); |
3403 | 0 | #endif |
3404 | 0 | #if CONFIG_16BPC |
3405 | 0 | case 10: |
3406 | 0 | case 12: |
3407 | 0 | assign_bitdepth_case(16); |
3408 | 0 | #endif |
3409 | 0 | #undef assign_bitdepth_case |
3410 | 0 | default: |
3411 | 0 | dav1d_log(c, "Compiled without support for %d-bit decoding\n", |
3412 | 0 | 8 + 2 * f->seq_hdr->hbd); |
3413 | 0 | res = DAV1D_ERR(ENOPROTOOPT); |
3414 | 0 | goto error; |
3415 | 0 | } |
3416 | 0 | } |
3417 | | |
3418 | 0 | #define assign_bitdepth_case(bd) \ |
3419 | 0 | f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \ |
3420 | 0 | f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \ |
3421 | 0 | f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \ |
3422 | 0 | f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \ |
3423 | 0 | f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \ |
3424 | 0 | f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \ |
3425 | 0 | f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \ |
3426 | 0 | f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \ |
3427 | 0 | f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \ |
3428 | 0 | f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \ |
3429 | 0 | f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \ |
3430 | 0 | f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \ |
3431 | 0 | f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \ |
3432 | 0 | f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc |
3433 | 0 | if (!f->seq_hdr->hbd) { |
3434 | 0 | #if CONFIG_8BPC |
3435 | 0 | assign_bitdepth_case(8); |
3436 | 0 | #endif |
3437 | 0 | } else { |
3438 | 0 | #if CONFIG_16BPC |
3439 | 0 | assign_bitdepth_case(16); |
3440 | 0 | #endif |
3441 | 0 | } |
3442 | 0 | #undef assign_bitdepth_case |
3443 | |
|
3444 | 0 | int ref_coded_width[7]; |
3445 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr)) { |
3446 | 0 | if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) { |
3447 | 0 | const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame]; |
3448 | 0 | if (!c->refs[pri_ref].p.p.data[0]) { |
3449 | 0 | res = DAV1D_ERR(EINVAL); |
3450 | 0 | goto error; |
3451 | 0 | } |
3452 | 0 | } |
3453 | 0 | for (int i = 0; i < 7; i++) { |
3454 | 0 | const int refidx = f->frame_hdr->refidx[i]; |
3455 | 0 | if (!c->refs[refidx].p.p.data[0] || |
3456 | 0 | f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w || |
3457 | 0 | f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h || |
3458 | 0 | f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 || |
3459 | 0 | f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 || |
3460 | 0 | f->seq_hdr->layout != c->refs[refidx].p.p.p.layout || |
3461 | 0 | bpc != c->refs[refidx].p.p.p.bpc) |
3462 | 0 | { |
3463 | 0 | for (int j = 0; j < i; j++) |
3464 | 0 | dav1d_thread_picture_unref(&f->refp[j]); |
3465 | 0 | res = DAV1D_ERR(EINVAL); |
3466 | 0 | goto error; |
3467 | 0 | } |
3468 | 0 | dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p); |
3469 | 0 | ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0]; |
3470 | 0 | if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w || |
3471 | 0 | f->frame_hdr->height != c->refs[refidx].p.p.p.h) |
3472 | 0 | { |
3473 | 0 | #define scale_fac(ref_sz, this_sz) \ |
3474 | 0 | ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz)) |
3475 | 0 | f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w, |
3476 | 0 | f->frame_hdr->width[0]); |
3477 | 0 | f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h, |
3478 | 0 | f->frame_hdr->height); |
3479 | 0 | f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4; |
3480 | 0 | f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4; |
3481 | 0 | } else { |
3482 | 0 | f->svc[i][0].scale = f->svc[i][1].scale = 0; |
3483 | 0 | } |
3484 | 0 | f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION && |
3485 | 0 | !f->frame_hdr->force_integer_mv && |
3486 | 0 | !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) && |
3487 | 0 | !f->svc[i][0].scale; |
3488 | 0 | } |
3489 | 0 | } |
3490 | | |
3491 | | // setup entropy |
3492 | 0 | if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) { |
3493 | 0 | dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac); |
3494 | 0 | } else { |
3495 | 0 | const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame]; |
3496 | 0 | dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]); |
3497 | 0 | } |
3498 | 0 | if (f->frame_hdr->refresh_context) { |
3499 | 0 | res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1); |
3500 | 0 | if (res < 0) goto error; |
3501 | 0 | } |
3502 | | |
3503 | | // FIXME qsort so tiles are in order (for frame threading) |
3504 | 0 | if (f->n_tile_data_alloc < c->n_tile_data) { |
3505 | 0 | dav1d_free(f->tile); |
3506 | 0 | assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile)); |
3507 | 0 | f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile)); |
3508 | 0 | if (!f->tile) { |
3509 | 0 | f->n_tile_data_alloc = f->n_tile_data = 0; |
3510 | 0 | res = DAV1D_ERR(ENOMEM); |
3511 | 0 | goto error; |
3512 | 0 | } |
3513 | 0 | f->n_tile_data_alloc = c->n_tile_data; |
3514 | 0 | } |
3515 | 0 | memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile)); |
3516 | 0 | memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile)); |
3517 | 0 | f->n_tile_data = c->n_tile_data; |
3518 | 0 | c->n_tile_data = 0; |
3519 | | |
3520 | | // allocate frame |
3521 | 0 | res = dav1d_thread_picture_alloc(c, f, bpc); |
3522 | 0 | if (res < 0) goto error; |
3523 | | |
3524 | 0 | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { |
3525 | 0 | res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p); |
3526 | 0 | if (res < 0) goto error; |
3527 | 0 | } else { |
3528 | 0 | dav1d_picture_ref(&f->cur, &f->sr_cur.p); |
3529 | 0 | } |
3530 | | |
3531 | 0 | if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { |
3532 | 0 | f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w); |
3533 | 0 | const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; |
3534 | 0 | const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor; |
3535 | 0 | const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor; |
3536 | 0 | f->resize_step[1] = scale_fac(in_cw, out_cw); |
3537 | 0 | #undef scale_fac |
3538 | 0 | f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]); |
3539 | 0 | f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]); |
3540 | 0 | } |
3541 | | |
3542 | | // move f->cur into output queue |
3543 | 0 | if (c->n_fc == 1) { |
3544 | 0 | if (f->frame_hdr->show_frame || c->output_invisible_frames) { |
3545 | 0 | dav1d_thread_picture_ref(&c->out, &f->sr_cur); |
3546 | 0 | c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur); |
3547 | 0 | } |
3548 | 0 | } else { |
3549 | 0 | dav1d_thread_picture_ref(out_delayed, &f->sr_cur); |
3550 | 0 | } |
3551 | |
|
3552 | 0 | f->w4 = (f->frame_hdr->width[0] + 3) >> 2; |
3553 | 0 | f->h4 = (f->frame_hdr->height + 3) >> 2; |
3554 | 0 | f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1; |
3555 | 0 | f->bh = ((f->frame_hdr->height + 7) >> 3) << 1; |
3556 | 0 | f->sb128w = (f->bw + 31) >> 5; |
3557 | 0 | f->sb128h = (f->bh + 31) >> 5; |
3558 | 0 | f->sb_shift = 4 + f->seq_hdr->sb128; |
3559 | 0 | f->sb_step = 16 << f->seq_hdr->sb128; |
3560 | 0 | f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift; |
3561 | 0 | f->b4_stride = (f->bw + 31) & ~31; |
3562 | 0 | f->bitdepth_max = (1 << f->cur.p.bpc) - 1; |
3563 | 0 | atomic_init(&f->task_thread.error, 0); |
3564 | 0 | const int uses_2pass = c->n_fc > 1; |
3565 | 0 | const int cols = f->frame_hdr->tiling.cols; |
3566 | 0 | const int rows = f->frame_hdr->tiling.rows; |
3567 | 0 | atomic_store(&f->task_thread.task_counter, |
3568 | 0 | (cols * rows + f->sbh) << uses_2pass); |
3569 | | |
3570 | | // ref_mvs |
3571 | 0 | if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { |
3572 | 0 | f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool, |
3573 | 0 | sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1)); |
3574 | 0 | if (!f->mvs_ref) { |
3575 | 0 | res = DAV1D_ERR(ENOMEM); |
3576 | 0 | goto error; |
3577 | 0 | } |
3578 | 0 | f->mvs = f->mvs_ref->data; |
3579 | 0 | if (!f->frame_hdr->allow_intrabc) { |
3580 | 0 | for (int i = 0; i < 7; i++) |
3581 | 0 | f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset; |
3582 | 0 | } else { |
3583 | 0 | memset(f->refpoc, 0, sizeof(f->refpoc)); |
3584 | 0 | } |
3585 | 0 | if (f->frame_hdr->use_ref_frame_mvs) { |
3586 | 0 | for (int i = 0; i < 7; i++) { |
3587 | 0 | const int refidx = f->frame_hdr->refidx[i]; |
3588 | 0 | const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1; |
3589 | 0 | const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1; |
3590 | 0 | if (c->refs[refidx].refmvs != NULL && |
3591 | 0 | ref_w == f->bw && ref_h == f->bh) |
3592 | 0 | { |
3593 | 0 | f->ref_mvs_ref[i] = c->refs[refidx].refmvs; |
3594 | 0 | dav1d_ref_inc(f->ref_mvs_ref[i]); |
3595 | 0 | f->ref_mvs[i] = c->refs[refidx].refmvs->data; |
3596 | 0 | } else { |
3597 | 0 | f->ref_mvs[i] = NULL; |
3598 | 0 | f->ref_mvs_ref[i] = NULL; |
3599 | 0 | } |
3600 | 0 | memcpy(f->refrefpoc[i], c->refs[refidx].refpoc, |
3601 | 0 | sizeof(*f->refrefpoc)); |
3602 | 0 | } |
3603 | 0 | } else { |
3604 | 0 | memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref)); |
3605 | 0 | } |
3606 | 0 | } else { |
3607 | 0 | f->mvs_ref = NULL; |
3608 | 0 | memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref)); |
3609 | 0 | } |
3610 | | |
3611 | | // segmap |
3612 | 0 | if (f->frame_hdr->segmentation.enabled) { |
3613 | | // By default, the previous segmentation map is not initialised. |
3614 | 0 | f->prev_segmap_ref = NULL; |
3615 | 0 | f->prev_segmap = NULL; |
3616 | | |
3617 | | // We might need a previous frame's segmentation map. This |
3618 | | // happens if there is either no update or a temporal update. |
3619 | 0 | if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) { |
3620 | 0 | const int pri_ref = f->frame_hdr->primary_ref_frame; |
3621 | 0 | assert(pri_ref != DAV1D_PRIMARY_REF_NONE); |
3622 | 0 | const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1; |
3623 | 0 | const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1; |
3624 | 0 | if (ref_w == f->bw && ref_h == f->bh) { |
3625 | 0 | f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap; |
3626 | 0 | if (f->prev_segmap_ref) { |
3627 | 0 | dav1d_ref_inc(f->prev_segmap_ref); |
3628 | 0 | f->prev_segmap = f->prev_segmap_ref->data; |
3629 | 0 | } |
3630 | 0 | } |
3631 | 0 | } |
3632 | | |
3633 | 0 | if (f->frame_hdr->segmentation.update_map) { |
3634 | | // We're updating an existing map, but need somewhere to |
3635 | | // put the new values. Allocate them here (the data |
3636 | | // actually gets set elsewhere) |
3637 | 0 | f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, |
3638 | 0 | sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h); |
3639 | 0 | if (!f->cur_segmap_ref) { |
3640 | 0 | dav1d_ref_dec(&f->prev_segmap_ref); |
3641 | 0 | res = DAV1D_ERR(ENOMEM); |
3642 | 0 | goto error; |
3643 | 0 | } |
3644 | 0 | f->cur_segmap = f->cur_segmap_ref->data; |
3645 | 0 | } else if (f->prev_segmap_ref) { |
3646 | | // We're not updating an existing map, and we have a valid |
3647 | | // reference. Use that. |
3648 | 0 | f->cur_segmap_ref = f->prev_segmap_ref; |
3649 | 0 | dav1d_ref_inc(f->cur_segmap_ref); |
3650 | 0 | f->cur_segmap = f->prev_segmap_ref->data; |
3651 | 0 | } else { |
3652 | | // We need to make a new map. Allocate one here and zero it out. |
3653 | 0 | const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h; |
3654 | 0 | f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size); |
3655 | 0 | if (!f->cur_segmap_ref) { |
3656 | 0 | res = DAV1D_ERR(ENOMEM); |
3657 | 0 | goto error; |
3658 | 0 | } |
3659 | 0 | f->cur_segmap = f->cur_segmap_ref->data; |
3660 | 0 | memset(f->cur_segmap, 0, segmap_size); |
3661 | 0 | } |
3662 | 0 | } else { |
3663 | 0 | f->cur_segmap = NULL; |
3664 | 0 | f->cur_segmap_ref = NULL; |
3665 | 0 | f->prev_segmap_ref = NULL; |
3666 | 0 | } |
3667 | | |
3668 | | // update references etc. |
3669 | 0 | const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags; |
3670 | 0 | for (int i = 0; i < 8; i++) { |
3671 | 0 | if (refresh_frame_flags & (1 << i)) { |
3672 | 0 | if (c->refs[i].p.p.frame_hdr) |
3673 | 0 | dav1d_thread_picture_unref(&c->refs[i].p); |
3674 | 0 | dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur); |
3675 | |
|
3676 | 0 | dav1d_cdf_thread_unref(&c->cdf[i]); |
3677 | 0 | if (f->frame_hdr->refresh_context) { |
3678 | 0 | dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf); |
3679 | 0 | } else { |
3680 | 0 | dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf); |
3681 | 0 | } |
3682 | |
|
3683 | 0 | dav1d_ref_dec(&c->refs[i].segmap); |
3684 | 0 | c->refs[i].segmap = f->cur_segmap_ref; |
3685 | 0 | if (f->cur_segmap_ref) |
3686 | 0 | dav1d_ref_inc(f->cur_segmap_ref); |
3687 | 0 | dav1d_ref_dec(&c->refs[i].refmvs); |
3688 | 0 | if (!f->frame_hdr->allow_intrabc) { |
3689 | 0 | c->refs[i].refmvs = f->mvs_ref; |
3690 | 0 | if (f->mvs_ref) |
3691 | 0 | dav1d_ref_inc(f->mvs_ref); |
3692 | 0 | } |
3693 | 0 | memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc)); |
3694 | 0 | } |
3695 | 0 | } |
3696 | |
|
3697 | 0 | if (c->n_fc == 1) { |
3698 | 0 | if ((res = dav1d_decode_frame(f)) < 0) { |
3699 | 0 | dav1d_thread_picture_unref(&c->out); |
3700 | 0 | for (int i = 0; i < 8; i++) { |
3701 | 0 | if (refresh_frame_flags & (1 << i)) { |
3702 | 0 | if (c->refs[i].p.p.frame_hdr) |
3703 | 0 | dav1d_thread_picture_unref(&c->refs[i].p); |
3704 | 0 | dav1d_cdf_thread_unref(&c->cdf[i]); |
3705 | 0 | dav1d_ref_dec(&c->refs[i].segmap); |
3706 | 0 | dav1d_ref_dec(&c->refs[i].refmvs); |
3707 | 0 | } |
3708 | 0 | } |
3709 | 0 | goto error; |
3710 | 0 | } |
3711 | 0 | } else { |
3712 | 0 | dav1d_task_frame_init(f); |
3713 | 0 | pthread_mutex_unlock(&c->task_thread.lock); |
3714 | 0 | } |
3715 | | |
3716 | 0 | return 0; |
3717 | 0 | error: |
3718 | 0 | atomic_init(&f->task_thread.error, 1); |
3719 | 0 | dav1d_cdf_thread_unref(&f->in_cdf); |
3720 | 0 | if (f->frame_hdr->refresh_context) |
3721 | 0 | dav1d_cdf_thread_unref(&f->out_cdf); |
3722 | 0 | for (int i = 0; i < 7; i++) { |
3723 | 0 | if (f->refp[i].p.frame_hdr) |
3724 | 0 | dav1d_thread_picture_unref(&f->refp[i]); |
3725 | 0 | dav1d_ref_dec(&f->ref_mvs_ref[i]); |
3726 | 0 | } |
3727 | 0 | if (c->n_fc == 1) |
3728 | 0 | dav1d_thread_picture_unref(&c->out); |
3729 | 0 | else |
3730 | 0 | dav1d_thread_picture_unref(out_delayed); |
3731 | 0 | dav1d_picture_unref_internal(&f->cur); |
3732 | 0 | dav1d_thread_picture_unref(&f->sr_cur); |
3733 | 0 | dav1d_ref_dec(&f->mvs_ref); |
3734 | 0 | dav1d_ref_dec(&f->seq_hdr_ref); |
3735 | 0 | dav1d_ref_dec(&f->frame_hdr_ref); |
3736 | 0 | dav1d_data_props_copy(&c->cached_error_props, &c->in.m); |
3737 | |
|
3738 | 0 | for (int i = 0; i < f->n_tile_data; i++) |
3739 | 0 | dav1d_data_unref_internal(&f->tile[i].data); |
3740 | 0 | f->n_tile_data = 0; |
3741 | |
|
3742 | 0 | if (c->n_fc > 1) |
3743 | 0 | pthread_mutex_unlock(&c->task_thread.lock); |
3744 | |
|
3745 | 0 | return res; |
3746 | 0 | } |