Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/vp9.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * VP9 compatible video decoder
3
 *
4
 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5
 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
24
#include "config_components.h"
25
26
#include "avcodec.h"
27
#include "codec_internal.h"
28
#include "decode.h"
29
#include "get_bits.h"
30
#include "hwaccel_internal.h"
31
#include "hwconfig.h"
32
#include "profiles.h"
33
#include "progressframe.h"
34
#include "libavutil/refstruct.h"
35
#include "thread.h"
36
#include "pthread_internal.h"
37
38
#include "videodsp.h"
39
#include "vp89_rac.h"
40
#include "vp9.h"
41
#include "vp9data.h"
42
#include "vp9dec.h"
43
#include "vpx_rac.h"
44
#include "libavutil/avassert.h"
45
#include "libavutil/mem.h"
46
#include "libavutil/pixdesc.h"
47
#include "libavutil/video_enc_params.h"
48
49
20.6k
#define VP9_SYNCCODE 0x498342
50
51
#if HAVE_THREADS
52
DEFINE_OFFSET_ARRAY(VP9Context, vp9_context, pthread_init_cnt,
53
                    (offsetof(VP9Context, progress_mutex)),
54
                    (offsetof(VP9Context, progress_cond)));
55
56
9.94k
static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
57
9.94k
    VP9Context *s = avctx->priv_data;
58
59
9.94k
    if (avctx->active_thread_type & FF_THREAD_SLICE)  {
60
0
        if (s->entries)
61
0
            av_freep(&s->entries);
62
63
0
        s->entries = av_malloc_array(n, sizeof(atomic_int));
64
0
        if (!s->entries)
65
0
            return AVERROR(ENOMEM);
66
0
    }
67
9.94k
    return 0;
68
9.94k
}
69
70
0
static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
71
0
    pthread_mutex_lock(&s->progress_mutex);
72
0
    atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
73
0
    pthread_cond_signal(&s->progress_cond);
74
0
    pthread_mutex_unlock(&s->progress_mutex);
75
0
}
76
77
0
static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
78
0
    if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
79
0
        return;
80
81
0
    pthread_mutex_lock(&s->progress_mutex);
82
0
    while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
83
0
        pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
84
0
    pthread_mutex_unlock(&s->progress_mutex);
85
0
}
86
#else
87
static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
88
#endif
89
90
static void vp9_tile_data_free(VP9TileData *td)
91
43.2k
{
92
43.2k
    av_freep(&td->b_base);
93
43.2k
    av_freep(&td->block_base);
94
43.2k
    av_freep(&td->block_structure);
95
43.2k
}
96
97
static void vp9_frame_unref(VP9Frame *f)
98
717k
{
99
717k
    ff_progress_frame_unref(&f->tf);
100
717k
    av_refstruct_unref(&f->header_ref);
101
717k
    av_refstruct_unref(&f->extradata);
102
717k
    av_refstruct_unref(&f->hwaccel_picture_private);
103
717k
    f->segmentation_map = NULL;
104
717k
}
105
106
static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107
171k
{
108
171k
    VP9Context *s = avctx->priv_data;
109
171k
    int ret, sz;
110
111
171k
    ret = ff_progress_frame_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
112
171k
    if (ret < 0)
113
448
        return ret;
114
115
170k
    sz = 64 * s->sb_cols * s->sb_rows;
116
170k
    if (sz != s->frame_extradata_pool_size) {
117
14.7k
        av_refstruct_pool_uninit(&s->frame_extradata_pool);
118
14.7k
        s->frame_extradata_pool = av_refstruct_pool_alloc(sz * (1 + sizeof(VP9mvrefPair)),
119
14.7k
                                                          AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME);
120
14.7k
        if (!s->frame_extradata_pool) {
121
0
            s->frame_extradata_pool_size = 0;
122
0
            ret = AVERROR(ENOMEM);
123
0
            goto fail;
124
0
        }
125
14.7k
        s->frame_extradata_pool_size = sz;
126
14.7k
    }
127
170k
    f->extradata = av_refstruct_pool_get(s->frame_extradata_pool);
128
170k
    if (!f->extradata) {
129
0
        ret = AVERROR(ENOMEM);
130
0
        goto fail;
131
0
    }
132
133
170k
    f->segmentation_map = f->extradata;
134
170k
    f->mv = (VP9mvrefPair *) ((char*)f->extradata + sz);
135
136
170k
    ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private);
137
170k
    if (ret < 0)
138
0
        goto fail;
139
140
170k
    return 0;
141
142
0
fail:
143
0
    vp9_frame_unref(f);
144
0
    return ret;
145
170k
}
146
147
static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
148
331k
{
149
331k
    av_refstruct_replace(&dst->header_ref, src->header_ref);
150
331k
    dst->frame_header = src->frame_header;
151
152
331k
    ff_progress_frame_replace(&dst->tf, &src->tf);
153
154
331k
    av_refstruct_replace(&dst->extradata, src->extradata);
155
156
331k
    dst->segmentation_map = src->segmentation_map;
157
331k
    dst->mv = src->mv;
158
331k
    dst->uses_2pass = src->uses_2pass;
159
160
331k
    av_refstruct_replace(&dst->hwaccel_picture_private,
161
331k
                          src->hwaccel_picture_private);
162
331k
}
163
164
static int update_size(AVCodecContext *avctx, int w, int h)
165
186k
{
166
186k
#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
167
186k
                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
168
186k
                     CONFIG_VP9_D3D12VA_HWACCEL + \
169
186k
                     CONFIG_VP9_NVDEC_HWACCEL + \
170
186k
                     CONFIG_VP9_VAAPI_HWACCEL + \
171
186k
                     CONFIG_VP9_VDPAU_HWACCEL + \
172
186k
                     CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
173
186k
                     CONFIG_VP9_VULKAN_HWACCEL)
174
186k
    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
175
186k
    VP9Context *s = avctx->priv_data;
176
186k
    uint8_t *p;
177
186k
    int bytesperpixel = s->bytesperpixel, ret, cols, rows;
178
186k
    int lflvl_len, i;
179
180
186k
    av_assert0(w > 0 && h > 0);
181
182
186k
    if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
183
23.7k
        if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
184
438
            return ret;
185
186
23.3k
        switch (s->pix_fmt) {
187
3.48k
        case AV_PIX_FMT_YUV420P:
188
5.03k
        case AV_PIX_FMT_YUV420P10:
189
#if CONFIG_VP9_DXVA2_HWACCEL
190
            *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
191
#endif
192
#if CONFIG_VP9_D3D11VA_HWACCEL
193
            *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
194
            *fmtp++ = AV_PIX_FMT_D3D11;
195
#endif
196
#if CONFIG_VP9_D3D12VA_HWACCEL
197
            *fmtp++ = AV_PIX_FMT_D3D12;
198
#endif
199
#if CONFIG_VP9_NVDEC_HWACCEL
200
            *fmtp++ = AV_PIX_FMT_CUDA;
201
#endif
202
#if CONFIG_VP9_VAAPI_HWACCEL
203
            *fmtp++ = AV_PIX_FMT_VAAPI;
204
#endif
205
#if CONFIG_VP9_VDPAU_HWACCEL
206
            *fmtp++ = AV_PIX_FMT_VDPAU;
207
#endif
208
#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
209
            *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
210
#endif
211
#if CONFIG_VP9_VULKAN_HWACCEL
212
            *fmtp++ = AV_PIX_FMT_VULKAN;
213
#endif
214
5.03k
            break;
215
909
        case AV_PIX_FMT_YUV420P12:
216
#if CONFIG_VP9_NVDEC_HWACCEL
217
            *fmtp++ = AV_PIX_FMT_CUDA;
218
#endif
219
#if CONFIG_VP9_VAAPI_HWACCEL
220
            *fmtp++ = AV_PIX_FMT_VAAPI;
221
#endif
222
#if CONFIG_VP9_VDPAU_HWACCEL
223
            *fmtp++ = AV_PIX_FMT_VDPAU;
224
#endif
225
#if CONFIG_VP9_VULKAN_HWACCEL
226
            *fmtp++ = AV_PIX_FMT_VULKAN;
227
#endif
228
909
            break;
229
4.41k
        case AV_PIX_FMT_YUV444P:
230
5.94k
        case AV_PIX_FMT_YUV444P10:
231
8.17k
        case AV_PIX_FMT_YUV444P12:
232
#if CONFIG_VP9_VAAPI_HWACCEL
233
            *fmtp++ = AV_PIX_FMT_VAAPI;
234
#endif
235
#if CONFIG_VP9_VULKAN_HWACCEL
236
            *fmtp++ = AV_PIX_FMT_VULKAN;
237
#endif
238
8.17k
            break;
239
21
        case AV_PIX_FMT_GBRP:
240
61
        case AV_PIX_FMT_GBRP10:
241
68
        case AV_PIX_FMT_GBRP12:
242
#if CONFIG_VP9_VAAPI_HWACCEL
243
            *fmtp++ = AV_PIX_FMT_VAAPI;
244
#endif
245
#if CONFIG_VP9_VULKAN_HWACCEL
246
            *fmtp++ = AV_PIX_FMT_VULKAN;
247
#endif
248
68
            break;
249
23.3k
        }
250
251
23.3k
        *fmtp++ = s->pix_fmt;
252
23.3k
        *fmtp = AV_PIX_FMT_NONE;
253
254
23.3k
        ret = ff_get_format(avctx, pix_fmts);
255
23.3k
        if (ret < 0)
256
0
            return ret;
257
258
23.3k
        avctx->pix_fmt = ret;
259
23.3k
        s->gf_fmt  = s->pix_fmt;
260
23.3k
        s->w = w;
261
23.3k
        s->h = h;
262
23.3k
    }
263
264
185k
    cols = (w + 7) >> 3;
265
185k
    rows = (h + 7) >> 3;
266
267
185k
    if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
268
162k
        return 0;
269
270
23.2k
    s->last_fmt  = s->pix_fmt;
271
23.2k
    s->sb_cols   = (w + 63) >> 6;
272
23.2k
    s->sb_rows   = (h + 63) >> 6;
273
23.2k
    s->cols      = (w + 7) >> 3;
274
23.2k
    s->rows      = (h + 7) >> 3;
275
23.2k
    lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
276
277
394k
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
278
23.2k
    av_freep(&s->intra_pred_data[0]);
279
    // FIXME we slightly over-allocate here for subsampled chroma, but a little
280
    // bit of padding shouldn't affect performance...
281
23.2k
    p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
282
23.2k
                                lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
283
23.2k
    if (!p)
284
0
        return AVERROR(ENOMEM);
285
23.2k
    assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
286
23.2k
    assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
287
23.2k
    assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
288
23.2k
    assign(s->above_y_nnz_ctx,     uint8_t *,             16);
289
23.2k
    assign(s->above_mode_ctx,      uint8_t *,             16);
290
23.2k
    assign(s->above_mv_ctx,        VP9mv(*)[2],           16);
291
23.2k
    assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
292
23.2k
    assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
293
23.2k
    assign(s->above_partition_ctx, uint8_t *,              8);
294
23.2k
    assign(s->above_skip_ctx,      uint8_t *,              8);
295
23.2k
    assign(s->above_txfm_ctx,      uint8_t *,              8);
296
23.2k
    assign(s->above_segpred_ctx,   uint8_t *,              8);
297
23.2k
    assign(s->above_intra_ctx,     uint8_t *,              8);
298
23.2k
    assign(s->above_comp_ctx,      uint8_t *,              8);
299
23.2k
    assign(s->above_ref_ctx,       uint8_t *,              8);
300
23.2k
    assign(s->above_filter_ctx,    uint8_t *,              8);
301
23.2k
    assign(s->lflvl,               VP9Filter *,            lflvl_len);
302
23.2k
#undef assign
303
304
23.2k
    if (s->td) {
305
28.7k
        for (i = 0; i < s->active_tile_cols; i++)
306
14.3k
            vp9_tile_data_free(&s->td[i]);
307
14.3k
    }
308
309
23.2k
    if (s->s.h.bpp != s->last_bpp) {
310
9.42k
        ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
311
9.42k
        ff_videodsp_init(&s->vdsp, s->s.h.bpp);
312
9.42k
        s->last_bpp = s->s.h.bpp;
313
9.42k
    }
314
315
23.2k
    return 0;
316
23.2k
}
317
318
static int update_block_buffers(AVCodecContext *avctx)
319
170k
{
320
170k
    int i;
321
170k
    VP9Context *s = avctx->priv_data;
322
170k
    int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
323
170k
    VP9TileData *td = &s->td[0];
324
325
170k
    if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
326
151k
        return 0;
327
328
18.9k
    vp9_tile_data_free(td);
329
18.9k
    chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
330
18.9k
    chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
331
18.9k
    if (s->s.frames[CUR_FRAME].uses_2pass) {
332
0
        int sbs = s->sb_cols * s->sb_rows;
333
334
0
        td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
335
0
        td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
336
0
                                    16 * 16 + 2 * chroma_eobs) * sbs);
337
0
        if (!td->b_base || !td->block_base)
338
0
            return AVERROR(ENOMEM);
339
0
        td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
340
0
        td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
341
0
        td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
342
0
        td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
343
0
        td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
344
345
0
        if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
346
0
            td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
347
0
            if (!td->block_structure)
348
0
                return AVERROR(ENOMEM);
349
0
        }
350
18.9k
    } else {
351
18.9k
        for (i = 1; i < s->active_tile_cols; i++)
352
0
            vp9_tile_data_free(&s->td[i]);
353
354
37.8k
        for (i = 0; i < s->active_tile_cols; i++) {
355
18.9k
            s->td[i].b_base = av_malloc(sizeof(VP9Block));
356
18.9k
            s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
357
18.9k
                                       16 * 16 + 2 * chroma_eobs);
358
18.9k
            if (!s->td[i].b_base || !s->td[i].block_base)
359
0
                return AVERROR(ENOMEM);
360
18.9k
            s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
361
18.9k
            s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
362
18.9k
            s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
363
18.9k
            s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
364
18.9k
            s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
365
366
18.9k
            if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
367
6.65k
                s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
368
6.65k
                if (!s->td[i].block_structure)
369
0
                    return AVERROR(ENOMEM);
370
6.65k
            }
371
18.9k
        }
372
18.9k
    }
373
18.9k
    s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
374
375
18.9k
    return 0;
376
18.9k
}
377
378
// The sign bit is at the end, not the start, of a bit sequence
379
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
380
637k
{
381
637k
    int v = get_bits(gb, n);
382
637k
    return get_bits1(gb) ? -v : v;
383
637k
}
384
385
static av_always_inline int inv_recenter_nonneg(int v, int m)
386
722k
{
387
722k
    if (v > 2 * m)
388
264k
        return v;
389
458k
    if (v & 1)
390
91.8k
        return m - ((v + 1) >> 1);
391
366k
    return m + (v >> 1);
392
458k
}
393
394
// differential forward probability updates
395
static int update_prob(VPXRangeCoder *c, int p)
396
722k
{
397
722k
    static const uint8_t inv_map_table[255] = {
398
722k
          7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
399
722k
        189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
400
722k
         10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
401
722k
         25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
402
722k
         40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
403
722k
         55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
404
722k
         70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
405
722k
         86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
406
722k
        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
407
722k
        116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
408
722k
        131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
409
722k
        146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
410
722k
        161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
411
722k
        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
412
722k
        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
413
722k
        207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
414
722k
        222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
415
722k
        237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
416
722k
        252, 253, 253,
417
722k
    };
418
722k
    int d;
419
420
    /* This code is trying to do a differential probability update. For a
421
     * current probability A in the range [1, 255], the difference to a new
422
     * probability of any value can be expressed differentially as 1-A, 255-A
423
     * where some part of this (absolute range) exists both in positive as
424
     * well as the negative part, whereas another part only exists in one
425
     * half. We're trying to code this shared part differentially, i.e.
426
     * times two where the value of the lowest bit specifies the sign, and
427
     * the single part is then coded on top of this. This absolute difference
428
     * then again has a value of [0, 254], but a bigger value in this range
429
     * indicates that we're further away from the original value A, so we
430
     * can code this as a VLC code, since higher values are increasingly
431
     * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
432
     * updates vs. the 'fine, exact' updates further down the range, which
433
     * adds one extra dimension to this differential update model. */
434
435
722k
    if (!vp89_rac_get(c)) {
436
215k
        d = vp89_rac_get_uint(c, 4) + 0;
437
507k
    } else if (!vp89_rac_get(c)) {
438
269k
        d = vp89_rac_get_uint(c, 4) + 16;
439
269k
    } else if (!vp89_rac_get(c)) {
440
108k
        d = vp89_rac_get_uint(c, 5) + 32;
441
129k
    } else {
442
129k
        d = vp89_rac_get_uint(c, 7);
443
129k
        if (d >= 65)
444
108k
            d = (d << 1) - 65 + vp89_rac_get(c);
445
129k
        d += 64;
446
129k
        av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
447
129k
    }
448
449
722k
    return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
450
722k
                    255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
451
722k
}
452
453
static int read_colorspace_details(AVCodecContext *avctx)
454
17.7k
{
455
17.7k
    static const enum AVColorSpace colorspaces[8] = {
456
17.7k
        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
457
17.7k
        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
458
17.7k
    };
459
17.7k
    VP9Context *s = avctx->priv_data;
460
17.7k
    int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
461
462
17.7k
    s->bpp_index = bits;
463
17.7k
    s->s.h.bpp = 8 + bits * 2;
464
17.7k
    s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
465
17.7k
    avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
466
17.7k
    if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
467
473
        static const enum AVPixelFormat pix_fmt_rgb[3] = {
468
473
            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
469
473
        };
470
473
        s->ss_h = s->ss_v = 0;
471
473
        avctx->color_range = AVCOL_RANGE_JPEG;
472
473
        s->pix_fmt = pix_fmt_rgb[bits];
473
473
        if (avctx->profile & 1) {
474
51
            if (get_bits1(&s->gb)) {
475
0
                av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
476
0
                return AVERROR_INVALIDDATA;
477
0
            }
478
422
        } else {
479
422
            av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
480
422
                   avctx->profile);
481
422
            return AVERROR_INVALIDDATA;
482
422
        }
483
17.2k
    } else {
484
17.2k
        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
485
17.2k
            { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
486
17.2k
              { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
487
17.2k
            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
488
17.2k
              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
489
17.2k
            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
490
17.2k
              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
491
17.2k
        };
492
17.2k
        avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
493
17.2k
        if (avctx->profile & 1) {
494
15.1k
            s->ss_h = get_bits1(&s->gb);
495
15.1k
            s->ss_v = get_bits1(&s->gb);
496
15.1k
            s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
497
15.1k
            if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
498
8
                av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
499
8
                       avctx->profile);
500
8
                return AVERROR_INVALIDDATA;
501
15.1k
            } else if (get_bits1(&s->gb)) {
502
0
                av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
503
0
                       avctx->profile);
504
0
                return AVERROR_INVALIDDATA;
505
0
            }
506
15.1k
        } else {
507
2.09k
            s->ss_h = s->ss_v = 1;
508
2.09k
            s->pix_fmt = pix_fmt_for_ss[bits][1][1];
509
2.09k
        }
510
17.2k
    }
511
512
17.3k
    return 0;
513
17.7k
}
514
515
static int decode_frame_header(AVCodecContext *avctx,
516
                               const uint8_t *data, int size, int *ref)
517
208k
{
518
208k
    VP9Context *s = avctx->priv_data;
519
208k
    int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
520
208k
    int last_invisible;
521
208k
    const uint8_t *data2;
522
523
    /* general header */
524
208k
    if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
525
0
        av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
526
0
        return ret;
527
0
    }
528
208k
    if (get_bits(&s->gb, 2) != 0x2) { // frame marker
529
4.88k
        av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
530
4.88k
        return AVERROR_INVALIDDATA;
531
4.88k
    }
532
203k
    avctx->profile  = get_bits1(&s->gb);
533
203k
    avctx->profile |= get_bits1(&s->gb) << 1;
534
203k
    if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
535
203k
    if (avctx->profile > 3) {
536
0
        av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
537
0
        return AVERROR_INVALIDDATA;
538
0
    }
539
203k
    s->s.h.profile = avctx->profile;
540
203k
    if (get_bits1(&s->gb)) {
541
11.1k
        *ref = get_bits(&s->gb, 3);
542
11.1k
        return 0;
543
11.1k
    }
544
545
192k
    s->last_keyframe  = s->s.h.keyframe;
546
192k
    s->s.h.keyframe   = !get_bits1(&s->gb);
547
548
192k
    last_invisible   = s->s.h.invisible;
549
192k
    s->s.h.invisible = !get_bits1(&s->gb);
550
192k
    s->s.h.errorres  = get_bits1(&s->gb);
551
192k
    s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
552
553
192k
    if (s->s.h.keyframe) {
554
14.2k
        if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
555
0
            av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
556
0
            return AVERROR_INVALIDDATA;
557
0
        }
558
14.2k
        if ((ret = read_colorspace_details(avctx)) < 0)
559
8
            return ret;
560
        // for profile 1, here follows the subsampling bits
561
14.2k
        s->s.h.refreshrefmask = 0xff;
562
14.2k
        w = get_bits(&s->gb, 16) + 1;
563
14.2k
        h = get_bits(&s->gb, 16) + 1;
564
14.2k
        if (get_bits1(&s->gb)) // display size
565
224
            skip_bits(&s->gb, 32);
566
178k
    } else {
567
178k
        s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
568
178k
        s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
569
178k
        if (s->s.h.intraonly) {
570
6.35k
            if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
571
0
                av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
572
0
                return AVERROR_INVALIDDATA;
573
0
            }
574
6.35k
            if (avctx->profile >= 1) {
575
3.47k
                if ((ret = read_colorspace_details(avctx)) < 0)
576
422
                    return ret;
577
3.47k
            } else {
578
2.88k
                s->ss_h = s->ss_v = 1;
579
2.88k
                s->s.h.bpp = 8;
580
2.88k
                s->bpp_index = 0;
581
2.88k
                s->bytesperpixel = 1;
582
2.88k
                s->pix_fmt = AV_PIX_FMT_YUV420P;
583
2.88k
                avctx->colorspace = AVCOL_SPC_BT470BG;
584
2.88k
                avctx->color_range = AVCOL_RANGE_MPEG;
585
2.88k
            }
586
5.93k
            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
587
5.93k
            w = get_bits(&s->gb, 16) + 1;
588
5.93k
            h = get_bits(&s->gb, 16) + 1;
589
5.93k
            if (get_bits1(&s->gb)) // display size
590
1.16k
                skip_bits(&s->gb, 32);
591
171k
        } else {
592
171k
            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
593
171k
            s->s.h.refidx[0]      = get_bits(&s->gb, 3);
594
171k
            s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
595
171k
            s->s.h.refidx[1]      = get_bits(&s->gb, 3);
596
171k
            s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
597
171k
            s->s.h.refidx[2]      = get_bits(&s->gb, 3);
598
171k
            s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
599
171k
            if (!s->s.refs[s->s.h.refidx[0]].f ||
600
171k
                !s->s.refs[s->s.h.refidx[1]].f ||
601
171k
                !s->s.refs[s->s.h.refidx[2]].f) {
602
5.53k
                av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
603
5.53k
                return AVERROR_INVALIDDATA;
604
5.53k
            }
605
166k
            if (get_bits1(&s->gb)) {
606
126k
                w = s->s.refs[s->s.h.refidx[0]].f->width;
607
126k
                h = s->s.refs[s->s.h.refidx[0]].f->height;
608
126k
            } else if (get_bits1(&s->gb)) {
609
16.1k
                w = s->s.refs[s->s.h.refidx[1]].f->width;
610
16.1k
                h = s->s.refs[s->s.h.refidx[1]].f->height;
611
23.4k
            } else if (get_bits1(&s->gb)) {
612
5.09k
                w = s->s.refs[s->s.h.refidx[2]].f->width;
613
5.09k
                h = s->s.refs[s->s.h.refidx[2]].f->height;
614
18.3k
            } else {
615
18.3k
                w = get_bits(&s->gb, 16) + 1;
616
18.3k
                h = get_bits(&s->gb, 16) + 1;
617
18.3k
            }
618
            // Note that in this code, "CUR_FRAME" is actually before we
619
            // have formally allocated a frame, and thus actually represents
620
            // the _last_ frame
621
166k
            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f &&
622
166k
                                         s->s.frames[CUR_FRAME].tf.f->width == w &&
623
166k
                                       s->s.frames[CUR_FRAME].tf.f->height == h;
624
166k
            if (get_bits1(&s->gb)) // display size
625
155k
                skip_bits(&s->gb, 32);
626
166k
            s->s.h.highprecisionmvs = get_bits1(&s->gb);
627
166k
            s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
628
166k
                                                  get_bits(&s->gb, 2);
629
166k
            s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
630
166k
                                  s->s.h.signbias[0] != s->s.h.signbias[2];
631
166k
            if (s->s.h.allowcompinter) {
632
164k
                if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
633
16.1k
                    s->s.h.fixcompref    = 2;
634
16.1k
                    s->s.h.varcompref[0] = 0;
635
16.1k
                    s->s.h.varcompref[1] = 1;
636
147k
                } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
637
18.0k
                    s->s.h.fixcompref    = 1;
638
18.0k
                    s->s.h.varcompref[0] = 0;
639
18.0k
                    s->s.h.varcompref[1] = 2;
640
129k
                } else {
641
129k
                    s->s.h.fixcompref    = 0;
642
129k
                    s->s.h.varcompref[0] = 1;
643
129k
                    s->s.h.varcompref[1] = 2;
644
129k
                }
645
164k
            }
646
166k
        }
647
178k
    }
648
186k
    s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
649
186k
    s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
650
186k
    s->s.h.framectxid   = c = get_bits(&s->gb, 2);
651
186k
    if (s->s.h.keyframe || s->s.h.intraonly)
652
20.1k
        s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
653
654
    /* loopfilter header data */
655
186k
    if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
656
        // reset loopfilter defaults
657
20.2k
        s->s.h.lf_delta.ref[0] = 1;
658
20.2k
        s->s.h.lf_delta.ref[1] = 0;
659
20.2k
        s->s.h.lf_delta.ref[2] = -1;
660
20.2k
        s->s.h.lf_delta.ref[3] = -1;
661
20.2k
        s->s.h.lf_delta.mode[0] = 0;
662
20.2k
        s->s.h.lf_delta.mode[1] = 0;
663
20.2k
        memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
664
20.2k
    }
665
186k
    s->s.h.filter.level = get_bits(&s->gb, 6);
666
186k
    sharp = get_bits(&s->gb, 3);
667
    // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
668
    // the old cache values since they are still valid
669
186k
    if (s->s.h.filter.sharpness != sharp) {
670
1.73M
        for (i = 1; i <= 63; i++) {
671
1.70M
            int limit = i;
672
673
1.70M
            if (sharp > 0) {
674
1.20M
                limit >>= (sharp + 3) >> 2;
675
1.20M
                limit = FFMIN(limit, 9 - sharp);
676
1.20M
            }
677
1.70M
            limit = FFMAX(limit, 1);
678
679
1.70M
            s->filter_lut.lim_lut[i] = limit;
680
1.70M
            s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
681
1.70M
        }
682
27.0k
    }
683
186k
    s->s.h.filter.sharpness = sharp;
684
186k
    if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
685
42.0k
        if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
686
117k
            for (i = 0; i < 4; i++)
687
94.0k
                if (get_bits1(&s->gb))
688
73.9k
                    s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
689
70.5k
            for (i = 0; i < 2; i++)
690
47.0k
                if (get_bits1(&s->gb))
691
27.6k
                    s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
692
23.5k
        }
693
42.0k
    }
694
695
    /* quantization header data */
696
186k
    s->s.h.yac_qi      = get_bits(&s->gb, 8);
697
186k
    s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
698
186k
    s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
699
186k
    s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
700
186k
    s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
701
186k
                       s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
702
186k
#if FF_API_CODEC_PROPS
703
186k
FF_DISABLE_DEPRECATION_WARNINGS
704
186k
    if (s->s.h.lossless)
705
10.8k
        avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
706
186k
FF_ENABLE_DEPRECATION_WARNINGS
707
186k
#endif
708
709
    /* segmentation header info */
710
186k
    if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
711
156k
        if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
712
1.20M
            for (i = 0; i < 7; i++)
713
1.05M
                s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
714
751k
                                 get_bits(&s->gb, 8) : 255;
715
150k
            if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
716
102k
                for (i = 0; i < 3; i++)
717
76.6k
                    s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
718
47.7k
                                         get_bits(&s->gb, 8) : 255;
719
150k
        }
720
721
156k
        if (get_bits1(&s->gb)) {
722
20.5k
            s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
723
184k
            for (i = 0; i < 8; i++) {
724
164k
                if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
725
111k
                    s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
726
164k
                if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
727
84.6k
                    s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
728
164k
                if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
729
66.7k
                    s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
730
164k
                s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
731
164k
            }
732
20.5k
        }
733
156k
    } else {
734
        // Reset fields under segmentation switch if segmentation is disabled.
735
        // This is necessary because some hwaccels don't ignore these fields
736
        // if segmentation is disabled.
737
30.0k
        s->s.h.segmentation.temporal = 0;
738
30.0k
        s->s.h.segmentation.update_map = 0;
739
30.0k
    }
740
741
    // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
742
1.46M
    for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
743
1.28M
        int qyac, qydc, quvac, quvdc, lflvl, sh;
744
745
1.28M
        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
746
454k
            if (s->s.h.segmentation.absolute_vals)
747
400k
                qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
748
54.5k
            else
749
54.5k
                qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
750
825k
        } else {
751
825k
            qyac  = s->s.h.yac_qi;
752
825k
        }
753
1.28M
        qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
754
1.28M
        quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
755
1.28M
        quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
756
1.28M
        qyac  = av_clip_uintp2(qyac, 8);
757
758
1.28M
        s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
759
1.28M
        s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
760
1.28M
        s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
761
1.28M
        s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
762
763
1.28M
        sh = s->s.h.filter.level >= 32;
764
1.28M
        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
765
381k
            if (s->s.h.segmentation.absolute_vals)
766
320k
                lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
767
60.7k
            else
768
60.7k
                lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
769
899k
        } else {
770
899k
            lflvl  = s->s.h.filter.level;
771
899k
        }
772
1.28M
        if (s->s.h.lf_delta.enabled) {
773
281k
            s->s.h.segmentation.feat[i].lflvl[0][0] =
774
281k
            s->s.h.segmentation.feat[i].lflvl[0][1] =
775
281k
                av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
776
1.12M
            for (j = 1; j < 4; j++) {
777
845k
                s->s.h.segmentation.feat[i].lflvl[j][0] =
778
845k
                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
779
845k
                                             s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
780
845k
                s->s.h.segmentation.feat[i].lflvl[j][1] =
781
845k
                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
782
845k
                                             s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
783
845k
            }
784
998k
        } else {
785
998k
            memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
786
998k
                   sizeof(s->s.h.segmentation.feat[i].lflvl));
787
998k
        }
788
1.28M
    }
789
790
    /* tiling info */
791
186k
    if ((ret = update_size(avctx, w, h)) < 0) {
792
438
        av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
793
438
               w, h, s->pix_fmt);
794
438
        return ret;
795
438
    }
796
185k
    for (s->s.h.tiling.log2_tile_cols = 0;
797
186k
         s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
798
185k
         s->s.h.tiling.log2_tile_cols++) ;
799
364k
    for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
800
185k
    max = FFMAX(0, max - 1);
801
186k
    while (max > s->s.h.tiling.log2_tile_cols) {
802
72.7k
        if (get_bits1(&s->gb))
803
671
            s->s.h.tiling.log2_tile_cols++;
804
72.1k
        else
805
72.1k
            break;
806
72.7k
    }
807
185k
    s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
808
185k
    s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
809
185k
    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
810
9.94k
        int n_range_coders;
811
9.94k
        VPXRangeCoder *rc;
812
813
9.94k
        if (s->td) {
814
2.26k
            for (i = 0; i < s->active_tile_cols; i++)
815
1.13k
                vp9_tile_data_free(&s->td[i]);
816
1.13k
            av_freep(&s->td);
817
1.13k
        }
818
819
9.94k
        s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
820
9.94k
        s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
821
9.94k
                              s->s.h.tiling.tile_cols : 1;
822
9.94k
        vp9_alloc_entries(avctx, s->sb_rows);
823
9.94k
        if (avctx->active_thread_type == FF_THREAD_SLICE) {
824
0
            n_range_coders = 4; // max_tile_rows
825
9.94k
        } else {
826
9.94k
            n_range_coders = s->s.h.tiling.tile_cols;
827
9.94k
        }
828
9.94k
        s->td = av_calloc(s->active_tile_cols, sizeof(VP9TileData) +
829
9.94k
                                 n_range_coders * sizeof(VPXRangeCoder));
830
9.94k
        if (!s->td)
831
0
            return AVERROR(ENOMEM);
832
9.94k
        rc = (VPXRangeCoder *) &s->td[s->active_tile_cols];
833
19.8k
        for (i = 0; i < s->active_tile_cols; i++) {
834
9.94k
            s->td[i].s = s;
835
9.94k
            s->td[i].c_b = rc;
836
9.94k
            rc += n_range_coders;
837
9.94k
        }
838
9.94k
    }
839
840
    /* check reference frames */
841
185k
    if (!s->s.h.keyframe && !s->s.h.intraonly) {
842
166k
        int valid_ref_frame = 0;
843
661k
        for (i = 0; i < 3; i++) {
844
496k
            AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
845
496k
            int refw = ref->width, refh = ref->height;
846
847
496k
            if (ref->format != avctx->pix_fmt) {
848
753
                av_log(avctx, AV_LOG_ERROR,
849
753
                       "Ref pixfmt (%s) did not match current frame (%s)",
850
753
                       av_get_pix_fmt_name(ref->format),
851
753
                       av_get_pix_fmt_name(avctx->pix_fmt));
852
753
                return AVERROR_INVALIDDATA;
853
495k
            } else if (refw == w && refh == h) {
854
418k
                s->mvscale[i][0] = s->mvscale[i][1] = 0;
855
418k
            } else {
856
                /* Check to make sure at least one of frames that */
857
                /* this frame references has valid dimensions     */
858
77.5k
                if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
859
17.3k
                    av_log(avctx, AV_LOG_WARNING,
860
17.3k
                           "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
861
17.3k
                           refw, refh, w, h);
862
17.3k
                    s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
863
17.3k
                    continue;
864
17.3k
                }
865
60.1k
                s->mvscale[i][0] = (refw << 14) / w;
866
60.1k
                s->mvscale[i][1] = (refh << 14) / h;
867
60.1k
                s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
868
60.1k
                s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
869
60.1k
            }
870
478k
            valid_ref_frame++;
871
478k
        }
872
165k
        if (!valid_ref_frame) {
873
5.01k
            av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
874
5.01k
            return AVERROR_INVALIDDATA;
875
5.01k
        }
876
165k
    }
877
878
180k
    if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
879
19.6k
        s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
880
19.6k
                           s->prob_ctx[3].p = ff_vp9_default_probs;
881
19.6k
        memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
882
19.6k
               sizeof(ff_vp9_default_coef_probs));
883
19.6k
        memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
884
19.6k
               sizeof(ff_vp9_default_coef_probs));
885
19.6k
        memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
886
19.6k
               sizeof(ff_vp9_default_coef_probs));
887
19.6k
        memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
888
19.6k
               sizeof(ff_vp9_default_coef_probs));
889
160k
    } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
890
13
        s->prob_ctx[c].p = ff_vp9_default_probs;
891
13
        memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
892
13
               sizeof(ff_vp9_default_coef_probs));
893
13
    }
894
895
    // next 16 bits is size of the rest of the header (arith-coded)
896
180k
    s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
897
180k
    s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
898
899
180k
    data2 = align_get_bits(&s->gb);
900
180k
    if (size2 > size - (data2 - data)) {
901
5.09k
        av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
902
5.09k
        return AVERROR_INVALIDDATA;
903
5.09k
    }
904
175k
    ret = ff_vpx_init_range_decoder(&s->c, data2, size2);
905
175k
    if (ret < 0)
906
2.63k
        return ret;
907
908
172k
    if (vpx_rac_get_prob_branchy(&s->c, 128)) { // marker bit
909
1.19k
        av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
910
1.19k
        return AVERROR_INVALIDDATA;
911
1.19k
    }
912
913
342k
    for (i = 0; i < s->active_tile_cols; i++) {
914
171k
        if (s->s.h.keyframe || s->s.h.intraonly) {
915
18.5k
            memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
916
18.5k
            memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
917
152k
        } else {
918
152k
            memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
919
152k
        }
920
171k
        s->td[i].nb_block_structure = 0;
921
171k
    }
922
923
    /* FIXME is it faster to not copy here, but do it down in the fw updates
924
     * as explicit copies if the fw update is missing (and skip the copy upon
925
     * fw update)? */
926
171k
    s->prob.p = s->prob_ctx[c].p;
927
928
    // txfm updates
929
171k
    if (s->s.h.lossless) {
930
10.1k
        s->s.h.txfmmode = TX_4X4;
931
161k
    } else {
932
161k
        s->s.h.txfmmode = vp89_rac_get_uint(&s->c, 2);
933
161k
        if (s->s.h.txfmmode == 3)
934
26.8k
            s->s.h.txfmmode += vp89_rac_get(&s->c);
935
936
161k
        if (s->s.h.txfmmode == TX_SWITCHABLE) {
937
28.8k
            for (i = 0; i < 2; i++)
938
19.2k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
939
101
                    s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
940
28.8k
            for (i = 0; i < 2; i++)
941
57.7k
                for (j = 0; j < 2; j++)
942
38.4k
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
943
158
                        s->prob.p.tx16p[i][j] =
944
158
                            update_prob(&s->c, s->prob.p.tx16p[i][j]);
945
28.8k
            for (i = 0; i < 2; i++)
946
76.9k
                for (j = 0; j < 3; j++)
947
57.7k
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
948
71
                        s->prob.p.tx32p[i][j] =
949
71
                            update_prob(&s->c, s->prob.p.tx32p[i][j]);
950
9.62k
        }
951
161k
    }
952
953
    // coef updates
954
271k
    for (i = 0; i < 4; i++) {
955
261k
        uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
956
261k
        if (vp89_rac_get(&s->c)) {
957
126k
            for (j = 0; j < 2; j++)
958
252k
                for (k = 0; k < 2; k++)
959
1.17M
                    for (l = 0; l < 6; l++)
960
6.56M
                        for (m = 0; m < 6; m++) {
961
5.72M
                            uint8_t *p = s->prob.coef[i][j][k][l][m];
962
5.72M
                            uint8_t *r = ref[j][k][l][m];
963
5.72M
                            if (m >= 3 && l == 0) // dc only has 3 pt
964
168k
                                break;
965
22.2M
                            for (n = 0; n < 3; n++) {
966
16.6M
                                if (vpx_rac_get_prob_branchy(&s->c, 252))
967
269k
                                    p[n] = update_prob(&s->c, r[n]);
968
16.3M
                                else
969
16.3M
                                    p[n] = r[n];
970
16.6M
                            }
971
5.55M
                            memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
972
5.55M
                        }
973
219k
        } else {
974
659k
            for (j = 0; j < 2; j++)
975
1.31M
                for (k = 0; k < 2; k++)
976
6.15M
                    for (l = 0; l < 6; l++)
977
35.1M
                        for (m = 0; m < 6; m++) {
978
30.7M
                            uint8_t *p = s->prob.coef[i][j][k][l][m];
979
30.7M
                            uint8_t *r = ref[j][k][l][m];
980
30.7M
                            if (m > 3 && l == 0) // dc only has 3 pt
981
879k
                                break;
982
29.8M
                            memcpy(p, r, 3);
983
29.8M
                            memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
984
29.8M
                        }
985
219k
        }
986
261k
        if (s->s.h.txfmmode == i)
987
161k
            break;
988
261k
    }
989
990
    // mode updates
991
685k
    for (i = 0; i < 3; i++)
992
513k
        if (vpx_rac_get_prob_branchy(&s->c, 252))
993
1.60k
            s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
994
171k
    if (!s->s.h.keyframe && !s->s.h.intraonly) {
995
1.22M
        for (i = 0; i < 7; i++)
996
4.27M
            for (j = 0; j < 3; j++)
997
3.20M
                if (vpx_rac_get_prob_branchy(&s->c, 252))
998
88.3k
                    s->prob.p.mv_mode[i][j] =
999
88.3k
                        update_prob(&s->c, s->prob.p.mv_mode[i][j]);
1000
1001
152k
        if (s->s.h.filtermode == FILTER_SWITCHABLE)
1002
171k
            for (i = 0; i < 4; i++)
1003
412k
                for (j = 0; j < 2; j++)
1004
275k
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
1005
12.1k
                        s->prob.p.filter[i][j] =
1006
12.1k
                            update_prob(&s->c, s->prob.p.filter[i][j]);
1007
1008
763k
        for (i = 0; i < 4; i++)
1009
610k
            if (vpx_rac_get_prob_branchy(&s->c, 252))
1010
37.2k
                s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
1011
1012
152k
        if (s->s.h.allowcompinter) {
1013
152k
            s->s.h.comppredmode = vp89_rac_get(&s->c);
1014
152k
            if (s->s.h.comppredmode)
1015
131k
                s->s.h.comppredmode += vp89_rac_get(&s->c);
1016
152k
            if (s->s.h.comppredmode == PRED_SWITCHABLE)
1017
261k
                for (i = 0; i < 5; i++)
1018
218k
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
1019
68
                        s->prob.p.comp[i] =
1020
68
                            update_prob(&s->c, s->prob.p.comp[i]);
1021
152k
        } else {
1022
394
            s->s.h.comppredmode = PRED_SINGLEREF;
1023
394
        }
1024
1025
152k
        if (s->s.h.comppredmode != PRED_COMPREF) {
1026
391k
            for (i = 0; i < 5; i++) {
1027
325k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1028
613
                    s->prob.p.single_ref[i][0] =
1029
613
                        update_prob(&s->c, s->prob.p.single_ref[i][0]);
1030
325k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1031
4.74k
                    s->prob.p.single_ref[i][1] =
1032
4.74k
                        update_prob(&s->c, s->prob.p.single_ref[i][1]);
1033
325k
            }
1034
65.1k
        }
1035
1036
152k
        if (s->s.h.comppredmode != PRED_SINGLEREF) {
1037
786k
            for (i = 0; i < 5; i++)
1038
655k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1039
76.8k
                    s->prob.p.comp_ref[i] =
1040
76.8k
                        update_prob(&s->c, s->prob.p.comp_ref[i]);
1041
131k
        }
1042
1043
763k
        for (i = 0; i < 4; i++)
1044
6.10M
            for (j = 0; j < 9; j++)
1045
5.49M
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1046
20.7k
                    s->prob.p.y_mode[i][j] =
1047
20.7k
                        update_prob(&s->c, s->prob.p.y_mode[i][j]);
1048
1049
763k
        for (i = 0; i < 4; i++)
1050
3.05M
            for (j = 0; j < 4; j++)
1051
9.77M
                for (k = 0; k < 3; k++)
1052
7.32M
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
1053
210k
                        s->prob.p.partition[3 - i][j][k] =
1054
210k
                            update_prob(&s->c,
1055
210k
                                        s->prob.p.partition[3 - i][j][k]);
1056
1057
        // mv fields don't use the update_prob subexp model for some reason
1058
610k
        for (i = 0; i < 3; i++)
1059
458k
            if (vpx_rac_get_prob_branchy(&s->c, 252))
1060
1.26k
                s->prob.p.mv_joint[i] = (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1061
1062
458k
        for (i = 0; i < 2; i++) {
1063
305k
            if (vpx_rac_get_prob_branchy(&s->c, 252))
1064
4.49k
                s->prob.p.mv_comp[i].sign =
1065
4.49k
                    (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1066
1067
3.35M
            for (j = 0; j < 10; j++)
1068
3.05M
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1069
46.9k
                    s->prob.p.mv_comp[i].classes[j] =
1070
46.9k
                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1071
1072
305k
            if (vpx_rac_get_prob_branchy(&s->c, 252))
1073
128
                s->prob.p.mv_comp[i].class0 =
1074
128
                    (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1075
1076
3.35M
            for (j = 0; j < 10; j++)
1077
3.05M
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1078
4.72k
                    s->prob.p.mv_comp[i].bits[j] =
1079
4.72k
                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1080
305k
        }
1081
1082
458k
        for (i = 0; i < 2; i++) {
1083
916k
            for (j = 0; j < 2; j++)
1084
2.44M
                for (k = 0; k < 3; k++)
1085
1.83M
                    if (vpx_rac_get_prob_branchy(&s->c, 252))
1086
25.8k
                        s->prob.p.mv_comp[i].class0_fp[j][k] =
1087
25.8k
                            (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1088
1089
1.22M
            for (j = 0; j < 3; j++)
1090
916k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1091
1.23k
                    s->prob.p.mv_comp[i].fp[j] =
1092
1.23k
                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1093
305k
        }
1094
1095
152k
        if (s->s.h.highprecisionmvs) {
1096
376k
            for (i = 0; i < 2; i++) {
1097
250k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1098
84
                    s->prob.p.mv_comp[i].class0_hp =
1099
84
                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1100
1101
250k
                if (vpx_rac_get_prob_branchy(&s->c, 252))
1102
175
                    s->prob.p.mv_comp[i].hp =
1103
175
                        (vp89_rac_get_uint(&s->c, 7) << 1) | 1;
1104
250k
            }
1105
125k
        }
1106
152k
    }
1107
1108
171k
    return (data2 - data) + size2;
1109
172k
}
1110
1111
static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1112
                      ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1113
24.7M
{
1114
24.7M
    const VP9Context *s = td->s;
1115
24.7M
    int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1116
24.7M
            (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1117
24.7M
    const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1118
24.7M
                                                     s->prob.p.partition[bl][c];
1119
24.7M
    enum BlockPartition bp;
1120
24.7M
    ptrdiff_t hbs = 4 >> bl;
1121
24.7M
    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1122
24.7M
    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1123
24.7M
    int bytesperpixel = s->bytesperpixel;
1124
1125
24.7M
    if (bl == BL_8X8) {
1126
12.0M
        bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1127
12.0M
        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1128
12.6M
    } else if (col + hbs < s->cols) { // FIXME why not <=?
1129
8.87M
        if (row + hbs < s->rows) { // FIXME why not <=?
1130
6.76M
            bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1131
6.76M
            switch (bp) {
1132
2.27M
            case PARTITION_NONE:
1133
2.27M
                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1134
2.27M
                break;
1135
467k
            case PARTITION_H:
1136
467k
                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1137
467k
                yoff  += hbs * 8 * y_stride;
1138
467k
                uvoff += hbs * 8 * uv_stride >> s->ss_v;
1139
467k
                ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1140
467k
                break;
1141
569k
            case PARTITION_V:
1142
569k
                ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1143
569k
                yoff  += hbs * 8 * bytesperpixel;
1144
569k
                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1145
569k
                ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1146
569k
                break;
1147
3.45M
            case PARTITION_SPLIT:
1148
3.45M
                decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1149
3.45M
                decode_sb(td, row, col + hbs, lflvl,
1150
3.45M
                          yoff + 8 * hbs * bytesperpixel,
1151
3.45M
                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1152
3.45M
                yoff  += hbs * 8 * y_stride;
1153
3.45M
                uvoff += hbs * 8 * uv_stride >> s->ss_v;
1154
3.45M
                decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1155
3.45M
                decode_sb(td, row + hbs, col + hbs, lflvl,
1156
3.45M
                          yoff + 8 * hbs * bytesperpixel,
1157
3.45M
                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1158
3.45M
                break;
1159
0
            default:
1160
0
                av_unreachable("ff_vp9_partition_tree only has "
1161
6.76M
                               "the four PARTITION_* terminal codes");
1162
6.76M
            }
1163
6.76M
        } else if (vpx_rac_get_prob_branchy(td->c, p[1])) {
1164
1.32M
            bp = PARTITION_SPLIT;
1165
1.32M
            decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1166
1.32M
            decode_sb(td, row, col + hbs, lflvl,
1167
1.32M
                      yoff + 8 * hbs * bytesperpixel,
1168
1.32M
                      uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1169
1.32M
        } else {
1170
788k
            bp = PARTITION_H;
1171
788k
            ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1172
788k
        }
1173
8.87M
    } else if (row + hbs < s->rows) { // FIXME why not <=?
1174
3.55M
        if (vpx_rac_get_prob_branchy(td->c, p[2])) {
1175
2.44M
            bp = PARTITION_SPLIT;
1176
2.44M
            decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1177
2.44M
            yoff  += hbs * 8 * y_stride;
1178
2.44M
            uvoff += hbs * 8 * uv_stride >> s->ss_v;
1179
2.44M
            decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1180
2.44M
        } else {
1181
1.10M
            bp = PARTITION_V;
1182
1.10M
            ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1183
1.10M
        }
1184
3.55M
    } else {
1185
245k
        bp = PARTITION_SPLIT;
1186
245k
        decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1187
245k
    }
1188
24.7M
    td->counts.partition[bl][c][bp]++;
1189
24.7M
}
1190
1191
static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1192
                          ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1193
0
{
1194
0
    const VP9Context *s = td->s;
1195
0
    VP9Block *b = td->b;
1196
0
    ptrdiff_t hbs = 4 >> bl;
1197
0
    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1198
0
    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1199
0
    int bytesperpixel = s->bytesperpixel;
1200
1201
0
    if (bl == BL_8X8) {
1202
0
        av_assert2(b->bl == BL_8X8);
1203
0
        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1204
0
    } else if (td->b->bl == bl) {
1205
0
        ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1206
0
        if (b->bp == PARTITION_H && row + hbs < s->rows) {
1207
0
            yoff  += hbs * 8 * y_stride;
1208
0
            uvoff += hbs * 8 * uv_stride >> s->ss_v;
1209
0
            ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1210
0
        } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1211
0
            yoff  += hbs * 8 * bytesperpixel;
1212
0
            uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1213
0
            ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1214
0
        }
1215
0
    } else {
1216
0
        decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1217
0
        if (col + hbs < s->cols) { // FIXME why not <=?
1218
0
            if (row + hbs < s->rows) {
1219
0
                decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1220
0
                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1221
0
                yoff  += hbs * 8 * y_stride;
1222
0
                uvoff += hbs * 8 * uv_stride >> s->ss_v;
1223
0
                decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1224
0
                decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1225
0
                              yoff + 8 * hbs * bytesperpixel,
1226
0
                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1227
0
            } else {
1228
0
                yoff  += hbs * 8 * bytesperpixel;
1229
0
                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1230
0
                decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1231
0
            }
1232
0
        } else if (row + hbs < s->rows) {
1233
0
            yoff  += hbs * 8 * y_stride;
1234
0
            uvoff += hbs * 8 * uv_stride >> s->ss_v;
1235
0
            decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1236
0
        }
1237
0
    }
1238
0
}
1239
1240
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1241
1.40M
{
1242
1.40M
    int sb_start = ( idx      * n) >> log2_n;
1243
1.40M
    int sb_end   = ((idx + 1) * n) >> log2_n;
1244
1.40M
    *start = FFMIN(sb_start, n) << 3;
1245
1.40M
    *end   = FFMIN(sb_end,   n) << 3;
1246
1.40M
}
1247
1248
static void free_buffers(VP9Context *s)
1249
9.77k
{
1250
9.77k
    int i;
1251
1252
9.77k
    av_freep(&s->intra_pred_data[0]);
1253
18.5k
    for (i = 0; i < s->active_tile_cols; i++)
1254
8.81k
        vp9_tile_data_free(&s->td[i]);
1255
9.77k
}
1256
1257
static av_cold int vp9_decode_free(AVCodecContext *avctx)
1258
9.77k
{
1259
9.77k
    VP9Context *s = avctx->priv_data;
1260
9.77k
    int i;
1261
1262
39.1k
    for (int i = 0; i < 3; i++)
1263
29.3k
        vp9_frame_unref(&s->s.frames[i]);
1264
9.77k
    av_refstruct_pool_uninit(&s->frame_extradata_pool);
1265
88.0k
    for (i = 0; i < 8; i++) {
1266
78.2k
        ff_progress_frame_unref(&s->s.refs[i]);
1267
78.2k
        ff_progress_frame_unref(&s->next_refs[i]);
1268
78.2k
        vp9_frame_unref(&s->s.ref_frames[i]);
1269
78.2k
    }
1270
1271
9.77k
    free_buffers(s);
1272
9.77k
#if HAVE_THREADS
1273
9.77k
    av_freep(&s->entries);
1274
9.77k
    ff_pthread_free(s, vp9_context_offsets);
1275
9.77k
#endif
1276
1277
9.77k
    av_refstruct_unref(&s->header_ref);
1278
9.77k
    ff_cbs_fragment_free(&s->current_frag);
1279
9.77k
    ff_cbs_close(&s->cbc);
1280
1281
9.77k
    av_freep(&s->td);
1282
9.77k
    return 0;
1283
9.77k
}
1284
1285
static int decode_tiles(AVCodecContext *avctx,
1286
                        const uint8_t *data, int size)
1287
170k
{
1288
170k
    VP9Context *s = avctx->priv_data;
1289
170k
    VP9TileData *td = &s->td[0];
1290
170k
    int row, col, tile_row, tile_col, ret;
1291
170k
    int bytesperpixel;
1292
170k
    int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1293
170k
    AVFrame *f;
1294
170k
    ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1295
1296
170k
    f = s->s.frames[CUR_FRAME].tf.f;
1297
170k
    ls_y = f->linesize[0];
1298
170k
    ls_uv =f->linesize[1];
1299
170k
    bytesperpixel = s->bytesperpixel;
1300
1301
170k
    yoff = uvoff = 0;
1302
307k
    for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1303
170k
        set_tile_offset(&tile_row_start, &tile_row_end,
1304
170k
                        tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1305
1306
334k
        for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1307
170k
            int64_t tile_size;
1308
1309
170k
            if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1310
170k
                tile_row == s->s.h.tiling.tile_rows - 1) {
1311
170k
                tile_size = size;
1312
170k
            } else {
1313
621
                tile_size = AV_RB32(data);
1314
621
                data += 4;
1315
621
                size -= 4;
1316
621
            }
1317
170k
            if (tile_size > size)
1318
514
                return AVERROR_INVALIDDATA;
1319
170k
            ret = ff_vpx_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1320
170k
            if (ret < 0)
1321
3.55k
                return ret;
1322
166k
            if (vpx_rac_get_prob_branchy(&td->c_b[tile_col], 128)) // marker bit
1323
3.11k
                return AVERROR_INVALIDDATA;
1324
163k
            data += tile_size;
1325
163k
            size -= tile_size;
1326
163k
        }
1327
1328
1.37M
        for (row = tile_row_start; row < tile_row_end;
1329
1.23M
             row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1330
1.23M
            VP9Filter *lflvl_ptr = s->lflvl;
1331
1.23M
            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1332
1333
2.44M
            for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1334
1.23M
                set_tile_offset(&tile_col_start, &tile_col_end,
1335
1.23M
                                tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1336
1.23M
                td->tile_col_start = tile_col_start;
1337
1.23M
                if (s->pass != 2) {
1338
1.23M
                    memset(td->left_partition_ctx, 0, 8);
1339
1.23M
                    memset(td->left_skip_ctx, 0, 8);
1340
1.23M
                    if (s->s.h.keyframe || s->s.h.intraonly) {
1341
271k
                        memset(td->left_mode_ctx, DC_PRED, 16);
1342
965k
                    } else {
1343
965k
                        memset(td->left_mode_ctx, NEARESTMV, 8);
1344
965k
                    }
1345
1.23M
                    memset(td->left_y_nnz_ctx, 0, 16);
1346
1.23M
                    memset(td->left_uv_nnz_ctx, 0, 32);
1347
1.23M
                    memset(td->left_segpred_ctx, 0, 8);
1348
1349
1.23M
                    td->c = &td->c_b[tile_col];
1350
1.23M
                }
1351
1352
1.23M
                for (col = tile_col_start;
1353
4.39M
                     col < tile_col_end;
1354
3.15M
                     col += 8, yoff2 += 64 * bytesperpixel,
1355
3.18M
                     uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1356
                    // FIXME integrate with lf code (i.e. zero after each
1357
                    // use, similar to invtxfm coefficients, or similar)
1358
3.18M
                    if (s->pass != 1) {
1359
3.18M
                        memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1360
3.18M
                    }
1361
1362
3.18M
                    if (s->pass == 2) {
1363
0
                        decode_sb_mem(td, row, col, lflvl_ptr,
1364
0
                                      yoff2, uvoff2, BL_64X64);
1365
3.18M
                    } else {
1366
3.18M
                        if (vpx_rac_is_end(td->c)) {
1367
27.0k
                            return AVERROR_INVALIDDATA;
1368
27.0k
                        }
1369
3.15M
                        decode_sb(td, row, col, lflvl_ptr,
1370
3.15M
                                  yoff2, uvoff2, BL_64X64);
1371
3.15M
                    }
1372
3.18M
                }
1373
1.23M
            }
1374
1375
1.20M
            if (s->pass == 1)
1376
0
                continue;
1377
1378
            // backup pre-loopfilter reconstruction data for intra
1379
            // prediction of next row of sb64s
1380
1.20M
            if (row + 8 < s->rows) {
1381
1.07M
                memcpy(s->intra_pred_data[0],
1382
1.07M
                       f->data[0] + yoff + 63 * ls_y,
1383
1.07M
                       8 * s->cols * bytesperpixel);
1384
1.07M
                memcpy(s->intra_pred_data[1],
1385
1.07M
                       f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1386
1.07M
                       8 * s->cols * bytesperpixel >> s->ss_h);
1387
1.07M
                memcpy(s->intra_pred_data[2],
1388
1.07M
                       f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1389
1.07M
                       8 * s->cols * bytesperpixel >> s->ss_h);
1390
1.07M
            }
1391
1392
            // loopfilter one row
1393
1.20M
            if (s->s.h.filter.level) {
1394
1.16M
                yoff2 = yoff;
1395
1.16M
                uvoff2 = uvoff;
1396
1.16M
                lflvl_ptr = s->lflvl;
1397
3.95M
                for (col = 0; col < s->cols;
1398
2.78M
                     col += 8, yoff2 += 64 * bytesperpixel,
1399
2.78M
                     uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1400
2.78M
                    ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1401
2.78M
                                         yoff2, uvoff2);
1402
2.78M
                }
1403
1.16M
            }
1404
1405
            // FIXME maybe we can make this more finegrained by running the
1406
            // loopfilter per-block instead of after each sbrow
1407
            // In fact that would also make intra pred left preparation easier?
1408
1.20M
            ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, row >> 3);
1409
1.20M
        }
1410
163k
    }
1411
136k
    return 0;
1412
170k
}
1413
1414
#if HAVE_THREADS
1415
static av_always_inline
1416
int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1417
                              int threadnr)
1418
0
{
1419
0
    VP9Context *s = avctx->priv_data;
1420
0
    VP9TileData *td = &s->td[jobnr];
1421
0
    ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1422
0
    int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1423
0
    unsigned tile_cols_len;
1424
0
    int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1425
0
    VP9Filter *lflvl_ptr_base;
1426
0
    AVFrame *f;
1427
1428
0
    f = s->s.frames[CUR_FRAME].tf.f;
1429
0
    ls_y = f->linesize[0];
1430
0
    ls_uv =f->linesize[1];
1431
1432
0
    set_tile_offset(&tile_col_start, &tile_col_end,
1433
0
                    jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1434
0
    td->tile_col_start  = tile_col_start;
1435
0
    uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1436
0
    yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1437
0
    lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1438
1439
0
    for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1440
0
        set_tile_offset(&tile_row_start, &tile_row_end,
1441
0
                        tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1442
1443
0
        td->c = &td->c_b[tile_row];
1444
0
        for (row = tile_row_start; row < tile_row_end;
1445
0
             row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1446
0
            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1447
0
            VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1448
1449
0
            memset(td->left_partition_ctx, 0, 8);
1450
0
            memset(td->left_skip_ctx, 0, 8);
1451
0
            if (s->s.h.keyframe || s->s.h.intraonly) {
1452
0
                memset(td->left_mode_ctx, DC_PRED, 16);
1453
0
            } else {
1454
0
                memset(td->left_mode_ctx, NEARESTMV, 8);
1455
0
            }
1456
0
            memset(td->left_y_nnz_ctx, 0, 16);
1457
0
            memset(td->left_uv_nnz_ctx, 0, 32);
1458
0
            memset(td->left_segpred_ctx, 0, 8);
1459
1460
0
            for (col = tile_col_start;
1461
0
                 col < tile_col_end;
1462
0
                 col += 8, yoff2 += 64 * bytesperpixel,
1463
0
                 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1464
                // FIXME integrate with lf code (i.e. zero after each
1465
                // use, similar to invtxfm coefficients, or similar)
1466
0
                memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1467
0
                decode_sb(td, row, col, lflvl_ptr,
1468
0
                            yoff2, uvoff2, BL_64X64);
1469
0
            }
1470
1471
            // backup pre-loopfilter reconstruction data for intra
1472
            // prediction of next row of sb64s
1473
0
            tile_cols_len = tile_col_end - tile_col_start;
1474
0
            if (row + 8 < s->rows) {
1475
0
                memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1476
0
                       f->data[0] + yoff + 63 * ls_y,
1477
0
                       8 * tile_cols_len * bytesperpixel);
1478
0
                memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1479
0
                       f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1480
0
                       8 * tile_cols_len * bytesperpixel >> s->ss_h);
1481
0
                memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1482
0
                       f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1483
0
                       8 * tile_cols_len * bytesperpixel >> s->ss_h);
1484
0
            }
1485
1486
0
            vp9_report_tile_progress(s, row >> 3, 1);
1487
0
        }
1488
0
    }
1489
0
    return 0;
1490
0
}
1491
1492
static av_always_inline
1493
int loopfilter_proc(AVCodecContext *avctx)
1494
0
{
1495
0
    VP9Context *s = avctx->priv_data;
1496
0
    ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1497
0
    VP9Filter *lflvl_ptr;
1498
0
    int bytesperpixel = s->bytesperpixel, col, i;
1499
0
    AVFrame *f;
1500
1501
0
    f = s->s.frames[CUR_FRAME].tf.f;
1502
0
    ls_y = f->linesize[0];
1503
0
    ls_uv =f->linesize[1];
1504
1505
0
    for (i = 0; i < s->sb_rows; i++) {
1506
0
        vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1507
1508
0
        if (s->s.h.filter.level) {
1509
0
            yoff = (ls_y * 64)*i;
1510
0
            uvoff =  (ls_uv * 64 >> s->ss_v)*i;
1511
0
            lflvl_ptr = s->lflvl+s->sb_cols*i;
1512
0
            for (col = 0; col < s->cols;
1513
0
                 col += 8, yoff += 64 * bytesperpixel,
1514
0
                 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1515
0
                ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1516
0
                                     yoff, uvoff);
1517
0
            }
1518
0
        }
1519
0
    }
1520
0
    return 0;
1521
0
}
1522
#endif
1523
1524
static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1525
46.5k
{
1526
46.5k
    AVVideoEncParams *par;
1527
46.5k
    unsigned int tile, nb_blocks = 0;
1528
1529
46.5k
    if (s->s.h.segmentation.enabled) {
1530
82.2k
        for (tile = 0; tile < s->active_tile_cols; tile++)
1531
41.1k
            nb_blocks += s->td[tile].nb_block_structure;
1532
41.1k
    }
1533
1534
46.5k
    par = av_video_enc_params_create_side_data(frame->tf.f,
1535
46.5k
        AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1536
46.5k
    if (!par)
1537
0
        return AVERROR(ENOMEM);
1538
1539
46.5k
    par->qp             = s->s.h.yac_qi;
1540
46.5k
    par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1541
46.5k
    par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1542
46.5k
    par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1543
46.5k
    par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1544
46.5k
    par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1545
1546
46.5k
    if (nb_blocks) {
1547
41.1k
        unsigned int block = 0;
1548
41.1k
        unsigned int tile, block_tile;
1549
1550
82.2k
        for (tile = 0; tile < s->active_tile_cols; tile++) {
1551
41.1k
            VP9TileData *td = &s->td[tile];
1552
1553
3.01M
            for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1554
2.97M
                AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1555
2.97M
                unsigned int      row = td->block_structure[block_tile].row;
1556
2.97M
                unsigned int      col = td->block_structure[block_tile].col;
1557
2.97M
                uint8_t        seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1558
1559
2.97M
                b->src_x = col * 8;
1560
2.97M
                b->src_y = row * 8;
1561
2.97M
                b->w     = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1562
2.97M
                b->h     = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1563
1564
2.97M
                if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1565
1.52M
                    b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1566
1.52M
                    if (s->s.h.segmentation.absolute_vals)
1567
1.35M
                        b->delta_qp -= par->qp;
1568
1.52M
                }
1569
2.97M
            }
1570
41.1k
        }
1571
41.1k
    }
1572
1573
46.5k
    return 0;
1574
46.5k
}
1575
1576
static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1577
                            int *got_frame, AVPacket *pkt)
1578
363k
{
1579
363k
    const uint8_t *data = pkt->data;
1580
363k
    int size = pkt->size;
1581
363k
    VP9Context *s = avctx->priv_data;
1582
363k
    int ret, i, j, ref;
1583
363k
    CodedBitstreamUnit *unit;
1584
363k
    VP9RawFrame *rf;
1585
1586
363k
    int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1587
363k
                            (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1588
363k
    const VP9Frame *src;
1589
363k
    AVFrame *f;
1590
1591
363k
    ret = ff_cbs_read_packet(s->cbc, &s->current_frag, pkt);
1592
363k
    if (ret < 0) {
1593
154k
        ff_cbs_fragment_reset(&s->current_frag);
1594
154k
        av_log(avctx, AV_LOG_ERROR, "Failed to read frame header.\n");
1595
154k
        return ret;
1596
154k
    }
1597
1598
208k
    unit = &s->current_frag.units[0];
1599
208k
    rf = unit->content;
1600
1601
208k
    av_refstruct_replace(&s->header_ref, unit->content_ref);
1602
208k
    s->frame_header = &rf->header;
1603
1604
208k
    if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1605
25.9k
        return ret;
1606
182k
    } else if (ret == 0) {
1607
11.1k
        if (!s->s.refs[ref].f) {
1608
8.64k
            av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1609
8.64k
            return AVERROR_INVALIDDATA;
1610
8.64k
        }
1611
22.5k
        for (int i = 0; i < 8; i++)
1612
20.0k
            ff_progress_frame_replace(&s->next_refs[i], &s->s.refs[i]);
1613
2.50k
        ff_thread_finish_setup(avctx);
1614
2.50k
        ff_progress_frame_await(&s->s.refs[ref], INT_MAX);
1615
2.50k
        ff_cbs_fragment_reset(&s->current_frag);
1616
1617
2.50k
        if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1618
0
            return ret;
1619
2.50k
        frame->pts     = pkt->pts;
1620
2.50k
        frame->pkt_dts = pkt->dts;
1621
2.50k
        *got_frame = 1;
1622
2.50k
        return pkt->size;
1623
2.50k
    }
1624
171k
    data += ret;
1625
171k
    size -= ret;
1626
1627
171k
    src = !s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres ?
1628
152k
              &s->s.frames[CUR_FRAME] : &s->s.frames[BLANK_FRAME];
1629
171k
    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly)
1630
160k
        vp9_frame_replace(&s->s.frames[REF_FRAME_SEGMAP], src);
1631
171k
    vp9_frame_replace(&s->s.frames[REF_FRAME_MVPAIR], src);
1632
171k
    vp9_frame_unref(&s->s.frames[CUR_FRAME]);
1633
171k
    if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1634
448
        return ret;
1635
1636
170k
    s->s.frames[CUR_FRAME].header_ref = av_refstruct_ref(s->header_ref);
1637
170k
    s->s.frames[CUR_FRAME].frame_header = s->frame_header;
1638
1639
170k
    f = s->s.frames[CUR_FRAME].tf.f;
1640
170k
    if (s->s.h.keyframe)
1641
13.4k
        f->flags |= AV_FRAME_FLAG_KEY;
1642
157k
    else
1643
157k
        f->flags &= ~AV_FRAME_FLAG_KEY;
1644
170k
    if (s->s.h.lossless)
1645
10.0k
        f->flags |= AV_FRAME_FLAG_LOSSLESS;
1646
160k
    else
1647
160k
        f->flags &= ~AV_FRAME_FLAG_LOSSLESS;
1648
170k
    f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1649
1650
    // Non-existent frames have the implicit dimension 0x0 != CUR_FRAME
1651
170k
    if (!s->s.frames[REF_FRAME_MVPAIR].tf.f ||
1652
170k
        (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1653
152k
         s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1654
22.9k
        vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]);
1655
22.9k
    }
1656
1657
    // ref frame setup
1658
1.53M
    for (i = 0; i < 8; i++) {
1659
1.36M
        ff_progress_frame_replace(&s->next_refs[i],
1660
1.36M
                                   s->s.h.refreshrefmask & (1 << i) ?
1661
699k
                                       &s->s.frames[CUR_FRAME].tf : &s->s.refs[i]);
1662
1.36M
    }
1663
1664
170k
    if (avctx->hwaccel) {
1665
0
        const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
1666
0
        ret = hwaccel->start_frame(avctx, pkt->buf, pkt->data, pkt->size);
1667
0
        if (ret < 0)
1668
0
            return ret;
1669
0
        ret = hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1670
0
        if (ret < 0)
1671
0
            return ret;
1672
0
        ret = hwaccel->end_frame(avctx);
1673
0
        if (ret < 0)
1674
0
            return ret;
1675
1676
0
        for (i = 0; i < 8; i++) {
1677
0
            vp9_frame_replace(&s->s.ref_frames[i],
1678
0
                              s->s.h.refreshrefmask & (1 << i) ?
1679
0
                                  &s->s.frames[CUR_FRAME] : &s->s.ref_frames[i]);
1680
0
        }
1681
1682
0
        goto finish;
1683
0
    }
1684
1685
    // main tile decode loop
1686
170k
    memset(s->above_partition_ctx, 0, s->cols);
1687
170k
    memset(s->above_skip_ctx, 0, s->cols);
1688
170k
    if (s->s.h.keyframe || s->s.h.intraonly) {
1689
18.5k
        memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1690
152k
    } else {
1691
152k
        memset(s->above_mode_ctx, NEARESTMV, s->cols);
1692
152k
    }
1693
170k
    memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1694
170k
    memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1695
170k
    memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1696
170k
    memset(s->above_segpred_ctx, 0, s->cols);
1697
170k
    s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1698
170k
        avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1699
170k
    if ((ret = update_block_buffers(avctx)) < 0) {
1700
0
        av_log(avctx, AV_LOG_ERROR,
1701
0
               "Failed to allocate block buffers\n");
1702
0
        return ret;
1703
0
    }
1704
170k
    if (s->s.h.refreshctx && s->s.h.parallelmode) {
1705
14.2k
        int j, k, l, m;
1706
1707
54.5k
        for (i = 0; i < 4; i++) {
1708
162k
            for (j = 0; j < 2; j++)
1709
325k
                for (k = 0; k < 2; k++)
1710
1.51M
                    for (l = 0; l < 6; l++)
1711
9.11M
                        for (m = 0; m < 6; m++)
1712
7.81M
                            memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1713
7.81M
                                   s->prob.coef[i][j][k][l][m], 3);
1714
54.2k
            if (s->s.h.txfmmode == i)
1715
13.9k
                break;
1716
54.2k
        }
1717
14.2k
        s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1718
14.2k
        ff_thread_finish_setup(avctx);
1719
156k
    } else if (!s->s.h.refreshctx) {
1720
37.1k
        ff_thread_finish_setup(avctx);
1721
37.1k
    }
1722
1723
170k
#if HAVE_THREADS
1724
170k
    if (avctx->active_thread_type & FF_THREAD_SLICE) {
1725
0
        for (i = 0; i < s->sb_rows; i++)
1726
0
            atomic_init(&s->entries[i], 0);
1727
0
    }
1728
170k
#endif
1729
1730
170k
    do {
1731
341k
        for (i = 0; i < s->active_tile_cols; i++) {
1732
170k
            s->td[i].b = s->td[i].b_base;
1733
170k
            s->td[i].block = s->td[i].block_base;
1734
170k
            s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1735
170k
            s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1736
170k
            s->td[i].eob = s->td[i].eob_base;
1737
170k
            s->td[i].uveob[0] = s->td[i].uveob_base[0];
1738
170k
            s->td[i].uveob[1] = s->td[i].uveob_base[1];
1739
170k
            s->td[i].error_info = 0;
1740
170k
        }
1741
1742
170k
#if HAVE_THREADS
1743
170k
        if (avctx->active_thread_type == FF_THREAD_SLICE) {
1744
0
            int tile_row, tile_col;
1745
1746
0
            av_assert1(!s->pass);
1747
1748
0
            for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1749
0
                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1750
0
                    int64_t tile_size;
1751
1752
0
                    if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1753
0
                        tile_row == s->s.h.tiling.tile_rows - 1) {
1754
0
                        tile_size = size;
1755
0
                    } else {
1756
0
                        tile_size = AV_RB32(data);
1757
0
                        data += 4;
1758
0
                        size -= 4;
1759
0
                    }
1760
0
                    if (tile_size > size)
1761
0
                        return AVERROR_INVALIDDATA;
1762
0
                    ret = ff_vpx_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1763
0
                    if (ret < 0)
1764
0
                        return ret;
1765
0
                    if (vpx_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1766
0
                        return AVERROR_INVALIDDATA;
1767
0
                    data += tile_size;
1768
0
                    size -= tile_size;
1769
0
                }
1770
0
            }
1771
1772
0
            ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1773
0
        } else
1774
170k
#endif
1775
170k
        {
1776
170k
            ret = decode_tiles(avctx, data, size);
1777
170k
            if (ret < 0)
1778
34.2k
                goto fail;
1779
170k
        }
1780
1781
        // Sum all counts fields into td[0].counts for tile threading
1782
136k
        if (avctx->active_thread_type == FF_THREAD_SLICE)
1783
0
            for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1784
0
                for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1785
0
                    ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1786
1787
136k
        if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1788
102k
            ff_vp9_adapt_probs(s);
1789
102k
            ff_thread_finish_setup(avctx);
1790
102k
        }
1791
136k
    } while (s->pass++ == 1);
1792
1793
136k
    if (s->td->error_info < 0) {
1794
1.16k
        av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1795
1.16k
        s->td->error_info = 0;
1796
1.16k
        ret = AVERROR_INVALIDDATA;
1797
1.16k
        goto fail;
1798
1.16k
    }
1799
135k
    if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1800
46.5k
        ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1801
46.5k
        if (ret < 0)
1802
0
            goto fail;
1803
46.5k
    }
1804
1805
135k
finish:
1806
135k
    ff_cbs_fragment_reset(&s->current_frag);
1807
1808
135k
    ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
1809
    // ref frame setup
1810
1.21M
    for (int i = 0; i < 8; i++)
1811
1.08M
        ff_progress_frame_replace(&s->s.refs[i], &s->next_refs[i]);
1812
1813
135k
    if (!s->s.h.invisible) {
1814
129k
        if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1815
0
            return ret;
1816
129k
        *got_frame = 1;
1817
129k
    }
1818
1819
135k
    return pkt->size;
1820
35.3k
fail:
1821
35.3k
    ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
1822
35.3k
    return ret;
1823
135k
}
1824
1825
static void vp9_decode_flush(AVCodecContext *avctx)
1826
37.7k
{
1827
37.7k
    VP9Context *s = avctx->priv_data;
1828
37.7k
    int i;
1829
1830
151k
    for (i = 0; i < 3; i++)
1831
113k
        vp9_frame_unref(&s->s.frames[i]);
1832
1833
339k
    for (i = 0; i < 8; i++) {
1834
302k
        ff_progress_frame_unref(&s->s.refs[i]);
1835
302k
        vp9_frame_unref(&s->s.ref_frames[i]);
1836
302k
    }
1837
1838
37.7k
    ff_cbs_fragment_reset(&s->current_frag);
1839
37.7k
    ff_cbs_flush(s->cbc);
1840
1841
37.7k
    if (FF_HW_HAS_CB(avctx, flush))
1842
0
        FF_HW_SIMPLE_CALL(avctx, flush);
1843
37.7k
}
1844
1845
static av_cold int vp9_decode_init(AVCodecContext *avctx)
1846
9.77k
{
1847
9.77k
    VP9Context *s = avctx->priv_data;
1848
9.77k
    int ret;
1849
1850
9.77k
    s->last_bpp = 0;
1851
9.77k
    s->s.h.filter.sharpness = -1;
1852
1853
9.77k
    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VP9, avctx);
1854
9.77k
    if (ret < 0)
1855
0
        return ret;
1856
1857
9.77k
#if HAVE_THREADS
1858
9.77k
    if (avctx->active_thread_type & FF_THREAD_SLICE) {
1859
0
        ret = ff_pthread_init(s, vp9_context_offsets);
1860
0
        if (ret < 0)
1861
0
            return ret;
1862
0
    }
1863
9.77k
#endif
1864
1865
9.77k
    return 0;
1866
9.77k
}
1867
1868
#if HAVE_THREADS
1869
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1870
0
{
1871
0
    VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1872
1873
0
    for (int i = 0; i < 3; i++)
1874
0
        vp9_frame_replace(&s->s.frames[i], &ssrc->s.frames[i]);
1875
0
    for (int i = 0; i < 8; i++)
1876
0
        ff_progress_frame_replace(&s->s.refs[i], &ssrc->next_refs[i]);
1877
0
    av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
1878
0
    s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1879
1880
0
    av_refstruct_replace(&s->header_ref, ssrc->header_ref);
1881
0
    for (int i = 0; i < 8; i++)
1882
0
        vp9_frame_replace(&s->s.ref_frames[i], &ssrc->s.ref_frames[i]);
1883
1884
0
    s->frame_header = ssrc->frame_header;
1885
0
    memcpy(s->cbc->priv_data, ssrc->cbc->priv_data, sizeof(CodedBitstreamVP9Context));
1886
1887
0
    s->s.h.invisible = ssrc->s.h.invisible;
1888
0
    s->s.h.keyframe = ssrc->s.h.keyframe;
1889
0
    s->s.h.intraonly = ssrc->s.h.intraonly;
1890
0
    s->ss_v = ssrc->ss_v;
1891
0
    s->ss_h = ssrc->ss_h;
1892
0
    s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1893
0
    s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1894
0
    s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1895
0
    s->bytesperpixel = ssrc->bytesperpixel;
1896
0
    s->gf_fmt = ssrc->gf_fmt;
1897
0
    s->w = ssrc->w;
1898
0
    s->h = ssrc->h;
1899
0
    s->s.h.bpp = ssrc->s.h.bpp;
1900
0
    s->bpp_index = ssrc->bpp_index;
1901
0
    s->pix_fmt = ssrc->pix_fmt;
1902
0
    memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1903
0
    memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1904
0
    memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1905
0
           sizeof(s->s.h.segmentation.feat));
1906
1907
0
    return 0;
1908
0
}
1909
#endif
1910
1911
const FFCodec ff_vp9_decoder = {
1912
    .p.name                = "vp9",
1913
    CODEC_LONG_NAME("Google VP9"),
1914
    .p.type                = AVMEDIA_TYPE_VIDEO,
1915
    .p.id                  = AV_CODEC_ID_VP9,
1916
    .priv_data_size        = sizeof(VP9Context),
1917
    .init                  = vp9_decode_init,
1918
    .close                 = vp9_decode_free,
1919
    FF_CODEC_DECODE_CB(vp9_decode_frame),
1920
    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1921
    .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
1922
                             FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1923
                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
1924
    .flush                 = vp9_decode_flush,
1925
    UPDATE_THREAD_CONTEXT(vp9_decode_update_thread_context),
1926
    .p.profiles            = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1927
    .bsfs                  = "vp9_superframe_split",
1928
    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
1929
#if CONFIG_VP9_DXVA2_HWACCEL
1930
                               HWACCEL_DXVA2(vp9),
1931
#endif
1932
#if CONFIG_VP9_D3D11VA_HWACCEL
1933
                               HWACCEL_D3D11VA(vp9),
1934
#endif
1935
#if CONFIG_VP9_D3D11VA2_HWACCEL
1936
                               HWACCEL_D3D11VA2(vp9),
1937
#endif
1938
#if CONFIG_VP9_D3D12VA_HWACCEL
1939
                               HWACCEL_D3D12VA(vp9),
1940
#endif
1941
#if CONFIG_VP9_NVDEC_HWACCEL
1942
                               HWACCEL_NVDEC(vp9),
1943
#endif
1944
#if CONFIG_VP9_VAAPI_HWACCEL
1945
                               HWACCEL_VAAPI(vp9),
1946
#endif
1947
#if CONFIG_VP9_VDPAU_HWACCEL
1948
                               HWACCEL_VDPAU(vp9),
1949
#endif
1950
#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
1951
                               HWACCEL_VIDEOTOOLBOX(vp9),
1952
#endif
1953
#if CONFIG_VP9_VULKAN_HWACCEL
1954
                               HWACCEL_VULKAN(vp9),
1955
#endif
1956
                               NULL
1957
                           },
1958
};