Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/vp8.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * VP7/VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Fiona Glaser
7
 * Copyright (C) 2012 Daniel Kang
8
 * Copyright (C) 2014 Peter Ross
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
27
#include "config_components.h"
28
29
#include "libavutil/mem.h"
30
#include "libavutil/mem_internal.h"
31
32
#include "avcodec.h"
33
#include "codec_internal.h"
34
#include "decode.h"
35
#include "hwaccel_internal.h"
36
#include "hwconfig.h"
37
#include "mathops.h"
38
#include "progressframe.h"
39
#include "libavutil/refstruct.h"
40
#include "thread.h"
41
#include "vp8.h"
42
#include "vp89_rac.h"
43
#include "vp8data.h"
44
#include "vpx_rac.h"
45
46
#if ARCH_ARM
47
#   include "arm/vp8.h"
48
#endif
49
50
// fixme: add 1 bit to all the calls to this?
51
static int vp8_rac_get_sint(VPXRangeCoder *c, int bits)
52
886k
{
53
886k
    int v;
54
55
886k
    if (!vp89_rac_get(c))
56
543k
        return 0;
57
58
342k
    v = vp89_rac_get_uint(c, bits);
59
60
342k
    if (vp89_rac_get(c))
61
136k
        v = -v;
62
63
342k
    return v;
64
886k
}
65
66
static int vp8_rac_get_nn(VPXRangeCoder *c)
67
2.04M
{
68
2.04M
    int v = vp89_rac_get_uint(c, 7) << 1;
69
2.04M
    return v + !v;
70
2.04M
}
71
72
// DCTextra
73
static int vp8_rac_get_coeff(VPXRangeCoder *c, const uint8_t *prob)
74
39.7M
{
75
39.7M
    int v = 0;
76
77
436M
    do {
78
436M
        v = (v<<1) + vpx_rac_get_prob(c, *prob++);
79
436M
    } while (*prob);
80
81
39.7M
    return v;
82
39.7M
}
83
84
static void free_buffers(VP8Context *s)
85
40.6k
{
86
40.6k
    int i;
87
40.6k
    if (s->thread_data)
88
231k
        for (i = 0; i < MAX_THREADS; i++) {
89
206k
#if HAVE_THREADS
90
206k
            pthread_cond_destroy(&s->thread_data[i].cond);
91
206k
            pthread_mutex_destroy(&s->thread_data[i].lock);
92
206k
#endif
93
206k
            av_freep(&s->thread_data[i].filter_strength);
94
206k
        }
95
40.6k
    av_freep(&s->thread_data);
96
40.6k
    av_freep(&s->macroblocks_base);
97
40.6k
    av_freep(&s->intra4x4_pred_mode_top);
98
40.6k
    av_freep(&s->top_nnz);
99
40.6k
    av_freep(&s->top_border);
100
101
40.6k
    s->macroblocks = NULL;
102
40.6k
}
103
104
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
105
183k
{
106
183k
    int ret = ff_progress_frame_get_buffer(s->avctx, &f->tf,
107
183k
                                           ref ? AV_GET_BUFFER_FLAG_REF : 0);
108
183k
    if (ret < 0)
109
987
        return ret;
110
182k
    f->seg_map = av_refstruct_allocz(s->mb_width * s->mb_height);
111
182k
    if (!f->seg_map) {
112
0
        ret = AVERROR(ENOMEM);
113
0
        goto fail;
114
0
    }
115
182k
    ret = ff_hwaccel_frame_priv_alloc(s->avctx, &f->hwaccel_picture_private);
116
182k
    if (ret < 0)
117
0
        goto fail;
118
119
182k
    return 0;
120
121
0
fail:
122
0
    av_refstruct_unref(&f->seg_map);
123
0
    ff_progress_frame_unref(&f->tf);
124
0
    return ret;
125
182k
}
126
127
static void vp8_release_frame(VP8Frame *f)
128
530k
{
129
530k
    av_refstruct_unref(&f->seg_map);
130
530k
    av_refstruct_unref(&f->hwaccel_picture_private);
131
530k
    ff_progress_frame_unref(&f->tf);
132
530k
}
133
134
static av_cold void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
135
76.3k
{
136
76.3k
    VP8Context *s = avctx->priv_data;
137
76.3k
    int i;
138
139
458k
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
140
381k
        vp8_release_frame(&s->frames[i]);
141
76.3k
    memset(s->framep, 0, sizeof(s->framep));
142
143
76.3k
    if (free_mem)
144
40.6k
        free_buffers(s);
145
146
76.3k
    if (FF_HW_HAS_CB(avctx, flush))
147
0
        FF_HW_SIMPLE_CALL(avctx, flush);
148
76.3k
}
149
150
static av_cold void vp8_decode_flush(AVCodecContext *avctx)
151
35.7k
{
152
35.7k
    vp8_decode_flush_impl(avctx, 0);
153
35.7k
}
154
155
static VP8Frame *vp8_find_free_buffer(VP8Context *s)
156
183k
{
157
183k
    VP8Frame *frame = NULL;
158
183k
    int i;
159
160
    // find a free buffer
161
322k
    for (i = 0; i < 5; i++)
162
322k
        if (&s->frames[i] != s->framep[VP8_FRAME_CURRENT]  &&
163
322k
            &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
164
322k
            &s->frames[i] != s->framep[VP8_FRAME_GOLDEN]   &&
165
322k
            &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) {
166
183k
            frame = &s->frames[i];
167
183k
            break;
168
183k
        }
169
183k
    if (i == 5) {
170
0
        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
171
0
        abort();
172
0
    }
173
183k
    if (frame->tf.f)
174
55.1k
        vp8_release_frame(frame);
175
176
183k
    return frame;
177
183k
}
178
179
static enum AVPixelFormat get_pixel_format(VP8Context *s)
180
4.56k
{
181
4.56k
    enum AVPixelFormat pix_fmts[] = {
182
#if CONFIG_VP8_VAAPI_HWACCEL
183
        AV_PIX_FMT_VAAPI,
184
#endif
185
#if CONFIG_VP8_NVDEC_HWACCEL
186
        AV_PIX_FMT_CUDA,
187
#endif
188
4.56k
        AV_PIX_FMT_YUV420P,
189
4.56k
        AV_PIX_FMT_NONE,
190
4.56k
    };
191
192
4.56k
    return ff_get_format(s->avctx, pix_fmts);
193
4.56k
}
194
195
static av_always_inline
196
int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
197
30.2k
{
198
30.2k
    AVCodecContext *avctx = s->avctx;
199
30.2k
    int i, ret, dim_reset = 0;
200
201
30.2k
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
202
30.2k
        height != s->avctx->height) {
203
27.7k
        vp8_decode_flush_impl(s->avctx, 1);
204
205
27.7k
        ret = ff_set_dimensions(s->avctx, width, height);
206
27.7k
        if (ret < 0)
207
4.47k
            return ret;
208
209
23.2k
        dim_reset = (s->macroblocks_base != NULL);
210
23.2k
    }
211
212
25.7k
    if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
213
25.7k
         !s->actually_webp && !is_vp7) {
214
4.56k
        s->pix_fmt = get_pixel_format(s);
215
4.56k
        if (s->pix_fmt < 0)
216
0
            return AVERROR(EINVAL);
217
4.56k
        avctx->pix_fmt = s->pix_fmt;
218
4.56k
    }
219
220
25.7k
    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
221
25.7k
    s->mb_height = (s->avctx->coded_height + 15) / 16;
222
223
25.7k
    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
224
15.3k
                   avctx->thread_count > 1;
225
25.7k
    if (!s->mb_layout) { // Frame threading and one thread
226
15.3k
        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
227
15.3k
                                               sizeof(*s->macroblocks));
228
15.3k
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
229
15.3k
    } else // Sliced threading
230
10.4k
        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
231
10.4k
                                         sizeof(*s->macroblocks));
232
25.7k
    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
233
25.7k
    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
234
25.7k
    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
235
236
25.7k
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
237
25.7k
        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
238
0
        free_buffers(s);
239
0
        return AVERROR(ENOMEM);
240
0
    }
241
242
231k
    for (i = 0; i < MAX_THREADS; i++) {
243
206k
        s->thread_data[i].filter_strength =
244
206k
            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
245
206k
        if (!s->thread_data[i].filter_strength) {
246
0
            free_buffers(s);
247
0
            return AVERROR(ENOMEM);
248
0
        }
249
206k
#if HAVE_THREADS
250
206k
        ret = pthread_mutex_init(&s->thread_data[i].lock, NULL);
251
206k
        if (ret) {
252
0
            free_buffers(s);
253
0
            return AVERROR(ret);
254
0
        }
255
206k
        ret = pthread_cond_init(&s->thread_data[i].cond, NULL);
256
206k
        if (ret) {
257
0
            free_buffers(s);
258
0
            return AVERROR(ret);
259
0
        }
260
206k
#endif
261
206k
    }
262
263
25.7k
    s->macroblocks = s->macroblocks_base + 1;
264
265
25.7k
    return 0;
266
25.7k
}
267
268
static int vp7_update_dimensions(VP8Context *s, int width, int height)
269
11.8k
{
270
11.8k
    return update_dimensions(s, width, height, IS_VP7);
271
11.8k
}
272
273
static int vp8_update_dimensions(VP8Context *s, int width, int height)
274
18.3k
{
275
18.3k
    return update_dimensions(s, width, height, IS_VP8);
276
18.3k
}
277
278
279
static void parse_segment_info(VP8Context *s)
280
64.6k
{
281
64.6k
    VPXRangeCoder *c = &s->c;
282
64.6k
    int i;
283
284
64.6k
    s->segmentation.update_map = vp89_rac_get(c);
285
64.6k
    s->segmentation.update_feature_data = vp89_rac_get(c);
286
287
64.6k
    if (s->segmentation.update_feature_data) {
288
53.0k
        s->segmentation.absolute_vals = vp89_rac_get(c);
289
290
265k
        for (i = 0; i < 4; i++)
291
212k
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
292
293
265k
        for (i = 0; i < 4; i++)
294
212k
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
295
53.0k
    }
296
64.6k
    if (s->segmentation.update_map)
297
194k
        for (i = 0; i < 3; i++)
298
146k
            s->prob->segmentid[i] = vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
299
64.6k
}
300
301
static void update_lf_deltas(VP8Context *s)
302
13.1k
{
303
13.1k
    VPXRangeCoder *c = &s->c;
304
13.1k
    int i;
305
306
65.7k
    for (i = 0; i < 4; i++) {
307
52.5k
        if (vp89_rac_get(c)) {
308
26.6k
            s->lf_delta.ref[i] = vp89_rac_get_uint(c, 6);
309
310
26.6k
            if (vp89_rac_get(c))
311
17.4k
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
312
26.6k
        }
313
52.5k
    }
314
315
65.7k
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
316
52.5k
        if (vp89_rac_get(c)) {
317
27.8k
            s->lf_delta.mode[i] = vp89_rac_get_uint(c, 6);
318
319
27.8k
            if (vp89_rac_get(c))
320
21.9k
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
321
27.8k
        }
322
52.5k
    }
323
13.1k
}
324
325
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
326
110k
{
327
110k
    const uint8_t *sizes = buf;
328
110k
    int i;
329
110k
    int ret;
330
331
110k
    s->num_coeff_partitions = 1 << vp89_rac_get_uint(&s->c, 2);
332
333
110k
    buf      += 3 * (s->num_coeff_partitions - 1);
334
110k
    buf_size -= 3 * (s->num_coeff_partitions - 1);
335
110k
    if (buf_size < 0)
336
5.20k
        return -1;
337
338
107k
    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
339
4.07k
        int size = AV_RL24(sizes + 3 * i);
340
4.07k
        if (buf_size - size < 0)
341
1.13k
            return -1;
342
2.93k
        s->coeff_partition_size[i] = size;
343
344
2.93k
        ret = ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, size);
345
2.93k
        if (ret < 0)
346
385
            return ret;
347
2.55k
        buf      += size;
348
2.55k
        buf_size -= size;
349
2.55k
    }
350
351
103k
    s->coeff_partition_size[i] = buf_size;
352
353
103k
    return ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
354
105k
}
355
356
static void vp7_get_quants(VP8Context *s)
357
94.4k
{
358
94.4k
    VPXRangeCoder *c = &s->c;
359
360
94.4k
    int yac_qi  = vp89_rac_get_uint(c, 7);
361
94.4k
    int ydc_qi  = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
362
94.4k
    int y2dc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
363
94.4k
    int y2ac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
364
94.4k
    int uvdc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
365
94.4k
    int uvac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi;
366
367
94.4k
    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
368
94.4k
    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
369
94.4k
    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
370
94.4k
    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
371
94.4k
    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
372
94.4k
    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
373
94.4k
}
374
375
static void vp8_get_quants(VP8Context *s)
376
92.4k
{
377
92.4k
    VPXRangeCoder *c = &s->c;
378
92.4k
    int i, base_qi;
379
380
92.4k
    s->quant.yac_qi     = vp89_rac_get_uint(c, 7);
381
92.4k
    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
382
92.4k
    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
383
92.4k
    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
384
92.4k
    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
385
92.4k
    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
386
387
462k
    for (i = 0; i < 4; i++) {
388
369k
        if (s->segmentation.enabled) {
389
213k
            base_qi = s->segmentation.base_quant[i];
390
213k
            if (!s->segmentation.absolute_vals)
391
46.3k
                base_qi += s->quant.yac_qi;
392
213k
        } else
393
155k
            base_qi = s->quant.yac_qi;
394
395
369k
        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
396
369k
        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
397
369k
        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
398
        /* 101581>>16 is equivalent to 155/100 */
399
369k
        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
400
369k
        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
401
369k
        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
402
403
369k
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
404
369k
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
405
369k
    }
406
92.4k
}
407
408
/**
409
 * Determine which buffers golden and altref should be updated with after this frame.
410
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
411
 *
412
 * Intra frames update all 3 references
413
 * Inter frames update VP8_FRAME_PREVIOUS if the update_last flag is set
414
 * If the update (golden|altref) flag is set, it's updated with the current frame
415
 *      if update_last is set, and VP8_FRAME_PREVIOUS otherwise.
416
 * If the flag is not set, the number read means:
417
 *      0: no update
418
 *      1: VP8_FRAME_PREVIOUS
419
 *      2: update golden with altref, or update altref with golden
420
 */
421
static VP8FrameType ref_to_update(VP8Context *s, int update, VP8FrameType ref)
422
134k
{
423
134k
    VPXRangeCoder *c = &s->c;
424
425
134k
    if (update)
426
63.8k
        return VP8_FRAME_CURRENT;
427
428
70.8k
    switch (vp89_rac_get_uint(c, 2)) {
429
5.16k
    case 1:
430
5.16k
        return VP8_FRAME_PREVIOUS;
431
40.2k
    case 2:
432
40.2k
        return (ref == VP8_FRAME_GOLDEN) ? VP8_FRAME_ALTREF : VP8_FRAME_GOLDEN;
433
70.8k
    }
434
25.5k
    return VP8_FRAME_NONE;
435
70.8k
}
436
437
static void vp78_reset_probability_tables(VP8Context *s)
438
47.6k
{
439
47.6k
    int i, j;
440
238k
    for (i = 0; i < 4; i++)
441
3.24M
        for (j = 0; j < 16; j++)
442
3.05M
            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
443
3.05M
                   sizeof(s->prob->token[i][j]));
444
47.6k
}
445
446
static void vp78_update_probability_tables(VP8Context *s)
447
186k
{
448
186k
    VPXRangeCoder *c = &s->c;
449
186k
    int i, j, k, l, m;
450
451
934k
    for (i = 0; i < 4; i++)
452
6.72M
        for (j = 0; j < 8; j++)
453
23.9M
            for (k = 0; k < 3; k++)
454
215M
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
455
197M
                    if (vpx_rac_get_prob_branchy(c, ff_vp8_token_update_probs[i][j][k][l])) {
456
63.0M
                        int prob = vp89_rac_get_uint(c, 8);
457
189M
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
458
126M
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
459
63.0M
                    }
460
186k
}
461
462
82.8k
#define VP7_MVC_SIZE 17
463
67.3k
#define VP8_MVC_SIZE 19
464
465
static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
466
                                                            int mvc_size)
467
150k
{
468
150k
    VPXRangeCoder *c = &s->c;
469
150k
    int i, j;
470
471
150k
    if (vp89_rac_get(c))
472
335k
        for (i = 0; i < 4; i++)
473
268k
            s->prob->pred16x16[i] = vp89_rac_get_uint(c, 8);
474
150k
    if (vp89_rac_get(c))
475
272k
        for (i = 0; i < 3; i++)
476
204k
            s->prob->pred8x8c[i]  = vp89_rac_get_uint(c, 8);
477
478
    // 17.2 MV probability update
479
450k
    for (i = 0; i < 2; i++)
480
5.67M
        for (j = 0; j < mvc_size; j++)
481
5.37M
            if (vpx_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
482
2.04M
                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
483
150k
}
484
485
static void update_refs(VP8Context *s)
486
67.3k
{
487
67.3k
    VPXRangeCoder *c = &s->c;
488
489
67.3k
    int update_golden = vp89_rac_get(c);
490
67.3k
    int update_altref = vp89_rac_get(c);
491
492
67.3k
    s->update_golden = ref_to_update(s, update_golden, VP8_FRAME_GOLDEN);
493
67.3k
    s->update_altref = ref_to_update(s, update_altref, VP8_FRAME_ALTREF);
494
67.3k
}
495
496
static void copy_chroma(AVFrame *dst, const AVFrame *src, int width, int height)
497
55.9k
{
498
55.9k
    int i, j;
499
500
167k
    for (j = 1; j < 3; j++) {
501
3.40M
        for (i = 0; i < height / 2; i++)
502
3.28M
            memcpy(dst->data[j] + i * dst->linesize[j],
503
3.28M
                   src->data[j] + i * src->linesize[j], width / 2);
504
111k
    }
505
55.9k
}
506
507
static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
508
                 const uint8_t *src, ptrdiff_t src_linesize,
509
                 int width, int height,
510
                 int alpha, int beta)
511
57.7k
{
512
57.7k
    int i, j;
513
3.53M
    for (j = 0; j < height; j++) {
514
3.47M
        const uint8_t *src2 = src + j * src_linesize;
515
3.47M
        uint8_t *dst2 = dst + j * dst_linesize;
516
278M
        for (i = 0; i < width; i++) {
517
274M
            uint8_t y = src2[i];
518
274M
            dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
519
274M
        }
520
3.47M
    }
521
57.7k
}
522
523
static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
524
94.4k
{
525
94.4k
    int ret;
526
527
94.4k
    if (!s->keyframe && (alpha || beta)) {
528
61.6k
        int width  = s->mb_width * 16;
529
61.6k
        int height = s->mb_height * 16;
530
61.6k
        const AVFrame *src;
531
61.6k
        AVFrame *dst;
532
533
61.6k
        if (!s->framep[VP8_FRAME_PREVIOUS] ||
534
61.6k
            !s->framep[VP8_FRAME_GOLDEN]) {
535
3.85k
            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
536
3.85k
            return AVERROR_INVALIDDATA;
537
3.85k
        }
538
539
57.7k
        src =
540
57.7k
        dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
541
542
        /* preserve the golden frame, write a new previous frame */
543
57.7k
        if (s->framep[VP8_FRAME_GOLDEN] == s->framep[VP8_FRAME_PREVIOUS]) {
544
55.9k
            VP8Frame *prev_frame = vp8_find_free_buffer(s);
545
546
55.9k
            ret = vp8_alloc_frame(s, prev_frame, 1);
547
55.9k
            if (ret < 0)
548
0
                return ret;
549
55.9k
            s->framep[VP8_FRAME_PREVIOUS] = prev_frame;
550
551
55.9k
            dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f;
552
553
55.9k
            copy_chroma(dst, src, width, height);
554
55.9k
        }
555
556
57.7k
        fade(dst->data[0], dst->linesize[0],
557
57.7k
             src->data[0], src->linesize[0],
558
57.7k
             width, height, alpha, beta);
559
57.7k
    }
560
561
90.5k
    return 0;
562
94.4k
}
563
564
static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
565
114k
{
566
114k
    VPXRangeCoder *c = &s->c;
567
114k
    int part1_size, hscale, vscale, i, j, ret;
568
114k
    int width  = s->avctx->width;
569
114k
    int height = s->avctx->height;
570
114k
    int alpha = 0;
571
114k
    int beta  = 0;
572
114k
    int fade_present = 1;
573
574
114k
    if (buf_size < 4) {
575
8.05k
        return AVERROR_INVALIDDATA;
576
8.05k
    }
577
578
106k
    s->profile = (buf[0] >> 1) & 7;
579
106k
    if (s->profile > 1) {
580
1.50k
        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
581
1.50k
        return AVERROR_INVALIDDATA;
582
1.50k
    }
583
584
104k
    s->keyframe  = !(buf[0] & 1);
585
104k
    s->invisible = 0;
586
104k
    part1_size   = AV_RL24(buf) >> 4;
587
588
104k
    if (buf_size < 4 - s->profile + part1_size) {
589
1.99k
        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
590
1.99k
        return AVERROR_INVALIDDATA;
591
1.99k
    }
592
593
102k
    buf      += 4 - s->profile;
594
102k
    buf_size -= 4 - s->profile;
595
596
102k
    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
597
598
102k
    ret = ff_vpx_init_range_decoder(c, buf, part1_size);
599
102k
    if (ret < 0)
600
295
        return ret;
601
102k
    buf      += part1_size;
602
102k
    buf_size -= part1_size;
603
604
    /* A. Dimension information (keyframes only) */
605
102k
    if (s->keyframe) {
606
15.1k
        width  = vp89_rac_get_uint(c, 12);
607
15.1k
        height = vp89_rac_get_uint(c, 12);
608
15.1k
        hscale = vp89_rac_get_uint(c, 2);
609
15.1k
        vscale = vp89_rac_get_uint(c, 2);
610
15.1k
        if (hscale || vscale)
611
8.49k
            avpriv_request_sample(s->avctx, "Upscaling");
612
613
15.1k
        s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
614
15.1k
        vp78_reset_probability_tables(s);
615
15.1k
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
616
15.1k
               sizeof(s->prob->pred16x16));
617
15.1k
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
618
15.1k
               sizeof(s->prob->pred8x8c));
619
45.3k
        for (i = 0; i < 2; i++)
620
30.2k
            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
621
30.2k
                   sizeof(vp7_mv_default_prob[i]));
622
15.1k
        memset(&s->segmentation, 0, sizeof(s->segmentation));
623
15.1k
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
624
15.1k
        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
625
15.1k
    }
626
627
102k
    if (s->keyframe || s->profile > 0)
628
89.9k
        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
629
630
    /* B. Decoding information for all four macroblock-level features */
631
512k
    for (i = 0; i < 4; i++) {
632
410k
        s->feature_enabled[i] = vp89_rac_get(c);
633
410k
        if (s->feature_enabled[i]) {
634
271k
             s->feature_present_prob[i] = vp89_rac_get_uint(c, 8);
635
636
1.08M
             for (j = 0; j < 3; j++)
637
814k
                 s->feature_index_prob[i][j] =
638
814k
                     vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255;
639
640
271k
             if (vp7_feature_value_size[s->profile][i])
641
1.00M
                 for (j = 0; j < 4; j++)
642
806k
                     s->feature_value[i][j] =
643
806k
                        vp89_rac_get(c) ? vp89_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
644
271k
        }
645
410k
    }
646
647
102k
    s->segmentation.enabled    = 0;
648
102k
    s->segmentation.update_map = 0;
649
102k
    s->lf_delta.enabled        = 0;
650
651
102k
    s->num_coeff_partitions = 1;
652
102k
    ret = ff_vpx_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
653
102k
    if (ret < 0)
654
6.63k
        return ret;
655
656
95.9k
    if (!s->macroblocks_base || /* first frame */
657
95.9k
        width != s->avctx->width || height != s->avctx->height ||
658
95.9k
        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
659
11.8k
        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
660
1.45k
            return ret;
661
11.8k
    }
662
663
    /* C. Dequantization indices */
664
94.4k
    vp7_get_quants(s);
665
666
    /* D. Golden frame update flag (a Flag) for interframes only */
667
94.4k
    if (!s->keyframe) {
668
82.8k
        s->update_golden = vp89_rac_get(c) ? VP8_FRAME_CURRENT : VP8_FRAME_NONE;
669
82.8k
        s->sign_bias[VP8_FRAME_GOLDEN] = 0;
670
82.8k
    }
671
672
94.4k
    s->update_last          = 1;
673
94.4k
    s->update_probabilities = 1;
674
675
94.4k
    if (s->profile > 0) {
676
78.7k
        s->update_probabilities = vp89_rac_get(c);
677
78.7k
        if (!s->update_probabilities)
678
17.1k
            s->prob[1] = s->prob[0];
679
680
78.7k
        if (!s->keyframe)
681
70.5k
            fade_present = vp89_rac_get(c);
682
78.7k
    }
683
684
94.4k
    if (vpx_rac_is_end(c))
685
0
        return AVERROR_INVALIDDATA;
686
    /* E. Fading information for previous frame */
687
94.4k
    if (fade_present && vp89_rac_get(c)) {
688
62.6k
        alpha = (int8_t) vp89_rac_get_uint(c, 8);
689
62.6k
        beta  = (int8_t) vp89_rac_get_uint(c, 8);
690
62.6k
    }
691
692
    /* F. Loop filter type */
693
94.4k
    if (!s->profile)
694
15.6k
        s->filter.simple = vp89_rac_get(c);
695
696
    /* G. DCT coefficient ordering specification */
697
94.4k
    if (vp89_rac_get(c))
698
1.03M
        for (i = 1; i < 16; i++)
699
969k
            s->prob[0].scan[i] = ff_zigzag_scan[vp89_rac_get_uint(c, 4)];
700
701
    /* H. Loop filter levels  */
702
94.4k
    if (s->profile > 0)
703
78.7k
        s->filter.simple = vp89_rac_get(c);
704
94.4k
    s->filter.level     = vp89_rac_get_uint(c, 6);
705
94.4k
    s->filter.sharpness = vp89_rac_get_uint(c, 3);
706
707
    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
708
94.4k
    vp78_update_probability_tables(s);
709
710
94.4k
    s->mbskip_enabled = 0;
711
712
    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
713
94.4k
    if (!s->keyframe) {
714
82.8k
        s->prob->intra  = vp89_rac_get_uint(c, 8);
715
82.8k
        s->prob->last   = vp89_rac_get_uint(c, 8);
716
82.8k
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
717
82.8k
    }
718
719
94.4k
    if (vpx_rac_is_end(c))
720
0
        return AVERROR_INVALIDDATA;
721
722
94.4k
    if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
723
3.85k
        return ret;
724
725
90.5k
    return 0;
726
94.4k
}
727
728
static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
729
128k
{
730
128k
    VPXRangeCoder *c = &s->c;
731
128k
    int header_size, hscale, vscale, ret;
732
128k
    int width  = s->avctx->width;
733
128k
    int height = s->avctx->height;
734
735
128k
    if (buf_size < 3) {
736
8.49k
        av_log(s->avctx, AV_LOG_ERROR, "Insufficient data (%d) for header\n", buf_size);
737
8.49k
        return AVERROR_INVALIDDATA;
738
8.49k
    }
739
740
119k
    s->keyframe  = !(buf[0] & 1);
741
119k
    s->profile   =  (buf[0]>>1) & 7;
742
119k
    s->invisible = !(buf[0] & 0x10);
743
119k
    header_size  = AV_RL24(buf) >> 5;
744
119k
    buf      += 3;
745
119k
    buf_size -= 3;
746
747
119k
    s->header_partition_size = header_size;
748
749
119k
    if (s->profile > 3)
750
38.6k
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
751
752
119k
    if (!s->profile)
753
26.5k
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
754
26.5k
               sizeof(s->put_pixels_tab));
755
92.9k
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
756
92.9k
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
757
92.9k
               sizeof(s->put_pixels_tab));
758
759
119k
    if (header_size > buf_size - 7 * s->keyframe) {
760
7.24k
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
761
7.24k
        return AVERROR_INVALIDDATA;
762
7.24k
    }
763
764
112k
    if (s->keyframe) {
765
33.4k
        if (AV_RL24(buf) != 0x2a019d) {
766
926
            av_log(s->avctx, AV_LOG_ERROR,
767
926
                   "Invalid start code 0x%x\n", AV_RL24(buf));
768
926
            return AVERROR_INVALIDDATA;
769
926
        }
770
32.5k
        width     = AV_RL16(buf + 3) & 0x3fff;
771
32.5k
        height    = AV_RL16(buf + 5) & 0x3fff;
772
32.5k
        hscale    = buf[4] >> 6;
773
32.5k
        vscale    = buf[6] >> 6;
774
32.5k
        buf      += 7;
775
32.5k
        buf_size -= 7;
776
777
32.5k
        if (hscale || vscale)
778
23.5k
            avpriv_request_sample(s->avctx, "Upscaling");
779
780
32.5k
        s->update_golden = s->update_altref = VP8_FRAME_CURRENT;
781
32.5k
        vp78_reset_probability_tables(s);
782
32.5k
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
783
32.5k
               sizeof(s->prob->pred16x16));
784
32.5k
        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
785
32.5k
               sizeof(s->prob->pred8x8c));
786
32.5k
        memcpy(s->prob->mvc, vp8_mv_default_prob,
787
32.5k
               sizeof(s->prob->mvc));
788
32.5k
        memset(&s->segmentation, 0, sizeof(s->segmentation));
789
32.5k
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
790
32.5k
    }
791
792
111k
    ret = ff_vpx_init_range_decoder(c, buf, header_size);
793
111k
    if (ret < 0)
794
1.09k
        return ret;
795
110k
    buf      += header_size;
796
110k
    buf_size -= header_size;
797
798
110k
    if (s->keyframe) {
799
32.0k
        s->colorspace = vp89_rac_get(c);
800
32.0k
        if (s->colorspace)
801
3.11k
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
802
32.0k
        s->fullrange = vp89_rac_get(c);
803
32.0k
    }
804
805
110k
    if ((s->segmentation.enabled = vp89_rac_get(c)))
806
64.6k
        parse_segment_info(s);
807
45.5k
    else
808
45.5k
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
809
810
110k
    s->filter.simple    = vp89_rac_get(c);
811
110k
    s->filter.level     = vp89_rac_get_uint(c, 6);
812
110k
    s->filter.sharpness = vp89_rac_get_uint(c, 3);
813
814
110k
    if ((s->lf_delta.enabled = vp89_rac_get(c))) {
815
18.6k
        s->lf_delta.update = vp89_rac_get(c);
816
18.6k
        if (s->lf_delta.update)
817
13.1k
            update_lf_deltas(s);
818
18.6k
    }
819
820
110k
    if (setup_partitions(s, buf, buf_size)) {
821
14.7k
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
822
14.7k
        return AVERROR_INVALIDDATA;
823
14.7k
    }
824
825
95.4k
    if (!s->macroblocks_base || /* first frame */
826
95.4k
        width != s->avctx->width || height != s->avctx->height ||
827
95.4k
        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
828
18.3k
        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
829
3.01k
            return ret;
830
831
92.4k
    vp8_get_quants(s);
832
833
92.4k
    if (!s->keyframe) {
834
67.3k
        update_refs(s);
835
67.3k
        s->sign_bias[VP8_FRAME_GOLDEN] = vp89_rac_get(c);
836
67.3k
        s->sign_bias[VP8_FRAME_ALTREF] = vp89_rac_get(c);
837
67.3k
    }
838
839
    // if we aren't saving this frame's probabilities for future frames,
840
    // make a copy of the current probabilities
841
92.4k
    if (!(s->update_probabilities = vp89_rac_get(c)))
842
81.5k
        s->prob[1] = s->prob[0];
843
844
92.4k
    s->update_last = s->keyframe || vp89_rac_get(c);
845
846
92.4k
    vp78_update_probability_tables(s);
847
848
92.4k
    if ((s->mbskip_enabled = vp89_rac_get(c)))
849
5.41k
        s->prob->mbskip = vp89_rac_get_uint(c, 8);
850
851
92.4k
    if (!s->keyframe) {
852
67.3k
        s->prob->intra  = vp89_rac_get_uint(c, 8);
853
67.3k
        s->prob->last   = vp89_rac_get_uint(c, 8);
854
67.3k
        s->prob->golden = vp89_rac_get_uint(c, 8);
855
67.3k
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
856
67.3k
    }
857
858
    // Record the entropy coder state here so that hwaccels can use it.
859
92.4k
    s->c.code_word = vpx_rac_renorm(&s->c);
860
92.4k
    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
861
92.4k
    s->coder_state_at_header_end.range     = s->c.high;
862
92.4k
    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
863
92.4k
    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
864
865
92.4k
    return 0;
866
95.4k
}
867
868
static av_always_inline
869
void clamp_mv(const VP8mvbounds *s, VP8mv *dst, const VP8mv *src)
870
349k
{
871
349k
    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
872
349k
                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
873
349k
    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
874
349k
                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
875
349k
}
876
877
/**
878
 * Motion vector coding, 17.1.
879
 */
880
static av_always_inline int read_mv_component(VPXRangeCoder *c, const uint8_t *p, int vp7)
881
2.48M
{
882
2.48M
    int bit, x = 0;
883
884
2.48M
    if (vpx_rac_get_prob_branchy(c, p[0])) {
885
1.76M
        int i;
886
887
7.04M
        for (i = 0; i < 3; i++)
888
5.28M
            x += vpx_rac_get_prob(c, p[9 + i]) << i;
889
9.13M
        for (i = (vp7 ? 7 : 9); i > 3; i--)
890
7.37M
            x += vpx_rac_get_prob(c, p[9 + i]) << i;
891
1.76M
        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vpx_rac_get_prob(c, p[12]))
892
1.70M
            x += 8;
893
1.76M
    } else {
894
        // small_mvtree
895
720k
        const uint8_t *ps = p + 2;
896
720k
        bit = vpx_rac_get_prob(c, *ps);
897
720k
        ps += 1 + 3 * bit;
898
720k
        x  += 4 * bit;
899
720k
        bit = vpx_rac_get_prob(c, *ps);
900
720k
        ps += 1 + bit;
901
720k
        x  += 2 * bit;
902
720k
        x  += vpx_rac_get_prob(c, *ps);
903
720k
    }
904
905
2.48M
    return (x && vpx_rac_get_prob(c, p[1])) ? -x : x;
906
2.48M
}
907
908
static int vp7_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
909
340k
{
910
340k
    return read_mv_component(c, p, 1);
911
340k
}
912
913
static int vp8_read_mv_component(VPXRangeCoder *c, const uint8_t *p)
914
228k
{
915
228k
    return read_mv_component(c, p, 0);
916
228k
}
917
918
static av_always_inline
919
const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
920
2.03M
{
921
2.03M
    if (is_vp7)
922
893k
        return vp7_submv_prob;
923
924
1.13M
    if (left == top)
925
654k
        return vp8_submv_prob[4 - !!left];
926
483k
    if (!top)
927
156k
        return vp8_submv_prob[2];
928
326k
    return vp8_submv_prob[1 - !!left];
929
483k
}
930
931
/**
932
 * Split motion vector prediction, 16.4.
933
 * @returns the number of motion vectors parsed (2, 4 or 16)
934
 */
935
static av_always_inline
936
int decode_splitmvs(const VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
937
                    int layout, int is_vp7)
938
533k
{
939
533k
    int part_idx;
940
533k
    int n, num;
941
533k
    const VP8Macroblock *top_mb;
942
533k
    const VP8Macroblock *left_mb = &mb[-1];
943
533k
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
944
533k
    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
945
533k
    const VP8mv *top_mv;
946
533k
    const VP8mv *left_mv = left_mb->bmv;
947
533k
    const VP8mv *cur_mv  = mb->bmv;
948
949
533k
    if (!layout) // layout is inlined, s->mb_layout is not
950
145k
        top_mb = &mb[2];
951
388k
    else
952
388k
        top_mb = &mb[-s->mb_width - 1];
953
533k
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
954
533k
    top_mv       = top_mb->bmv;
955
956
533k
    if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
957
470k
        if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
958
426k
            part_idx = VP8_SPLITMVMODE_16x8 + vpx_rac_get_prob(c, vp8_mbsplit_prob[2]);
959
43.6k
        else
960
43.6k
            part_idx = VP8_SPLITMVMODE_8x8;
961
470k
    } else {
962
62.6k
        part_idx = VP8_SPLITMVMODE_4x4;
963
62.6k
    }
964
965
533k
    num              = vp8_mbsplit_count[part_idx];
966
533k
    mbsplits_cur     = vp8_mbsplits[part_idx],
967
533k
    firstidx         = vp8_mbfirstidx[part_idx];
968
533k
    mb->partitioning = part_idx;
969
970
2.56M
    for (n = 0; n < num; n++) {
971
2.03M
        int k = firstidx[n];
972
2.03M
        uint32_t left, above;
973
2.03M
        const uint8_t *submv_prob;
974
975
2.03M
        if (!(k & 3))
976
799k
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
977
1.23M
        else
978
1.23M
            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
979
2.03M
        if (k <= 3)
980
1.15M
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
981
874k
        else
982
874k
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
983
984
2.03M
        submv_prob = get_submv_prob(left, above, is_vp7);
985
986
2.03M
        if (vpx_rac_get_prob_branchy(c, submv_prob[0])) {
987
1.10M
            if (vpx_rac_get_prob_branchy(c, submv_prob[1])) {
988
967k
                if (vpx_rac_get_prob_branchy(c, submv_prob[2])) {
989
956k
                    mb->bmv[n].y = mb->mv.y +
990
956k
                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
991
956k
                    mb->bmv[n].x = mb->mv.x +
992
956k
                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
993
956k
                } else {
994
10.8k
                    AV_ZERO32(&mb->bmv[n]);
995
10.8k
                }
996
967k
            } else {
997
135k
                AV_WN32A(&mb->bmv[n], above);
998
135k
            }
999
1.10M
        } else {
1000
928k
            AV_WN32A(&mb->bmv[n], left);
1001
928k
        }
1002
2.03M
    }
1003
1004
533k
    return num;
1005
533k
}
1006
1007
/**
1008
 * The vp7 reference decoder uses a padding macroblock column (added to right
1009
 * edge of the frame) to guard against illegal macroblock offsets. The
1010
 * algorithm has bugs that permit offsets to straddle the padding column.
1011
 * This function replicates those bugs.
1012
 *
1013
 * @param[out] edge_x macroblock x address
1014
 * @param[out] edge_y macroblock y address
1015
 *
1016
 * @return macroblock offset legal (boolean)
1017
 */
1018
static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1019
                                   int xoffset, int yoffset, int boundary,
1020
                                   int *edge_x, int *edge_y)
1021
18.1M
{
1022
18.1M
    int vwidth = mb_width + 1;
1023
18.1M
    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1024
18.1M
    if (new < boundary || new % vwidth == vwidth - 1)
1025
4.44M
        return 0;
1026
13.6M
    *edge_y = new / vwidth;
1027
13.6M
    *edge_x = new % vwidth;
1028
13.6M
    return 1;
1029
18.1M
}
1030
1031
static const VP8mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1032
13.6M
{
1033
13.6M
    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1034
13.6M
}
1035
1036
static av_always_inline
1037
void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1038
                    int mb_x, int mb_y, int layout)
1039
1.51M
{
1040
1.51M
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1041
1.51M
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1042
1.51M
    int idx = CNT_ZERO;
1043
1.51M
    VP8mv near_mv[3];
1044
1.51M
    uint8_t cnt[3] = { 0 };
1045
1.51M
    VPXRangeCoder *c = &s->c;
1046
1.51M
    int i;
1047
1048
1.51M
    AV_ZERO32(&near_mv[0]);
1049
1.51M
    AV_ZERO32(&near_mv[1]);
1050
1.51M
    AV_ZERO32(&near_mv[2]);
1051
1052
19.6M
    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1053
18.1M
        const VP7MVPred * pred = &vp7_mv_pred[i];
1054
18.1M
        int edge_x, edge_y;
1055
1056
18.1M
        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1057
18.1M
                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1058
13.6M
            const VP8Macroblock *edge = (s->mb_layout == 1)
1059
13.6M
                                      ? s->macroblocks_base + 1 + edge_x +
1060
13.6M
                                        (s->mb_width + 1) * (edge_y + 1)
1061
13.6M
                                      : s->macroblocks + edge_x +
1062
0
                                        (s->mb_height - edge_y - 1) * 2;
1063
13.6M
            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1064
13.6M
            if (mv) {
1065
7.95M
                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1066
6.79M
                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1067
3.10M
                        idx = CNT_NEAREST;
1068
3.69M
                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1069
2.88M
                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1070
1.93M
                            continue;
1071
950k
                        idx = CNT_NEAR;
1072
950k
                    } else {
1073
805k
                        AV_WN32A(&near_mv[CNT_NEAR], mv);
1074
805k
                        idx = CNT_NEAR;
1075
805k
                    }
1076
6.79M
                } else {
1077
1.16M
                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
1078
1.16M
                    idx = CNT_NEAREST;
1079
1.16M
                }
1080
7.95M
            } else {
1081
5.72M
                idx = CNT_ZERO;
1082
5.72M
            }
1083
13.6M
        } else {
1084
4.44M
            idx = CNT_ZERO;
1085
4.44M
        }
1086
16.1M
        cnt[idx] += vp7_mv_pred[i].score;
1087
16.1M
    }
1088
1089
1.51M
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1090
1091
1.51M
    if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1092
1.03M
        mb->mode = VP8_MVMODE_MV;
1093
1094
1.03M
        if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1095
1096
670k
            if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1097
1098
558k
                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1099
558k
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1100
119k
                else
1101
558k
                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1102
1103
558k
                if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1104
388k
                    mb->mode = VP8_MVMODE_SPLIT;
1105
388k
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1106
388k
                } else {
1107
170k
                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1108
170k
                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1109
170k
                    mb->bmv[0] = mb->mv;
1110
170k
                }
1111
558k
            } else {
1112
111k
                mb->mv = near_mv[CNT_NEAR];
1113
111k
                mb->bmv[0] = mb->mv;
1114
111k
            }
1115
670k
        } else {
1116
365k
            mb->mv = near_mv[CNT_NEAREST];
1117
365k
            mb->bmv[0] = mb->mv;
1118
365k
        }
1119
1.03M
    } else {
1120
474k
        mb->mode = VP8_MVMODE_ZERO;
1121
474k
        AV_ZERO32(&mb->mv);
1122
474k
        mb->bmv[0] = mb->mv;
1123
474k
    }
1124
1.51M
}
1125
1126
static av_always_inline
1127
void vp8_decode_mvs(VP8Context *s, const VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1128
                    int mb_x, int mb_y, int layout)
1129
813k
{
1130
813k
    VP8Macroblock *mb_edge[3] = { 0      /* top */,
1131
813k
                                  mb - 1 /* left */,
1132
813k
                                  0      /* top-left */ };
1133
813k
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1134
813k
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1135
813k
    int idx = CNT_ZERO;
1136
813k
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
1137
813k
    const int8_t *sign_bias = s->sign_bias;
1138
813k
    VP8mv near_mv[4];
1139
813k
    uint8_t cnt[4] = { 0 };
1140
813k
    VPXRangeCoder *c = &s->c;
1141
1142
813k
    if (!layout) { // layout is inlined (s->mb_layout is not)
1143
813k
        mb_edge[0] = mb + 2;
1144
813k
        mb_edge[2] = mb + 1;
1145
813k
    } else {
1146
0
        mb_edge[0] = mb - s->mb_width - 1;
1147
0
        mb_edge[2] = mb - s->mb_width - 2;
1148
0
    }
1149
1150
813k
    AV_ZERO32(&near_mv[0]);
1151
813k
    AV_ZERO32(&near_mv[1]);
1152
813k
    AV_ZERO32(&near_mv[2]);
1153
1154
    /* Process MB on top, left and top-left */
1155
813k
#define MV_EDGE_CHECK(n)                                                      \
1156
2.44M
    {                                                                         \
1157
2.44M
        const VP8Macroblock *edge = mb_edge[n];                               \
1158
2.44M
        int edge_ref = edge->ref_frame;                                       \
1159
2.44M
        if (edge_ref != VP8_FRAME_CURRENT) {                                 \
1160
1.97M
            uint32_t mv = AV_RN32A(&edge->mv);                                \
1161
1.97M
            if (mv) {                                                         \
1162
497k
                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
1163
                    /* SWAR negate of the values in mv. */                    \
1164
70.0k
                    mv = ~mv;                                                 \
1165
70.0k
                    mv = ((mv & 0x7fff7fff) +                                 \
1166
70.0k
                          0x00010001) ^ (mv & 0x80008000);                    \
1167
70.0k
                }                                                             \
1168
497k
                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
1169
497k
                    AV_WN32A(&near_mv[++idx], mv);                            \
1170
497k
                cnt[idx] += 1 + (n != 2);                                     \
1171
497k
            } else                                                            \
1172
1.97M
                cnt[CNT_ZERO] += 1 + (n != 2);                                \
1173
1.97M
        }                                                                     \
1174
2.44M
    }
1175
1176
813k
    MV_EDGE_CHECK(0)
1177
813k
    MV_EDGE_CHECK(1)
1178
813k
    MV_EDGE_CHECK(2)
1179
1180
813k
    mb->partitioning = VP8_SPLITMVMODE_NONE;
1181
813k
    if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1182
349k
        mb->mode = VP8_MVMODE_MV;
1183
1184
        /* If we have three distinct MVs, merge first and last if they're the same */
1185
349k
        if (cnt[CNT_SPLITMV] &&
1186
349k
            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1187
16.3k
            cnt[CNT_NEAREST] += 1;
1188
1189
        /* Swap near and nearest if necessary */
1190
349k
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1191
14.3k
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1192
14.3k
            FFSWAP(VP8mv,   near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1193
14.3k
        }
1194
1195
349k
        if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1196
273k
            if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1197
                /* Choose the best mv out of 0,0 and the nearest mv */
1198
259k
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1199
259k
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1200
259k
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1201
259k
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1202
1203
259k
                if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1204
145k
                    mb->mode = VP8_MVMODE_SPLIT;
1205
145k
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1206
145k
                } else {
1207
114k
                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
1208
114k
                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
1209
114k
                    mb->bmv[0] = mb->mv;
1210
114k
                }
1211
259k
            } else {
1212
14.3k
                clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1213
14.3k
                mb->bmv[0] = mb->mv;
1214
14.3k
            }
1215
273k
        } else {
1216
75.5k
            clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1217
75.5k
            mb->bmv[0] = mb->mv;
1218
75.5k
        }
1219
464k
    } else {
1220
464k
        mb->mode = VP8_MVMODE_ZERO;
1221
464k
        AV_ZERO32(&mb->mv);
1222
464k
        mb->bmv[0] = mb->mv;
1223
464k
    }
1224
813k
}
1225
1226
static av_always_inline
1227
void decode_intra4x4_modes(VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb,
1228
                           int mb_x, int keyframe, int layout)
1229
1.86M
{
1230
1.86M
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1231
1232
1.86M
    if (layout) {
1233
1.60M
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
1234
1.60M
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1235
1.60M
    }
1236
1.86M
    if (keyframe) {
1237
1.84M
        int x, y;
1238
1.84M
        uint8_t *top;
1239
1.84M
        uint8_t *const left = s->intra4x4_pred_mode_left;
1240
1.84M
        if (layout)
1241
1.60M
            top = mb->intra4x4_pred_mode_top;
1242
241k
        else
1243
241k
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
1244
9.23M
        for (y = 0; y < 4; y++) {
1245
36.9M
            for (x = 0; x < 4; x++) {
1246
29.5M
                const uint8_t *ctx;
1247
29.5M
                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
1248
29.5M
                *intra4x4 = vp89_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1249
29.5M
                left[y]   = top[x] = *intra4x4;
1250
29.5M
                intra4x4++;
1251
29.5M
            }
1252
7.38M
        }
1253
1.84M
    } else {
1254
15.6k
        int i;
1255
266k
        for (i = 0; i < 16; i++)
1256
251k
            intra4x4[i] = vp89_rac_get_tree(c, vp8_pred4x4_tree,
1257
251k
                                            vp8_pred4x4_prob_inter);
1258
15.6k
    }
1259
1.86M
}
1260
1261
static av_always_inline
1262
void decode_mb_mode(VP8Context *s, const VP8mvbounds *mv_bounds,
1263
                    VP8Macroblock *mb, int mb_x, int mb_y,
1264
                    uint8_t *segment, const uint8_t *ref, int layout, int is_vp7)
1265
5.27M
{
1266
5.27M
    VPXRangeCoder *c = &s->c;
1267
5.27M
    static const char * const vp7_feature_name[] = { "q-index",
1268
5.27M
                                                     "lf-delta",
1269
5.27M
                                                     "partial-golden-update",
1270
5.27M
                                                     "blit-pitch" };
1271
5.27M
    if (is_vp7) {
1272
3.60M
        int i;
1273
3.60M
        *segment = 0;
1274
18.0M
        for (i = 0; i < 4; i++) {
1275
14.4M
            if (s->feature_enabled[i]) {
1276
6.05M
                if (vpx_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1277
1.95M
                      int index = vp89_rac_get_tree(c, vp7_feature_index_tree,
1278
1.95M
                                                    s->feature_index_prob[i]);
1279
1.95M
                      av_log(s->avctx, AV_LOG_WARNING,
1280
1.95M
                             "Feature %s present in macroblock (value 0x%x)\n",
1281
1.95M
                             vp7_feature_name[i], s->feature_value[i][index]);
1282
1.95M
                }
1283
6.05M
           }
1284
14.4M
        }
1285
3.60M
    } else if (s->segmentation.update_map) {
1286
82.5k
        int bit  = vpx_rac_get_prob(c, s->prob->segmentid[0]);
1287
82.5k
        *segment = vpx_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1288
1.58M
    } else if (s->segmentation.enabled)
1289
76.3k
        *segment = ref ? *ref : *segment;
1290
5.27M
    mb->segment = *segment;
1291
1292
5.27M
    mb->skip = s->mbskip_enabled ? vpx_rac_get_prob(c, s->prob->mbskip) : 0;
1293
1294
5.27M
    if (s->keyframe) {
1295
2.23M
        mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_intra,
1296
2.23M
                                     vp8_pred16x16_prob_intra);
1297
1298
2.23M
        if (mb->mode == MODE_I4x4) {
1299
1.84M
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1300
1.84M
        } else {
1301
383k
            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1302
383k
                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1303
383k
            if (s->mb_layout)
1304
383k
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1305
122k
            else
1306
383k
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1307
383k
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
1308
383k
        }
1309
1310
2.23M
        mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
1311
2.23M
                                                 vp8_pred8x8c_prob_intra);
1312
2.23M
        mb->ref_frame        = VP8_FRAME_CURRENT;
1313
3.04M
    } else if (vpx_rac_get_prob_branchy(c, s->prob->intra)) {
1314
        // inter MB, 16.2
1315
2.32M
        if (vpx_rac_get_prob_branchy(c, s->prob->last))
1316
1.46M
            mb->ref_frame =
1317
1.46M
                (!is_vp7 && vpx_rac_get_prob(c, s->prob->golden)) ? VP8_FRAME_ALTREF
1318
1.46M
                                                                  : VP8_FRAME_GOLDEN;
1319
858k
        else
1320
858k
            mb->ref_frame = VP8_FRAME_PREVIOUS;
1321
2.32M
        s->ref_count[mb->ref_frame - 1]++;
1322
1323
        // motion vectors, 16.3
1324
2.32M
        if (is_vp7)
1325
1.51M
            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1326
813k
        else
1327
813k
            vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1328
2.32M
    } else {
1329
        // intra MB, 16.1
1330
716k
        mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_inter,
1331
716k
                                     s->prob->pred16x16);
1332
1333
716k
        if (mb->mode == MODE_I4x4)
1334
15.6k
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1335
1336
716k
        mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree,
1337
716k
                                                 s->prob->pred8x8c);
1338
716k
        mb->ref_frame        = VP8_FRAME_CURRENT;
1339
716k
        mb->partitioning     = VP8_SPLITMVMODE_NONE;
1340
716k
        AV_ZERO32(&mb->bmv[0]);
1341
716k
    }
1342
5.27M
}
1343
1344
/**
1345
 * @param r     arithmetic bitstream reader context
1346
 * @param block destination for block coefficients
1347
 * @param probs probabilities to use when reading trees from the bitstream
1348
 * @param i     initial coeff index, 0 unless a separate DC block is coded
1349
 * @param qmul  array holding the dc/ac dequant factor at position 0/1
1350
 *
1351
 * @return 0 if no coeffs were decoded
1352
 *         otherwise, the index of the last coeff decoded plus one
1353
 */
1354
static av_always_inline
1355
int decode_block_coeffs_internal(VPXRangeCoder *r, int16_t block[16],
1356
                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1357
                                 int i, const uint8_t *token_prob, const int16_t qmul[2],
1358
                                 const uint8_t scan[16], int vp7)
1359
4.44M
{
1360
4.44M
    VPXRangeCoder c = *r;
1361
4.44M
    goto skip_eob;
1362
40.3M
    do {
1363
40.3M
        int coeff;
1364
40.8M
restart:
1365
40.8M
        if (!vpx_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1366
1.80M
            break;
1367
1368
45.3M
skip_eob:
1369
45.3M
        if (!vpx_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1370
2.41M
            if (++i == 16)
1371
51.5k
                break; // invalid input; blocks should end with EOB
1372
2.36M
            token_prob = probs[i][0];
1373
2.36M
            if (vp7)
1374
494k
                goto restart;
1375
1.86M
            goto skip_eob;
1376
2.36M
        }
1377
1378
42.9M
        if (!vpx_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1379
2.51M
            coeff = 1;
1380
2.51M
            token_prob = probs[i + 1][1];
1381
40.3M
        } else {
1382
40.3M
            if (!vpx_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1383
553k
                coeff = vpx_rac_get_prob_branchy(&c, token_prob[4]);
1384
553k
                if (coeff)
1385
162k
                    coeff += vpx_rac_get_prob(&c, token_prob[5]);
1386
553k
                coeff += 2;
1387
39.8M
            } else {
1388
                // DCT_CAT*
1389
39.8M
                if (!vpx_rac_get_prob_branchy(&c, token_prob[6])) {
1390
85.5k
                    if (!vpx_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1391
51.8k
                        coeff = 5 + vpx_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1392
51.8k
                    } else {                                    // DCT_CAT2
1393
33.7k
                        coeff  = 7;
1394
33.7k
                        coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1395
33.7k
                        coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1396
33.7k
                    }
1397
39.7M
                } else {    // DCT_CAT3 and up
1398
39.7M
                    int a   = vpx_rac_get_prob(&c, token_prob[8]);
1399
39.7M
                    int b   = vpx_rac_get_prob(&c, token_prob[9 + a]);
1400
39.7M
                    int cat = (a << 1) + b;
1401
39.7M
                    coeff  = 3 + (8 << cat);
1402
39.7M
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1403
39.7M
                }
1404
39.8M
            }
1405
40.3M
            token_prob = probs[i + 1][2];
1406
40.3M
        }
1407
42.9M
        block[scan[i]] = (vp89_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1408
42.9M
    } while (++i < 16);
1409
1410
4.44M
    *r = c;
1411
4.44M
    return i;
1412
0
}
1413
1414
static av_always_inline
1415
int inter_predict_dc(int16_t block[16], int16_t pred[2])
1416
706k
{
1417
706k
    int16_t dc = block[0];
1418
706k
    int ret = 0;
1419
1420
706k
    if (pred[1] > 3) {
1421
98.2k
        dc += pred[0];
1422
98.2k
        ret = 1;
1423
98.2k
    }
1424
1425
706k
    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1426
599k
        block[0] = pred[0] = dc;
1427
599k
        pred[1] = 0;
1428
599k
    } else {
1429
107k
        if (pred[0] == dc)
1430
96.4k
            pred[1]++;
1431
107k
        block[0] = pred[0] = dc;
1432
107k
    }
1433
1434
706k
    return ret;
1435
706k
}
1436
1437
static int vp7_decode_block_coeffs_internal(VPXRangeCoder *r,
1438
                                            int16_t block[16],
1439
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1440
                                            int i, const uint8_t *token_prob,
1441
                                            const int16_t qmul[2],
1442
                                            const uint8_t scan[16])
1443
1.75M
{
1444
1.75M
    return decode_block_coeffs_internal(r, block, probs, i,
1445
1.75M
                                        token_prob, qmul, scan, IS_VP7);
1446
1.75M
}
1447
1448
#ifndef vp8_decode_block_coeffs_internal
1449
static int vp8_decode_block_coeffs_internal(VPXRangeCoder *r,
1450
                                            int16_t block[16],
1451
                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1452
                                            int i, const uint8_t *token_prob,
1453
                                            const int16_t qmul[2])
1454
2.69M
{
1455
2.69M
    return decode_block_coeffs_internal(r, block, probs, i,
1456
2.69M
                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
1457
2.69M
}
1458
#endif
1459
1460
/**
1461
 * @param c          arithmetic bitstream reader context
1462
 * @param block      destination for block coefficients
1463
 * @param probs      probabilities to use when reading trees from the bitstream
1464
 * @param i          initial coeff index, 0 unless a separate DC block is coded
1465
 * @param zero_nhood the initial prediction context for number of surrounding
1466
 *                   all-zero blocks (only left/top, so 0-2)
1467
 * @param qmul       array holding the dc/ac dequant factor at position 0/1
1468
 * @param scan       scan pattern (VP7 only)
1469
 *
1470
 * @return 0 if no coeffs were decoded
1471
 *         otherwise, the index of the last coeff decoded plus one
1472
 */
1473
static av_always_inline
1474
int decode_block_coeffs(VPXRangeCoder *c, int16_t block[16],
1475
                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1476
                        int i, int zero_nhood, const int16_t qmul[2],
1477
                        const uint8_t scan[16], int vp7)
1478
57.9M
{
1479
57.9M
    const uint8_t *token_prob = probs[i][zero_nhood];
1480
57.9M
    if (!vpx_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1481
53.5M
        return 0;
1482
4.44M
    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1483
1.75M
                                                  token_prob, qmul, scan)
1484
4.44M
               : vp8_decode_block_coeffs_internal(c, block, probs, i,
1485
2.69M
                                                  token_prob, qmul);
1486
57.9M
}
1487
1488
static av_always_inline
1489
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VPXRangeCoder *c,
1490
                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1491
                      int is_vp7)
1492
2.34M
{
1493
2.34M
    int i, x, y, luma_start = 0, luma_ctx = 3;
1494
2.34M
    int nnz_pred, nnz, nnz_total = 0;
1495
2.34M
    int segment = mb->segment;
1496
2.34M
    int block_dc = 0;
1497
1498
2.34M
    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1499
1.72M
        nnz_pred = t_nnz[8] + l_nnz[8];
1500
1501
        // decode DC values and do hadamard
1502
1.72M
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1503
1.72M
                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
1504
1.72M
                                  ff_zigzag_scan, is_vp7);
1505
1.72M
        l_nnz[8] = t_nnz[8] = !!nnz;
1506
1507
1.72M
        if (is_vp7 && mb->mode > MODE_I4x4) {
1508
706k
            nnz |=  inter_predict_dc(td->block_dc,
1509
706k
                                     s->inter_dc_pred[mb->ref_frame - 1]);
1510
706k
        }
1511
1512
1.72M
        if (nnz) {
1513
281k
            nnz_total += nnz;
1514
281k
            block_dc   = 1;
1515
281k
            if (nnz == 1)
1516
136k
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1517
145k
            else
1518
145k
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1519
281k
        }
1520
1.72M
        luma_start = 1;
1521
1.72M
        luma_ctx   = 0;
1522
1.72M
    }
1523
1524
    // luma blocks
1525
11.7M
    for (y = 0; y < 4; y++)
1526
46.8M
        for (x = 0; x < 4; x++) {
1527
37.5M
            nnz_pred = l_nnz[y] + t_nnz[x];
1528
37.5M
            nnz = decode_block_coeffs(c, td->block[y][x],
1529
37.5M
                                      s->prob->token[luma_ctx],
1530
37.5M
                                      luma_start, nnz_pred,
1531
37.5M
                                      s->qmat[segment].luma_qmul,
1532
37.5M
                                      s->prob[0].scan, is_vp7);
1533
            /* nnz+block_dc may be one more than the actual last index,
1534
             * but we don't care */
1535
37.5M
            td->non_zero_count_cache[y][x] = nnz + block_dc;
1536
37.5M
            t_nnz[x] = l_nnz[y] = !!nnz;
1537
37.5M
            nnz_total += nnz;
1538
37.5M
        }
1539
1540
    // chroma blocks
1541
    // TODO: what to do about dimensions? 2nd dim for luma is x,
1542
    // but for chroma it's (y<<1)|x
1543
7.03M
    for (i = 4; i < 6; i++)
1544
14.0M
        for (y = 0; y < 2; y++)
1545
28.1M
            for (x = 0; x < 2; x++) {
1546
18.7M
                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1547
18.7M
                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1548
18.7M
                                          s->prob->token[2], 0, nnz_pred,
1549
18.7M
                                          s->qmat[segment].chroma_qmul,
1550
18.7M
                                          s->prob[0].scan, is_vp7);
1551
18.7M
                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1552
18.7M
                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1553
18.7M
                nnz_total += nnz;
1554
18.7M
            }
1555
1556
    // if there were no coded coeffs despite the macroblock not being marked skip,
1557
    // we MUST not do the inner loop filter and should not do IDCT
1558
    // Since skip isn't used for bitstream prediction, just manually set it.
1559
2.34M
    if (!nnz_total)
1560
1.80M
        mb->skip = 1;
1561
2.34M
}
1562
1563
static av_always_inline
1564
void backup_mb_border(uint8_t *top_border, const uint8_t *src_y,
1565
                      const uint8_t *src_cb, const uint8_t *src_cr,
1566
                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1567
1.98M
{
1568
1.98M
    AV_COPY128(top_border, src_y + 15 * linesize);
1569
1.98M
    if (!simple) {
1570
1.36M
        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1571
1.36M
        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1572
1.36M
    }
1573
1.98M
}
1574
1575
static av_always_inline
1576
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1577
                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1578
                    int mb_y, int mb_width, int simple, int xchg)
1579
1.52M
{
1580
1.52M
    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
1581
1.52M
    src_y  -= linesize;
1582
1.52M
    src_cb -= uvlinesize;
1583
1.52M
    src_cr -= uvlinesize;
1584
1585
1.52M
#define XCHG(a, b, xchg)                                                      \
1586
10.5M
    do {                                                                      \
1587
10.5M
        if (xchg)                                                             \
1588
10.5M
            AV_SWAP64(b, a);                                                  \
1589
10.5M
        else                                                                  \
1590
10.5M
            AV_COPY64(b, a);                                                  \
1591
10.5M
    } while (0)
1592
1593
1.52M
    XCHG(top_border_m1 + 8, src_y - 8, xchg);
1594
1.52M
    XCHG(top_border, src_y, xchg);
1595
1.52M
    XCHG(top_border + 8, src_y + 8, 1);
1596
1.52M
    if (mb_x < mb_width - 1)
1597
1.27M
        XCHG(top_border + 32, src_y + 16, 1);
1598
1599
    // only copy chroma for normal loop filter
1600
    // or to initialize the top row to 127
1601
1.52M
    if (!simple || !mb_y) {
1602
1.16M
        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1603
1.16M
        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1604
1.16M
        XCHG(top_border + 16, src_cb, 1);
1605
1.16M
        XCHG(top_border + 24, src_cr, 1);
1606
1.16M
    }
1607
1.52M
}
1608
1609
static av_always_inline
1610
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1611
1.67M
{
1612
1.67M
    if (!mb_x)
1613
296k
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1614
1.38M
    else
1615
1.38M
        return mb_y ? mode : LEFT_DC_PRED8x8;
1616
1.67M
}
1617
1618
static av_always_inline
1619
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1620
169k
{
1621
169k
    if (!mb_x)
1622
33.4k
        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1623
136k
    else
1624
136k
        return mb_y ? mode : HOR_PRED8x8;
1625
169k
}
1626
1627
static av_always_inline
1628
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1629
2.05M
{
1630
2.05M
    switch (mode) {
1631
1.67M
    case DC_PRED8x8:
1632
1.67M
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1633
115k
    case VERT_PRED8x8:
1634
115k
        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1635
88.7k
    case HOR_PRED8x8:
1636
88.7k
        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1637
169k
    case PLANE_PRED8x8: /* TM */
1638
169k
        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1639
2.05M
    }
1640
0
    return mode;
1641
2.05M
}
1642
1643
static av_always_inline
1644
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1645
298k
{
1646
298k
    if (!mb_x) {
1647
13.2k
        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1648
285k
    } else {
1649
285k
        return mb_y ? mode : HOR_VP8_PRED;
1650
285k
    }
1651
298k
}
1652
1653
static av_always_inline
1654
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1655
                                     int *copy_buf, int vp7)
1656
8.80M
{
1657
8.80M
    switch (mode) {
1658
127k
    case VERT_PRED:
1659
127k
        if (!mb_x && mb_y) {
1660
13.7k
            *copy_buf = 1;
1661
13.7k
            return mode;
1662
13.7k
        }
1663
        /* fall-through */
1664
154k
    case DIAG_DOWN_LEFT_PRED:
1665
221k
    case VERT_LEFT_PRED:
1666
221k
        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1667
81.3k
    case HOR_PRED:
1668
81.3k
        if (!mb_y) {
1669
5.61k
            *copy_buf = 1;
1670
5.61k
            return mode;
1671
5.61k
        }
1672
        /* fall-through */
1673
130k
    case HOR_UP_PRED:
1674
130k
        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1675
298k
    case TM_VP8_PRED:
1676
298k
        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1677
8.01M
    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1678
                   * as 16x16/8x8 DC */
1679
8.04M
    case DIAG_DOWN_RIGHT_PRED:
1680
8.09M
    case VERT_RIGHT_PRED:
1681
8.13M
    case HOR_DOWN_PRED:
1682
8.13M
        if (!mb_y || !mb_x)
1683
773k
            *copy_buf = 1;
1684
8.13M
        return mode;
1685
8.80M
    }
1686
0
    return mode;
1687
8.80M
}
1688
1689
static av_always_inline
1690
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
1691
                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1692
1.30M
{
1693
1.30M
    int x, y, mode, nnz;
1694
1.30M
    uint32_t tr;
1695
1696
    /* for the first row, we need to run xchg_mb_border to init the top edge
1697
     * to 127 otherwise, skip it if we aren't going to deblock */
1698
1.30M
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1699
762k
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1700
762k
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1701
762k
                       s->filter.simple, 1);
1702
1703
1.30M
    if (mb->mode < MODE_I4x4) {
1704
750k
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1705
750k
        s->hpc.pred16x16[mode](dst[0], s->linesize);
1706
750k
    } else {
1707
550k
        uint8_t *ptr = dst[0];
1708
550k
        const uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1709
550k
        const uint8_t lo = is_vp7 ? 128 : 127;
1710
550k
        const uint8_t hi = is_vp7 ? 128 : 129;
1711
550k
        const uint8_t tr_top[4] = { lo, lo, lo, lo };
1712
1713
        // all blocks on the right edge of the macroblock use bottom edge
1714
        // the top macroblock for their topright edge
1715
550k
        const uint8_t *tr_right = ptr - s->linesize + 16;
1716
1717
        // if we're on the right edge of the frame, said edge is extended
1718
        // from the top macroblock
1719
550k
        if (mb_y && mb_x == s->mb_width - 1) {
1720
45.8k
            tr       = tr_right[-1] * 0x01010101u;
1721
45.8k
            tr_right = (uint8_t *) &tr;
1722
45.8k
        }
1723
1724
550k
        if (mb->skip)
1725
411k
            AV_ZERO128(td->non_zero_count_cache);
1726
1727
2.75M
        for (y = 0; y < 4; y++) {
1728
2.20M
            const uint8_t *topright = ptr + 4 - s->linesize;
1729
11.0M
            for (x = 0; x < 4; x++) {
1730
8.80M
                int copy = 0;
1731
8.80M
                ptrdiff_t linesize = s->linesize;
1732
8.80M
                uint8_t *dst = ptr + 4 * x;
1733
8.80M
                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1734
1735
8.80M
                if ((y == 0 || x == 3) && mb_y == 0) {
1736
987k
                    topright = tr_top;
1737
7.81M
                } else if (x == 3)
1738
1.63M
                    topright = tr_right;
1739
1740
8.80M
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1741
8.80M
                                                        mb_y + y, &copy, is_vp7);
1742
8.80M
                if (copy) {
1743
793k
                    dst      = copy_dst + 12;
1744
793k
                    linesize = 8;
1745
793k
                    if (!(mb_y + y)) {
1746
535k
                        copy_dst[3] = lo;
1747
535k
                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1748
535k
                    } else {
1749
257k
                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1750
257k
                        if (!(mb_x + x)) {
1751
257k
                            copy_dst[3] = hi;
1752
257k
                        } else {
1753
0
                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
1754
0
                        }
1755
257k
                    }
1756
793k
                    if (!(mb_x + x)) {
1757
282k
                        copy_dst[11] =
1758
282k
                        copy_dst[19] =
1759
282k
                        copy_dst[27] =
1760
282k
                        copy_dst[35] = hi;
1761
510k
                    } else {
1762
510k
                        copy_dst[11] = ptr[4 * x                   - 1];
1763
510k
                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
1764
510k
                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1765
510k
                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1766
510k
                    }
1767
793k
                }
1768
8.80M
                s->hpc.pred4x4[mode](dst, topright, linesize);
1769
8.80M
                if (copy) {
1770
793k
                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
1771
793k
                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
1772
793k
                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1773
793k
                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1774
793k
                }
1775
1776
8.80M
                nnz = td->non_zero_count_cache[y][x];
1777
8.80M
                if (nnz) {
1778
929k
                    if (nnz == 1)
1779
358k
                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1780
358k
                                                  td->block[y][x], s->linesize);
1781
570k
                    else
1782
570k
                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1783
570k
                                               td->block[y][x], s->linesize);
1784
929k
                }
1785
8.80M
                topright += 4;
1786
8.80M
            }
1787
1788
2.20M
            ptr      += 4 * s->linesize;
1789
2.20M
            intra4x4 += 4;
1790
2.20M
        }
1791
550k
    }
1792
1793
1.30M
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1794
1.30M
                                            mb_x, mb_y, is_vp7);
1795
1.30M
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1796
1.30M
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1797
1798
1.30M
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1799
762k
        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1800
762k
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1801
762k
                       s->filter.simple, 0);
1802
1.30M
}
1803
1804
static const uint8_t subpel_idx[3][8] = {
1805
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1806
                                // also function pointer index
1807
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1808
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1809
};
1810
1811
/**
1812
 * luma MC function
1813
 *
1814
 * @param s        VP8 decoding context
1815
 * @param dst      target buffer for block data at block position
1816
 * @param ref      reference picture buffer at origin (0, 0)
1817
 * @param mv       motion vector (relative to block position) to get pixel data from
1818
 * @param x_off    horizontal position of block from origin (0, 0)
1819
 * @param y_off    vertical position of block from origin (0, 0)
1820
 * @param block_w  width of block (16, 8 or 4)
1821
 * @param block_h  height of block (always same as block_w)
1822
 * @param width    width of src/dst plane data
1823
 * @param height   height of src/dst plane data
1824
 * @param linesize size of a single line of plane data, including padding
1825
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1826
 */
1827
static av_always_inline
1828
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1829
                 const ProgressFrame *ref, const VP8mv *mv,
1830
                 int x_off, int y_off, int block_w, int block_h,
1831
                 int width, int height, ptrdiff_t linesize,
1832
                 vp8_mc_func mc_func[3][3])
1833
2.66M
{
1834
2.66M
    const uint8_t *src = ref->f->data[0];
1835
1836
2.66M
    if (AV_RN32A(mv)) {
1837
1.38M
        ptrdiff_t src_linesize = linesize;
1838
1839
1.38M
        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1840
1.38M
        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1841
1842
1.38M
        x_off += mv->x >> 2;
1843
1.38M
        y_off += mv->y >> 2;
1844
1845
        // edge emulation
1846
1.38M
        ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4);
1847
1.38M
        src += y_off * linesize + x_off;
1848
1.38M
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1849
1.38M
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1850
489k
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1851
489k
                                     src - my_idx * linesize - mx_idx,
1852
489k
                                     EDGE_EMU_LINESIZE, linesize,
1853
489k
                                     block_w + subpel_idx[1][mx],
1854
489k
                                     block_h + subpel_idx[1][my],
1855
489k
                                     x_off - mx_idx, y_off - my_idx,
1856
489k
                                     width, height);
1857
489k
            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1858
489k
            src_linesize = EDGE_EMU_LINESIZE;
1859
489k
        }
1860
1.38M
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1861
1.38M
    } else {
1862
1.27M
        ff_progress_frame_await(ref, (3 + y_off + block_h) >> 4);
1863
1.27M
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1864
1.27M
                      linesize, block_h, 0, 0);
1865
1.27M
    }
1866
2.66M
}
1867
1868
/**
1869
 * chroma MC function
1870
 *
1871
 * @param s        VP8 decoding context
1872
 * @param dst1     target buffer for block data at block position (U plane)
1873
 * @param dst2     target buffer for block data at block position (V plane)
1874
 * @param ref      reference picture buffer at origin (0, 0)
1875
 * @param mv       motion vector (relative to block position) to get pixel data from
1876
 * @param x_off    horizontal position of block from origin (0, 0)
1877
 * @param y_off    vertical position of block from origin (0, 0)
1878
 * @param block_w  width of block (16, 8 or 4)
1879
 * @param block_h  height of block (always same as block_w)
1880
 * @param width    width of src/dst plane data
1881
 * @param height   height of src/dst plane data
1882
 * @param linesize size of a single line of plane data, including padding
1883
 * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1884
 */
1885
static av_always_inline
1886
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1887
                   uint8_t *dst2, const ProgressFrame *ref, const VP8mv *mv,
1888
                   int x_off, int y_off, int block_w, int block_h,
1889
                   int width, int height, ptrdiff_t linesize,
1890
                   vp8_mc_func mc_func[3][3])
1891
1.94M
{
1892
1.94M
    const uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1893
1894
1.94M
    if (AV_RN32A(mv)) {
1895
955k
        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1896
955k
        int my = mv->y & 7, my_idx = subpel_idx[0][my];
1897
1898
955k
        x_off += mv->x >> 3;
1899
955k
        y_off += mv->y >> 3;
1900
1901
        // edge emulation
1902
955k
        src1 += y_off * linesize + x_off;
1903
955k
        src2 += y_off * linesize + x_off;
1904
955k
        ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3);
1905
955k
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1906
955k
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1907
391k
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1908
391k
                                     src1 - my_idx * linesize - mx_idx,
1909
391k
                                     EDGE_EMU_LINESIZE, linesize,
1910
391k
                                     block_w + subpel_idx[1][mx],
1911
391k
                                     block_h + subpel_idx[1][my],
1912
391k
                                     x_off - mx_idx, y_off - my_idx, width, height);
1913
391k
            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1914
391k
            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1915
1916
391k
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1917
391k
                                     src2 - my_idx * linesize - mx_idx,
1918
391k
                                     EDGE_EMU_LINESIZE, linesize,
1919
391k
                                     block_w + subpel_idx[1][mx],
1920
391k
                                     block_h + subpel_idx[1][my],
1921
391k
                                     x_off - mx_idx, y_off - my_idx, width, height);
1922
391k
            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1923
391k
            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1924
563k
        } else {
1925
563k
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1926
563k
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1927
563k
        }
1928
992k
    } else {
1929
992k
        ff_progress_frame_await(ref, (3 + y_off + block_h) >> 3);
1930
992k
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1931
992k
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1932
992k
    }
1933
1.94M
}
1934
1935
static av_always_inline
1936
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
1937
                 const ProgressFrame *ref_frame, int x_off, int y_off,
1938
                 int bx_off, int by_off, int block_w, int block_h,
1939
                 int width, int height, const VP8mv *mv)
1940
1.70M
{
1941
1.70M
    VP8mv uvmv = *mv;
1942
1943
    /* Y */
1944
1.70M
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1945
1.70M
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1946
1.70M
                block_w, block_h, width, height, s->linesize,
1947
1.70M
                s->put_pixels_tab[block_w == 8]);
1948
1949
    /* U/V */
1950
1.70M
    if (s->profile == 3) {
1951
        /* this block only applies VP8; it is safe to check
1952
         * only the profile, as VP7 profile <= 1 */
1953
100k
        uvmv.x &= ~7;
1954
100k
        uvmv.y &= ~7;
1955
100k
    }
1956
1.70M
    x_off   >>= 1;
1957
1.70M
    y_off   >>= 1;
1958
1.70M
    bx_off  >>= 1;
1959
1.70M
    by_off  >>= 1;
1960
1.70M
    width   >>= 1;
1961
1.70M
    height  >>= 1;
1962
1.70M
    block_w >>= 1;
1963
1.70M
    block_h >>= 1;
1964
1.70M
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1965
1.70M
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1966
1.70M
                  &uvmv, x_off + bx_off, y_off + by_off,
1967
1.70M
                  block_w, block_h, width, height, s->uvlinesize,
1968
1.70M
                  s->put_pixels_tab[1 + (block_w == 4)]);
1969
1.70M
}
1970
1971
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1972
 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1973
static av_always_inline
1974
void prefetch_motion(const VP8Context *s, const VP8Macroblock *mb,
1975
                     int mb_x, int mb_y, int mb_xy, int ref)
1976
8.46M
{
1977
    /* Don't prefetch refs that haven't been used very often this frame. */
1978
8.46M
    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1979
3.24M
        int x_off = mb_x << 4, y_off = mb_y << 4;
1980
3.24M
        int mx = (mb->mv.x >> 2) + x_off + 8;
1981
3.24M
        int my = (mb->mv.y >> 2) + y_off;
1982
3.24M
        uint8_t **src = s->framep[ref]->tf.f->data;
1983
3.24M
        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1984
        /* For threading, a ff_thread_await_progress here might be useful, but
1985
         * it actually slows down the decoder. Since a bad prefetch doesn't
1986
         * generate bad decoder output, we don't run it here. */
1987
3.24M
        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1988
3.24M
        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1989
3.24M
        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1990
3.24M
    }
1991
8.46M
}
1992
1993
/**
1994
 * Apply motion vectors to prediction buffer, chapter 18.
1995
 */
1996
static av_always_inline
1997
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
1998
                   VP8Macroblock *mb, int mb_x, int mb_y)
1999
1.51M
{
2000
1.51M
    int x_off = mb_x << 4, y_off = mb_y << 4;
2001
1.51M
    int width = 16 * s->mb_width, height = 16 * s->mb_height;
2002
1.51M
    const ProgressFrame *ref = &s->framep[mb->ref_frame]->tf;
2003
1.51M
    const VP8mv *bmv = mb->bmv;
2004
2005
1.51M
    switch (mb->partitioning) {
2006
1.29M
    case VP8_SPLITMVMODE_NONE:
2007
1.29M
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2008
1.29M
                    0, 0, 16, 16, width, height, &mb->mv);
2009
1.29M
        break;
2010
59.9k
    case VP8_SPLITMVMODE_4x4: {
2011
59.9k
        int x, y;
2012
59.9k
        VP8mv uvmv;
2013
2014
        /* Y */
2015
299k
        for (y = 0; y < 4; y++) {
2016
1.19M
            for (x = 0; x < 4; x++) {
2017
958k
                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
2018
958k
                            ref, &bmv[4 * y + x],
2019
958k
                            4 * x + x_off, 4 * y + y_off, 4, 4,
2020
958k
                            width, height, s->linesize,
2021
958k
                            s->put_pixels_tab[2]);
2022
958k
            }
2023
239k
        }
2024
2025
        /* U/V */
2026
59.9k
        x_off  >>= 1;
2027
59.9k
        y_off  >>= 1;
2028
59.9k
        width  >>= 1;
2029
59.9k
        height >>= 1;
2030
179k
        for (y = 0; y < 2; y++) {
2031
359k
            for (x = 0; x < 2; x++) {
2032
239k
                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
2033
239k
                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
2034
239k
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
2035
239k
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2036
239k
                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
2037
239k
                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
2038
239k
                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
2039
239k
                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2040
239k
                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2041
239k
                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2042
239k
                if (s->profile == 3) {
2043
40.2k
                    uvmv.x &= ~7;
2044
40.2k
                    uvmv.y &= ~7;
2045
40.2k
                }
2046
239k
                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2047
239k
                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2048
239k
                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2049
239k
                              width, height, s->uvlinesize,
2050
239k
                              s->put_pixels_tab[2]);
2051
239k
            }
2052
119k
        }
2053
59.9k
        break;
2054
0
    }
2055
33.3k
    case VP8_SPLITMVMODE_16x8:
2056
33.3k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2057
33.3k
                    0, 0, 16, 8, width, height, &bmv[0]);
2058
33.3k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2059
33.3k
                    0, 8, 16, 8, width, height, &bmv[1]);
2060
33.3k
        break;
2061
88.2k
    case VP8_SPLITMVMODE_8x16:
2062
88.2k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2063
88.2k
                    0, 0, 8, 16, width, height, &bmv[0]);
2064
88.2k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2065
88.2k
                    8, 0, 8, 16, width, height, &bmv[1]);
2066
88.2k
        break;
2067
42.1k
    case VP8_SPLITMVMODE_8x8:
2068
42.1k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2069
42.1k
                    0, 0, 8, 8, width, height, &bmv[0]);
2070
42.1k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2071
42.1k
                    8, 0, 8, 8, width, height, &bmv[1]);
2072
42.1k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2073
42.1k
                    0, 8, 8, 8, width, height, &bmv[2]);
2074
42.1k
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
2075
42.1k
                    8, 8, 8, 8, width, height, &bmv[3]);
2076
42.1k
        break;
2077
1.51M
    }
2078
1.51M
}
2079
2080
static av_always_inline
2081
void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
2082
             const VP8Macroblock *mb)
2083
539k
{
2084
539k
    int x, y, ch;
2085
2086
539k
    if (mb->mode != MODE_I4x4) {
2087
400k
        uint8_t *y_dst = dst[0];
2088
2.00M
        for (y = 0; y < 4; y++) {
2089
1.60M
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2090
1.60M
            if (nnz4) {
2091
1.29M
                if (nnz4 & ~0x01010101) {
2092
2.26M
                    for (x = 0; x < 4; x++) {
2093
2.26M
                        if ((uint8_t) nnz4 == 1)
2094
178k
                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2095
178k
                                                      td->block[y][x],
2096
178k
                                                      s->linesize);
2097
2.09M
                        else if ((uint8_t) nnz4 > 1)
2098
1.90M
                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2099
1.90M
                                                   td->block[y][x],
2100
1.90M
                                                   s->linesize);
2101
2.26M
                        nnz4 >>= 8;
2102
2.26M
                        if (!nnz4)
2103
603k
                            break;
2104
2.26M
                    }
2105
689k
                } else {
2106
689k
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2107
689k
                }
2108
1.29M
            }
2109
1.60M
            y_dst += 4 * s->linesize;
2110
1.60M
        }
2111
400k
    }
2112
2113
1.61M
    for (ch = 0; ch < 2; ch++) {
2114
1.07M
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2115
1.07M
        if (nnz4) {
2116
467k
            uint8_t *ch_dst = dst[1 + ch];
2117
467k
            if (nnz4 & ~0x01010101) {
2118
610k
                for (y = 0; y < 2; y++) {
2119
1.48M
                    for (x = 0; x < 2; x++) {
2120
1.19M
                        if ((uint8_t) nnz4 == 1)
2121
116k
                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2122
116k
                                                      td->block[4 + ch][(y << 1) + x],
2123
116k
                                                      s->uvlinesize);
2124
1.07M
                        else if ((uint8_t) nnz4 > 1)
2125
970k
                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2126
970k
                                                   td->block[4 + ch][(y << 1) + x],
2127
970k
                                                   s->uvlinesize);
2128
1.19M
                        nnz4 >>= 8;
2129
1.19M
                        if (!nnz4)
2130
314k
                            goto chroma_idct_end;
2131
1.19M
                    }
2132
295k
                    ch_dst += 4 * s->uvlinesize;
2133
295k
                }
2134
314k
            } else {
2135
152k
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2136
152k
            }
2137
467k
        }
2138
1.07M
chroma_idct_end:
2139
1.07M
        ;
2140
1.07M
    }
2141
539k
}
2142
2143
static av_always_inline
2144
void filter_level_for_mb(const VP8Context *s, const VP8Macroblock *mb,
2145
                         VP8FilterStrength *f, int is_vp7)
2146
2.26M
{
2147
2.26M
    int interior_limit, filter_level;
2148
2149
2.26M
    if (s->segmentation.enabled) {
2150
137k
        filter_level = s->segmentation.filter_level[mb->segment];
2151
137k
        if (!s->segmentation.absolute_vals)
2152
77.8k
            filter_level += s->filter.level;
2153
137k
    } else
2154
2.12M
        filter_level = s->filter.level;
2155
2156
2.26M
    if (s->lf_delta.enabled) {
2157
472k
        filter_level += s->lf_delta.ref[mb->ref_frame];
2158
472k
        filter_level += s->lf_delta.mode[mb->mode];
2159
472k
    }
2160
2161
2.26M
    filter_level = av_clip_uintp2(filter_level, 6);
2162
2163
2.26M
    interior_limit = filter_level;
2164
2.26M
    if (s->filter.sharpness) {
2165
1.31M
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
2166
1.31M
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2167
1.31M
    }
2168
2.26M
    interior_limit = FFMAX(interior_limit, 1);
2169
2170
2.26M
    f->filter_level = filter_level;
2171
2.26M
    f->inner_limit = interior_limit;
2172
2.26M
    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2173
2.26M
                      mb->mode == VP8_MVMODE_SPLIT;
2174
2.26M
}
2175
2176
static av_always_inline
2177
void filter_mb(const VP8Context *s, uint8_t *const dst[3], const VP8FilterStrength *f,
2178
               int mb_x, int mb_y, int is_vp7)
2179
1.36M
{
2180
1.36M
    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2181
1.36M
    int filter_level = f->filter_level;
2182
1.36M
    int inner_limit = f->inner_limit;
2183
1.36M
    int inner_filter = f->inner_filter;
2184
1.36M
    ptrdiff_t linesize   = s->linesize;
2185
1.36M
    ptrdiff_t uvlinesize = s->uvlinesize;
2186
1.36M
    static const uint8_t hev_thresh_lut[2][64] = {
2187
1.36M
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2188
1.36M
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2189
1.36M
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2190
1.36M
          3, 3, 3, 3 },
2191
1.36M
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2192
1.36M
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2193
1.36M
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2194
1.36M
          2, 2, 2, 2 }
2195
1.36M
    };
2196
2197
1.36M
    if (!filter_level)
2198
268k
        return;
2199
2200
1.09M
    if (is_vp7) {
2201
371k
        bedge_lim_y  = filter_level;
2202
371k
        bedge_lim_uv = filter_level * 2;
2203
371k
        mbedge_lim   = filter_level + 2;
2204
727k
    } else {
2205
727k
        bedge_lim_y  =
2206
727k
        bedge_lim_uv = filter_level * 2 + inner_limit;
2207
727k
        mbedge_lim   = bedge_lim_y + 4;
2208
727k
    }
2209
2210
1.09M
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2211
2212
1.09M
    if (mb_x) {
2213
985k
        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2214
985k
                                       mbedge_lim, inner_limit, hev_thresh);
2215
985k
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2216
985k
                                       mbedge_lim, inner_limit, hev_thresh);
2217
985k
    }
2218
2219
1.09M
#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
2220
2.19M
    if (cond && inner_filter) {                                               \
2221
592k
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
2222
592k
                                             bedge_lim_y, inner_limit,        \
2223
592k
                                             hev_thresh);                     \
2224
592k
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
2225
592k
                                             bedge_lim_y, inner_limit,        \
2226
592k
                                             hev_thresh);                     \
2227
592k
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
2228
592k
                                             bedge_lim_y, inner_limit,        \
2229
592k
                                             hev_thresh);                     \
2230
592k
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
2231
592k
                                             uvlinesize,  bedge_lim_uv,       \
2232
592k
                                             inner_limit, hev_thresh);        \
2233
592k
    }
2234
2235
1.09M
    H_LOOP_FILTER_16Y_INNER(!is_vp7)
2236
2237
1.09M
    if (mb_y) {
2238
965k
        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2239
965k
                                       mbedge_lim, inner_limit, hev_thresh);
2240
965k
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2241
965k
                                       mbedge_lim, inner_limit, hev_thresh);
2242
965k
    }
2243
2244
1.09M
    if (inner_filter) {
2245
592k
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
2246
592k
                                             linesize, bedge_lim_y,
2247
592k
                                             inner_limit, hev_thresh);
2248
592k
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
2249
592k
                                             linesize, bedge_lim_y,
2250
592k
                                             inner_limit, hev_thresh);
2251
592k
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2252
592k
                                             linesize, bedge_lim_y,
2253
592k
                                             inner_limit, hev_thresh);
2254
592k
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
2255
592k
                                             dst[2] +  4 * uvlinesize,
2256
592k
                                             uvlinesize, bedge_lim_uv,
2257
592k
                                             inner_limit, hev_thresh);
2258
592k
    }
2259
2260
1.09M
    H_LOOP_FILTER_16Y_INNER(is_vp7)
2261
1.09M
}
2262
2263
static av_always_inline
2264
void filter_mb_simple(const VP8Context *s, uint8_t *dst, const VP8FilterStrength *f,
2265
                      int mb_x, int mb_y)
2266
621k
{
2267
621k
    int mbedge_lim, bedge_lim;
2268
621k
    int filter_level = f->filter_level;
2269
621k
    int inner_limit  = f->inner_limit;
2270
621k
    int inner_filter = f->inner_filter;
2271
621k
    ptrdiff_t linesize = s->linesize;
2272
2273
621k
    if (!filter_level)
2274
4.00k
        return;
2275
2276
617k
    bedge_lim  = 2 * filter_level + inner_limit;
2277
617k
    mbedge_lim = bedge_lim + 4;
2278
2279
617k
    if (mb_x)
2280
534k
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2281
617k
    if (inner_filter) {
2282
515k
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
2283
515k
        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
2284
515k
        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2285
515k
    }
2286
2287
617k
    if (mb_y)
2288
545k
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2289
617k
    if (inner_filter) {
2290
515k
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
2291
515k
        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
2292
515k
        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2293
515k
    }
2294
617k
}
2295
2296
2.22M
#define MARGIN (16 << 2)
2297
static av_always_inline
2298
int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2299
                            const VP8Frame *prev_frame, int is_vp7)
2300
83.5k
{
2301
83.5k
    VP8Context *s = avctx->priv_data;
2302
83.5k
    int mb_x, mb_y;
2303
2304
83.5k
    s->mv_bounds.mv_min.y = -MARGIN;
2305
83.5k
    s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2306
460k
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2307
382k
        VP8Macroblock *mb = s->macroblocks_base +
2308
382k
                            ((s->mb_width + 1) * (mb_y + 1) + 1);
2309
382k
        int mb_xy = mb_y * s->mb_width;
2310
2311
382k
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2312
2313
382k
        s->mv_bounds.mv_min.x = -MARGIN;
2314
382k
        s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2315
2316
3.98M
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2317
3.61M
            if (vpx_rac_is_end(&s->c)) {
2318
4.74k
                return AVERROR_INVALIDDATA;
2319
4.74k
            }
2320
3.60M
            if (mb_y == 0)
2321
3.60M
                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2322
3.60M
                         DC_PRED * 0x01010101);
2323
3.60M
            decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy,
2324
3.60M
                           prev_frame && prev_frame->seg_map ?
2325
1.74M
                           prev_frame->seg_map + mb_xy : NULL, 1, is_vp7);
2326
3.60M
            s->mv_bounds.mv_min.x -= 64;
2327
3.60M
            s->mv_bounds.mv_max.x -= 64;
2328
3.60M
        }
2329
377k
        s->mv_bounds.mv_min.y -= 64;
2330
377k
        s->mv_bounds.mv_max.y -= 64;
2331
377k
    }
2332
78.7k
    return 0;
2333
83.5k
}
2334
2335
static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2336
                                  const VP8Frame *prev_frame)
2337
83.5k
{
2338
83.5k
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2339
83.5k
}
2340
2341
static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2342
                                  const VP8Frame *prev_frame)
2343
0
{
2344
0
    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2345
0
}
2346
2347
#if HAVE_THREADS
2348
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
2349
0
    do {                                                                      \
2350
0
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
2351
0
        if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
2352
0
            pthread_mutex_lock(&otd->lock);                                   \
2353
0
            atomic_store(&td->wait_mb_pos, tmp);                              \
2354
0
            do {                                                              \
2355
0
                if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
2356
0
                    break;                                                    \
2357
0
                pthread_cond_wait(&otd->cond, &otd->lock);                    \
2358
0
            } while (1);                                                      \
2359
0
            atomic_store(&td->wait_mb_pos, INT_MAX);                          \
2360
0
            pthread_mutex_unlock(&otd->lock);                                 \
2361
0
        }                                                                     \
2362
0
    } while (0)
2363
2364
#define update_pos(td, mb_y, mb_x)                                            \
2365
5.23M
    do {                                                                      \
2366
5.23M
        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
2367
5.23M
        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2368
5.23M
                               (num_jobs > 1);                                \
2369
5.23M
        int is_null          = !next_td || !prev_td;                          \
2370
5.23M
        int pos_check        = (is_null) ? 1 :                                \
2371
5.23M
            (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
2372
4.80M
            (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
2373
5.23M
        atomic_store(&td->thread_mb_pos, pos);                                \
2374
5.23M
        if (sliced_threading && pos_check) {                                  \
2375
0
            pthread_mutex_lock(&td->lock);                                    \
2376
0
            pthread_cond_broadcast(&td->cond);                                \
2377
0
            pthread_mutex_unlock(&td->lock);                                  \
2378
0
        }                                                                     \
2379
5.23M
    } while (0)
2380
#else
2381
#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2382
#define update_pos(td, mb_y, mb_x) while(0)
2383
#endif
2384
2385
static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2386
                                        int jobnr, int threadnr, int is_vp7)
2387
426k
{
2388
426k
    VP8Context *s = avctx->priv_data;
2389
426k
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2390
426k
    int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2391
426k
    int mb_x, mb_xy = mb_y * s->mb_width;
2392
426k
    int num_jobs = s->num_jobs;
2393
426k
    const VP8Frame *prev_frame = s->prev_frame;
2394
426k
    VP8Frame *curframe = s->curframe;
2395
426k
    VPXRangeCoder *coeff_c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2396
2397
426k
    VP8Macroblock *mb;
2398
426k
    uint8_t *dst[3] = {
2399
426k
        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2400
426k
        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
2401
426k
        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
2402
426k
    };
2403
2404
426k
    if (vpx_rac_is_end(&s->c))
2405
24.5k
         return AVERROR_INVALIDDATA;
2406
2407
402k
    if (mb_y == 0)
2408
105k
        prev_td = td;
2409
296k
    else
2410
296k
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2411
402k
    if (mb_y == s->mb_height - 1)
2412
35.2k
        next_td = td;
2413
366k
    else
2414
366k
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2415
402k
    if (s->mb_layout == 1)
2416
135k
        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2417
267k
    else {
2418
        // Make sure the previous frame has read its segmentation map,
2419
        // if we reuse the same map.
2420
267k
        if (prev_frame && s->segmentation.enabled &&
2421
267k
            !s->segmentation.update_map)
2422
9.86k
            ff_progress_frame_await(&prev_frame->tf, mb_y);
2423
267k
        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2424
267k
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2425
267k
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2426
267k
    }
2427
2428
402k
    if (!is_vp7 || mb_y == 0)
2429
330k
        memset(td->left_nnz, 0, sizeof(td->left_nnz));
2430
2431
402k
    td->mv_bounds.mv_min.x = -MARGIN;
2432
402k
    td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2433
2434
3.22M
    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2435
2.89M
        if (vpx_rac_is_end(&s->c))
2436
68.4k
            return AVERROR_INVALIDDATA;
2437
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2438
2.82M
        if (prev_td != td) {
2439
0
            if (threadnr != 0) {
2440
0
                check_thread_pos(td, prev_td,
2441
0
                                 mb_x + (is_vp7 ? 2 : 1),
2442
0
                                 mb_y - (is_vp7 ? 2 : 1));
2443
0
            } else {
2444
0
                check_thread_pos(td, prev_td,
2445
0
                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2446
0
                                 mb_y - (is_vp7 ? 2 : 1));
2447
0
            }
2448
0
        }
2449
2450
0
        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2451
2.82M
                         s->linesize, 4);
2452
2.82M
        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2453
2.82M
                         dst[2] - dst[1], 2);
2454
2455
2.82M
        if (!s->mb_layout)
2456
1.66M
            decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy,
2457
1.66M
                           prev_frame && prev_frame->seg_map ?
2458
1.42M
                           prev_frame->seg_map + mb_xy : NULL, 0, is_vp7);
2459
2460
2.82M
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_PREVIOUS);
2461
2462
2.82M
        if (!mb->skip) {
2463
2.34M
            if (vpx_rac_is_end(coeff_c))
2464
5.36k
                return AVERROR_INVALIDDATA;
2465
2.34M
            decode_mb_coeffs(s, td, coeff_c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2466
2.34M
        }
2467
2468
2.82M
        if (mb->mode <= MODE_I4x4)
2469
1.30M
            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2470
1.51M
        else
2471
1.51M
            inter_predict(s, td, dst, mb, mb_x, mb_y);
2472
2473
2.82M
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_GOLDEN);
2474
2475
2.82M
        if (!mb->skip) {
2476
539k
            idct_mb(s, td, dst, mb);
2477
2.28M
        } else {
2478
2.28M
            AV_ZERO64(td->left_nnz);
2479
2.28M
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2480
2481
            /* Reset DC block predictors if they would exist
2482
             * if the mb had coefficients */
2483
2.28M
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2484
1.71M
                td->left_nnz[8]     = 0;
2485
1.71M
                s->top_nnz[mb_x][8] = 0;
2486
1.71M
            }
2487
2.28M
        }
2488
2489
2.82M
        if (s->deblock_filter)
2490
2.26M
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2491
2492
2.82M
        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2493
0
            if (s->filter.simple)
2494
0
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2495
0
                                 NULL, NULL, s->linesize, 0, 1);
2496
0
            else
2497
0
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2498
0
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2499
0
        }
2500
2501
2.82M
        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_ALTREF);
2502
2503
2.82M
        dst[0]      += 16;
2504
2.82M
        dst[1]      += 8;
2505
2.82M
        dst[2]      += 8;
2506
2.82M
        td->mv_bounds.mv_min.x -= 64;
2507
2.82M
        td->mv_bounds.mv_max.x -= 64;
2508
2509
2.82M
        if (mb_x == s->mb_width + 1) {
2510
0
            update_pos(td, mb_y, s->mb_width + 3);
2511
2.82M
        } else {
2512
2.82M
            update_pos(td, mb_y, mb_x);
2513
2.82M
        }
2514
2.82M
    }
2515
328k
    return 0;
2516
402k
}
2517
2518
static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2519
                              int jobnr, int threadnr, int is_vp7)
2520
221k
{
2521
221k
    VP8Context *s = avctx->priv_data;
2522
221k
    VP8ThreadData *td = &s->thread_data[threadnr];
2523
221k
    int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2524
221k
    AVFrame *curframe = s->curframe->tf.f;
2525
221k
    VP8ThreadData *prev_td, *next_td;
2526
221k
    uint8_t *dst[3] = {
2527
221k
        curframe->data[0] + 16 * mb_y * s->linesize,
2528
221k
        curframe->data[1] +  8 * mb_y * s->uvlinesize,
2529
221k
        curframe->data[2] +  8 * mb_y * s->uvlinesize
2530
221k
    };
2531
2532
221k
    if (mb_y == 0)
2533
28.3k
        prev_td = td;
2534
193k
    else
2535
193k
        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2536
221k
    if (mb_y == s->mb_height - 1)
2537
14.4k
        next_td = td;
2538
207k
    else
2539
207k
        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2540
2541
2.21M
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
2542
1.98M
        const VP8FilterStrength *f = &td->filter_strength[mb_x];
2543
1.98M
        if (prev_td != td)
2544
0
            check_thread_pos(td, prev_td,
2545
1.98M
                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2546
1.98M
        if (next_td != td)
2547
0
            if (next_td != &s->thread_data[0])
2548
0
                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2549
2550
1.98M
        if (num_jobs == 1) {
2551
1.98M
            if (s->filter.simple)
2552
621k
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2553
621k
                                 NULL, NULL, s->linesize, 0, 1);
2554
1.36M
            else
2555
1.36M
                backup_mb_border(s->top_border[mb_x + 1], dst[0],
2556
1.36M
                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2557
1.98M
        }
2558
2559
1.98M
        if (s->filter.simple)
2560
621k
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2561
1.36M
        else
2562
1.36M
            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2563
1.98M
        dst[0] += 16;
2564
1.98M
        dst[1] += 8;
2565
1.98M
        dst[2] += 8;
2566
2567
1.98M
        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2568
1.98M
    }
2569
221k
}
2570
2571
static av_always_inline
2572
int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2573
                              int threadnr, int is_vp7)
2574
121k
{
2575
121k
    const VP8Context *s = avctx->priv_data;
2576
121k
    VP8ThreadData *td = &s->thread_data[jobnr];
2577
121k
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
2578
121k
    VP8Frame *curframe = s->curframe;
2579
121k
    int mb_y, num_jobs = s->num_jobs;
2580
121k
    int ret;
2581
2582
121k
    td->thread_nr = threadnr;
2583
121k
    td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
2584
121k
    td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2585
449k
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2586
426k
        atomic_store(&td->thread_mb_pos, mb_y << 16);
2587
426k
        ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2588
426k
        if (ret < 0) {
2589
98.3k
            update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2590
98.3k
            return ret;
2591
98.3k
        }
2592
328k
        if (s->deblock_filter)
2593
221k
            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2594
328k
        update_pos(td, mb_y, INT_MAX & 0xFFFF);
2595
2596
328k
        td->mv_bounds.mv_min.y -= 64 * num_jobs;
2597
328k
        td->mv_bounds.mv_max.y -= 64 * num_jobs;
2598
2599
328k
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2600
0
            ff_progress_frame_report(&curframe->tf, mb_y);
2601
328k
    }
2602
2603
22.9k
    return 0;
2604
121k
}
2605
2606
static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2607
                                    int jobnr, int threadnr)
2608
78.7k
{
2609
78.7k
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2610
78.7k
}
2611
2612
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2613
                                    int jobnr, int threadnr)
2614
42.5k
{
2615
42.5k
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2616
42.5k
}
2617
2618
static av_always_inline
2619
int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
2620
                      const AVPacket *avpkt, int is_vp7)
2621
242k
{
2622
242k
    VP8Context *s = avctx->priv_data;
2623
242k
    int ret, i, referenced, num_jobs;
2624
242k
    enum AVDiscard skip_thresh;
2625
242k
    VP8Frame *av_uninit(curframe), *prev_frame;
2626
2627
242k
    if (is_vp7)
2628
114k
        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2629
128k
    else
2630
128k
        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2631
2632
242k
    if (ret < 0)
2633
59.3k
        goto err;
2634
2635
183k
    if (!is_vp7 && s->actually_webp) {
2636
        // VP8 in WebP is supposed to be intra-only. Enforce this here
2637
        // to ensure that output is reproducible with frame-threading.
2638
23.5k
        if (!s->keyframe)
2639
5.50k
            return AVERROR_INVALIDDATA;
2640
        // avctx->pix_fmt already set in caller.
2641
159k
    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2642
0
        s->pix_fmt = get_pixel_format(s);
2643
0
        if (s->pix_fmt < 0) {
2644
0
            ret = AVERROR(EINVAL);
2645
0
            goto err;
2646
0
        }
2647
0
        avctx->pix_fmt = s->pix_fmt;
2648
0
    }
2649
2650
177k
    prev_frame = s->framep[VP8_FRAME_CURRENT];
2651
2652
177k
    referenced = s->update_last || s->update_golden == VP8_FRAME_CURRENT ||
2653
177k
                 s->update_altref == VP8_FRAME_CURRENT;
2654
2655
177k
    skip_thresh = !referenced ? AVDISCARD_NONREF
2656
177k
                              : !s->keyframe ? AVDISCARD_NONKEY
2657
169k
                                             : AVDISCARD_ALL;
2658
2659
177k
    if (avctx->skip_frame >= skip_thresh) {
2660
37.3k
        s->invisible = 1;
2661
37.3k
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662
37.3k
        goto skip_decode;
2663
37.3k
    }
2664
140k
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2665
2666
    // release no longer referenced frames
2667
840k
    for (i = 0; i < 5; i++)
2668
700k
        if (s->frames[i].tf.f &&
2669
700k
            &s->frames[i] != prev_frame &&
2670
700k
            &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
2671
700k
            &s->frames[i] != s->framep[VP8_FRAME_GOLDEN]   &&
2672
700k
            &s->frames[i] != s->framep[VP8_FRAME_ALTREF])
2673
93.7k
            vp8_release_frame(&s->frames[i]);
2674
2675
140k
    if (!s->colorspace)
2676
133k
        avctx->colorspace = AVCOL_SPC_BT470BG;
2677
140k
    if (s->fullrange)
2678
14.5k
        avctx->color_range = AVCOL_RANGE_JPEG;
2679
125k
    else
2680
125k
        avctx->color_range = AVCOL_RANGE_MPEG;
2681
2682
    /* Given that arithmetic probabilities are updated every frame, it's quite
2683
     * likely that the values we have on a random interframe are complete
2684
     * junk if we didn't start decode on a keyframe. So just don't display
2685
     * anything rather than junk. */
2686
140k
    if (!s->keyframe && (!s->framep[VP8_FRAME_PREVIOUS] ||
2687
104k
                         !s->framep[VP8_FRAME_GOLDEN]   ||
2688
104k
                         !s->framep[VP8_FRAME_ALTREF])) {
2689
13.0k
        av_log(avctx, AV_LOG_WARNING,
2690
13.0k
               "Discarding interframe without a prior keyframe!\n");
2691
13.0k
        ret = AVERROR_INVALIDDATA;
2692
13.0k
        goto err;
2693
13.0k
    }
2694
2695
127k
    curframe = vp8_find_free_buffer(s);
2696
127k
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2697
987
        goto err;
2698
126k
    s->framep[VP8_FRAME_CURRENT] = curframe;
2699
126k
    if (s->keyframe)
2700
34.1k
        curframe->tf.f->flags |= AV_FRAME_FLAG_KEY;
2701
91.9k
    else
2702
91.9k
        curframe->tf.f->flags &= ~AV_FRAME_FLAG_KEY;
2703
126k
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2704
126k
                                            : AV_PICTURE_TYPE_P;
2705
2706
    // check if golden and altref are swapped
2707
126k
    if (s->update_altref != VP8_FRAME_NONE)
2708
119k
        s->next_framep[VP8_FRAME_ALTREF] = s->framep[s->update_altref];
2709
6.46k
    else
2710
6.46k
        s->next_framep[VP8_FRAME_ALTREF] = s->framep[VP8_FRAME_ALTREF];
2711
2712
126k
    if (s->update_golden != VP8_FRAME_NONE)
2713
106k
        s->next_framep[VP8_FRAME_GOLDEN] = s->framep[s->update_golden];
2714
19.3k
    else
2715
19.3k
        s->next_framep[VP8_FRAME_GOLDEN] = s->framep[VP8_FRAME_GOLDEN];
2716
2717
126k
    if (s->update_last)
2718
112k
        s->next_framep[VP8_FRAME_PREVIOUS] = curframe;
2719
13.9k
    else
2720
13.9k
        s->next_framep[VP8_FRAME_PREVIOUS] = s->framep[VP8_FRAME_PREVIOUS];
2721
2722
126k
    s->next_framep[VP8_FRAME_CURRENT] = curframe;
2723
2724
126k
    if (!is_vp7 && !s->actually_webp)
2725
24.9k
        ff_thread_finish_setup(avctx);
2726
2727
126k
    if (!is_vp7 && avctx->hwaccel) {
2728
0
        const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
2729
0
        ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
2730
0
        if (ret < 0)
2731
0
            goto err;
2732
2733
0
        ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2734
0
        if (ret < 0)
2735
0
            goto err;
2736
2737
0
        ret = hwaccel->end_frame(avctx);
2738
0
        if (ret < 0)
2739
0
            goto err;
2740
2741
126k
    } else {
2742
126k
        s->linesize   = curframe->tf.f->linesize[0];
2743
126k
        s->uvlinesize = curframe->tf.f->linesize[1];
2744
2745
126k
        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2746
        /* Zero macroblock structures for top/top-left prediction
2747
         * from outside the frame. */
2748
126k
        if (!s->mb_layout)
2749
42.5k
            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2750
42.5k
                   (s->mb_width + 1) * sizeof(*s->macroblocks));
2751
126k
        if (!s->mb_layout && s->keyframe)
2752
24.1k
            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2753
2754
126k
        memset(s->ref_count, 0, sizeof(s->ref_count));
2755
2756
126k
        if (s->mb_layout == 1) {
2757
            // Make sure the previous frame has read its segmentation map,
2758
            // if we reuse the same map.
2759
83.5k
            if (prev_frame && s->segmentation.enabled &&
2760
83.5k
                !s->segmentation.update_map)
2761
0
                ff_progress_frame_await(&prev_frame->tf, 1);
2762
83.5k
            if (is_vp7)
2763
83.5k
                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2764
0
            else
2765
0
                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2766
83.5k
            if (ret < 0)
2767
4.74k
                goto err;
2768
83.5k
        }
2769
2770
121k
        if (avctx->active_thread_type == FF_THREAD_FRAME)
2771
0
            num_jobs = 1;
2772
121k
        else
2773
121k
            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2774
121k
        s->num_jobs   = num_jobs;
2775
121k
        s->curframe   = curframe;
2776
121k
        s->prev_frame = prev_frame;
2777
121k
        s->mv_bounds.mv_min.y   = -MARGIN;
2778
121k
        s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2779
1.09M
        for (i = 0; i < MAX_THREADS; i++) {
2780
970k
            VP8ThreadData *td = &s->thread_data[i];
2781
970k
            atomic_init(&td->thread_mb_pos, 0);
2782
970k
            atomic_init(&td->wait_mb_pos, INT_MAX);
2783
970k
        }
2784
121k
        if (is_vp7)
2785
78.7k
            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2786
78.7k
                            num_jobs);
2787
42.5k
        else
2788
42.5k
            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2789
42.5k
                            num_jobs);
2790
121k
    }
2791
2792
121k
    ff_progress_frame_report(&curframe->tf, INT_MAX);
2793
121k
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2794
2795
158k
skip_decode:
2796
    // if future frames don't use the updated probabilities,
2797
    // reset them to the values we saved
2798
158k
    if (!s->update_probabilities)
2799
80.1k
        s->prob[0] = s->prob[1];
2800
2801
158k
    if (!s->invisible) {
2802
104k
        if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0)
2803
0
            return ret;
2804
104k
        *got_frame = 1;
2805
104k
    }
2806
2807
158k
    return avpkt->size;
2808
78.1k
err:
2809
78.1k
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2810
78.1k
    return ret;
2811
158k
}
2812
2813
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2814
12.9k
{
2815
12.9k
    vp8_decode_flush_impl(avctx, 1);
2816
2817
12.9k
    return 0;
2818
12.9k
}
2819
2820
static av_cold void vp78_decode_init(AVCodecContext *avctx)
2821
12.9k
{
2822
12.9k
    VP8Context *s = avctx->priv_data;
2823
2824
12.9k
    s->avctx = avctx;
2825
12.9k
    s->pix_fmt = AV_PIX_FMT_NONE;
2826
12.9k
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2827
2828
12.9k
    ff_videodsp_init(&s->vdsp, 8);
2829
2830
12.9k
    ff_vp78dsp_init(&s->vp8dsp);
2831
2832
    /* does not change for VP8 */
2833
12.9k
    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2834
12.9k
}
2835
2836
#if CONFIG_VP8_DECODER
2837
static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2838
                                        int jobnr, int threadnr)
2839
274k
{
2840
274k
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2841
274k
}
2842
2843
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2844
                              int jobnr, int threadnr)
2845
165k
{
2846
165k
    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2847
165k
}
2848
2849
int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2850
                        int *got_frame, AVPacket *avpkt)
2851
128k
{
2852
128k
    return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8);
2853
128k
}
2854
2855
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2856
6.64k
{
2857
6.64k
    VP8Context *s = avctx->priv_data;
2858
2859
6.64k
    vp78_decode_init(avctx);
2860
6.64k
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2861
6.64k
    ff_vp8dsp_init(&s->vp8dsp);
2862
6.64k
    s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2863
6.64k
    s->filter_mb_row           = vp8_filter_mb_row;
2864
2865
6.64k
    return 0;
2866
6.64k
}
2867
2868
#if HAVE_THREADS
2869
static void vp8_replace_frame(VP8Frame *dst, const VP8Frame *src)
2870
0
{
2871
0
    ff_progress_frame_replace(&dst->tf, &src->tf);
2872
0
    av_refstruct_replace(&dst->seg_map, src->seg_map);
2873
0
    av_refstruct_replace(&dst->hwaccel_picture_private,
2874
0
                          src->hwaccel_picture_private);
2875
0
}
2876
2877
0
#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2878
2879
static int vp8_decode_update_thread_context(AVCodecContext *dst,
2880
                                            const AVCodecContext *src)
2881
0
{
2882
0
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2883
2884
0
    if (s->macroblocks_base &&
2885
0
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2886
0
        free_buffers(s);
2887
0
        s->mb_width  = s_src->mb_width;
2888
0
        s->mb_height = s_src->mb_height;
2889
0
    }
2890
2891
0
    s->pix_fmt      = s_src->pix_fmt;
2892
0
    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
2893
0
    s->segmentation = s_src->segmentation;
2894
0
    s->lf_delta     = s_src->lf_delta;
2895
0
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2896
2897
0
    for (int i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++)
2898
0
        vp8_replace_frame(&s->frames[i], &s_src->frames[i]);
2899
2900
0
    s->framep[0] = REBASE(s_src->next_framep[0]);
2901
0
    s->framep[1] = REBASE(s_src->next_framep[1]);
2902
0
    s->framep[2] = REBASE(s_src->next_framep[2]);
2903
0
    s->framep[3] = REBASE(s_src->next_framep[3]);
2904
2905
0
    return 0;
2906
0
}
2907
#endif /* HAVE_THREADS */
2908
#endif /* CONFIG_VP8_DECODER */
2909
2910
#if CONFIG_VP7_DECODER
2911
static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2912
                                        int jobnr, int threadnr)
2913
152k
{
2914
152k
    return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2915
152k
}
2916
2917
static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2918
                              int jobnr, int threadnr)
2919
55.5k
{
2920
55.5k
    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2921
55.5k
}
2922
2923
static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
2924
                            int *got_frame, AVPacket *avpkt)
2925
114k
{
2926
114k
    return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7);
2927
114k
}
2928
2929
av_cold static int vp7_decode_init(AVCodecContext *avctx)
2930
6.28k
{
2931
6.28k
    VP8Context *s = avctx->priv_data;
2932
2933
6.28k
    vp78_decode_init(avctx);
2934
6.28k
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2935
6.28k
    ff_vp7dsp_init(&s->vp8dsp);
2936
6.28k
    s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2937
6.28k
    s->filter_mb_row           = vp7_filter_mb_row;
2938
2939
6.28k
    return 0;
2940
6.28k
}
2941
2942
const FFCodec ff_vp7_decoder = {
2943
    .p.name                = "vp7",
2944
    CODEC_LONG_NAME("On2 VP7"),
2945
    .p.type                = AVMEDIA_TYPE_VIDEO,
2946
    .p.id                  = AV_CODEC_ID_VP7,
2947
    .priv_data_size        = sizeof(VP8Context),
2948
    .init                  = vp7_decode_init,
2949
    .close                 = ff_vp8_decode_free,
2950
    FF_CODEC_DECODE_CB(vp7_decode_frame),
2951
    .p.capabilities        = AV_CODEC_CAP_DR1,
2952
    .flush                 = vp8_decode_flush,
2953
    .caps_internal         = FF_CODEC_CAP_USES_PROGRESSFRAMES,
2954
};
2955
#endif /* CONFIG_VP7_DECODER */
2956
2957
#if CONFIG_VP8_DECODER
2958
const FFCodec ff_vp8_decoder = {
2959
    .p.name                = "vp8",
2960
    CODEC_LONG_NAME("On2 VP8"),
2961
    .p.type                = AVMEDIA_TYPE_VIDEO,
2962
    .p.id                  = AV_CODEC_ID_VP8,
2963
    .priv_data_size        = sizeof(VP8Context),
2964
    .init                  = ff_vp8_decode_init,
2965
    .close                 = ff_vp8_decode_free,
2966
    FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
2967
    .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2968
                             AV_CODEC_CAP_SLICE_THREADS,
2969
    .caps_internal         = FF_CODEC_CAP_USES_PROGRESSFRAMES,
2970
    .flush                 = vp8_decode_flush,
2971
    UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context),
2972
    .hw_configs            = (const AVCodecHWConfigInternal *const []) {
2973
#if CONFIG_VP8_VAAPI_HWACCEL
2974
                               HWACCEL_VAAPI(vp8),
2975
#endif
2976
#if CONFIG_VP8_NVDEC_HWACCEL
2977
                               HWACCEL_NVDEC(vp8),
2978
#endif
2979
                               NULL
2980
                           },
2981
};
2982
#endif /* CONFIG_VP8_DECODER */