/src/ffmpeg/libavcodec/vp8.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * VP7/VP8 compatible video decoder |
3 | | * |
4 | | * Copyright (C) 2010 David Conrad |
5 | | * Copyright (C) 2010 Ronald S. Bultje |
6 | | * Copyright (C) 2010 Fiona Glaser |
7 | | * Copyright (C) 2012 Daniel Kang |
8 | | * Copyright (C) 2014 Peter Ross |
9 | | * |
10 | | * This file is part of FFmpeg. |
11 | | * |
12 | | * FFmpeg is free software; you can redistribute it and/or |
13 | | * modify it under the terms of the GNU Lesser General Public |
14 | | * License as published by the Free Software Foundation; either |
15 | | * version 2.1 of the License, or (at your option) any later version. |
16 | | * |
17 | | * FFmpeg is distributed in the hope that it will be useful, |
18 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | | * Lesser General Public License for more details. |
21 | | * |
22 | | * You should have received a copy of the GNU Lesser General Public |
23 | | * License along with FFmpeg; if not, write to the Free Software |
24 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
25 | | */ |
26 | | |
27 | | #include "config_components.h" |
28 | | |
29 | | #include "libavutil/mem.h" |
30 | | #include "libavutil/mem_internal.h" |
31 | | |
32 | | #include "avcodec.h" |
33 | | #include "codec_internal.h" |
34 | | #include "decode.h" |
35 | | #include "hwaccel_internal.h" |
36 | | #include "hwconfig.h" |
37 | | #include "mathops.h" |
38 | | #include "progressframe.h" |
39 | | #include "libavutil/refstruct.h" |
40 | | #include "thread.h" |
41 | | #include "vp8.h" |
42 | | #include "vp89_rac.h" |
43 | | #include "vp8data.h" |
44 | | #include "vpx_rac.h" |
45 | | |
46 | | #if ARCH_ARM |
47 | | # include "arm/vp8.h" |
48 | | #endif |
49 | | |
50 | | // fixme: add 1 bit to all the calls to this? |
51 | | static int vp8_rac_get_sint(VPXRangeCoder *c, int bits) |
52 | 886k | { |
53 | 886k | int v; |
54 | | |
55 | 886k | if (!vp89_rac_get(c)) |
56 | 543k | return 0; |
57 | | |
58 | 342k | v = vp89_rac_get_uint(c, bits); |
59 | | |
60 | 342k | if (vp89_rac_get(c)) |
61 | 136k | v = -v; |
62 | | |
63 | 342k | return v; |
64 | 886k | } |
65 | | |
66 | | static int vp8_rac_get_nn(VPXRangeCoder *c) |
67 | 2.04M | { |
68 | 2.04M | int v = vp89_rac_get_uint(c, 7) << 1; |
69 | 2.04M | return v + !v; |
70 | 2.04M | } |
71 | | |
72 | | // DCTextra |
73 | | static int vp8_rac_get_coeff(VPXRangeCoder *c, const uint8_t *prob) |
74 | 39.7M | { |
75 | 39.7M | int v = 0; |
76 | | |
77 | 436M | do { |
78 | 436M | v = (v<<1) + vpx_rac_get_prob(c, *prob++); |
79 | 436M | } while (*prob); |
80 | | |
81 | 39.7M | return v; |
82 | 39.7M | } |
83 | | |
84 | | static void free_buffers(VP8Context *s) |
85 | 40.6k | { |
86 | 40.6k | int i; |
87 | 40.6k | if (s->thread_data) |
88 | 231k | for (i = 0; i < MAX_THREADS; i++) { |
89 | 206k | #if HAVE_THREADS |
90 | 206k | pthread_cond_destroy(&s->thread_data[i].cond); |
91 | 206k | pthread_mutex_destroy(&s->thread_data[i].lock); |
92 | 206k | #endif |
93 | 206k | av_freep(&s->thread_data[i].filter_strength); |
94 | 206k | } |
95 | 40.6k | av_freep(&s->thread_data); |
96 | 40.6k | av_freep(&s->macroblocks_base); |
97 | 40.6k | av_freep(&s->intra4x4_pred_mode_top); |
98 | 40.6k | av_freep(&s->top_nnz); |
99 | 40.6k | av_freep(&s->top_border); |
100 | | |
101 | 40.6k | s->macroblocks = NULL; |
102 | 40.6k | } |
103 | | |
104 | | static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) |
105 | 183k | { |
106 | 183k | int ret = ff_progress_frame_get_buffer(s->avctx, &f->tf, |
107 | 183k | ref ? AV_GET_BUFFER_FLAG_REF : 0); |
108 | 183k | if (ret < 0) |
109 | 987 | return ret; |
110 | 182k | f->seg_map = av_refstruct_allocz(s->mb_width * s->mb_height); |
111 | 182k | if (!f->seg_map) { |
112 | 0 | ret = AVERROR(ENOMEM); |
113 | 0 | goto fail; |
114 | 0 | } |
115 | 182k | ret = ff_hwaccel_frame_priv_alloc(s->avctx, &f->hwaccel_picture_private); |
116 | 182k | if (ret < 0) |
117 | 0 | goto fail; |
118 | | |
119 | 182k | return 0; |
120 | | |
121 | 0 | fail: |
122 | 0 | av_refstruct_unref(&f->seg_map); |
123 | 0 | ff_progress_frame_unref(&f->tf); |
124 | 0 | return ret; |
125 | 182k | } |
126 | | |
127 | | static void vp8_release_frame(VP8Frame *f) |
128 | 530k | { |
129 | 530k | av_refstruct_unref(&f->seg_map); |
130 | 530k | av_refstruct_unref(&f->hwaccel_picture_private); |
131 | 530k | ff_progress_frame_unref(&f->tf); |
132 | 530k | } |
133 | | |
134 | | static av_cold void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) |
135 | 76.3k | { |
136 | 76.3k | VP8Context *s = avctx->priv_data; |
137 | 76.3k | int i; |
138 | | |
139 | 458k | for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) |
140 | 381k | vp8_release_frame(&s->frames[i]); |
141 | 76.3k | memset(s->framep, 0, sizeof(s->framep)); |
142 | | |
143 | 76.3k | if (free_mem) |
144 | 40.6k | free_buffers(s); |
145 | | |
146 | 76.3k | if (FF_HW_HAS_CB(avctx, flush)) |
147 | 0 | FF_HW_SIMPLE_CALL(avctx, flush); |
148 | 76.3k | } |
149 | | |
150 | | static av_cold void vp8_decode_flush(AVCodecContext *avctx) |
151 | 35.7k | { |
152 | 35.7k | vp8_decode_flush_impl(avctx, 0); |
153 | 35.7k | } |
154 | | |
155 | | static VP8Frame *vp8_find_free_buffer(VP8Context *s) |
156 | 183k | { |
157 | 183k | VP8Frame *frame = NULL; |
158 | 183k | int i; |
159 | | |
160 | | // find a free buffer |
161 | 322k | for (i = 0; i < 5; i++) |
162 | 322k | if (&s->frames[i] != s->framep[VP8_FRAME_CURRENT] && |
163 | 322k | &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] && |
164 | 322k | &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] && |
165 | 322k | &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) { |
166 | 183k | frame = &s->frames[i]; |
167 | 183k | break; |
168 | 183k | } |
169 | 183k | if (i == 5) { |
170 | 0 | av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); |
171 | 0 | abort(); |
172 | 0 | } |
173 | 183k | if (frame->tf.f) |
174 | 55.1k | vp8_release_frame(frame); |
175 | | |
176 | 183k | return frame; |
177 | 183k | } |
178 | | |
179 | | static enum AVPixelFormat get_pixel_format(VP8Context *s) |
180 | 4.56k | { |
181 | 4.56k | enum AVPixelFormat pix_fmts[] = { |
182 | | #if CONFIG_VP8_VAAPI_HWACCEL |
183 | | AV_PIX_FMT_VAAPI, |
184 | | #endif |
185 | | #if CONFIG_VP8_NVDEC_HWACCEL |
186 | | AV_PIX_FMT_CUDA, |
187 | | #endif |
188 | 4.56k | AV_PIX_FMT_YUV420P, |
189 | 4.56k | AV_PIX_FMT_NONE, |
190 | 4.56k | }; |
191 | | |
192 | 4.56k | return ff_get_format(s->avctx, pix_fmts); |
193 | 4.56k | } |
194 | | |
195 | | static av_always_inline |
196 | | int update_dimensions(VP8Context *s, int width, int height, int is_vp7) |
197 | 30.2k | { |
198 | 30.2k | AVCodecContext *avctx = s->avctx; |
199 | 30.2k | int i, ret, dim_reset = 0; |
200 | | |
201 | 30.2k | if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || |
202 | 30.2k | height != s->avctx->height) { |
203 | 27.7k | vp8_decode_flush_impl(s->avctx, 1); |
204 | | |
205 | 27.7k | ret = ff_set_dimensions(s->avctx, width, height); |
206 | 27.7k | if (ret < 0) |
207 | 4.47k | return ret; |
208 | | |
209 | 23.2k | dim_reset = (s->macroblocks_base != NULL); |
210 | 23.2k | } |
211 | | |
212 | 25.7k | if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) && |
213 | 25.7k | !s->actually_webp && !is_vp7) { |
214 | 4.56k | s->pix_fmt = get_pixel_format(s); |
215 | 4.56k | if (s->pix_fmt < 0) |
216 | 0 | return AVERROR(EINVAL); |
217 | 4.56k | avctx->pix_fmt = s->pix_fmt; |
218 | 4.56k | } |
219 | | |
220 | 25.7k | s->mb_width = (s->avctx->coded_width + 15) / 16; |
221 | 25.7k | s->mb_height = (s->avctx->coded_height + 15) / 16; |
222 | | |
223 | 25.7k | s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE && |
224 | 15.3k | avctx->thread_count > 1; |
225 | 25.7k | if (!s->mb_layout) { // Frame threading and one thread |
226 | 15.3k | s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * |
227 | 15.3k | sizeof(*s->macroblocks)); |
228 | 15.3k | s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); |
229 | 15.3k | } else // Sliced threading |
230 | 10.4k | s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * |
231 | 10.4k | sizeof(*s->macroblocks)); |
232 | 25.7k | s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); |
233 | 25.7k | s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); |
234 | 25.7k | s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); |
235 | | |
236 | 25.7k | if (!s->macroblocks_base || !s->top_nnz || !s->top_border || |
237 | 25.7k | !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) { |
238 | 0 | free_buffers(s); |
239 | 0 | return AVERROR(ENOMEM); |
240 | 0 | } |
241 | | |
242 | 231k | for (i = 0; i < MAX_THREADS; i++) { |
243 | 206k | s->thread_data[i].filter_strength = |
244 | 206k | av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); |
245 | 206k | if (!s->thread_data[i].filter_strength) { |
246 | 0 | free_buffers(s); |
247 | 0 | return AVERROR(ENOMEM); |
248 | 0 | } |
249 | 206k | #if HAVE_THREADS |
250 | 206k | ret = pthread_mutex_init(&s->thread_data[i].lock, NULL); |
251 | 206k | if (ret) { |
252 | 0 | free_buffers(s); |
253 | 0 | return AVERROR(ret); |
254 | 0 | } |
255 | 206k | ret = pthread_cond_init(&s->thread_data[i].cond, NULL); |
256 | 206k | if (ret) { |
257 | 0 | free_buffers(s); |
258 | 0 | return AVERROR(ret); |
259 | 0 | } |
260 | 206k | #endif |
261 | 206k | } |
262 | | |
263 | 25.7k | s->macroblocks = s->macroblocks_base + 1; |
264 | | |
265 | 25.7k | return 0; |
266 | 25.7k | } |
267 | | |
268 | | static int vp7_update_dimensions(VP8Context *s, int width, int height) |
269 | 11.8k | { |
270 | 11.8k | return update_dimensions(s, width, height, IS_VP7); |
271 | 11.8k | } |
272 | | |
273 | | static int vp8_update_dimensions(VP8Context *s, int width, int height) |
274 | 18.3k | { |
275 | 18.3k | return update_dimensions(s, width, height, IS_VP8); |
276 | 18.3k | } |
277 | | |
278 | | |
279 | | static void parse_segment_info(VP8Context *s) |
280 | 64.6k | { |
281 | 64.6k | VPXRangeCoder *c = &s->c; |
282 | 64.6k | int i; |
283 | | |
284 | 64.6k | s->segmentation.update_map = vp89_rac_get(c); |
285 | 64.6k | s->segmentation.update_feature_data = vp89_rac_get(c); |
286 | | |
287 | 64.6k | if (s->segmentation.update_feature_data) { |
288 | 53.0k | s->segmentation.absolute_vals = vp89_rac_get(c); |
289 | | |
290 | 265k | for (i = 0; i < 4; i++) |
291 | 212k | s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); |
292 | | |
293 | 265k | for (i = 0; i < 4; i++) |
294 | 212k | s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); |
295 | 53.0k | } |
296 | 64.6k | if (s->segmentation.update_map) |
297 | 194k | for (i = 0; i < 3; i++) |
298 | 146k | s->prob->segmentid[i] = vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255; |
299 | 64.6k | } |
300 | | |
301 | | static void update_lf_deltas(VP8Context *s) |
302 | 13.1k | { |
303 | 13.1k | VPXRangeCoder *c = &s->c; |
304 | 13.1k | int i; |
305 | | |
306 | 65.7k | for (i = 0; i < 4; i++) { |
307 | 52.5k | if (vp89_rac_get(c)) { |
308 | 26.6k | s->lf_delta.ref[i] = vp89_rac_get_uint(c, 6); |
309 | | |
310 | 26.6k | if (vp89_rac_get(c)) |
311 | 17.4k | s->lf_delta.ref[i] = -s->lf_delta.ref[i]; |
312 | 26.6k | } |
313 | 52.5k | } |
314 | | |
315 | 65.7k | for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { |
316 | 52.5k | if (vp89_rac_get(c)) { |
317 | 27.8k | s->lf_delta.mode[i] = vp89_rac_get_uint(c, 6); |
318 | | |
319 | 27.8k | if (vp89_rac_get(c)) |
320 | 21.9k | s->lf_delta.mode[i] = -s->lf_delta.mode[i]; |
321 | 27.8k | } |
322 | 52.5k | } |
323 | 13.1k | } |
324 | | |
325 | | static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) |
326 | 110k | { |
327 | 110k | const uint8_t *sizes = buf; |
328 | 110k | int i; |
329 | 110k | int ret; |
330 | | |
331 | 110k | s->num_coeff_partitions = 1 << vp89_rac_get_uint(&s->c, 2); |
332 | | |
333 | 110k | buf += 3 * (s->num_coeff_partitions - 1); |
334 | 110k | buf_size -= 3 * (s->num_coeff_partitions - 1); |
335 | 110k | if (buf_size < 0) |
336 | 5.20k | return -1; |
337 | | |
338 | 107k | for (i = 0; i < s->num_coeff_partitions - 1; i++) { |
339 | 4.07k | int size = AV_RL24(sizes + 3 * i); |
340 | 4.07k | if (buf_size - size < 0) |
341 | 1.13k | return -1; |
342 | 2.93k | s->coeff_partition_size[i] = size; |
343 | | |
344 | 2.93k | ret = ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, size); |
345 | 2.93k | if (ret < 0) |
346 | 385 | return ret; |
347 | 2.55k | buf += size; |
348 | 2.55k | buf_size -= size; |
349 | 2.55k | } |
350 | | |
351 | 103k | s->coeff_partition_size[i] = buf_size; |
352 | | |
353 | 103k | return ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size); |
354 | 105k | } |
355 | | |
356 | | static void vp7_get_quants(VP8Context *s) |
357 | 94.4k | { |
358 | 94.4k | VPXRangeCoder *c = &s->c; |
359 | | |
360 | 94.4k | int yac_qi = vp89_rac_get_uint(c, 7); |
361 | 94.4k | int ydc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; |
362 | 94.4k | int y2dc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; |
363 | 94.4k | int y2ac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; |
364 | 94.4k | int uvdc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; |
365 | 94.4k | int uvac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; |
366 | | |
367 | 94.4k | s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi]; |
368 | 94.4k | s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi]; |
369 | 94.4k | s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi]; |
370 | 94.4k | s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi]; |
371 | 94.4k | s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132); |
372 | 94.4k | s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi]; |
373 | 94.4k | } |
374 | | |
375 | | static void vp8_get_quants(VP8Context *s) |
376 | 92.4k | { |
377 | 92.4k | VPXRangeCoder *c = &s->c; |
378 | 92.4k | int i, base_qi; |
379 | | |
380 | 92.4k | s->quant.yac_qi = vp89_rac_get_uint(c, 7); |
381 | 92.4k | s->quant.ydc_delta = vp8_rac_get_sint(c, 4); |
382 | 92.4k | s->quant.y2dc_delta = vp8_rac_get_sint(c, 4); |
383 | 92.4k | s->quant.y2ac_delta = vp8_rac_get_sint(c, 4); |
384 | 92.4k | s->quant.uvdc_delta = vp8_rac_get_sint(c, 4); |
385 | 92.4k | s->quant.uvac_delta = vp8_rac_get_sint(c, 4); |
386 | | |
387 | 462k | for (i = 0; i < 4; i++) { |
388 | 369k | if (s->segmentation.enabled) { |
389 | 213k | base_qi = s->segmentation.base_quant[i]; |
390 | 213k | if (!s->segmentation.absolute_vals) |
391 | 46.3k | base_qi += s->quant.yac_qi; |
392 | 213k | } else |
393 | 155k | base_qi = s->quant.yac_qi; |
394 | | |
395 | 369k | s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)]; |
396 | 369k | s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; |
397 | 369k | s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2; |
398 | | /* 101581>>16 is equivalent to 155/100 */ |
399 | 369k | s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16; |
400 | 369k | s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)]; |
401 | 369k | s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)]; |
402 | | |
403 | 369k | s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); |
404 | 369k | s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); |
405 | 369k | } |
406 | 92.4k | } |
407 | | |
408 | | /** |
409 | | * Determine which buffers golden and altref should be updated with after this frame. |
410 | | * The spec isn't clear here, so I'm going by my understanding of what libvpx does |
411 | | * |
412 | | * Intra frames update all 3 references |
413 | | * Inter frames update VP8_FRAME_PREVIOUS if the update_last flag is set |
414 | | * If the update (golden|altref) flag is set, it's updated with the current frame |
415 | | * if update_last is set, and VP8_FRAME_PREVIOUS otherwise. |
416 | | * If the flag is not set, the number read means: |
417 | | * 0: no update |
418 | | * 1: VP8_FRAME_PREVIOUS |
419 | | * 2: update golden with altref, or update altref with golden |
420 | | */ |
421 | | static VP8FrameType ref_to_update(VP8Context *s, int update, VP8FrameType ref) |
422 | 134k | { |
423 | 134k | VPXRangeCoder *c = &s->c; |
424 | | |
425 | 134k | if (update) |
426 | 63.8k | return VP8_FRAME_CURRENT; |
427 | | |
428 | 70.8k | switch (vp89_rac_get_uint(c, 2)) { |
429 | 5.16k | case 1: |
430 | 5.16k | return VP8_FRAME_PREVIOUS; |
431 | 40.2k | case 2: |
432 | 40.2k | return (ref == VP8_FRAME_GOLDEN) ? VP8_FRAME_ALTREF : VP8_FRAME_GOLDEN; |
433 | 70.8k | } |
434 | 25.5k | return VP8_FRAME_NONE; |
435 | 70.8k | } |
436 | | |
437 | | static void vp78_reset_probability_tables(VP8Context *s) |
438 | 47.6k | { |
439 | 47.6k | int i, j; |
440 | 238k | for (i = 0; i < 4; i++) |
441 | 3.24M | for (j = 0; j < 16; j++) |
442 | 3.05M | memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], |
443 | 3.05M | sizeof(s->prob->token[i][j])); |
444 | 47.6k | } |
445 | | |
446 | | static void vp78_update_probability_tables(VP8Context *s) |
447 | 186k | { |
448 | 186k | VPXRangeCoder *c = &s->c; |
449 | 186k | int i, j, k, l, m; |
450 | | |
451 | 934k | for (i = 0; i < 4; i++) |
452 | 6.72M | for (j = 0; j < 8; j++) |
453 | 23.9M | for (k = 0; k < 3; k++) |
454 | 215M | for (l = 0; l < NUM_DCT_TOKENS-1; l++) |
455 | 197M | if (vpx_rac_get_prob_branchy(c, ff_vp8_token_update_probs[i][j][k][l])) { |
456 | 63.0M | int prob = vp89_rac_get_uint(c, 8); |
457 | 189M | for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) |
458 | 126M | s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; |
459 | 63.0M | } |
460 | 186k | } |
461 | | |
462 | 82.8k | #define VP7_MVC_SIZE 17 |
463 | 67.3k | #define VP8_MVC_SIZE 19 |
464 | | |
465 | | static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, |
466 | | int mvc_size) |
467 | 150k | { |
468 | 150k | VPXRangeCoder *c = &s->c; |
469 | 150k | int i, j; |
470 | | |
471 | 150k | if (vp89_rac_get(c)) |
472 | 335k | for (i = 0; i < 4; i++) |
473 | 268k | s->prob->pred16x16[i] = vp89_rac_get_uint(c, 8); |
474 | 150k | if (vp89_rac_get(c)) |
475 | 272k | for (i = 0; i < 3; i++) |
476 | 204k | s->prob->pred8x8c[i] = vp89_rac_get_uint(c, 8); |
477 | | |
478 | | // 17.2 MV probability update |
479 | 450k | for (i = 0; i < 2; i++) |
480 | 5.67M | for (j = 0; j < mvc_size; j++) |
481 | 5.37M | if (vpx_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
482 | 2.04M | s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
483 | 150k | } |
484 | | |
485 | | static void update_refs(VP8Context *s) |
486 | 67.3k | { |
487 | 67.3k | VPXRangeCoder *c = &s->c; |
488 | | |
489 | 67.3k | int update_golden = vp89_rac_get(c); |
490 | 67.3k | int update_altref = vp89_rac_get(c); |
491 | | |
492 | 67.3k | s->update_golden = ref_to_update(s, update_golden, VP8_FRAME_GOLDEN); |
493 | 67.3k | s->update_altref = ref_to_update(s, update_altref, VP8_FRAME_ALTREF); |
494 | 67.3k | } |
495 | | |
496 | | static void copy_chroma(AVFrame *dst, const AVFrame *src, int width, int height) |
497 | 55.9k | { |
498 | 55.9k | int i, j; |
499 | | |
500 | 167k | for (j = 1; j < 3; j++) { |
501 | 3.40M | for (i = 0; i < height / 2; i++) |
502 | 3.28M | memcpy(dst->data[j] + i * dst->linesize[j], |
503 | 3.28M | src->data[j] + i * src->linesize[j], width / 2); |
504 | 111k | } |
505 | 55.9k | } |
506 | | |
507 | | static void fade(uint8_t *dst, ptrdiff_t dst_linesize, |
508 | | const uint8_t *src, ptrdiff_t src_linesize, |
509 | | int width, int height, |
510 | | int alpha, int beta) |
511 | 57.7k | { |
512 | 57.7k | int i, j; |
513 | 3.53M | for (j = 0; j < height; j++) { |
514 | 3.47M | const uint8_t *src2 = src + j * src_linesize; |
515 | 3.47M | uint8_t *dst2 = dst + j * dst_linesize; |
516 | 278M | for (i = 0; i < width; i++) { |
517 | 274M | uint8_t y = src2[i]; |
518 | 274M | dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha); |
519 | 274M | } |
520 | 3.47M | } |
521 | 57.7k | } |
522 | | |
523 | | static int vp7_fade_frame(VP8Context *s, int alpha, int beta) |
524 | 94.4k | { |
525 | 94.4k | int ret; |
526 | | |
527 | 94.4k | if (!s->keyframe && (alpha || beta)) { |
528 | 61.6k | int width = s->mb_width * 16; |
529 | 61.6k | int height = s->mb_height * 16; |
530 | 61.6k | const AVFrame *src; |
531 | 61.6k | AVFrame *dst; |
532 | | |
533 | 61.6k | if (!s->framep[VP8_FRAME_PREVIOUS] || |
534 | 61.6k | !s->framep[VP8_FRAME_GOLDEN]) { |
535 | 3.85k | av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); |
536 | 3.85k | return AVERROR_INVALIDDATA; |
537 | 3.85k | } |
538 | | |
539 | 57.7k | src = |
540 | 57.7k | dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f; |
541 | | |
542 | | /* preserve the golden frame, write a new previous frame */ |
543 | 57.7k | if (s->framep[VP8_FRAME_GOLDEN] == s->framep[VP8_FRAME_PREVIOUS]) { |
544 | 55.9k | VP8Frame *prev_frame = vp8_find_free_buffer(s); |
545 | | |
546 | 55.9k | ret = vp8_alloc_frame(s, prev_frame, 1); |
547 | 55.9k | if (ret < 0) |
548 | 0 | return ret; |
549 | 55.9k | s->framep[VP8_FRAME_PREVIOUS] = prev_frame; |
550 | | |
551 | 55.9k | dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f; |
552 | | |
553 | 55.9k | copy_chroma(dst, src, width, height); |
554 | 55.9k | } |
555 | | |
556 | 57.7k | fade(dst->data[0], dst->linesize[0], |
557 | 57.7k | src->data[0], src->linesize[0], |
558 | 57.7k | width, height, alpha, beta); |
559 | 57.7k | } |
560 | | |
561 | 90.5k | return 0; |
562 | 94.4k | } |
563 | | |
564 | | static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) |
565 | 114k | { |
566 | 114k | VPXRangeCoder *c = &s->c; |
567 | 114k | int part1_size, hscale, vscale, i, j, ret; |
568 | 114k | int width = s->avctx->width; |
569 | 114k | int height = s->avctx->height; |
570 | 114k | int alpha = 0; |
571 | 114k | int beta = 0; |
572 | 114k | int fade_present = 1; |
573 | | |
574 | 114k | if (buf_size < 4) { |
575 | 8.05k | return AVERROR_INVALIDDATA; |
576 | 8.05k | } |
577 | | |
578 | 106k | s->profile = (buf[0] >> 1) & 7; |
579 | 106k | if (s->profile > 1) { |
580 | 1.50k | avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile); |
581 | 1.50k | return AVERROR_INVALIDDATA; |
582 | 1.50k | } |
583 | | |
584 | 104k | s->keyframe = !(buf[0] & 1); |
585 | 104k | s->invisible = 0; |
586 | 104k | part1_size = AV_RL24(buf) >> 4; |
587 | | |
588 | 104k | if (buf_size < 4 - s->profile + part1_size) { |
589 | 1.99k | av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size); |
590 | 1.99k | return AVERROR_INVALIDDATA; |
591 | 1.99k | } |
592 | | |
593 | 102k | buf += 4 - s->profile; |
594 | 102k | buf_size -= 4 - s->profile; |
595 | | |
596 | 102k | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); |
597 | | |
598 | 102k | ret = ff_vpx_init_range_decoder(c, buf, part1_size); |
599 | 102k | if (ret < 0) |
600 | 295 | return ret; |
601 | 102k | buf += part1_size; |
602 | 102k | buf_size -= part1_size; |
603 | | |
604 | | /* A. Dimension information (keyframes only) */ |
605 | 102k | if (s->keyframe) { |
606 | 15.1k | width = vp89_rac_get_uint(c, 12); |
607 | 15.1k | height = vp89_rac_get_uint(c, 12); |
608 | 15.1k | hscale = vp89_rac_get_uint(c, 2); |
609 | 15.1k | vscale = vp89_rac_get_uint(c, 2); |
610 | 15.1k | if (hscale || vscale) |
611 | 8.49k | avpriv_request_sample(s->avctx, "Upscaling"); |
612 | | |
613 | 15.1k | s->update_golden = s->update_altref = VP8_FRAME_CURRENT; |
614 | 15.1k | vp78_reset_probability_tables(s); |
615 | 15.1k | memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, |
616 | 15.1k | sizeof(s->prob->pred16x16)); |
617 | 15.1k | memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, |
618 | 15.1k | sizeof(s->prob->pred8x8c)); |
619 | 45.3k | for (i = 0; i < 2; i++) |
620 | 30.2k | memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], |
621 | 30.2k | sizeof(vp7_mv_default_prob[i])); |
622 | 15.1k | memset(&s->segmentation, 0, sizeof(s->segmentation)); |
623 | 15.1k | memset(&s->lf_delta, 0, sizeof(s->lf_delta)); |
624 | 15.1k | memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); |
625 | 15.1k | } |
626 | | |
627 | 102k | if (s->keyframe || s->profile > 0) |
628 | 89.9k | memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred)); |
629 | | |
630 | | /* B. Decoding information for all four macroblock-level features */ |
631 | 512k | for (i = 0; i < 4; i++) { |
632 | 410k | s->feature_enabled[i] = vp89_rac_get(c); |
633 | 410k | if (s->feature_enabled[i]) { |
634 | 271k | s->feature_present_prob[i] = vp89_rac_get_uint(c, 8); |
635 | | |
636 | 1.08M | for (j = 0; j < 3; j++) |
637 | 814k | s->feature_index_prob[i][j] = |
638 | 814k | vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255; |
639 | | |
640 | 271k | if (vp7_feature_value_size[s->profile][i]) |
641 | 1.00M | for (j = 0; j < 4; j++) |
642 | 806k | s->feature_value[i][j] = |
643 | 806k | vp89_rac_get(c) ? vp89_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0; |
644 | 271k | } |
645 | 410k | } |
646 | | |
647 | 102k | s->segmentation.enabled = 0; |
648 | 102k | s->segmentation.update_map = 0; |
649 | 102k | s->lf_delta.enabled = 0; |
650 | | |
651 | 102k | s->num_coeff_partitions = 1; |
652 | 102k | ret = ff_vpx_init_range_decoder(&s->coeff_partition[0], buf, buf_size); |
653 | 102k | if (ret < 0) |
654 | 6.63k | return ret; |
655 | | |
656 | 95.9k | if (!s->macroblocks_base || /* first frame */ |
657 | 95.9k | width != s->avctx->width || height != s->avctx->height || |
658 | 95.9k | (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) { |
659 | 11.8k | if ((ret = vp7_update_dimensions(s, width, height)) < 0) |
660 | 1.45k | return ret; |
661 | 11.8k | } |
662 | | |
663 | | /* C. Dequantization indices */ |
664 | 94.4k | vp7_get_quants(s); |
665 | | |
666 | | /* D. Golden frame update flag (a Flag) for interframes only */ |
667 | 94.4k | if (!s->keyframe) { |
668 | 82.8k | s->update_golden = vp89_rac_get(c) ? VP8_FRAME_CURRENT : VP8_FRAME_NONE; |
669 | 82.8k | s->sign_bias[VP8_FRAME_GOLDEN] = 0; |
670 | 82.8k | } |
671 | | |
672 | 94.4k | s->update_last = 1; |
673 | 94.4k | s->update_probabilities = 1; |
674 | | |
675 | 94.4k | if (s->profile > 0) { |
676 | 78.7k | s->update_probabilities = vp89_rac_get(c); |
677 | 78.7k | if (!s->update_probabilities) |
678 | 17.1k | s->prob[1] = s->prob[0]; |
679 | | |
680 | 78.7k | if (!s->keyframe) |
681 | 70.5k | fade_present = vp89_rac_get(c); |
682 | 78.7k | } |
683 | | |
684 | 94.4k | if (vpx_rac_is_end(c)) |
685 | 0 | return AVERROR_INVALIDDATA; |
686 | | /* E. Fading information for previous frame */ |
687 | 94.4k | if (fade_present && vp89_rac_get(c)) { |
688 | 62.6k | alpha = (int8_t) vp89_rac_get_uint(c, 8); |
689 | 62.6k | beta = (int8_t) vp89_rac_get_uint(c, 8); |
690 | 62.6k | } |
691 | | |
692 | | /* F. Loop filter type */ |
693 | 94.4k | if (!s->profile) |
694 | 15.6k | s->filter.simple = vp89_rac_get(c); |
695 | | |
696 | | /* G. DCT coefficient ordering specification */ |
697 | 94.4k | if (vp89_rac_get(c)) |
698 | 1.03M | for (i = 1; i < 16; i++) |
699 | 969k | s->prob[0].scan[i] = ff_zigzag_scan[vp89_rac_get_uint(c, 4)]; |
700 | | |
701 | | /* H. Loop filter levels */ |
702 | 94.4k | if (s->profile > 0) |
703 | 78.7k | s->filter.simple = vp89_rac_get(c); |
704 | 94.4k | s->filter.level = vp89_rac_get_uint(c, 6); |
705 | 94.4k | s->filter.sharpness = vp89_rac_get_uint(c, 3); |
706 | | |
707 | | /* I. DCT coefficient probability update; 13.3 Token Probability Updates */ |
708 | 94.4k | vp78_update_probability_tables(s); |
709 | | |
710 | 94.4k | s->mbskip_enabled = 0; |
711 | | |
712 | | /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */ |
713 | 94.4k | if (!s->keyframe) { |
714 | 82.8k | s->prob->intra = vp89_rac_get_uint(c, 8); |
715 | 82.8k | s->prob->last = vp89_rac_get_uint(c, 8); |
716 | 82.8k | vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE); |
717 | 82.8k | } |
718 | | |
719 | 94.4k | if (vpx_rac_is_end(c)) |
720 | 0 | return AVERROR_INVALIDDATA; |
721 | | |
722 | 94.4k | if ((ret = vp7_fade_frame(s, alpha, beta)) < 0) |
723 | 3.85k | return ret; |
724 | | |
725 | 90.5k | return 0; |
726 | 94.4k | } |
727 | | |
728 | | static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) |
729 | 128k | { |
730 | 128k | VPXRangeCoder *c = &s->c; |
731 | 128k | int header_size, hscale, vscale, ret; |
732 | 128k | int width = s->avctx->width; |
733 | 128k | int height = s->avctx->height; |
734 | | |
735 | 128k | if (buf_size < 3) { |
736 | 8.49k | av_log(s->avctx, AV_LOG_ERROR, "Insufficient data (%d) for header\n", buf_size); |
737 | 8.49k | return AVERROR_INVALIDDATA; |
738 | 8.49k | } |
739 | | |
740 | 119k | s->keyframe = !(buf[0] & 1); |
741 | 119k | s->profile = (buf[0]>>1) & 7; |
742 | 119k | s->invisible = !(buf[0] & 0x10); |
743 | 119k | header_size = AV_RL24(buf) >> 5; |
744 | 119k | buf += 3; |
745 | 119k | buf_size -= 3; |
746 | | |
747 | 119k | s->header_partition_size = header_size; |
748 | | |
749 | 119k | if (s->profile > 3) |
750 | 38.6k | av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); |
751 | | |
752 | 119k | if (!s->profile) |
753 | 26.5k | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, |
754 | 26.5k | sizeof(s->put_pixels_tab)); |
755 | 92.9k | else // profile 1-3 use bilinear, 4+ aren't defined so whatever |
756 | 92.9k | memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, |
757 | 92.9k | sizeof(s->put_pixels_tab)); |
758 | | |
759 | 119k | if (header_size > buf_size - 7 * s->keyframe) { |
760 | 7.24k | av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); |
761 | 7.24k | return AVERROR_INVALIDDATA; |
762 | 7.24k | } |
763 | | |
764 | 112k | if (s->keyframe) { |
765 | 33.4k | if (AV_RL24(buf) != 0x2a019d) { |
766 | 926 | av_log(s->avctx, AV_LOG_ERROR, |
767 | 926 | "Invalid start code 0x%x\n", AV_RL24(buf)); |
768 | 926 | return AVERROR_INVALIDDATA; |
769 | 926 | } |
770 | 32.5k | width = AV_RL16(buf + 3) & 0x3fff; |
771 | 32.5k | height = AV_RL16(buf + 5) & 0x3fff; |
772 | 32.5k | hscale = buf[4] >> 6; |
773 | 32.5k | vscale = buf[6] >> 6; |
774 | 32.5k | buf += 7; |
775 | 32.5k | buf_size -= 7; |
776 | | |
777 | 32.5k | if (hscale || vscale) |
778 | 23.5k | avpriv_request_sample(s->avctx, "Upscaling"); |
779 | | |
780 | 32.5k | s->update_golden = s->update_altref = VP8_FRAME_CURRENT; |
781 | 32.5k | vp78_reset_probability_tables(s); |
782 | 32.5k | memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, |
783 | 32.5k | sizeof(s->prob->pred16x16)); |
784 | 32.5k | memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, |
785 | 32.5k | sizeof(s->prob->pred8x8c)); |
786 | 32.5k | memcpy(s->prob->mvc, vp8_mv_default_prob, |
787 | 32.5k | sizeof(s->prob->mvc)); |
788 | 32.5k | memset(&s->segmentation, 0, sizeof(s->segmentation)); |
789 | 32.5k | memset(&s->lf_delta, 0, sizeof(s->lf_delta)); |
790 | 32.5k | } |
791 | | |
792 | 111k | ret = ff_vpx_init_range_decoder(c, buf, header_size); |
793 | 111k | if (ret < 0) |
794 | 1.09k | return ret; |
795 | 110k | buf += header_size; |
796 | 110k | buf_size -= header_size; |
797 | | |
798 | 110k | if (s->keyframe) { |
799 | 32.0k | s->colorspace = vp89_rac_get(c); |
800 | 32.0k | if (s->colorspace) |
801 | 3.11k | av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); |
802 | 32.0k | s->fullrange = vp89_rac_get(c); |
803 | 32.0k | } |
804 | | |
805 | 110k | if ((s->segmentation.enabled = vp89_rac_get(c))) |
806 | 64.6k | parse_segment_info(s); |
807 | 45.5k | else |
808 | 45.5k | s->segmentation.update_map = 0; // FIXME: move this to some init function? |
809 | | |
810 | 110k | s->filter.simple = vp89_rac_get(c); |
811 | 110k | s->filter.level = vp89_rac_get_uint(c, 6); |
812 | 110k | s->filter.sharpness = vp89_rac_get_uint(c, 3); |
813 | | |
814 | 110k | if ((s->lf_delta.enabled = vp89_rac_get(c))) { |
815 | 18.6k | s->lf_delta.update = vp89_rac_get(c); |
816 | 18.6k | if (s->lf_delta.update) |
817 | 13.1k | update_lf_deltas(s); |
818 | 18.6k | } |
819 | | |
820 | 110k | if (setup_partitions(s, buf, buf_size)) { |
821 | 14.7k | av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); |
822 | 14.7k | return AVERROR_INVALIDDATA; |
823 | 14.7k | } |
824 | | |
825 | 95.4k | if (!s->macroblocks_base || /* first frame */ |
826 | 95.4k | width != s->avctx->width || height != s->avctx->height || |
827 | 95.4k | (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) |
828 | 18.3k | if ((ret = vp8_update_dimensions(s, width, height)) < 0) |
829 | 3.01k | return ret; |
830 | | |
831 | 92.4k | vp8_get_quants(s); |
832 | | |
833 | 92.4k | if (!s->keyframe) { |
834 | 67.3k | update_refs(s); |
835 | 67.3k | s->sign_bias[VP8_FRAME_GOLDEN] = vp89_rac_get(c); |
836 | 67.3k | s->sign_bias[VP8_FRAME_ALTREF] = vp89_rac_get(c); |
837 | 67.3k | } |
838 | | |
839 | | // if we aren't saving this frame's probabilities for future frames, |
840 | | // make a copy of the current probabilities |
841 | 92.4k | if (!(s->update_probabilities = vp89_rac_get(c))) |
842 | 81.5k | s->prob[1] = s->prob[0]; |
843 | | |
844 | 92.4k | s->update_last = s->keyframe || vp89_rac_get(c); |
845 | | |
846 | 92.4k | vp78_update_probability_tables(s); |
847 | | |
848 | 92.4k | if ((s->mbskip_enabled = vp89_rac_get(c))) |
849 | 5.41k | s->prob->mbskip = vp89_rac_get_uint(c, 8); |
850 | | |
851 | 92.4k | if (!s->keyframe) { |
852 | 67.3k | s->prob->intra = vp89_rac_get_uint(c, 8); |
853 | 67.3k | s->prob->last = vp89_rac_get_uint(c, 8); |
854 | 67.3k | s->prob->golden = vp89_rac_get_uint(c, 8); |
855 | 67.3k | vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); |
856 | 67.3k | } |
857 | | |
858 | | // Record the entropy coder state here so that hwaccels can use it. |
859 | 92.4k | s->c.code_word = vpx_rac_renorm(&s->c); |
860 | 92.4k | s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8); |
861 | 92.4k | s->coder_state_at_header_end.range = s->c.high; |
862 | 92.4k | s->coder_state_at_header_end.value = s->c.code_word >> 16; |
863 | 92.4k | s->coder_state_at_header_end.bit_count = -s->c.bits % 8; |
864 | | |
865 | 92.4k | return 0; |
866 | 95.4k | } |
867 | | |
868 | | static av_always_inline |
869 | | void clamp_mv(const VP8mvbounds *s, VP8mv *dst, const VP8mv *src) |
870 | 349k | { |
871 | 349k | dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), |
872 | 349k | av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); |
873 | 349k | dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX), |
874 | 349k | av_clip(s->mv_max.y, INT16_MIN, INT16_MAX)); |
875 | 349k | } |
876 | | |
877 | | /** |
878 | | * Motion vector coding, 17.1. |
879 | | */ |
880 | | static av_always_inline int read_mv_component(VPXRangeCoder *c, const uint8_t *p, int vp7) |
881 | 2.48M | { |
882 | 2.48M | int bit, x = 0; |
883 | | |
884 | 2.48M | if (vpx_rac_get_prob_branchy(c, p[0])) { |
885 | 1.76M | int i; |
886 | | |
887 | 7.04M | for (i = 0; i < 3; i++) |
888 | 5.28M | x += vpx_rac_get_prob(c, p[9 + i]) << i; |
889 | 9.13M | for (i = (vp7 ? 7 : 9); i > 3; i--) |
890 | 7.37M | x += vpx_rac_get_prob(c, p[9 + i]) << i; |
891 | 1.76M | if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vpx_rac_get_prob(c, p[12])) |
892 | 1.70M | x += 8; |
893 | 1.76M | } else { |
894 | | // small_mvtree |
895 | 720k | const uint8_t *ps = p + 2; |
896 | 720k | bit = vpx_rac_get_prob(c, *ps); |
897 | 720k | ps += 1 + 3 * bit; |
898 | 720k | x += 4 * bit; |
899 | 720k | bit = vpx_rac_get_prob(c, *ps); |
900 | 720k | ps += 1 + bit; |
901 | 720k | x += 2 * bit; |
902 | 720k | x += vpx_rac_get_prob(c, *ps); |
903 | 720k | } |
904 | | |
905 | 2.48M | return (x && vpx_rac_get_prob(c, p[1])) ? -x : x; |
906 | 2.48M | } |
907 | | |
908 | | static int vp7_read_mv_component(VPXRangeCoder *c, const uint8_t *p) |
909 | 340k | { |
910 | 340k | return read_mv_component(c, p, 1); |
911 | 340k | } |
912 | | |
913 | | static int vp8_read_mv_component(VPXRangeCoder *c, const uint8_t *p) |
914 | 228k | { |
915 | 228k | return read_mv_component(c, p, 0); |
916 | 228k | } |
917 | | |
918 | | static av_always_inline |
919 | | const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7) |
920 | 2.03M | { |
921 | 2.03M | if (is_vp7) |
922 | 893k | return vp7_submv_prob; |
923 | | |
924 | 1.13M | if (left == top) |
925 | 654k | return vp8_submv_prob[4 - !!left]; |
926 | 483k | if (!top) |
927 | 156k | return vp8_submv_prob[2]; |
928 | 326k | return vp8_submv_prob[1 - !!left]; |
929 | 483k | } |
930 | | |
931 | | /** |
932 | | * Split motion vector prediction, 16.4. |
933 | | * @returns the number of motion vectors parsed (2, 4 or 16) |
934 | | */ |
935 | | static av_always_inline |
936 | | int decode_splitmvs(const VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb, |
937 | | int layout, int is_vp7) |
938 | 533k | { |
939 | 533k | int part_idx; |
940 | 533k | int n, num; |
941 | 533k | const VP8Macroblock *top_mb; |
942 | 533k | const VP8Macroblock *left_mb = &mb[-1]; |
943 | 533k | const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; |
944 | 533k | const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; |
945 | 533k | const VP8mv *top_mv; |
946 | 533k | const VP8mv *left_mv = left_mb->bmv; |
947 | 533k | const VP8mv *cur_mv = mb->bmv; |
948 | | |
949 | 533k | if (!layout) // layout is inlined, s->mb_layout is not |
950 | 145k | top_mb = &mb[2]; |
951 | 388k | else |
952 | 388k | top_mb = &mb[-s->mb_width - 1]; |
953 | 533k | mbsplits_top = vp8_mbsplits[top_mb->partitioning]; |
954 | 533k | top_mv = top_mb->bmv; |
955 | | |
956 | 533k | if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { |
957 | 470k | if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) |
958 | 426k | part_idx = VP8_SPLITMVMODE_16x8 + vpx_rac_get_prob(c, vp8_mbsplit_prob[2]); |
959 | 43.6k | else |
960 | 43.6k | part_idx = VP8_SPLITMVMODE_8x8; |
961 | 470k | } else { |
962 | 62.6k | part_idx = VP8_SPLITMVMODE_4x4; |
963 | 62.6k | } |
964 | | |
965 | 533k | num = vp8_mbsplit_count[part_idx]; |
966 | 533k | mbsplits_cur = vp8_mbsplits[part_idx], |
967 | 533k | firstidx = vp8_mbfirstidx[part_idx]; |
968 | 533k | mb->partitioning = part_idx; |
969 | | |
970 | 2.56M | for (n = 0; n < num; n++) { |
971 | 2.03M | int k = firstidx[n]; |
972 | 2.03M | uint32_t left, above; |
973 | 2.03M | const uint8_t *submv_prob; |
974 | | |
975 | 2.03M | if (!(k & 3)) |
976 | 799k | left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); |
977 | 1.23M | else |
978 | 1.23M | left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); |
979 | 2.03M | if (k <= 3) |
980 | 1.15M | above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); |
981 | 874k | else |
982 | 874k | above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); |
983 | | |
984 | 2.03M | submv_prob = get_submv_prob(left, above, is_vp7); |
985 | | |
986 | 2.03M | if (vpx_rac_get_prob_branchy(c, submv_prob[0])) { |
987 | 1.10M | if (vpx_rac_get_prob_branchy(c, submv_prob[1])) { |
988 | 967k | if (vpx_rac_get_prob_branchy(c, submv_prob[2])) { |
989 | 956k | mb->bmv[n].y = mb->mv.y + |
990 | 956k | read_mv_component(c, s->prob->mvc[0], is_vp7); |
991 | 956k | mb->bmv[n].x = mb->mv.x + |
992 | 956k | read_mv_component(c, s->prob->mvc[1], is_vp7); |
993 | 956k | } else { |
994 | 10.8k | AV_ZERO32(&mb->bmv[n]); |
995 | 10.8k | } |
996 | 967k | } else { |
997 | 135k | AV_WN32A(&mb->bmv[n], above); |
998 | 135k | } |
999 | 1.10M | } else { |
1000 | 928k | AV_WN32A(&mb->bmv[n], left); |
1001 | 928k | } |
1002 | 2.03M | } |
1003 | | |
1004 | 533k | return num; |
1005 | 533k | } |
1006 | | |
1007 | | /** |
1008 | | * The vp7 reference decoder uses a padding macroblock column (added to right |
1009 | | * edge of the frame) to guard against illegal macroblock offsets. The |
1010 | | * algorithm has bugs that permit offsets to straddle the padding column. |
1011 | | * This function replicates those bugs. |
1012 | | * |
1013 | | * @param[out] edge_x macroblock x address |
1014 | | * @param[out] edge_y macroblock y address |
1015 | | * |
1016 | | * @return macroblock offset legal (boolean) |
1017 | | */ |
1018 | | static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, |
1019 | | int xoffset, int yoffset, int boundary, |
1020 | | int *edge_x, int *edge_y) |
1021 | 18.1M | { |
1022 | 18.1M | int vwidth = mb_width + 1; |
1023 | 18.1M | int new = (mb_y + yoffset) * vwidth + mb_x + xoffset; |
1024 | 18.1M | if (new < boundary || new % vwidth == vwidth - 1) |
1025 | 4.44M | return 0; |
1026 | 13.6M | *edge_y = new / vwidth; |
1027 | 13.6M | *edge_x = new % vwidth; |
1028 | 13.6M | return 1; |
1029 | 18.1M | } |
1030 | | |
1031 | | static const VP8mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock) |
1032 | 13.6M | { |
1033 | 13.6M | return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0]; |
1034 | 13.6M | } |
1035 | | |
1036 | | static av_always_inline |
1037 | | void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, |
1038 | | int mb_x, int mb_y, int layout) |
1039 | 1.51M | { |
1040 | 1.51M | enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR }; |
1041 | 1.51M | enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; |
1042 | 1.51M | int idx = CNT_ZERO; |
1043 | 1.51M | VP8mv near_mv[3]; |
1044 | 1.51M | uint8_t cnt[3] = { 0 }; |
1045 | 1.51M | VPXRangeCoder *c = &s->c; |
1046 | 1.51M | int i; |
1047 | | |
1048 | 1.51M | AV_ZERO32(&near_mv[0]); |
1049 | 1.51M | AV_ZERO32(&near_mv[1]); |
1050 | 1.51M | AV_ZERO32(&near_mv[2]); |
1051 | | |
1052 | 19.6M | for (i = 0; i < VP7_MV_PRED_COUNT; i++) { |
1053 | 18.1M | const VP7MVPred * pred = &vp7_mv_pred[i]; |
1054 | 18.1M | int edge_x, edge_y; |
1055 | | |
1056 | 18.1M | if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, |
1057 | 18.1M | pred->yoffset, !s->profile, &edge_x, &edge_y)) { |
1058 | 13.6M | const VP8Macroblock *edge = (s->mb_layout == 1) |
1059 | 13.6M | ? s->macroblocks_base + 1 + edge_x + |
1060 | 13.6M | (s->mb_width + 1) * (edge_y + 1) |
1061 | 13.6M | : s->macroblocks + edge_x + |
1062 | 0 | (s->mb_height - edge_y - 1) * 2; |
1063 | 13.6M | uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock)); |
1064 | 13.6M | if (mv) { |
1065 | 7.95M | if (AV_RN32A(&near_mv[CNT_NEAREST])) { |
1066 | 6.79M | if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) { |
1067 | 3.10M | idx = CNT_NEAREST; |
1068 | 3.69M | } else if (AV_RN32A(&near_mv[CNT_NEAR])) { |
1069 | 2.88M | if (mv != AV_RN32A(&near_mv[CNT_NEAR])) |
1070 | 1.93M | continue; |
1071 | 950k | idx = CNT_NEAR; |
1072 | 950k | } else { |
1073 | 805k | AV_WN32A(&near_mv[CNT_NEAR], mv); |
1074 | 805k | idx = CNT_NEAR; |
1075 | 805k | } |
1076 | 6.79M | } else { |
1077 | 1.16M | AV_WN32A(&near_mv[CNT_NEAREST], mv); |
1078 | 1.16M | idx = CNT_NEAREST; |
1079 | 1.16M | } |
1080 | 7.95M | } else { |
1081 | 5.72M | idx = CNT_ZERO; |
1082 | 5.72M | } |
1083 | 13.6M | } else { |
1084 | 4.44M | idx = CNT_ZERO; |
1085 | 4.44M | } |
1086 | 16.1M | cnt[idx] += vp7_mv_pred[i].score; |
1087 | 16.1M | } |
1088 | | |
1089 | 1.51M | mb->partitioning = VP8_SPLITMVMODE_NONE; |
1090 | | |
1091 | 1.51M | if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) { |
1092 | 1.03M | mb->mode = VP8_MVMODE_MV; |
1093 | | |
1094 | 1.03M | if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) { |
1095 | | |
1096 | 670k | if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) { |
1097 | | |
1098 | 558k | if (cnt[CNT_NEAREST] > cnt[CNT_NEAR]) |
1099 | 558k | AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST])); |
1100 | 119k | else |
1101 | 558k | AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR])); |
1102 | | |
1103 | 558k | if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) { |
1104 | 388k | mb->mode = VP8_MVMODE_SPLIT; |
1105 | 388k | mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1]; |
1106 | 388k | } else { |
1107 | 170k | mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]); |
1108 | 170k | mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]); |
1109 | 170k | mb->bmv[0] = mb->mv; |
1110 | 170k | } |
1111 | 558k | } else { |
1112 | 111k | mb->mv = near_mv[CNT_NEAR]; |
1113 | 111k | mb->bmv[0] = mb->mv; |
1114 | 111k | } |
1115 | 670k | } else { |
1116 | 365k | mb->mv = near_mv[CNT_NEAREST]; |
1117 | 365k | mb->bmv[0] = mb->mv; |
1118 | 365k | } |
1119 | 1.03M | } else { |
1120 | 474k | mb->mode = VP8_MVMODE_ZERO; |
1121 | 474k | AV_ZERO32(&mb->mv); |
1122 | 474k | mb->bmv[0] = mb->mv; |
1123 | 474k | } |
1124 | 1.51M | } |
1125 | | |
1126 | | static av_always_inline |
1127 | | void vp8_decode_mvs(VP8Context *s, const VP8mvbounds *mv_bounds, VP8Macroblock *mb, |
1128 | | int mb_x, int mb_y, int layout) |
1129 | 813k | { |
1130 | 813k | VP8Macroblock *mb_edge[3] = { 0 /* top */, |
1131 | 813k | mb - 1 /* left */, |
1132 | 813k | 0 /* top-left */ }; |
1133 | 813k | enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; |
1134 | 813k | enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; |
1135 | 813k | int idx = CNT_ZERO; |
1136 | 813k | int cur_sign_bias = s->sign_bias[mb->ref_frame]; |
1137 | 813k | const int8_t *sign_bias = s->sign_bias; |
1138 | 813k | VP8mv near_mv[4]; |
1139 | 813k | uint8_t cnt[4] = { 0 }; |
1140 | 813k | VPXRangeCoder *c = &s->c; |
1141 | | |
1142 | 813k | if (!layout) { // layout is inlined (s->mb_layout is not) |
1143 | 813k | mb_edge[0] = mb + 2; |
1144 | 813k | mb_edge[2] = mb + 1; |
1145 | 813k | } else { |
1146 | 0 | mb_edge[0] = mb - s->mb_width - 1; |
1147 | 0 | mb_edge[2] = mb - s->mb_width - 2; |
1148 | 0 | } |
1149 | | |
1150 | 813k | AV_ZERO32(&near_mv[0]); |
1151 | 813k | AV_ZERO32(&near_mv[1]); |
1152 | 813k | AV_ZERO32(&near_mv[2]); |
1153 | | |
1154 | | /* Process MB on top, left and top-left */ |
1155 | 813k | #define MV_EDGE_CHECK(n) \ |
1156 | 2.44M | { \ |
1157 | 2.44M | const VP8Macroblock *edge = mb_edge[n]; \ |
1158 | 2.44M | int edge_ref = edge->ref_frame; \ |
1159 | 2.44M | if (edge_ref != VP8_FRAME_CURRENT) { \ |
1160 | 1.97M | uint32_t mv = AV_RN32A(&edge->mv); \ |
1161 | 1.97M | if (mv) { \ |
1162 | 497k | if (cur_sign_bias != sign_bias[edge_ref]) { \ |
1163 | | /* SWAR negate of the values in mv. */ \ |
1164 | 70.0k | mv = ~mv; \ |
1165 | 70.0k | mv = ((mv & 0x7fff7fff) + \ |
1166 | 70.0k | 0x00010001) ^ (mv & 0x80008000); \ |
1167 | 70.0k | } \ |
1168 | 497k | if (!n || mv != AV_RN32A(&near_mv[idx])) \ |
1169 | 497k | AV_WN32A(&near_mv[++idx], mv); \ |
1170 | 497k | cnt[idx] += 1 + (n != 2); \ |
1171 | 497k | } else \ |
1172 | 1.97M | cnt[CNT_ZERO] += 1 + (n != 2); \ |
1173 | 1.97M | } \ |
1174 | 2.44M | } |
1175 | | |
1176 | 813k | MV_EDGE_CHECK(0) |
1177 | 813k | MV_EDGE_CHECK(1) |
1178 | 813k | MV_EDGE_CHECK(2) |
1179 | | |
1180 | 813k | mb->partitioning = VP8_SPLITMVMODE_NONE; |
1181 | 813k | if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { |
1182 | 349k | mb->mode = VP8_MVMODE_MV; |
1183 | | |
1184 | | /* If we have three distinct MVs, merge first and last if they're the same */ |
1185 | 349k | if (cnt[CNT_SPLITMV] && |
1186 | 349k | AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) |
1187 | 16.3k | cnt[CNT_NEAREST] += 1; |
1188 | | |
1189 | | /* Swap near and nearest if necessary */ |
1190 | 349k | if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { |
1191 | 14.3k | FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); |
1192 | 14.3k | FFSWAP(VP8mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); |
1193 | 14.3k | } |
1194 | | |
1195 | 349k | if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { |
1196 | 273k | if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { |
1197 | | /* Choose the best mv out of 0,0 and the nearest mv */ |
1198 | 259k | clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); |
1199 | 259k | cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + |
1200 | 259k | (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + |
1201 | 259k | (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); |
1202 | | |
1203 | 259k | if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { |
1204 | 145k | mb->mode = VP8_MVMODE_SPLIT; |
1205 | 145k | mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1]; |
1206 | 145k | } else { |
1207 | 114k | mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]); |
1208 | 114k | mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]); |
1209 | 114k | mb->bmv[0] = mb->mv; |
1210 | 114k | } |
1211 | 259k | } else { |
1212 | 14.3k | clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]); |
1213 | 14.3k | mb->bmv[0] = mb->mv; |
1214 | 14.3k | } |
1215 | 273k | } else { |
1216 | 75.5k | clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]); |
1217 | 75.5k | mb->bmv[0] = mb->mv; |
1218 | 75.5k | } |
1219 | 464k | } else { |
1220 | 464k | mb->mode = VP8_MVMODE_ZERO; |
1221 | 464k | AV_ZERO32(&mb->mv); |
1222 | 464k | mb->bmv[0] = mb->mv; |
1223 | 464k | } |
1224 | 813k | } |
1225 | | |
1226 | | static av_always_inline |
1227 | | void decode_intra4x4_modes(VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb, |
1228 | | int mb_x, int keyframe, int layout) |
1229 | 1.86M | { |
1230 | 1.86M | uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
1231 | | |
1232 | 1.86M | if (layout) { |
1233 | 1.60M | VP8Macroblock *mb_top = mb - s->mb_width - 1; |
1234 | 1.60M | memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); |
1235 | 1.60M | } |
1236 | 1.86M | if (keyframe) { |
1237 | 1.84M | int x, y; |
1238 | 1.84M | uint8_t *top; |
1239 | 1.84M | uint8_t *const left = s->intra4x4_pred_mode_left; |
1240 | 1.84M | if (layout) |
1241 | 1.60M | top = mb->intra4x4_pred_mode_top; |
1242 | 241k | else |
1243 | 241k | top = s->intra4x4_pred_mode_top + 4 * mb_x; |
1244 | 9.23M | for (y = 0; y < 4; y++) { |
1245 | 36.9M | for (x = 0; x < 4; x++) { |
1246 | 29.5M | const uint8_t *ctx; |
1247 | 29.5M | ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; |
1248 | 29.5M | *intra4x4 = vp89_rac_get_tree(c, vp8_pred4x4_tree, ctx); |
1249 | 29.5M | left[y] = top[x] = *intra4x4; |
1250 | 29.5M | intra4x4++; |
1251 | 29.5M | } |
1252 | 7.38M | } |
1253 | 1.84M | } else { |
1254 | 15.6k | int i; |
1255 | 266k | for (i = 0; i < 16; i++) |
1256 | 251k | intra4x4[i] = vp89_rac_get_tree(c, vp8_pred4x4_tree, |
1257 | 251k | vp8_pred4x4_prob_inter); |
1258 | 15.6k | } |
1259 | 1.86M | } |
1260 | | |
1261 | | static av_always_inline |
1262 | | void decode_mb_mode(VP8Context *s, const VP8mvbounds *mv_bounds, |
1263 | | VP8Macroblock *mb, int mb_x, int mb_y, |
1264 | | uint8_t *segment, const uint8_t *ref, int layout, int is_vp7) |
1265 | 5.27M | { |
1266 | 5.27M | VPXRangeCoder *c = &s->c; |
1267 | 5.27M | static const char * const vp7_feature_name[] = { "q-index", |
1268 | 5.27M | "lf-delta", |
1269 | 5.27M | "partial-golden-update", |
1270 | 5.27M | "blit-pitch" }; |
1271 | 5.27M | if (is_vp7) { |
1272 | 3.60M | int i; |
1273 | 3.60M | *segment = 0; |
1274 | 18.0M | for (i = 0; i < 4; i++) { |
1275 | 14.4M | if (s->feature_enabled[i]) { |
1276 | 6.05M | if (vpx_rac_get_prob_branchy(c, s->feature_present_prob[i])) { |
1277 | 1.95M | int index = vp89_rac_get_tree(c, vp7_feature_index_tree, |
1278 | 1.95M | s->feature_index_prob[i]); |
1279 | 1.95M | av_log(s->avctx, AV_LOG_WARNING, |
1280 | 1.95M | "Feature %s present in macroblock (value 0x%x)\n", |
1281 | 1.95M | vp7_feature_name[i], s->feature_value[i][index]); |
1282 | 1.95M | } |
1283 | 6.05M | } |
1284 | 14.4M | } |
1285 | 3.60M | } else if (s->segmentation.update_map) { |
1286 | 82.5k | int bit = vpx_rac_get_prob(c, s->prob->segmentid[0]); |
1287 | 82.5k | *segment = vpx_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; |
1288 | 1.58M | } else if (s->segmentation.enabled) |
1289 | 76.3k | *segment = ref ? *ref : *segment; |
1290 | 5.27M | mb->segment = *segment; |
1291 | | |
1292 | 5.27M | mb->skip = s->mbskip_enabled ? vpx_rac_get_prob(c, s->prob->mbskip) : 0; |
1293 | | |
1294 | 5.27M | if (s->keyframe) { |
1295 | 2.23M | mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_intra, |
1296 | 2.23M | vp8_pred16x16_prob_intra); |
1297 | | |
1298 | 2.23M | if (mb->mode == MODE_I4x4) { |
1299 | 1.84M | decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); |
1300 | 1.84M | } else { |
1301 | 383k | const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode |
1302 | 383k | : vp8_pred4x4_mode)[mb->mode] * 0x01010101u; |
1303 | 383k | if (s->mb_layout) |
1304 | 383k | AV_WN32A(mb->intra4x4_pred_mode_top, modes); |
1305 | 122k | else |
1306 | 383k | AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); |
1307 | 383k | AV_WN32A(s->intra4x4_pred_mode_left, modes); |
1308 | 383k | } |
1309 | | |
1310 | 2.23M | mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree, |
1311 | 2.23M | vp8_pred8x8c_prob_intra); |
1312 | 2.23M | mb->ref_frame = VP8_FRAME_CURRENT; |
1313 | 3.04M | } else if (vpx_rac_get_prob_branchy(c, s->prob->intra)) { |
1314 | | // inter MB, 16.2 |
1315 | 2.32M | if (vpx_rac_get_prob_branchy(c, s->prob->last)) |
1316 | 1.46M | mb->ref_frame = |
1317 | 1.46M | (!is_vp7 && vpx_rac_get_prob(c, s->prob->golden)) ? VP8_FRAME_ALTREF |
1318 | 1.46M | : VP8_FRAME_GOLDEN; |
1319 | 858k | else |
1320 | 858k | mb->ref_frame = VP8_FRAME_PREVIOUS; |
1321 | 2.32M | s->ref_count[mb->ref_frame - 1]++; |
1322 | | |
1323 | | // motion vectors, 16.3 |
1324 | 2.32M | if (is_vp7) |
1325 | 1.51M | vp7_decode_mvs(s, mb, mb_x, mb_y, layout); |
1326 | 813k | else |
1327 | 813k | vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout); |
1328 | 2.32M | } else { |
1329 | | // intra MB, 16.1 |
1330 | 716k | mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_inter, |
1331 | 716k | s->prob->pred16x16); |
1332 | | |
1333 | 716k | if (mb->mode == MODE_I4x4) |
1334 | 15.6k | decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); |
1335 | | |
1336 | 716k | mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree, |
1337 | 716k | s->prob->pred8x8c); |
1338 | 716k | mb->ref_frame = VP8_FRAME_CURRENT; |
1339 | 716k | mb->partitioning = VP8_SPLITMVMODE_NONE; |
1340 | 716k | AV_ZERO32(&mb->bmv[0]); |
1341 | 716k | } |
1342 | 5.27M | } |
1343 | | |
1344 | | /** |
1345 | | * @param r arithmetic bitstream reader context |
1346 | | * @param block destination for block coefficients |
1347 | | * @param probs probabilities to use when reading trees from the bitstream |
1348 | | * @param i initial coeff index, 0 unless a separate DC block is coded |
1349 | | * @param qmul array holding the dc/ac dequant factor at position 0/1 |
1350 | | * |
1351 | | * @return 0 if no coeffs were decoded |
1352 | | * otherwise, the index of the last coeff decoded plus one |
1353 | | */ |
1354 | | static av_always_inline |
1355 | | int decode_block_coeffs_internal(VPXRangeCoder *r, int16_t block[16], |
1356 | | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
1357 | | int i, const uint8_t *token_prob, const int16_t qmul[2], |
1358 | | const uint8_t scan[16], int vp7) |
1359 | 4.44M | { |
1360 | 4.44M | VPXRangeCoder c = *r; |
1361 | 4.44M | goto skip_eob; |
1362 | 40.3M | do { |
1363 | 40.3M | int coeff; |
1364 | 40.8M | restart: |
1365 | 40.8M | if (!vpx_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB |
1366 | 1.80M | break; |
1367 | | |
1368 | 45.3M | skip_eob: |
1369 | 45.3M | if (!vpx_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 |
1370 | 2.41M | if (++i == 16) |
1371 | 51.5k | break; // invalid input; blocks should end with EOB |
1372 | 2.36M | token_prob = probs[i][0]; |
1373 | 2.36M | if (vp7) |
1374 | 494k | goto restart; |
1375 | 1.86M | goto skip_eob; |
1376 | 2.36M | } |
1377 | | |
1378 | 42.9M | if (!vpx_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 |
1379 | 2.51M | coeff = 1; |
1380 | 2.51M | token_prob = probs[i + 1][1]; |
1381 | 40.3M | } else { |
1382 | 40.3M | if (!vpx_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 |
1383 | 553k | coeff = vpx_rac_get_prob_branchy(&c, token_prob[4]); |
1384 | 553k | if (coeff) |
1385 | 162k | coeff += vpx_rac_get_prob(&c, token_prob[5]); |
1386 | 553k | coeff += 2; |
1387 | 39.8M | } else { |
1388 | | // DCT_CAT* |
1389 | 39.8M | if (!vpx_rac_get_prob_branchy(&c, token_prob[6])) { |
1390 | 85.5k | if (!vpx_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 |
1391 | 51.8k | coeff = 5 + vpx_rac_get_prob(&c, vp8_dct_cat1_prob[0]); |
1392 | 51.8k | } else { // DCT_CAT2 |
1393 | 33.7k | coeff = 7; |
1394 | 33.7k | coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; |
1395 | 33.7k | coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[1]); |
1396 | 33.7k | } |
1397 | 39.7M | } else { // DCT_CAT3 and up |
1398 | 39.7M | int a = vpx_rac_get_prob(&c, token_prob[8]); |
1399 | 39.7M | int b = vpx_rac_get_prob(&c, token_prob[9 + a]); |
1400 | 39.7M | int cat = (a << 1) + b; |
1401 | 39.7M | coeff = 3 + (8 << cat); |
1402 | 39.7M | coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); |
1403 | 39.7M | } |
1404 | 39.8M | } |
1405 | 40.3M | token_prob = probs[i + 1][2]; |
1406 | 40.3M | } |
1407 | 42.9M | block[scan[i]] = (vp89_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; |
1408 | 42.9M | } while (++i < 16); |
1409 | | |
1410 | 4.44M | *r = c; |
1411 | 4.44M | return i; |
1412 | 0 | } |
1413 | | |
1414 | | static av_always_inline |
1415 | | int inter_predict_dc(int16_t block[16], int16_t pred[2]) |
1416 | 706k | { |
1417 | 706k | int16_t dc = block[0]; |
1418 | 706k | int ret = 0; |
1419 | | |
1420 | 706k | if (pred[1] > 3) { |
1421 | 98.2k | dc += pred[0]; |
1422 | 98.2k | ret = 1; |
1423 | 98.2k | } |
1424 | | |
1425 | 706k | if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) { |
1426 | 599k | block[0] = pred[0] = dc; |
1427 | 599k | pred[1] = 0; |
1428 | 599k | } else { |
1429 | 107k | if (pred[0] == dc) |
1430 | 96.4k | pred[1]++; |
1431 | 107k | block[0] = pred[0] = dc; |
1432 | 107k | } |
1433 | | |
1434 | 706k | return ret; |
1435 | 706k | } |
1436 | | |
1437 | | static int vp7_decode_block_coeffs_internal(VPXRangeCoder *r, |
1438 | | int16_t block[16], |
1439 | | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
1440 | | int i, const uint8_t *token_prob, |
1441 | | const int16_t qmul[2], |
1442 | | const uint8_t scan[16]) |
1443 | 1.75M | { |
1444 | 1.75M | return decode_block_coeffs_internal(r, block, probs, i, |
1445 | 1.75M | token_prob, qmul, scan, IS_VP7); |
1446 | 1.75M | } |
1447 | | |
1448 | | #ifndef vp8_decode_block_coeffs_internal |
1449 | | static int vp8_decode_block_coeffs_internal(VPXRangeCoder *r, |
1450 | | int16_t block[16], |
1451 | | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
1452 | | int i, const uint8_t *token_prob, |
1453 | | const int16_t qmul[2]) |
1454 | 2.69M | { |
1455 | 2.69M | return decode_block_coeffs_internal(r, block, probs, i, |
1456 | 2.69M | token_prob, qmul, ff_zigzag_scan, IS_VP8); |
1457 | 2.69M | } |
1458 | | #endif |
1459 | | |
1460 | | /** |
1461 | | * @param c arithmetic bitstream reader context |
1462 | | * @param block destination for block coefficients |
1463 | | * @param probs probabilities to use when reading trees from the bitstream |
1464 | | * @param i initial coeff index, 0 unless a separate DC block is coded |
1465 | | * @param zero_nhood the initial prediction context for number of surrounding |
1466 | | * all-zero blocks (only left/top, so 0-2) |
1467 | | * @param qmul array holding the dc/ac dequant factor at position 0/1 |
1468 | | * @param scan scan pattern (VP7 only) |
1469 | | * |
1470 | | * @return 0 if no coeffs were decoded |
1471 | | * otherwise, the index of the last coeff decoded plus one |
1472 | | */ |
1473 | | static av_always_inline |
1474 | | int decode_block_coeffs(VPXRangeCoder *c, int16_t block[16], |
1475 | | uint8_t probs[16][3][NUM_DCT_TOKENS - 1], |
1476 | | int i, int zero_nhood, const int16_t qmul[2], |
1477 | | const uint8_t scan[16], int vp7) |
1478 | 57.9M | { |
1479 | 57.9M | const uint8_t *token_prob = probs[i][zero_nhood]; |
1480 | 57.9M | if (!vpx_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
1481 | 53.5M | return 0; |
1482 | 4.44M | return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, |
1483 | 1.75M | token_prob, qmul, scan) |
1484 | 4.44M | : vp8_decode_block_coeffs_internal(c, block, probs, i, |
1485 | 2.69M | token_prob, qmul); |
1486 | 57.9M | } |
1487 | | |
1488 | | static av_always_inline |
1489 | | void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VPXRangeCoder *c, |
1490 | | VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], |
1491 | | int is_vp7) |
1492 | 2.34M | { |
1493 | 2.34M | int i, x, y, luma_start = 0, luma_ctx = 3; |
1494 | 2.34M | int nnz_pred, nnz, nnz_total = 0; |
1495 | 2.34M | int segment = mb->segment; |
1496 | 2.34M | int block_dc = 0; |
1497 | | |
1498 | 2.34M | if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) { |
1499 | 1.72M | nnz_pred = t_nnz[8] + l_nnz[8]; |
1500 | | |
1501 | | // decode DC values and do hadamard |
1502 | 1.72M | nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, |
1503 | 1.72M | nnz_pred, s->qmat[segment].luma_dc_qmul, |
1504 | 1.72M | ff_zigzag_scan, is_vp7); |
1505 | 1.72M | l_nnz[8] = t_nnz[8] = !!nnz; |
1506 | | |
1507 | 1.72M | if (is_vp7 && mb->mode > MODE_I4x4) { |
1508 | 706k | nnz |= inter_predict_dc(td->block_dc, |
1509 | 706k | s->inter_dc_pred[mb->ref_frame - 1]); |
1510 | 706k | } |
1511 | | |
1512 | 1.72M | if (nnz) { |
1513 | 281k | nnz_total += nnz; |
1514 | 281k | block_dc = 1; |
1515 | 281k | if (nnz == 1) |
1516 | 136k | s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); |
1517 | 145k | else |
1518 | 145k | s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); |
1519 | 281k | } |
1520 | 1.72M | luma_start = 1; |
1521 | 1.72M | luma_ctx = 0; |
1522 | 1.72M | } |
1523 | | |
1524 | | // luma blocks |
1525 | 11.7M | for (y = 0; y < 4; y++) |
1526 | 46.8M | for (x = 0; x < 4; x++) { |
1527 | 37.5M | nnz_pred = l_nnz[y] + t_nnz[x]; |
1528 | 37.5M | nnz = decode_block_coeffs(c, td->block[y][x], |
1529 | 37.5M | s->prob->token[luma_ctx], |
1530 | 37.5M | luma_start, nnz_pred, |
1531 | 37.5M | s->qmat[segment].luma_qmul, |
1532 | 37.5M | s->prob[0].scan, is_vp7); |
1533 | | /* nnz+block_dc may be one more than the actual last index, |
1534 | | * but we don't care */ |
1535 | 37.5M | td->non_zero_count_cache[y][x] = nnz + block_dc; |
1536 | 37.5M | t_nnz[x] = l_nnz[y] = !!nnz; |
1537 | 37.5M | nnz_total += nnz; |
1538 | 37.5M | } |
1539 | | |
1540 | | // chroma blocks |
1541 | | // TODO: what to do about dimensions? 2nd dim for luma is x, |
1542 | | // but for chroma it's (y<<1)|x |
1543 | 7.03M | for (i = 4; i < 6; i++) |
1544 | 14.0M | for (y = 0; y < 2; y++) |
1545 | 28.1M | for (x = 0; x < 2; x++) { |
1546 | 18.7M | nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; |
1547 | 18.7M | nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], |
1548 | 18.7M | s->prob->token[2], 0, nnz_pred, |
1549 | 18.7M | s->qmat[segment].chroma_qmul, |
1550 | 18.7M | s->prob[0].scan, is_vp7); |
1551 | 18.7M | td->non_zero_count_cache[i][(y << 1) + x] = nnz; |
1552 | 18.7M | t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; |
1553 | 18.7M | nnz_total += nnz; |
1554 | 18.7M | } |
1555 | | |
1556 | | // if there were no coded coeffs despite the macroblock not being marked skip, |
1557 | | // we MUST not do the inner loop filter and should not do IDCT |
1558 | | // Since skip isn't used for bitstream prediction, just manually set it. |
1559 | 2.34M | if (!nnz_total) |
1560 | 1.80M | mb->skip = 1; |
1561 | 2.34M | } |
1562 | | |
1563 | | static av_always_inline |
1564 | | void backup_mb_border(uint8_t *top_border, const uint8_t *src_y, |
1565 | | const uint8_t *src_cb, const uint8_t *src_cr, |
1566 | | ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple) |
1567 | 1.98M | { |
1568 | 1.98M | AV_COPY128(top_border, src_y + 15 * linesize); |
1569 | 1.98M | if (!simple) { |
1570 | 1.36M | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
1571 | 1.36M | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); |
1572 | 1.36M | } |
1573 | 1.98M | } |
1574 | | |
1575 | | static av_always_inline |
1576 | | void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, |
1577 | | uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x, |
1578 | | int mb_y, int mb_width, int simple, int xchg) |
1579 | 1.52M | { |
1580 | 1.52M | uint8_t *top_border_m1 = top_border - 32; // for TL prediction |
1581 | 1.52M | src_y -= linesize; |
1582 | 1.52M | src_cb -= uvlinesize; |
1583 | 1.52M | src_cr -= uvlinesize; |
1584 | | |
1585 | 1.52M | #define XCHG(a, b, xchg) \ |
1586 | 10.5M | do { \ |
1587 | 10.5M | if (xchg) \ |
1588 | 10.5M | AV_SWAP64(b, a); \ |
1589 | 10.5M | else \ |
1590 | 10.5M | AV_COPY64(b, a); \ |
1591 | 10.5M | } while (0) |
1592 | | |
1593 | 1.52M | XCHG(top_border_m1 + 8, src_y - 8, xchg); |
1594 | 1.52M | XCHG(top_border, src_y, xchg); |
1595 | 1.52M | XCHG(top_border + 8, src_y + 8, 1); |
1596 | 1.52M | if (mb_x < mb_width - 1) |
1597 | 1.27M | XCHG(top_border + 32, src_y + 16, 1); |
1598 | | |
1599 | | // only copy chroma for normal loop filter |
1600 | | // or to initialize the top row to 127 |
1601 | 1.52M | if (!simple || !mb_y) { |
1602 | 1.16M | XCHG(top_border_m1 + 16, src_cb - 8, xchg); |
1603 | 1.16M | XCHG(top_border_m1 + 24, src_cr - 8, xchg); |
1604 | 1.16M | XCHG(top_border + 16, src_cb, 1); |
1605 | 1.16M | XCHG(top_border + 24, src_cr, 1); |
1606 | 1.16M | } |
1607 | 1.52M | } |
1608 | | |
1609 | | static av_always_inline |
1610 | | int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) |
1611 | 1.67M | { |
1612 | 1.67M | if (!mb_x) |
1613 | 296k | return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
1614 | 1.38M | else |
1615 | 1.38M | return mb_y ? mode : LEFT_DC_PRED8x8; |
1616 | 1.67M | } |
1617 | | |
1618 | | static av_always_inline |
1619 | | int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7) |
1620 | 169k | { |
1621 | 169k | if (!mb_x) |
1622 | 33.4k | return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8); |
1623 | 136k | else |
1624 | 136k | return mb_y ? mode : HOR_PRED8x8; |
1625 | 169k | } |
1626 | | |
1627 | | static av_always_inline |
1628 | | int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7) |
1629 | 2.05M | { |
1630 | 2.05M | switch (mode) { |
1631 | 1.67M | case DC_PRED8x8: |
1632 | 1.67M | return check_dc_pred8x8_mode(mode, mb_x, mb_y); |
1633 | 115k | case VERT_PRED8x8: |
1634 | 115k | return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode; |
1635 | 88.7k | case HOR_PRED8x8: |
1636 | 88.7k | return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode; |
1637 | 169k | case PLANE_PRED8x8: /* TM */ |
1638 | 169k | return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7); |
1639 | 2.05M | } |
1640 | 0 | return mode; |
1641 | 2.05M | } |
1642 | | |
1643 | | static av_always_inline |
1644 | | int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7) |
1645 | 298k | { |
1646 | 298k | if (!mb_x) { |
1647 | 13.2k | return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED); |
1648 | 285k | } else { |
1649 | 285k | return mb_y ? mode : HOR_VP8_PRED; |
1650 | 285k | } |
1651 | 298k | } |
1652 | | |
1653 | | static av_always_inline |
1654 | | int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, |
1655 | | int *copy_buf, int vp7) |
1656 | 8.80M | { |
1657 | 8.80M | switch (mode) { |
1658 | 127k | case VERT_PRED: |
1659 | 127k | if (!mb_x && mb_y) { |
1660 | 13.7k | *copy_buf = 1; |
1661 | 13.7k | return mode; |
1662 | 13.7k | } |
1663 | | /* fall-through */ |
1664 | 154k | case DIAG_DOWN_LEFT_PRED: |
1665 | 221k | case VERT_LEFT_PRED: |
1666 | 221k | return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode; |
1667 | 81.3k | case HOR_PRED: |
1668 | 81.3k | if (!mb_y) { |
1669 | 5.61k | *copy_buf = 1; |
1670 | 5.61k | return mode; |
1671 | 5.61k | } |
1672 | | /* fall-through */ |
1673 | 130k | case HOR_UP_PRED: |
1674 | 130k | return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode; |
1675 | 298k | case TM_VP8_PRED: |
1676 | 298k | return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7); |
1677 | 8.01M | case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions |
1678 | | * as 16x16/8x8 DC */ |
1679 | 8.04M | case DIAG_DOWN_RIGHT_PRED: |
1680 | 8.09M | case VERT_RIGHT_PRED: |
1681 | 8.13M | case HOR_DOWN_PRED: |
1682 | 8.13M | if (!mb_y || !mb_x) |
1683 | 773k | *copy_buf = 1; |
1684 | 8.13M | return mode; |
1685 | 8.80M | } |
1686 | 0 | return mode; |
1687 | 8.80M | } |
1688 | | |
1689 | | static av_always_inline |
1690 | | void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], |
1691 | | VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7) |
1692 | 1.30M | { |
1693 | 1.30M | int x, y, mode, nnz; |
1694 | 1.30M | uint32_t tr; |
1695 | | |
1696 | | /* for the first row, we need to run xchg_mb_border to init the top edge |
1697 | | * to 127 otherwise, skip it if we aren't going to deblock */ |
1698 | 1.30M | if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
1699 | 762k | xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], |
1700 | 762k | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
1701 | 762k | s->filter.simple, 1); |
1702 | | |
1703 | 1.30M | if (mb->mode < MODE_I4x4) { |
1704 | 750k | mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7); |
1705 | 750k | s->hpc.pred16x16[mode](dst[0], s->linesize); |
1706 | 750k | } else { |
1707 | 550k | uint8_t *ptr = dst[0]; |
1708 | 550k | const uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; |
1709 | 550k | const uint8_t lo = is_vp7 ? 128 : 127; |
1710 | 550k | const uint8_t hi = is_vp7 ? 128 : 129; |
1711 | 550k | const uint8_t tr_top[4] = { lo, lo, lo, lo }; |
1712 | | |
1713 | | // all blocks on the right edge of the macroblock use bottom edge |
1714 | | // the top macroblock for their topright edge |
1715 | 550k | const uint8_t *tr_right = ptr - s->linesize + 16; |
1716 | | |
1717 | | // if we're on the right edge of the frame, said edge is extended |
1718 | | // from the top macroblock |
1719 | 550k | if (mb_y && mb_x == s->mb_width - 1) { |
1720 | 45.8k | tr = tr_right[-1] * 0x01010101u; |
1721 | 45.8k | tr_right = (uint8_t *) &tr; |
1722 | 45.8k | } |
1723 | | |
1724 | 550k | if (mb->skip) |
1725 | 411k | AV_ZERO128(td->non_zero_count_cache); |
1726 | | |
1727 | 2.75M | for (y = 0; y < 4; y++) { |
1728 | 2.20M | const uint8_t *topright = ptr + 4 - s->linesize; |
1729 | 11.0M | for (x = 0; x < 4; x++) { |
1730 | 8.80M | int copy = 0; |
1731 | 8.80M | ptrdiff_t linesize = s->linesize; |
1732 | 8.80M | uint8_t *dst = ptr + 4 * x; |
1733 | 8.80M | LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]); |
1734 | | |
1735 | 8.80M | if ((y == 0 || x == 3) && mb_y == 0) { |
1736 | 987k | topright = tr_top; |
1737 | 7.81M | } else if (x == 3) |
1738 | 1.63M | topright = tr_right; |
1739 | | |
1740 | 8.80M | mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, |
1741 | 8.80M | mb_y + y, ©, is_vp7); |
1742 | 8.80M | if (copy) { |
1743 | 793k | dst = copy_dst + 12; |
1744 | 793k | linesize = 8; |
1745 | 793k | if (!(mb_y + y)) { |
1746 | 535k | copy_dst[3] = lo; |
1747 | 535k | AV_WN32A(copy_dst + 4, lo * 0x01010101U); |
1748 | 535k | } else { |
1749 | 257k | AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); |
1750 | 257k | if (!(mb_x + x)) { |
1751 | 257k | copy_dst[3] = hi; |
1752 | 257k | } else { |
1753 | 0 | copy_dst[3] = ptr[4 * x - s->linesize - 1]; |
1754 | 0 | } |
1755 | 257k | } |
1756 | 793k | if (!(mb_x + x)) { |
1757 | 282k | copy_dst[11] = |
1758 | 282k | copy_dst[19] = |
1759 | 282k | copy_dst[27] = |
1760 | 282k | copy_dst[35] = hi; |
1761 | 510k | } else { |
1762 | 510k | copy_dst[11] = ptr[4 * x - 1]; |
1763 | 510k | copy_dst[19] = ptr[4 * x + s->linesize - 1]; |
1764 | 510k | copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; |
1765 | 510k | copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; |
1766 | 510k | } |
1767 | 793k | } |
1768 | 8.80M | s->hpc.pred4x4[mode](dst, topright, linesize); |
1769 | 8.80M | if (copy) { |
1770 | 793k | AV_COPY32(ptr + 4 * x, copy_dst + 12); |
1771 | 793k | AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); |
1772 | 793k | AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); |
1773 | 793k | AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); |
1774 | 793k | } |
1775 | | |
1776 | 8.80M | nnz = td->non_zero_count_cache[y][x]; |
1777 | 8.80M | if (nnz) { |
1778 | 929k | if (nnz == 1) |
1779 | 358k | s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, |
1780 | 358k | td->block[y][x], s->linesize); |
1781 | 570k | else |
1782 | 570k | s->vp8dsp.vp8_idct_add(ptr + 4 * x, |
1783 | 570k | td->block[y][x], s->linesize); |
1784 | 929k | } |
1785 | 8.80M | topright += 4; |
1786 | 8.80M | } |
1787 | | |
1788 | 2.20M | ptr += 4 * s->linesize; |
1789 | 2.20M | intra4x4 += 4; |
1790 | 2.20M | } |
1791 | 550k | } |
1792 | | |
1793 | 1.30M | mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, |
1794 | 1.30M | mb_x, mb_y, is_vp7); |
1795 | 1.30M | s->hpc.pred8x8[mode](dst[1], s->uvlinesize); |
1796 | 1.30M | s->hpc.pred8x8[mode](dst[2], s->uvlinesize); |
1797 | | |
1798 | 1.30M | if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) |
1799 | 762k | xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], |
1800 | 762k | s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
1801 | 762k | s->filter.simple, 0); |
1802 | 1.30M | } |
1803 | | |
1804 | | static const uint8_t subpel_idx[3][8] = { |
1805 | | { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, |
1806 | | // also function pointer index |
1807 | | { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required |
1808 | | { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels |
1809 | | }; |
1810 | | |
1811 | | /** |
1812 | | * luma MC function |
1813 | | * |
1814 | | * @param s VP8 decoding context |
1815 | | * @param dst target buffer for block data at block position |
1816 | | * @param ref reference picture buffer at origin (0, 0) |
1817 | | * @param mv motion vector (relative to block position) to get pixel data from |
1818 | | * @param x_off horizontal position of block from origin (0, 0) |
1819 | | * @param y_off vertical position of block from origin (0, 0) |
1820 | | * @param block_w width of block (16, 8 or 4) |
1821 | | * @param block_h height of block (always same as block_w) |
1822 | | * @param width width of src/dst plane data |
1823 | | * @param height height of src/dst plane data |
1824 | | * @param linesize size of a single line of plane data, including padding |
1825 | | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
1826 | | */ |
1827 | | static av_always_inline |
1828 | | void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, |
1829 | | const ProgressFrame *ref, const VP8mv *mv, |
1830 | | int x_off, int y_off, int block_w, int block_h, |
1831 | | int width, int height, ptrdiff_t linesize, |
1832 | | vp8_mc_func mc_func[3][3]) |
1833 | 2.66M | { |
1834 | 2.66M | const uint8_t *src = ref->f->data[0]; |
1835 | | |
1836 | 2.66M | if (AV_RN32A(mv)) { |
1837 | 1.38M | ptrdiff_t src_linesize = linesize; |
1838 | | |
1839 | 1.38M | int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx]; |
1840 | 1.38M | int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my]; |
1841 | | |
1842 | 1.38M | x_off += mv->x >> 2; |
1843 | 1.38M | y_off += mv->y >> 2; |
1844 | | |
1845 | | // edge emulation |
1846 | 1.38M | ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4); |
1847 | 1.38M | src += y_off * linesize + x_off; |
1848 | 1.38M | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
1849 | 1.38M | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { |
1850 | 489k | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
1851 | 489k | src - my_idx * linesize - mx_idx, |
1852 | 489k | EDGE_EMU_LINESIZE, linesize, |
1853 | 489k | block_w + subpel_idx[1][mx], |
1854 | 489k | block_h + subpel_idx[1][my], |
1855 | 489k | x_off - mx_idx, y_off - my_idx, |
1856 | 489k | width, height); |
1857 | 489k | src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
1858 | 489k | src_linesize = EDGE_EMU_LINESIZE; |
1859 | 489k | } |
1860 | 1.38M | mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); |
1861 | 1.38M | } else { |
1862 | 1.27M | ff_progress_frame_await(ref, (3 + y_off + block_h) >> 4); |
1863 | 1.27M | mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, |
1864 | 1.27M | linesize, block_h, 0, 0); |
1865 | 1.27M | } |
1866 | 2.66M | } |
1867 | | |
1868 | | /** |
1869 | | * chroma MC function |
1870 | | * |
1871 | | * @param s VP8 decoding context |
1872 | | * @param dst1 target buffer for block data at block position (U plane) |
1873 | | * @param dst2 target buffer for block data at block position (V plane) |
1874 | | * @param ref reference picture buffer at origin (0, 0) |
1875 | | * @param mv motion vector (relative to block position) to get pixel data from |
1876 | | * @param x_off horizontal position of block from origin (0, 0) |
1877 | | * @param y_off vertical position of block from origin (0, 0) |
1878 | | * @param block_w width of block (16, 8 or 4) |
1879 | | * @param block_h height of block (always same as block_w) |
1880 | | * @param width width of src/dst plane data |
1881 | | * @param height height of src/dst plane data |
1882 | | * @param linesize size of a single line of plane data, including padding |
1883 | | * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
1884 | | */ |
1885 | | static av_always_inline |
1886 | | void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, |
1887 | | uint8_t *dst2, const ProgressFrame *ref, const VP8mv *mv, |
1888 | | int x_off, int y_off, int block_w, int block_h, |
1889 | | int width, int height, ptrdiff_t linesize, |
1890 | | vp8_mc_func mc_func[3][3]) |
1891 | 1.94M | { |
1892 | 1.94M | const uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; |
1893 | | |
1894 | 1.94M | if (AV_RN32A(mv)) { |
1895 | 955k | int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; |
1896 | 955k | int my = mv->y & 7, my_idx = subpel_idx[0][my]; |
1897 | | |
1898 | 955k | x_off += mv->x >> 3; |
1899 | 955k | y_off += mv->y >> 3; |
1900 | | |
1901 | | // edge emulation |
1902 | 955k | src1 += y_off * linesize + x_off; |
1903 | 955k | src2 += y_off * linesize + x_off; |
1904 | 955k | ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3); |
1905 | 955k | if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || |
1906 | 955k | y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { |
1907 | 391k | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
1908 | 391k | src1 - my_idx * linesize - mx_idx, |
1909 | 391k | EDGE_EMU_LINESIZE, linesize, |
1910 | 391k | block_w + subpel_idx[1][mx], |
1911 | 391k | block_h + subpel_idx[1][my], |
1912 | 391k | x_off - mx_idx, y_off - my_idx, width, height); |
1913 | 391k | src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
1914 | 391k | mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my); |
1915 | | |
1916 | 391k | s->vdsp.emulated_edge_mc(td->edge_emu_buffer, |
1917 | 391k | src2 - my_idx * linesize - mx_idx, |
1918 | 391k | EDGE_EMU_LINESIZE, linesize, |
1919 | 391k | block_w + subpel_idx[1][mx], |
1920 | 391k | block_h + subpel_idx[1][my], |
1921 | 391k | x_off - mx_idx, y_off - my_idx, width, height); |
1922 | 391k | src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; |
1923 | 391k | mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); |
1924 | 563k | } else { |
1925 | 563k | mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); |
1926 | 563k | mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); |
1927 | 563k | } |
1928 | 992k | } else { |
1929 | 992k | ff_progress_frame_await(ref, (3 + y_off + block_h) >> 3); |
1930 | 992k | mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); |
1931 | 992k | mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); |
1932 | 992k | } |
1933 | 1.94M | } |
1934 | | |
1935 | | static av_always_inline |
1936 | | void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], |
1937 | | const ProgressFrame *ref_frame, int x_off, int y_off, |
1938 | | int bx_off, int by_off, int block_w, int block_h, |
1939 | | int width, int height, const VP8mv *mv) |
1940 | 1.70M | { |
1941 | 1.70M | VP8mv uvmv = *mv; |
1942 | | |
1943 | | /* Y */ |
1944 | 1.70M | vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, |
1945 | 1.70M | ref_frame, mv, x_off + bx_off, y_off + by_off, |
1946 | 1.70M | block_w, block_h, width, height, s->linesize, |
1947 | 1.70M | s->put_pixels_tab[block_w == 8]); |
1948 | | |
1949 | | /* U/V */ |
1950 | 1.70M | if (s->profile == 3) { |
1951 | | /* this block only applies VP8; it is safe to check |
1952 | | * only the profile, as VP7 profile <= 1 */ |
1953 | 100k | uvmv.x &= ~7; |
1954 | 100k | uvmv.y &= ~7; |
1955 | 100k | } |
1956 | 1.70M | x_off >>= 1; |
1957 | 1.70M | y_off >>= 1; |
1958 | 1.70M | bx_off >>= 1; |
1959 | 1.70M | by_off >>= 1; |
1960 | 1.70M | width >>= 1; |
1961 | 1.70M | height >>= 1; |
1962 | 1.70M | block_w >>= 1; |
1963 | 1.70M | block_h >>= 1; |
1964 | 1.70M | vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, |
1965 | 1.70M | dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, |
1966 | 1.70M | &uvmv, x_off + bx_off, y_off + by_off, |
1967 | 1.70M | block_w, block_h, width, height, s->uvlinesize, |
1968 | 1.70M | s->put_pixels_tab[1 + (block_w == 4)]); |
1969 | 1.70M | } |
1970 | | |
1971 | | /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1972 | | * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ |
1973 | | static av_always_inline |
1974 | | void prefetch_motion(const VP8Context *s, const VP8Macroblock *mb, |
1975 | | int mb_x, int mb_y, int mb_xy, int ref) |
1976 | 8.46M | { |
1977 | | /* Don't prefetch refs that haven't been used very often this frame. */ |
1978 | 8.46M | if (s->ref_count[ref - 1] > (mb_xy >> 5)) { |
1979 | 3.24M | int x_off = mb_x << 4, y_off = mb_y << 4; |
1980 | 3.24M | int mx = (mb->mv.x >> 2) + x_off + 8; |
1981 | 3.24M | int my = (mb->mv.y >> 2) + y_off; |
1982 | 3.24M | uint8_t **src = s->framep[ref]->tf.f->data; |
1983 | 3.24M | int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; |
1984 | | /* For threading, a ff_thread_await_progress here might be useful, but |
1985 | | * it actually slows down the decoder. Since a bad prefetch doesn't |
1986 | | * generate bad decoder output, we don't run it here. */ |
1987 | 3.24M | s->vdsp.prefetch(src[0] + off, s->linesize, 4); |
1988 | 3.24M | off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; |
1989 | 3.24M | s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); |
1990 | 3.24M | } |
1991 | 8.46M | } |
1992 | | |
1993 | | /** |
1994 | | * Apply motion vectors to prediction buffer, chapter 18. |
1995 | | */ |
1996 | | static av_always_inline |
1997 | | void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], |
1998 | | VP8Macroblock *mb, int mb_x, int mb_y) |
1999 | 1.51M | { |
2000 | 1.51M | int x_off = mb_x << 4, y_off = mb_y << 4; |
2001 | 1.51M | int width = 16 * s->mb_width, height = 16 * s->mb_height; |
2002 | 1.51M | const ProgressFrame *ref = &s->framep[mb->ref_frame]->tf; |
2003 | 1.51M | const VP8mv *bmv = mb->bmv; |
2004 | | |
2005 | 1.51M | switch (mb->partitioning) { |
2006 | 1.29M | case VP8_SPLITMVMODE_NONE: |
2007 | 1.29M | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2008 | 1.29M | 0, 0, 16, 16, width, height, &mb->mv); |
2009 | 1.29M | break; |
2010 | 59.9k | case VP8_SPLITMVMODE_4x4: { |
2011 | 59.9k | int x, y; |
2012 | 59.9k | VP8mv uvmv; |
2013 | | |
2014 | | /* Y */ |
2015 | 299k | for (y = 0; y < 4; y++) { |
2016 | 1.19M | for (x = 0; x < 4; x++) { |
2017 | 958k | vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, |
2018 | 958k | ref, &bmv[4 * y + x], |
2019 | 958k | 4 * x + x_off, 4 * y + y_off, 4, 4, |
2020 | 958k | width, height, s->linesize, |
2021 | 958k | s->put_pixels_tab[2]); |
2022 | 958k | } |
2023 | 239k | } |
2024 | | |
2025 | | /* U/V */ |
2026 | 59.9k | x_off >>= 1; |
2027 | 59.9k | y_off >>= 1; |
2028 | 59.9k | width >>= 1; |
2029 | 59.9k | height >>= 1; |
2030 | 179k | for (y = 0; y < 2; y++) { |
2031 | 359k | for (x = 0; x < 2; x++) { |
2032 | 239k | uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + |
2033 | 239k | mb->bmv[2 * y * 4 + 2 * x + 1].x + |
2034 | 239k | mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + |
2035 | 239k | mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; |
2036 | 239k | uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + |
2037 | 239k | mb->bmv[2 * y * 4 + 2 * x + 1].y + |
2038 | 239k | mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + |
2039 | 239k | mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; |
2040 | 239k | uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2; |
2041 | 239k | uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2; |
2042 | 239k | if (s->profile == 3) { |
2043 | 40.2k | uvmv.x &= ~7; |
2044 | 40.2k | uvmv.y &= ~7; |
2045 | 40.2k | } |
2046 | 239k | vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, |
2047 | 239k | dst[2] + 4 * y * s->uvlinesize + x * 4, ref, |
2048 | 239k | &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, |
2049 | 239k | width, height, s->uvlinesize, |
2050 | 239k | s->put_pixels_tab[2]); |
2051 | 239k | } |
2052 | 119k | } |
2053 | 59.9k | break; |
2054 | 0 | } |
2055 | 33.3k | case VP8_SPLITMVMODE_16x8: |
2056 | 33.3k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2057 | 33.3k | 0, 0, 16, 8, width, height, &bmv[0]); |
2058 | 33.3k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2059 | 33.3k | 0, 8, 16, 8, width, height, &bmv[1]); |
2060 | 33.3k | break; |
2061 | 88.2k | case VP8_SPLITMVMODE_8x16: |
2062 | 88.2k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2063 | 88.2k | 0, 0, 8, 16, width, height, &bmv[0]); |
2064 | 88.2k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2065 | 88.2k | 8, 0, 8, 16, width, height, &bmv[1]); |
2066 | 88.2k | break; |
2067 | 42.1k | case VP8_SPLITMVMODE_8x8: |
2068 | 42.1k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2069 | 42.1k | 0, 0, 8, 8, width, height, &bmv[0]); |
2070 | 42.1k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2071 | 42.1k | 8, 0, 8, 8, width, height, &bmv[1]); |
2072 | 42.1k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2073 | 42.1k | 0, 8, 8, 8, width, height, &bmv[2]); |
2074 | 42.1k | vp8_mc_part(s, td, dst, ref, x_off, y_off, |
2075 | 42.1k | 8, 8, 8, 8, width, height, &bmv[3]); |
2076 | 42.1k | break; |
2077 | 1.51M | } |
2078 | 1.51M | } |
2079 | | |
2080 | | static av_always_inline |
2081 | | void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], |
2082 | | const VP8Macroblock *mb) |
2083 | 539k | { |
2084 | 539k | int x, y, ch; |
2085 | | |
2086 | 539k | if (mb->mode != MODE_I4x4) { |
2087 | 400k | uint8_t *y_dst = dst[0]; |
2088 | 2.00M | for (y = 0; y < 4; y++) { |
2089 | 1.60M | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); |
2090 | 1.60M | if (nnz4) { |
2091 | 1.29M | if (nnz4 & ~0x01010101) { |
2092 | 2.26M | for (x = 0; x < 4; x++) { |
2093 | 2.26M | if ((uint8_t) nnz4 == 1) |
2094 | 178k | s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, |
2095 | 178k | td->block[y][x], |
2096 | 178k | s->linesize); |
2097 | 2.09M | else if ((uint8_t) nnz4 > 1) |
2098 | 1.90M | s->vp8dsp.vp8_idct_add(y_dst + 4 * x, |
2099 | 1.90M | td->block[y][x], |
2100 | 1.90M | s->linesize); |
2101 | 2.26M | nnz4 >>= 8; |
2102 | 2.26M | if (!nnz4) |
2103 | 603k | break; |
2104 | 2.26M | } |
2105 | 689k | } else { |
2106 | 689k | s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); |
2107 | 689k | } |
2108 | 1.29M | } |
2109 | 1.60M | y_dst += 4 * s->linesize; |
2110 | 1.60M | } |
2111 | 400k | } |
2112 | | |
2113 | 1.61M | for (ch = 0; ch < 2; ch++) { |
2114 | 1.07M | uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); |
2115 | 1.07M | if (nnz4) { |
2116 | 467k | uint8_t *ch_dst = dst[1 + ch]; |
2117 | 467k | if (nnz4 & ~0x01010101) { |
2118 | 610k | for (y = 0; y < 2; y++) { |
2119 | 1.48M | for (x = 0; x < 2; x++) { |
2120 | 1.19M | if ((uint8_t) nnz4 == 1) |
2121 | 116k | s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, |
2122 | 116k | td->block[4 + ch][(y << 1) + x], |
2123 | 116k | s->uvlinesize); |
2124 | 1.07M | else if ((uint8_t) nnz4 > 1) |
2125 | 970k | s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, |
2126 | 970k | td->block[4 + ch][(y << 1) + x], |
2127 | 970k | s->uvlinesize); |
2128 | 1.19M | nnz4 >>= 8; |
2129 | 1.19M | if (!nnz4) |
2130 | 314k | goto chroma_idct_end; |
2131 | 1.19M | } |
2132 | 295k | ch_dst += 4 * s->uvlinesize; |
2133 | 295k | } |
2134 | 314k | } else { |
2135 | 152k | s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); |
2136 | 152k | } |
2137 | 467k | } |
2138 | 1.07M | chroma_idct_end: |
2139 | 1.07M | ; |
2140 | 1.07M | } |
2141 | 539k | } |
2142 | | |
2143 | | static av_always_inline |
2144 | | void filter_level_for_mb(const VP8Context *s, const VP8Macroblock *mb, |
2145 | | VP8FilterStrength *f, int is_vp7) |
2146 | 2.26M | { |
2147 | 2.26M | int interior_limit, filter_level; |
2148 | | |
2149 | 2.26M | if (s->segmentation.enabled) { |
2150 | 137k | filter_level = s->segmentation.filter_level[mb->segment]; |
2151 | 137k | if (!s->segmentation.absolute_vals) |
2152 | 77.8k | filter_level += s->filter.level; |
2153 | 137k | } else |
2154 | 2.12M | filter_level = s->filter.level; |
2155 | | |
2156 | 2.26M | if (s->lf_delta.enabled) { |
2157 | 472k | filter_level += s->lf_delta.ref[mb->ref_frame]; |
2158 | 472k | filter_level += s->lf_delta.mode[mb->mode]; |
2159 | 472k | } |
2160 | | |
2161 | 2.26M | filter_level = av_clip_uintp2(filter_level, 6); |
2162 | | |
2163 | 2.26M | interior_limit = filter_level; |
2164 | 2.26M | if (s->filter.sharpness) { |
2165 | 1.31M | interior_limit >>= (s->filter.sharpness + 3) >> 2; |
2166 | 1.31M | interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); |
2167 | 1.31M | } |
2168 | 2.26M | interior_limit = FFMAX(interior_limit, 1); |
2169 | | |
2170 | 2.26M | f->filter_level = filter_level; |
2171 | 2.26M | f->inner_limit = interior_limit; |
2172 | 2.26M | f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 || |
2173 | 2.26M | mb->mode == VP8_MVMODE_SPLIT; |
2174 | 2.26M | } |
2175 | | |
2176 | | static av_always_inline |
2177 | | void filter_mb(const VP8Context *s, uint8_t *const dst[3], const VP8FilterStrength *f, |
2178 | | int mb_x, int mb_y, int is_vp7) |
2179 | 1.36M | { |
2180 | 1.36M | int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh; |
2181 | 1.36M | int filter_level = f->filter_level; |
2182 | 1.36M | int inner_limit = f->inner_limit; |
2183 | 1.36M | int inner_filter = f->inner_filter; |
2184 | 1.36M | ptrdiff_t linesize = s->linesize; |
2185 | 1.36M | ptrdiff_t uvlinesize = s->uvlinesize; |
2186 | 1.36M | static const uint8_t hev_thresh_lut[2][64] = { |
2187 | 1.36M | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, |
2188 | 1.36M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
2189 | 1.36M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
2190 | 1.36M | 3, 3, 3, 3 }, |
2191 | 1.36M | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, |
2192 | 1.36M | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2193 | 1.36M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
2194 | 1.36M | 2, 2, 2, 2 } |
2195 | 1.36M | }; |
2196 | | |
2197 | 1.36M | if (!filter_level) |
2198 | 268k | return; |
2199 | | |
2200 | 1.09M | if (is_vp7) { |
2201 | 371k | bedge_lim_y = filter_level; |
2202 | 371k | bedge_lim_uv = filter_level * 2; |
2203 | 371k | mbedge_lim = filter_level + 2; |
2204 | 727k | } else { |
2205 | 727k | bedge_lim_y = |
2206 | 727k | bedge_lim_uv = filter_level * 2 + inner_limit; |
2207 | 727k | mbedge_lim = bedge_lim_y + 4; |
2208 | 727k | } |
2209 | | |
2210 | 1.09M | hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; |
2211 | | |
2212 | 1.09M | if (mb_x) { |
2213 | 985k | s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
2214 | 985k | mbedge_lim, inner_limit, hev_thresh); |
2215 | 985k | s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
2216 | 985k | mbedge_lim, inner_limit, hev_thresh); |
2217 | 985k | } |
2218 | | |
2219 | 1.09M | #define H_LOOP_FILTER_16Y_INNER(cond) \ |
2220 | 2.19M | if (cond && inner_filter) { \ |
2221 | 592k | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \ |
2222 | 592k | bedge_lim_y, inner_limit, \ |
2223 | 592k | hev_thresh); \ |
2224 | 592k | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \ |
2225 | 592k | bedge_lim_y, inner_limit, \ |
2226 | 592k | hev_thresh); \ |
2227 | 592k | s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \ |
2228 | 592k | bedge_lim_y, inner_limit, \ |
2229 | 592k | hev_thresh); \ |
2230 | 592k | s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \ |
2231 | 592k | uvlinesize, bedge_lim_uv, \ |
2232 | 592k | inner_limit, hev_thresh); \ |
2233 | 592k | } |
2234 | | |
2235 | 1.09M | H_LOOP_FILTER_16Y_INNER(!is_vp7) |
2236 | | |
2237 | 1.09M | if (mb_y) { |
2238 | 965k | s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
2239 | 965k | mbedge_lim, inner_limit, hev_thresh); |
2240 | 965k | s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
2241 | 965k | mbedge_lim, inner_limit, hev_thresh); |
2242 | 965k | } |
2243 | | |
2244 | 1.09M | if (inner_filter) { |
2245 | 592k | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, |
2246 | 592k | linesize, bedge_lim_y, |
2247 | 592k | inner_limit, hev_thresh); |
2248 | 592k | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, |
2249 | 592k | linesize, bedge_lim_y, |
2250 | 592k | inner_limit, hev_thresh); |
2251 | 592k | s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, |
2252 | 592k | linesize, bedge_lim_y, |
2253 | 592k | inner_limit, hev_thresh); |
2254 | 592k | s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, |
2255 | 592k | dst[2] + 4 * uvlinesize, |
2256 | 592k | uvlinesize, bedge_lim_uv, |
2257 | 592k | inner_limit, hev_thresh); |
2258 | 592k | } |
2259 | | |
2260 | 1.09M | H_LOOP_FILTER_16Y_INNER(is_vp7) |
2261 | 1.09M | } |
2262 | | |
2263 | | static av_always_inline |
2264 | | void filter_mb_simple(const VP8Context *s, uint8_t *dst, const VP8FilterStrength *f, |
2265 | | int mb_x, int mb_y) |
2266 | 621k | { |
2267 | 621k | int mbedge_lim, bedge_lim; |
2268 | 621k | int filter_level = f->filter_level; |
2269 | 621k | int inner_limit = f->inner_limit; |
2270 | 621k | int inner_filter = f->inner_filter; |
2271 | 621k | ptrdiff_t linesize = s->linesize; |
2272 | | |
2273 | 621k | if (!filter_level) |
2274 | 4.00k | return; |
2275 | | |
2276 | 617k | bedge_lim = 2 * filter_level + inner_limit; |
2277 | 617k | mbedge_lim = bedge_lim + 4; |
2278 | | |
2279 | 617k | if (mb_x) |
2280 | 534k | s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
2281 | 617k | if (inner_filter) { |
2282 | 515k | s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); |
2283 | 515k | s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); |
2284 | 515k | s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); |
2285 | 515k | } |
2286 | | |
2287 | 617k | if (mb_y) |
2288 | 545k | s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
2289 | 617k | if (inner_filter) { |
2290 | 515k | s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); |
2291 | 515k | s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); |
2292 | 515k | s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); |
2293 | 515k | } |
2294 | 617k | } |
2295 | | |
2296 | 2.22M | #define MARGIN (16 << 2) |
2297 | | static av_always_inline |
2298 | | int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, |
2299 | | const VP8Frame *prev_frame, int is_vp7) |
2300 | 83.5k | { |
2301 | 83.5k | VP8Context *s = avctx->priv_data; |
2302 | 83.5k | int mb_x, mb_y; |
2303 | | |
2304 | 83.5k | s->mv_bounds.mv_min.y = -MARGIN; |
2305 | 83.5k | s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; |
2306 | 460k | for (mb_y = 0; mb_y < s->mb_height; mb_y++) { |
2307 | 382k | VP8Macroblock *mb = s->macroblocks_base + |
2308 | 382k | ((s->mb_width + 1) * (mb_y + 1) + 1); |
2309 | 382k | int mb_xy = mb_y * s->mb_width; |
2310 | | |
2311 | 382k | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); |
2312 | | |
2313 | 382k | s->mv_bounds.mv_min.x = -MARGIN; |
2314 | 382k | s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; |
2315 | | |
2316 | 3.98M | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
2317 | 3.61M | if (vpx_rac_is_end(&s->c)) { |
2318 | 4.74k | return AVERROR_INVALIDDATA; |
2319 | 4.74k | } |
2320 | 3.60M | if (mb_y == 0) |
2321 | 3.60M | AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, |
2322 | 3.60M | DC_PRED * 0x01010101); |
2323 | 3.60M | decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy, |
2324 | 3.60M | prev_frame && prev_frame->seg_map ? |
2325 | 1.74M | prev_frame->seg_map + mb_xy : NULL, 1, is_vp7); |
2326 | 3.60M | s->mv_bounds.mv_min.x -= 64; |
2327 | 3.60M | s->mv_bounds.mv_max.x -= 64; |
2328 | 3.60M | } |
2329 | 377k | s->mv_bounds.mv_min.y -= 64; |
2330 | 377k | s->mv_bounds.mv_max.y -= 64; |
2331 | 377k | } |
2332 | 78.7k | return 0; |
2333 | 83.5k | } |
2334 | | |
2335 | | static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, |
2336 | | const VP8Frame *prev_frame) |
2337 | 83.5k | { |
2338 | 83.5k | return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7); |
2339 | 83.5k | } |
2340 | | |
2341 | | static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, |
2342 | | const VP8Frame *prev_frame) |
2343 | 0 | { |
2344 | 0 | return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8); |
2345 | 0 | } |
2346 | | |
2347 | | #if HAVE_THREADS |
2348 | | #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ |
2349 | 0 | do { \ |
2350 | 0 | int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ |
2351 | 0 | if (atomic_load(&otd->thread_mb_pos) < tmp) { \ |
2352 | 0 | pthread_mutex_lock(&otd->lock); \ |
2353 | 0 | atomic_store(&td->wait_mb_pos, tmp); \ |
2354 | 0 | do { \ |
2355 | 0 | if (atomic_load(&otd->thread_mb_pos) >= tmp) \ |
2356 | 0 | break; \ |
2357 | 0 | pthread_cond_wait(&otd->cond, &otd->lock); \ |
2358 | 0 | } while (1); \ |
2359 | 0 | atomic_store(&td->wait_mb_pos, INT_MAX); \ |
2360 | 0 | pthread_mutex_unlock(&otd->lock); \ |
2361 | 0 | } \ |
2362 | 0 | } while (0) |
2363 | | |
2364 | | #define update_pos(td, mb_y, mb_x) \ |
2365 | 5.23M | do { \ |
2366 | 5.23M | int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ |
2367 | 5.23M | int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ |
2368 | 5.23M | (num_jobs > 1); \ |
2369 | 5.23M | int is_null = !next_td || !prev_td; \ |
2370 | 5.23M | int pos_check = (is_null) ? 1 : \ |
2371 | 5.23M | (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \ |
2372 | 4.80M | (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \ |
2373 | 5.23M | atomic_store(&td->thread_mb_pos, pos); \ |
2374 | 5.23M | if (sliced_threading && pos_check) { \ |
2375 | 0 | pthread_mutex_lock(&td->lock); \ |
2376 | 0 | pthread_cond_broadcast(&td->cond); \ |
2377 | 0 | pthread_mutex_unlock(&td->lock); \ |
2378 | 0 | } \ |
2379 | 5.23M | } while (0) |
2380 | | #else |
2381 | | #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0) |
2382 | | #define update_pos(td, mb_y, mb_x) while(0) |
2383 | | #endif |
2384 | | |
2385 | | static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
2386 | | int jobnr, int threadnr, int is_vp7) |
2387 | 426k | { |
2388 | 426k | VP8Context *s = avctx->priv_data; |
2389 | 426k | VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; |
2390 | 426k | int mb_y = atomic_load(&td->thread_mb_pos) >> 16; |
2391 | 426k | int mb_x, mb_xy = mb_y * s->mb_width; |
2392 | 426k | int num_jobs = s->num_jobs; |
2393 | 426k | const VP8Frame *prev_frame = s->prev_frame; |
2394 | 426k | VP8Frame *curframe = s->curframe; |
2395 | 426k | VPXRangeCoder *coeff_c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; |
2396 | | |
2397 | 426k | VP8Macroblock *mb; |
2398 | 426k | uint8_t *dst[3] = { |
2399 | 426k | curframe->tf.f->data[0] + 16 * mb_y * s->linesize, |
2400 | 426k | curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, |
2401 | 426k | curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize |
2402 | 426k | }; |
2403 | | |
2404 | 426k | if (vpx_rac_is_end(&s->c)) |
2405 | 24.5k | return AVERROR_INVALIDDATA; |
2406 | | |
2407 | 402k | if (mb_y == 0) |
2408 | 105k | prev_td = td; |
2409 | 296k | else |
2410 | 296k | prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; |
2411 | 402k | if (mb_y == s->mb_height - 1) |
2412 | 35.2k | next_td = td; |
2413 | 366k | else |
2414 | 366k | next_td = &s->thread_data[(jobnr + 1) % num_jobs]; |
2415 | 402k | if (s->mb_layout == 1) |
2416 | 135k | mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); |
2417 | 267k | else { |
2418 | | // Make sure the previous frame has read its segmentation map, |
2419 | | // if we reuse the same map. |
2420 | 267k | if (prev_frame && s->segmentation.enabled && |
2421 | 267k | !s->segmentation.update_map) |
2422 | 9.86k | ff_progress_frame_await(&prev_frame->tf, mb_y); |
2423 | 267k | mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; |
2424 | 267k | memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock |
2425 | 267k | AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); |
2426 | 267k | } |
2427 | | |
2428 | 402k | if (!is_vp7 || mb_y == 0) |
2429 | 330k | memset(td->left_nnz, 0, sizeof(td->left_nnz)); |
2430 | | |
2431 | 402k | td->mv_bounds.mv_min.x = -MARGIN; |
2432 | 402k | td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; |
2433 | | |
2434 | 3.22M | for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
2435 | 2.89M | if (vpx_rac_is_end(&s->c)) |
2436 | 68.4k | return AVERROR_INVALIDDATA; |
2437 | | // Wait for previous thread to read mb_x+2, and reach mb_y-1. |
2438 | 2.82M | if (prev_td != td) { |
2439 | 0 | if (threadnr != 0) { |
2440 | 0 | check_thread_pos(td, prev_td, |
2441 | 0 | mb_x + (is_vp7 ? 2 : 1), |
2442 | 0 | mb_y - (is_vp7 ? 2 : 1)); |
2443 | 0 | } else { |
2444 | 0 | check_thread_pos(td, prev_td, |
2445 | 0 | mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3, |
2446 | 0 | mb_y - (is_vp7 ? 2 : 1)); |
2447 | 0 | } |
2448 | 0 | } |
2449 | | |
2450 | 0 | s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, |
2451 | 2.82M | s->linesize, 4); |
2452 | 2.82M | s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, |
2453 | 2.82M | dst[2] - dst[1], 2); |
2454 | | |
2455 | 2.82M | if (!s->mb_layout) |
2456 | 1.66M | decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy, |
2457 | 1.66M | prev_frame && prev_frame->seg_map ? |
2458 | 1.42M | prev_frame->seg_map + mb_xy : NULL, 0, is_vp7); |
2459 | | |
2460 | 2.82M | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_PREVIOUS); |
2461 | | |
2462 | 2.82M | if (!mb->skip) { |
2463 | 2.34M | if (vpx_rac_is_end(coeff_c)) |
2464 | 5.36k | return AVERROR_INVALIDDATA; |
2465 | 2.34M | decode_mb_coeffs(s, td, coeff_c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7); |
2466 | 2.34M | } |
2467 | | |
2468 | 2.82M | if (mb->mode <= MODE_I4x4) |
2469 | 1.30M | intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7); |
2470 | 1.51M | else |
2471 | 1.51M | inter_predict(s, td, dst, mb, mb_x, mb_y); |
2472 | | |
2473 | 2.82M | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_GOLDEN); |
2474 | | |
2475 | 2.82M | if (!mb->skip) { |
2476 | 539k | idct_mb(s, td, dst, mb); |
2477 | 2.28M | } else { |
2478 | 2.28M | AV_ZERO64(td->left_nnz); |
2479 | 2.28M | AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned |
2480 | | |
2481 | | /* Reset DC block predictors if they would exist |
2482 | | * if the mb had coefficients */ |
2483 | 2.28M | if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { |
2484 | 1.71M | td->left_nnz[8] = 0; |
2485 | 1.71M | s->top_nnz[mb_x][8] = 0; |
2486 | 1.71M | } |
2487 | 2.28M | } |
2488 | | |
2489 | 2.82M | if (s->deblock_filter) |
2490 | 2.26M | filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7); |
2491 | | |
2492 | 2.82M | if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { |
2493 | 0 | if (s->filter.simple) |
2494 | 0 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
2495 | 0 | NULL, NULL, s->linesize, 0, 1); |
2496 | 0 | else |
2497 | 0 | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
2498 | 0 | dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
2499 | 0 | } |
2500 | | |
2501 | 2.82M | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_ALTREF); |
2502 | | |
2503 | 2.82M | dst[0] += 16; |
2504 | 2.82M | dst[1] += 8; |
2505 | 2.82M | dst[2] += 8; |
2506 | 2.82M | td->mv_bounds.mv_min.x -= 64; |
2507 | 2.82M | td->mv_bounds.mv_max.x -= 64; |
2508 | | |
2509 | 2.82M | if (mb_x == s->mb_width + 1) { |
2510 | 0 | update_pos(td, mb_y, s->mb_width + 3); |
2511 | 2.82M | } else { |
2512 | 2.82M | update_pos(td, mb_y, mb_x); |
2513 | 2.82M | } |
2514 | 2.82M | } |
2515 | 328k | return 0; |
2516 | 402k | } |
2517 | | |
2518 | | static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, |
2519 | | int jobnr, int threadnr, int is_vp7) |
2520 | 221k | { |
2521 | 221k | VP8Context *s = avctx->priv_data; |
2522 | 221k | VP8ThreadData *td = &s->thread_data[threadnr]; |
2523 | 221k | int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs; |
2524 | 221k | AVFrame *curframe = s->curframe->tf.f; |
2525 | 221k | VP8ThreadData *prev_td, *next_td; |
2526 | 221k | uint8_t *dst[3] = { |
2527 | 221k | curframe->data[0] + 16 * mb_y * s->linesize, |
2528 | 221k | curframe->data[1] + 8 * mb_y * s->uvlinesize, |
2529 | 221k | curframe->data[2] + 8 * mb_y * s->uvlinesize |
2530 | 221k | }; |
2531 | | |
2532 | 221k | if (mb_y == 0) |
2533 | 28.3k | prev_td = td; |
2534 | 193k | else |
2535 | 193k | prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; |
2536 | 221k | if (mb_y == s->mb_height - 1) |
2537 | 14.4k | next_td = td; |
2538 | 207k | else |
2539 | 207k | next_td = &s->thread_data[(jobnr + 1) % num_jobs]; |
2540 | | |
2541 | 2.21M | for (mb_x = 0; mb_x < s->mb_width; mb_x++) { |
2542 | 1.98M | const VP8FilterStrength *f = &td->filter_strength[mb_x]; |
2543 | 1.98M | if (prev_td != td) |
2544 | 0 | check_thread_pos(td, prev_td, |
2545 | 1.98M | (mb_x + 1) + (s->mb_width + 3), mb_y - 1); |
2546 | 1.98M | if (next_td != td) |
2547 | 0 | if (next_td != &s->thread_data[0]) |
2548 | 0 | check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); |
2549 | | |
2550 | 1.98M | if (num_jobs == 1) { |
2551 | 1.98M | if (s->filter.simple) |
2552 | 621k | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
2553 | 621k | NULL, NULL, s->linesize, 0, 1); |
2554 | 1.36M | else |
2555 | 1.36M | backup_mb_border(s->top_border[mb_x + 1], dst[0], |
2556 | 1.36M | dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
2557 | 1.98M | } |
2558 | | |
2559 | 1.98M | if (s->filter.simple) |
2560 | 621k | filter_mb_simple(s, dst[0], f, mb_x, mb_y); |
2561 | 1.36M | else |
2562 | 1.36M | filter_mb(s, dst, f, mb_x, mb_y, is_vp7); |
2563 | 1.98M | dst[0] += 16; |
2564 | 1.98M | dst[1] += 8; |
2565 | 1.98M | dst[2] += 8; |
2566 | | |
2567 | 1.98M | update_pos(td, mb_y, (s->mb_width + 3) + mb_x); |
2568 | 1.98M | } |
2569 | 221k | } |
2570 | | |
2571 | | static av_always_inline |
2572 | | int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, |
2573 | | int threadnr, int is_vp7) |
2574 | 121k | { |
2575 | 121k | const VP8Context *s = avctx->priv_data; |
2576 | 121k | VP8ThreadData *td = &s->thread_data[jobnr]; |
2577 | 121k | VP8ThreadData *next_td = NULL, *prev_td = NULL; |
2578 | 121k | VP8Frame *curframe = s->curframe; |
2579 | 121k | int mb_y, num_jobs = s->num_jobs; |
2580 | 121k | int ret; |
2581 | | |
2582 | 121k | td->thread_nr = threadnr; |
2583 | 121k | td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; |
2584 | 121k | td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; |
2585 | 449k | for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { |
2586 | 426k | atomic_store(&td->thread_mb_pos, mb_y << 16); |
2587 | 426k | ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); |
2588 | 426k | if (ret < 0) { |
2589 | 98.3k | update_pos(td, s->mb_height, INT_MAX & 0xFFFF); |
2590 | 98.3k | return ret; |
2591 | 98.3k | } |
2592 | 328k | if (s->deblock_filter) |
2593 | 221k | s->filter_mb_row(avctx, tdata, jobnr, threadnr); |
2594 | 328k | update_pos(td, mb_y, INT_MAX & 0xFFFF); |
2595 | | |
2596 | 328k | td->mv_bounds.mv_min.y -= 64 * num_jobs; |
2597 | 328k | td->mv_bounds.mv_max.y -= 64 * num_jobs; |
2598 | | |
2599 | 328k | if (avctx->active_thread_type == FF_THREAD_FRAME) |
2600 | 0 | ff_progress_frame_report(&curframe->tf, mb_y); |
2601 | 328k | } |
2602 | | |
2603 | 22.9k | return 0; |
2604 | 121k | } |
2605 | | |
2606 | | static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, |
2607 | | int jobnr, int threadnr) |
2608 | 78.7k | { |
2609 | 78.7k | return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7); |
2610 | 78.7k | } |
2611 | | |
2612 | | static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, |
2613 | | int jobnr, int threadnr) |
2614 | 42.5k | { |
2615 | 42.5k | return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); |
2616 | 42.5k | } |
2617 | | |
2618 | | static av_always_inline |
2619 | | int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame, |
2620 | | const AVPacket *avpkt, int is_vp7) |
2621 | 242k | { |
2622 | 242k | VP8Context *s = avctx->priv_data; |
2623 | 242k | int ret, i, referenced, num_jobs; |
2624 | 242k | enum AVDiscard skip_thresh; |
2625 | 242k | VP8Frame *av_uninit(curframe), *prev_frame; |
2626 | | |
2627 | 242k | if (is_vp7) |
2628 | 114k | ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size); |
2629 | 128k | else |
2630 | 128k | ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size); |
2631 | | |
2632 | 242k | if (ret < 0) |
2633 | 59.3k | goto err; |
2634 | | |
2635 | 183k | if (!is_vp7 && s->actually_webp) { |
2636 | | // VP8 in WebP is supposed to be intra-only. Enforce this here |
2637 | | // to ensure that output is reproducible with frame-threading. |
2638 | 23.5k | if (!s->keyframe) |
2639 | 5.50k | return AVERROR_INVALIDDATA; |
2640 | | // avctx->pix_fmt already set in caller. |
2641 | 159k | } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { |
2642 | 0 | s->pix_fmt = get_pixel_format(s); |
2643 | 0 | if (s->pix_fmt < 0) { |
2644 | 0 | ret = AVERROR(EINVAL); |
2645 | 0 | goto err; |
2646 | 0 | } |
2647 | 0 | avctx->pix_fmt = s->pix_fmt; |
2648 | 0 | } |
2649 | | |
2650 | 177k | prev_frame = s->framep[VP8_FRAME_CURRENT]; |
2651 | | |
2652 | 177k | referenced = s->update_last || s->update_golden == VP8_FRAME_CURRENT || |
2653 | 177k | s->update_altref == VP8_FRAME_CURRENT; |
2654 | | |
2655 | 177k | skip_thresh = !referenced ? AVDISCARD_NONREF |
2656 | 177k | : !s->keyframe ? AVDISCARD_NONKEY |
2657 | 169k | : AVDISCARD_ALL; |
2658 | | |
2659 | 177k | if (avctx->skip_frame >= skip_thresh) { |
2660 | 37.3k | s->invisible = 1; |
2661 | 37.3k | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); |
2662 | 37.3k | goto skip_decode; |
2663 | 37.3k | } |
2664 | 140k | s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
2665 | | |
2666 | | // release no longer referenced frames |
2667 | 840k | for (i = 0; i < 5; i++) |
2668 | 700k | if (s->frames[i].tf.f && |
2669 | 700k | &s->frames[i] != prev_frame && |
2670 | 700k | &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] && |
2671 | 700k | &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] && |
2672 | 700k | &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) |
2673 | 93.7k | vp8_release_frame(&s->frames[i]); |
2674 | | |
2675 | 140k | if (!s->colorspace) |
2676 | 133k | avctx->colorspace = AVCOL_SPC_BT470BG; |
2677 | 140k | if (s->fullrange) |
2678 | 14.5k | avctx->color_range = AVCOL_RANGE_JPEG; |
2679 | 125k | else |
2680 | 125k | avctx->color_range = AVCOL_RANGE_MPEG; |
2681 | | |
2682 | | /* Given that arithmetic probabilities are updated every frame, it's quite |
2683 | | * likely that the values we have on a random interframe are complete |
2684 | | * junk if we didn't start decode on a keyframe. So just don't display |
2685 | | * anything rather than junk. */ |
2686 | 140k | if (!s->keyframe && (!s->framep[VP8_FRAME_PREVIOUS] || |
2687 | 104k | !s->framep[VP8_FRAME_GOLDEN] || |
2688 | 104k | !s->framep[VP8_FRAME_ALTREF])) { |
2689 | 13.0k | av_log(avctx, AV_LOG_WARNING, |
2690 | 13.0k | "Discarding interframe without a prior keyframe!\n"); |
2691 | 13.0k | ret = AVERROR_INVALIDDATA; |
2692 | 13.0k | goto err; |
2693 | 13.0k | } |
2694 | | |
2695 | 127k | curframe = vp8_find_free_buffer(s); |
2696 | 127k | if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0) |
2697 | 987 | goto err; |
2698 | 126k | s->framep[VP8_FRAME_CURRENT] = curframe; |
2699 | 126k | if (s->keyframe) |
2700 | 34.1k | curframe->tf.f->flags |= AV_FRAME_FLAG_KEY; |
2701 | 91.9k | else |
2702 | 91.9k | curframe->tf.f->flags &= ~AV_FRAME_FLAG_KEY; |
2703 | 126k | curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I |
2704 | 126k | : AV_PICTURE_TYPE_P; |
2705 | | |
2706 | | // check if golden and altref are swapped |
2707 | 126k | if (s->update_altref != VP8_FRAME_NONE) |
2708 | 119k | s->next_framep[VP8_FRAME_ALTREF] = s->framep[s->update_altref]; |
2709 | 6.46k | else |
2710 | 6.46k | s->next_framep[VP8_FRAME_ALTREF] = s->framep[VP8_FRAME_ALTREF]; |
2711 | | |
2712 | 126k | if (s->update_golden != VP8_FRAME_NONE) |
2713 | 106k | s->next_framep[VP8_FRAME_GOLDEN] = s->framep[s->update_golden]; |
2714 | 19.3k | else |
2715 | 19.3k | s->next_framep[VP8_FRAME_GOLDEN] = s->framep[VP8_FRAME_GOLDEN]; |
2716 | | |
2717 | 126k | if (s->update_last) |
2718 | 112k | s->next_framep[VP8_FRAME_PREVIOUS] = curframe; |
2719 | 13.9k | else |
2720 | 13.9k | s->next_framep[VP8_FRAME_PREVIOUS] = s->framep[VP8_FRAME_PREVIOUS]; |
2721 | | |
2722 | 126k | s->next_framep[VP8_FRAME_CURRENT] = curframe; |
2723 | | |
2724 | 126k | if (!is_vp7 && !s->actually_webp) |
2725 | 24.9k | ff_thread_finish_setup(avctx); |
2726 | | |
2727 | 126k | if (!is_vp7 && avctx->hwaccel) { |
2728 | 0 | const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel); |
2729 | 0 | ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size); |
2730 | 0 | if (ret < 0) |
2731 | 0 | goto err; |
2732 | | |
2733 | 0 | ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); |
2734 | 0 | if (ret < 0) |
2735 | 0 | goto err; |
2736 | | |
2737 | 0 | ret = hwaccel->end_frame(avctx); |
2738 | 0 | if (ret < 0) |
2739 | 0 | goto err; |
2740 | |
|
2741 | 126k | } else { |
2742 | 126k | s->linesize = curframe->tf.f->linesize[0]; |
2743 | 126k | s->uvlinesize = curframe->tf.f->linesize[1]; |
2744 | | |
2745 | 126k | memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); |
2746 | | /* Zero macroblock structures for top/top-left prediction |
2747 | | * from outside the frame. */ |
2748 | 126k | if (!s->mb_layout) |
2749 | 42.5k | memset(s->macroblocks + s->mb_height * 2 - 1, 0, |
2750 | 42.5k | (s->mb_width + 1) * sizeof(*s->macroblocks)); |
2751 | 126k | if (!s->mb_layout && s->keyframe) |
2752 | 24.1k | memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); |
2753 | | |
2754 | 126k | memset(s->ref_count, 0, sizeof(s->ref_count)); |
2755 | | |
2756 | 126k | if (s->mb_layout == 1) { |
2757 | | // Make sure the previous frame has read its segmentation map, |
2758 | | // if we reuse the same map. |
2759 | 83.5k | if (prev_frame && s->segmentation.enabled && |
2760 | 83.5k | !s->segmentation.update_map) |
2761 | 0 | ff_progress_frame_await(&prev_frame->tf, 1); |
2762 | 83.5k | if (is_vp7) |
2763 | 83.5k | ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); |
2764 | 0 | else |
2765 | 0 | ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); |
2766 | 83.5k | if (ret < 0) |
2767 | 4.74k | goto err; |
2768 | 83.5k | } |
2769 | | |
2770 | 121k | if (avctx->active_thread_type == FF_THREAD_FRAME) |
2771 | 0 | num_jobs = 1; |
2772 | 121k | else |
2773 | 121k | num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); |
2774 | 121k | s->num_jobs = num_jobs; |
2775 | 121k | s->curframe = curframe; |
2776 | 121k | s->prev_frame = prev_frame; |
2777 | 121k | s->mv_bounds.mv_min.y = -MARGIN; |
2778 | 121k | s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; |
2779 | 1.09M | for (i = 0; i < MAX_THREADS; i++) { |
2780 | 970k | VP8ThreadData *td = &s->thread_data[i]; |
2781 | 970k | atomic_init(&td->thread_mb_pos, 0); |
2782 | 970k | atomic_init(&td->wait_mb_pos, INT_MAX); |
2783 | 970k | } |
2784 | 121k | if (is_vp7) |
2785 | 78.7k | avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, |
2786 | 78.7k | num_jobs); |
2787 | 42.5k | else |
2788 | 42.5k | avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, |
2789 | 42.5k | num_jobs); |
2790 | 121k | } |
2791 | | |
2792 | 121k | ff_progress_frame_report(&curframe->tf, INT_MAX); |
2793 | 121k | memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); |
2794 | | |
2795 | 158k | skip_decode: |
2796 | | // if future frames don't use the updated probabilities, |
2797 | | // reset them to the values we saved |
2798 | 158k | if (!s->update_probabilities) |
2799 | 80.1k | s->prob[0] = s->prob[1]; |
2800 | | |
2801 | 158k | if (!s->invisible) { |
2802 | 104k | if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0) |
2803 | 0 | return ret; |
2804 | 104k | *got_frame = 1; |
2805 | 104k | } |
2806 | | |
2807 | 158k | return avpkt->size; |
2808 | 78.1k | err: |
2809 | 78.1k | memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); |
2810 | 78.1k | return ret; |
2811 | 158k | } |
2812 | | |
2813 | | av_cold int ff_vp8_decode_free(AVCodecContext *avctx) |
2814 | 12.9k | { |
2815 | 12.9k | vp8_decode_flush_impl(avctx, 1); |
2816 | | |
2817 | 12.9k | return 0; |
2818 | 12.9k | } |
2819 | | |
2820 | | static av_cold void vp78_decode_init(AVCodecContext *avctx) |
2821 | 12.9k | { |
2822 | 12.9k | VP8Context *s = avctx->priv_data; |
2823 | | |
2824 | 12.9k | s->avctx = avctx; |
2825 | 12.9k | s->pix_fmt = AV_PIX_FMT_NONE; |
2826 | 12.9k | avctx->pix_fmt = AV_PIX_FMT_YUV420P; |
2827 | | |
2828 | 12.9k | ff_videodsp_init(&s->vdsp, 8); |
2829 | | |
2830 | 12.9k | ff_vp78dsp_init(&s->vp8dsp); |
2831 | | |
2832 | | /* does not change for VP8 */ |
2833 | 12.9k | memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); |
2834 | 12.9k | } |
2835 | | |
2836 | | #if CONFIG_VP8_DECODER |
2837 | | static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
2838 | | int jobnr, int threadnr) |
2839 | 274k | { |
2840 | 274k | return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); |
2841 | 274k | } |
2842 | | |
2843 | | static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, |
2844 | | int jobnr, int threadnr) |
2845 | 165k | { |
2846 | 165k | filter_mb_row(avctx, tdata, jobnr, threadnr, 0); |
2847 | 165k | } |
2848 | | |
2849 | | int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
2850 | | int *got_frame, AVPacket *avpkt) |
2851 | 128k | { |
2852 | 128k | return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8); |
2853 | 128k | } |
2854 | | |
2855 | | av_cold int ff_vp8_decode_init(AVCodecContext *avctx) |
2856 | 6.64k | { |
2857 | 6.64k | VP8Context *s = avctx->priv_data; |
2858 | | |
2859 | 6.64k | vp78_decode_init(avctx); |
2860 | 6.64k | ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); |
2861 | 6.64k | ff_vp8dsp_init(&s->vp8dsp); |
2862 | 6.64k | s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter; |
2863 | 6.64k | s->filter_mb_row = vp8_filter_mb_row; |
2864 | | |
2865 | 6.64k | return 0; |
2866 | 6.64k | } |
2867 | | |
2868 | | #if HAVE_THREADS |
2869 | | static void vp8_replace_frame(VP8Frame *dst, const VP8Frame *src) |
2870 | 0 | { |
2871 | 0 | ff_progress_frame_replace(&dst->tf, &src->tf); |
2872 | 0 | av_refstruct_replace(&dst->seg_map, src->seg_map); |
2873 | 0 | av_refstruct_replace(&dst->hwaccel_picture_private, |
2874 | 0 | src->hwaccel_picture_private); |
2875 | 0 | } |
2876 | | |
2877 | 0 | #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL) |
2878 | | |
2879 | | static int vp8_decode_update_thread_context(AVCodecContext *dst, |
2880 | | const AVCodecContext *src) |
2881 | 0 | { |
2882 | 0 | VP8Context *s = dst->priv_data, *s_src = src->priv_data; |
2883 | |
|
2884 | 0 | if (s->macroblocks_base && |
2885 | 0 | (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { |
2886 | 0 | free_buffers(s); |
2887 | 0 | s->mb_width = s_src->mb_width; |
2888 | 0 | s->mb_height = s_src->mb_height; |
2889 | 0 | } |
2890 | |
|
2891 | 0 | s->pix_fmt = s_src->pix_fmt; |
2892 | 0 | s->prob[0] = s_src->prob[!s_src->update_probabilities]; |
2893 | 0 | s->segmentation = s_src->segmentation; |
2894 | 0 | s->lf_delta = s_src->lf_delta; |
2895 | 0 | memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); |
2896 | |
|
2897 | 0 | for (int i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) |
2898 | 0 | vp8_replace_frame(&s->frames[i], &s_src->frames[i]); |
2899 | |
|
2900 | 0 | s->framep[0] = REBASE(s_src->next_framep[0]); |
2901 | 0 | s->framep[1] = REBASE(s_src->next_framep[1]); |
2902 | 0 | s->framep[2] = REBASE(s_src->next_framep[2]); |
2903 | 0 | s->framep[3] = REBASE(s_src->next_framep[3]); |
2904 | |
|
2905 | 0 | return 0; |
2906 | 0 | } |
2907 | | #endif /* HAVE_THREADS */ |
2908 | | #endif /* CONFIG_VP8_DECODER */ |
2909 | | |
2910 | | #if CONFIG_VP7_DECODER |
2911 | | static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, |
2912 | | int jobnr, int threadnr) |
2913 | 152k | { |
2914 | 152k | return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); |
2915 | 152k | } |
2916 | | |
2917 | | static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, |
2918 | | int jobnr, int threadnr) |
2919 | 55.5k | { |
2920 | 55.5k | filter_mb_row(avctx, tdata, jobnr, threadnr, 1); |
2921 | 55.5k | } |
2922 | | |
2923 | | static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
2924 | | int *got_frame, AVPacket *avpkt) |
2925 | 114k | { |
2926 | 114k | return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7); |
2927 | 114k | } |
2928 | | |
2929 | | av_cold static int vp7_decode_init(AVCodecContext *avctx) |
2930 | 6.28k | { |
2931 | 6.28k | VP8Context *s = avctx->priv_data; |
2932 | | |
2933 | 6.28k | vp78_decode_init(avctx); |
2934 | 6.28k | ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1); |
2935 | 6.28k | ff_vp7dsp_init(&s->vp8dsp); |
2936 | 6.28k | s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter; |
2937 | 6.28k | s->filter_mb_row = vp7_filter_mb_row; |
2938 | | |
2939 | 6.28k | return 0; |
2940 | 6.28k | } |
2941 | | |
2942 | | const FFCodec ff_vp7_decoder = { |
2943 | | .p.name = "vp7", |
2944 | | CODEC_LONG_NAME("On2 VP7"), |
2945 | | .p.type = AVMEDIA_TYPE_VIDEO, |
2946 | | .p.id = AV_CODEC_ID_VP7, |
2947 | | .priv_data_size = sizeof(VP8Context), |
2948 | | .init = vp7_decode_init, |
2949 | | .close = ff_vp8_decode_free, |
2950 | | FF_CODEC_DECODE_CB(vp7_decode_frame), |
2951 | | .p.capabilities = AV_CODEC_CAP_DR1, |
2952 | | .flush = vp8_decode_flush, |
2953 | | .caps_internal = FF_CODEC_CAP_USES_PROGRESSFRAMES, |
2954 | | }; |
2955 | | #endif /* CONFIG_VP7_DECODER */ |
2956 | | |
2957 | | #if CONFIG_VP8_DECODER |
2958 | | const FFCodec ff_vp8_decoder = { |
2959 | | .p.name = "vp8", |
2960 | | CODEC_LONG_NAME("On2 VP8"), |
2961 | | .p.type = AVMEDIA_TYPE_VIDEO, |
2962 | | .p.id = AV_CODEC_ID_VP8, |
2963 | | .priv_data_size = sizeof(VP8Context), |
2964 | | .init = ff_vp8_decode_init, |
2965 | | .close = ff_vp8_decode_free, |
2966 | | FF_CODEC_DECODE_CB(ff_vp8_decode_frame), |
2967 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | |
2968 | | AV_CODEC_CAP_SLICE_THREADS, |
2969 | | .caps_internal = FF_CODEC_CAP_USES_PROGRESSFRAMES, |
2970 | | .flush = vp8_decode_flush, |
2971 | | UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context), |
2972 | | .hw_configs = (const AVCodecHWConfigInternal *const []) { |
2973 | | #if CONFIG_VP8_VAAPI_HWACCEL |
2974 | | HWACCEL_VAAPI(vp8), |
2975 | | #endif |
2976 | | #if CONFIG_VP8_NVDEC_HWACCEL |
2977 | | HWACCEL_NVDEC(vp8), |
2978 | | #endif |
2979 | | NULL |
2980 | | }, |
2981 | | }; |
2982 | | #endif /* CONFIG_VP8_DECODER */ |