/src/ffmpeg/libavcodec/vp9mvs.c
Line | Count | Source |
1 | | /* |
2 | | * VP9 compatible video decoder |
3 | | * |
4 | | * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> |
5 | | * Copyright (C) 2013 Clément Bœsch <u pkh me> |
6 | | * |
7 | | * This file is part of FFmpeg. |
8 | | * |
9 | | * FFmpeg is free software; you can redistribute it and/or |
10 | | * modify it under the terms of the GNU Lesser General Public |
11 | | * License as published by the Free Software Foundation; either |
12 | | * version 2.1 of the License, or (at your option) any later version. |
13 | | * |
14 | | * FFmpeg is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | | * Lesser General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU Lesser General Public |
20 | | * License along with FFmpeg; if not, write to the Free Software |
21 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | | */ |
23 | | |
24 | | #include "progressframe.h" |
25 | | #include "vp89_rac.h" |
26 | | #include "vp9data.h" |
27 | | #include "vp9dec.h" |
28 | | #include "vpx_rac.h" |
29 | | |
30 | | static av_always_inline void clamp_mv(VP9mv *dst, const VP9mv *src, |
31 | | VP9TileData *td) |
32 | 20.9M | { |
33 | 20.9M | dst->x = av_clip(src->x, td->min_mv.x, td->max_mv.x); |
34 | 20.9M | dst->y = av_clip(src->y, td->min_mv.y, td->max_mv.y); |
35 | 20.9M | } |
36 | | |
37 | | static void find_ref_mvs(VP9TileData *td, |
38 | | VP9mv *pmv, int ref, int z, int idx, int sb) |
39 | 26.1M | { |
40 | 26.1M | static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = { |
41 | 26.1M | [BS_64x64] = { { 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 }, |
42 | 26.1M | { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 } }, |
43 | 26.1M | [BS_64x32] = { { 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 }, |
44 | 26.1M | { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 } }, |
45 | 26.1M | [BS_32x64] = { { -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 }, |
46 | 26.1M | { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 } }, |
47 | 26.1M | [BS_32x32] = { { 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 }, |
48 | 26.1M | { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, |
49 | 26.1M | [BS_32x16] = { { 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 }, |
50 | 26.1M | { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, |
51 | 26.1M | [BS_16x32] = { { -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 }, |
52 | 26.1M | { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 } }, |
53 | 26.1M | [BS_16x16] = { { 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 }, |
54 | 26.1M | { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, |
55 | 26.1M | [BS_16x8] = { { 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 }, |
56 | 26.1M | { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 } }, |
57 | 26.1M | [BS_8x16] = { { -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 }, |
58 | 26.1M | { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 } }, |
59 | 26.1M | [BS_8x8] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, |
60 | 26.1M | { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, |
61 | 26.1M | [BS_8x4] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, |
62 | 26.1M | { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, |
63 | 26.1M | [BS_4x8] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, |
64 | 26.1M | { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, |
65 | 26.1M | [BS_4x4] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, |
66 | 26.1M | { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, |
67 | 26.1M | }; |
68 | 26.1M | const VP9Context *s = td->s; |
69 | 26.1M | VP9Block *b = td->b; |
70 | 26.1M | int row = td->row, col = td->col, row7 = td->row7; |
71 | 26.1M | const int8_t (*p)[2] = mv_ref_blk_off[b->bs]; |
72 | 66.7M | #define INVALID_MV 0x80008000U |
73 | 26.1M | uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV; |
74 | 26.1M | int i; |
75 | | |
76 | 26.1M | #define RETURN_DIRECT_MV(mv) \ |
77 | 26.1M | do { \ |
78 | 8.76M | uint32_t m = AV_RN32A(&mv); \ |
79 | 8.76M | if (!idx) { \ |
80 | 5.91M | AV_WN32A(pmv, m); \ |
81 | 5.91M | return; \ |
82 | 5.91M | } else if (mem == INVALID_MV) { \ |
83 | 2.16M | mem = m; \ |
84 | 2.16M | } else if (m != mem) { \ |
85 | 321k | AV_WN32A(pmv, m); \ |
86 | 321k | return; \ |
87 | 321k | } \ |
88 | 8.76M | } while (0) |
89 | | |
90 | 26.1M | if (sb >= 0) { |
91 | 12.7M | if (sb == 2 || sb == 1) { |
92 | 6.38M | RETURN_DIRECT_MV(b->mv[0][z]); |
93 | 6.38M | } else if (sb == 3) { |
94 | 1.69M | RETURN_DIRECT_MV(b->mv[2][z]); |
95 | 477k | RETURN_DIRECT_MV(b->mv[1][z]); |
96 | 214k | RETURN_DIRECT_MV(b->mv[0][z]); |
97 | 214k | } |
98 | | |
99 | 6.50M | #define RETURN_MV(mv) \ |
100 | 27.5M | do { \ |
101 | 27.5M | if (sb > 0) { \ |
102 | 5.46M | VP9mv tmp; \ |
103 | 5.46M | uint32_t m; \ |
104 | 5.46M | av_assert2(idx == 1); \ |
105 | 5.46M | av_assert2(mem != INVALID_MV); \ |
106 | 5.46M | if (mem_sub8x8 == INVALID_MV) { \ |
107 | 1.81M | clamp_mv(&tmp, &mv, td); \ |
108 | 1.81M | m = AV_RN32A(&tmp); \ |
109 | 1.81M | if (m != mem) { \ |
110 | 823k | AV_WN32A(pmv, m); \ |
111 | 823k | return; \ |
112 | 823k | } \ |
113 | 1.81M | mem_sub8x8 = AV_RN32A(&mv); \ |
114 | 3.65M | } else if (mem_sub8x8 != AV_RN32A(&mv)) { \ |
115 | 475k | clamp_mv(&tmp, &mv, td); \ |
116 | 475k | m = AV_RN32A(&tmp); \ |
117 | 475k | if (m != mem) { \ |
118 | 468k | AV_WN32A(pmv, m); \ |
119 | 468k | } else { \ |
120 | | /* BUG I'm pretty sure this isn't the intention */ \ |
121 | 7.25k | AV_WN32A(pmv, 0); \ |
122 | 7.25k | } \ |
123 | 475k | return; \ |
124 | 475k | } \ |
125 | 22.0M | } else { \ |
126 | 22.0M | uint32_t m = AV_RN32A(&mv); \ |
127 | 22.0M | if (!idx) { \ |
128 | 15.9M | clamp_mv(pmv, &mv, td); \ |
129 | 15.9M | return; \ |
130 | 15.9M | } else if (mem == INVALID_MV) { \ |
131 | 1.72M | mem = m; \ |
132 | 4.41M | } else if (m != mem) { \ |
133 | 1.26M | clamp_mv(pmv, &mv, td); \ |
134 | 1.26M | return; \ |
135 | 1.26M | } \ |
136 | 22.0M | } \ |
137 | 27.5M | } while (0) |
138 | | |
139 | 6.50M | if (row > 0) { |
140 | 5.31M | VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col]; |
141 | 5.31M | if (mv->ref[0] == ref) |
142 | 2.49M | RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]); |
143 | 2.81M | else if (mv->ref[1] == ref) |
144 | 1.37M | RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]); |
145 | 5.31M | } |
146 | 3.84M | if (col > td->tile_col_start) { |
147 | 3.36M | VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1]; |
148 | 3.36M | if (mv->ref[0] == ref) |
149 | 1.48M | RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][0]); |
150 | 1.87M | else if (mv->ref[1] == ref) |
151 | 788k | RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][1]); |
152 | 3.36M | } |
153 | 2.55M | i = 2; |
154 | 13.4M | } else { |
155 | 13.4M | i = 0; |
156 | 13.4M | } |
157 | | |
158 | | // previously coded MVs in this neighborhood, using same reference frame |
159 | 53.3M | for (; i < 8; i++) { |
160 | 49.2M | int c = p[i][0] + col, r = p[i][1] + row; |
161 | | |
162 | 49.2M | if (c >= td->tile_col_start && c < s->cols && |
163 | 37.7M | r >= 0 && r < s->rows) { |
164 | 29.3M | VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c]; |
165 | | |
166 | 29.3M | if (mv->ref[0] == ref) |
167 | 9.50M | RETURN_MV(mv->mv[0]); |
168 | 19.8M | else if (mv->ref[1] == ref) |
169 | 6.33M | RETURN_MV(mv->mv[1]); |
170 | 29.3M | } |
171 | 49.2M | } |
172 | | |
173 | | // MV at this position in previous frame, using same reference frame |
174 | 4.04M | if (s->s.h.use_last_frame_mvs) { |
175 | 3.66M | VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col]; |
176 | | |
177 | 3.66M | if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass) |
178 | 3.66M | ff_progress_frame_await(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3); |
179 | 3.66M | if (mv->ref[0] == ref) |
180 | 1.13M | RETURN_MV(mv->mv[0]); |
181 | 2.52M | else if (mv->ref[1] == ref) |
182 | 722k | RETURN_MV(mv->mv[1]); |
183 | 3.66M | } |
184 | | |
185 | 2.79M | #define RETURN_SCALE_MV(mv, scale) \ |
186 | 3.68M | do { \ |
187 | 3.68M | if (scale) { \ |
188 | 3.22M | VP9mv mv_temp = { -mv.x, -mv.y }; \ |
189 | 3.22M | RETURN_MV(mv_temp); \ |
190 | 3.22M | } else { \ |
191 | 463k | RETURN_MV(mv); \ |
192 | 463k | } \ |
193 | 3.68M | } while (0) |
194 | | |
195 | | // previously coded MVs in this neighborhood, using different reference frame |
196 | 16.7M | for (i = 0; i < 8; i++) { |
197 | 15.1M | int c = p[i][0] + col, r = p[i][1] + row; |
198 | | |
199 | 15.1M | if (c >= td->tile_col_start && c < s->cols && r >= 0 && r < s->rows) { |
200 | 9.44M | VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c]; |
201 | | |
202 | 9.44M | if (mv->ref[0] != ref && mv->ref[0] >= 0) |
203 | 3.01M | RETURN_SCALE_MV(mv->mv[0], |
204 | 9.44M | s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]); |
205 | 8.45M | if (mv->ref[1] != ref && mv->ref[1] >= 0 && |
206 | | // BUG - libvpx has this condition regardless of whether |
207 | | // we used the first ref MV and pre-scaling |
208 | 2.47M | AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) { |
209 | 216k | RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]); |
210 | 216k | } |
211 | 8.45M | } |
212 | 15.1M | } |
213 | | |
214 | | // MV at this position in previous frame, using different reference frame |
215 | 1.60M | if (s->s.h.use_last_frame_mvs) { |
216 | 1.44M | VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col]; |
217 | | |
218 | | // no need to await_progress, because we already did that above |
219 | 1.44M | if (mv->ref[0] != ref && mv->ref[0] >= 0) |
220 | 428k | RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]); |
221 | 1.29M | if (mv->ref[1] != ref && mv->ref[1] >= 0 && |
222 | | // BUG - libvpx has this condition regardless of whether |
223 | | // we used the first ref MV and pre-scaling |
224 | 326k | AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) { |
225 | 21.5k | RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]); |
226 | 21.5k | } |
227 | 1.29M | } |
228 | | |
229 | 1.43M | AV_ZERO32(pmv); |
230 | 1.43M | clamp_mv(pmv, pmv, td); |
231 | 1.43M | #undef INVALID_MV |
232 | 1.43M | #undef RETURN_MV |
233 | 1.43M | #undef RETURN_SCALE_MV |
234 | 1.43M | } |
235 | | |
236 | | static av_always_inline int read_mv_component(VP9TileData *td, int idx, int hp) |
237 | 14.7M | { |
238 | 14.7M | const VP9Context *s = td->s; |
239 | 14.7M | int bit, sign = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].sign); |
240 | 14.7M | int n, c = vp89_rac_get_tree(td->c, ff_vp9_mv_class_tree, |
241 | 14.7M | s->prob.p.mv_comp[idx].classes); |
242 | | |
243 | 14.7M | td->counts.mv_comp[idx].sign[sign]++; |
244 | 14.7M | td->counts.mv_comp[idx].classes[c]++; |
245 | 14.7M | if (c) { |
246 | 6.68M | int m; |
247 | | |
248 | 47.3M | for (n = 0, m = 0; m < c; m++) { |
249 | 40.6M | bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].bits[m]); |
250 | 40.6M | n |= bit << m; |
251 | 40.6M | td->counts.mv_comp[idx].bits[m][bit]++; |
252 | 40.6M | } |
253 | 6.68M | n <<= 3; |
254 | 6.68M | bit = vp89_rac_get_tree(td->c, ff_vp9_mv_fp_tree, |
255 | 6.68M | s->prob.p.mv_comp[idx].fp); |
256 | 6.68M | n |= bit << 1; |
257 | 6.68M | td->counts.mv_comp[idx].fp[bit]++; |
258 | 6.68M | if (hp) { |
259 | 2.74M | bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].hp); |
260 | 2.74M | td->counts.mv_comp[idx].hp[bit]++; |
261 | 2.74M | n |= bit; |
262 | 3.93M | } else { |
263 | 3.93M | n |= 1; |
264 | | // bug in libvpx - we count for bw entropy purposes even if the |
265 | | // bit wasn't coded |
266 | 3.93M | td->counts.mv_comp[idx].hp[1]++; |
267 | 3.93M | } |
268 | 6.68M | n += 8 << c; |
269 | 8.11M | } else { |
270 | 8.11M | n = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0); |
271 | 8.11M | td->counts.mv_comp[idx].class0[n]++; |
272 | 8.11M | bit = vp89_rac_get_tree(td->c, ff_vp9_mv_fp_tree, |
273 | 8.11M | s->prob.p.mv_comp[idx].class0_fp[n]); |
274 | 8.11M | td->counts.mv_comp[idx].class0_fp[n][bit]++; |
275 | 8.11M | n = (n << 3) | (bit << 1); |
276 | 8.11M | if (hp) { |
277 | 4.11M | bit = vpx_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0_hp); |
278 | 4.11M | td->counts.mv_comp[idx].class0_hp[bit]++; |
279 | 4.11M | n |= bit; |
280 | 4.11M | } else { |
281 | 3.99M | n |= 1; |
282 | | // bug in libvpx - we count for bw entropy purposes even if the |
283 | | // bit wasn't coded |
284 | 3.99M | td->counts.mv_comp[idx].class0_hp[1]++; |
285 | 3.99M | } |
286 | 8.11M | } |
287 | | |
288 | 14.7M | return sign ? -(n + 1) : (n + 1); |
289 | 14.7M | } |
290 | | |
291 | | void ff_vp9_fill_mv(VP9TileData *td, VP9mv *mv, int mode, int sb) |
292 | 26.2M | { |
293 | 26.2M | const VP9Context *s = td->s; |
294 | 26.2M | VP9Block *b = td->b; |
295 | | |
296 | 26.2M | if (mode == ZEROMV) { |
297 | 10.6M | AV_ZERO32(&mv[0]); |
298 | 10.6M | AV_ZERO32(&mv[1]); |
299 | 15.5M | } else { |
300 | 15.5M | int hp; |
301 | | |
302 | | // FIXME cache this value and reuse for other subblocks |
303 | 15.5M | find_ref_mvs(td, &mv[0], b->ref[0], 0, mode == NEARMV, |
304 | 15.5M | mode == NEWMV ? -1 : sb); |
305 | | // FIXME maybe move this code into find_ref_mvs() |
306 | 15.5M | if ((mode == NEWMV || sb == -1) && |
307 | 7.93M | !(hp = s->s.h.highprecisionmvs && |
308 | 4.79M | abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) { |
309 | 4.32M | if (mv[0].y & 1) { |
310 | 21.2k | if (mv[0].y < 0) |
311 | 11.5k | mv[0].y++; |
312 | 9.69k | else |
313 | 9.69k | mv[0].y--; |
314 | 21.2k | } |
315 | 4.32M | if (mv[0].x & 1) { |
316 | 24.8k | if (mv[0].x < 0) |
317 | 14.4k | mv[0].x++; |
318 | 10.3k | else |
319 | 10.3k | mv[0].x--; |
320 | 24.8k | } |
321 | 4.32M | } |
322 | 15.5M | if (mode == NEWMV) { |
323 | 6.34M | enum MVJoint j = vp89_rac_get_tree(td->c, ff_vp9_mv_joint_tree, |
324 | 6.34M | s->prob.p.mv_joint); |
325 | | |
326 | 6.34M | td->counts.mv_joint[j]++; |
327 | 6.34M | if (j >= MV_JOINT_V) |
328 | 4.43M | mv[0].y += read_mv_component(td, 0, hp); |
329 | 6.34M | if (j & 1) |
330 | 4.32M | mv[0].x += read_mv_component(td, 1, hp); |
331 | 6.34M | } |
332 | | |
333 | 15.5M | if (b->comp) { |
334 | | // FIXME cache this value and reuse for other subblocks |
335 | 10.5M | find_ref_mvs(td, &mv[1], b->ref[1], 1, mode == NEARMV, |
336 | 10.5M | mode == NEWMV ? -1 : sb); |
337 | 10.5M | if ((mode == NEWMV || sb == -1) && |
338 | 5.49M | !(hp = s->s.h.highprecisionmvs && |
339 | 3.28M | abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) { |
340 | 3.28M | if (mv[1].y & 1) { |
341 | 12.6k | if (mv[1].y < 0) |
342 | 6.24k | mv[1].y++; |
343 | 6.43k | else |
344 | 6.43k | mv[1].y--; |
345 | 12.6k | } |
346 | 3.28M | if (mv[1].x & 1) { |
347 | 14.7k | if (mv[1].x < 0) |
348 | 9.23k | mv[1].x++; |
349 | 5.51k | else |
350 | 5.51k | mv[1].x--; |
351 | 14.7k | } |
352 | 3.28M | } |
353 | 10.5M | if (mode == NEWMV) { |
354 | 4.35M | enum MVJoint j = vp89_rac_get_tree(td->c, ff_vp9_mv_joint_tree, |
355 | 4.35M | s->prob.p.mv_joint); |
356 | | |
357 | 4.35M | td->counts.mv_joint[j]++; |
358 | 4.35M | if (j >= MV_JOINT_V) |
359 | 3.05M | mv[1].y += read_mv_component(td, 0, hp); |
360 | 4.35M | if (j & 1) |
361 | 2.98M | mv[1].x += read_mv_component(td, 1, hp); |
362 | 4.35M | } |
363 | 10.5M | } |
364 | 15.5M | } |
365 | 26.2M | } |