/src/ffmpeg/libavcodec/h264_cavlc.c
Line | Count | Source |
1 | | /* |
2 | | * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding |
3 | | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding. |
25 | | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | | */ |
27 | | |
28 | 16.7M | #define CABAC(h) 0 |
29 | | #define UNCHECKED_BITSTREAM_READER 1 |
30 | | |
31 | | #include "h264dec.h" |
32 | | #include "h264_mvpred.h" |
33 | | #include "h264data.h" |
34 | | #include "golomb.h" |
35 | | #include "mpegutils.h" |
36 | | #include "libavutil/avassert.h" |
37 | | |
38 | | |
39 | | static const uint8_t golomb_to_inter_cbp_gray[16]={ |
40 | | 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9, |
41 | | }; |
42 | | |
43 | | static const uint8_t golomb_to_intra4x4_cbp_gray[16]={ |
44 | | 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9, |
45 | | }; |
46 | | |
47 | | static const uint8_t chroma_dc_coeff_token_len[4*5]={ |
48 | | 2, 0, 0, 0, |
49 | | 6, 1, 0, 0, |
50 | | 6, 6, 3, 0, |
51 | | 6, 7, 7, 6, |
52 | | 6, 8, 8, 7, |
53 | | }; |
54 | | |
55 | | static const uint8_t chroma_dc_coeff_token_bits[4*5]={ |
56 | | 1, 0, 0, 0, |
57 | | 7, 1, 0, 0, |
58 | | 4, 6, 1, 0, |
59 | | 3, 3, 2, 5, |
60 | | 2, 3, 2, 0, |
61 | | }; |
62 | | |
63 | | static const uint8_t chroma422_dc_coeff_token_len[4*9]={ |
64 | | 1, 0, 0, 0, |
65 | | 7, 2, 0, 0, |
66 | | 7, 7, 3, 0, |
67 | | 9, 7, 7, 5, |
68 | | 9, 9, 7, 6, |
69 | | 10, 10, 9, 7, |
70 | | 11, 11, 10, 7, |
71 | | 12, 12, 11, 10, |
72 | | 13, 12, 12, 11, |
73 | | }; |
74 | | |
75 | | static const uint8_t chroma422_dc_coeff_token_bits[4*9]={ |
76 | | 1, 0, 0, 0, |
77 | | 15, 1, 0, 0, |
78 | | 14, 13, 1, 0, |
79 | | 7, 12, 11, 1, |
80 | | 6, 5, 10, 1, |
81 | | 7, 6, 4, 9, |
82 | | 7, 6, 5, 8, |
83 | | 7, 6, 5, 4, |
84 | | 7, 5, 4, 4, |
85 | | }; |
86 | | |
87 | | static const uint8_t coeff_token_len[4][4*17]={ |
88 | | { |
89 | | 1, 0, 0, 0, |
90 | | 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6, |
91 | | 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10, |
92 | | 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14, |
93 | | 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16, |
94 | | }, |
95 | | { |
96 | | 2, 0, 0, 0, |
97 | | 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4, |
98 | | 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7, |
99 | | 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12, |
100 | | 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14, |
101 | | }, |
102 | | { |
103 | | 4, 0, 0, 0, |
104 | | 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4, |
105 | | 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, |
106 | | 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, |
107 | | 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10, |
108 | | }, |
109 | | { |
110 | | 6, 0, 0, 0, |
111 | | 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, |
112 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
113 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
114 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
115 | | } |
116 | | }; |
117 | | |
118 | | static const uint8_t coeff_token_bits[4][4*17]={ |
119 | | { |
120 | | 1, 0, 0, 0, |
121 | | 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3, |
122 | | 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4, |
123 | | 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8, |
124 | | 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8, |
125 | | }, |
126 | | { |
127 | | 3, 0, 0, 0, |
128 | | 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4, |
129 | | 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4, |
130 | | 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12, |
131 | | 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4, |
132 | | }, |
133 | | { |
134 | | 15, 0, 0, 0, |
135 | | 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11, |
136 | | 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13, |
137 | | 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8, |
138 | | 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2, |
139 | | }, |
140 | | { |
141 | | 3, 0, 0, 0, |
142 | | 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15, |
143 | | 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31, |
144 | | 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47, |
145 | | 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63, |
146 | | } |
147 | | }; |
148 | | |
149 | | static const uint8_t total_zeros_len[16][16]= { |
150 | | {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, |
151 | | {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, |
152 | | {4,3,3,3,4,4,3,3,4,5,5,6,5,6}, |
153 | | {5,3,4,4,3,3,3,4,3,4,5,5,5}, |
154 | | {4,4,4,3,3,3,3,3,4,5,4,5}, |
155 | | {6,5,3,3,3,3,3,3,4,3,6}, |
156 | | {6,5,3,3,3,2,3,4,3,6}, |
157 | | {6,4,5,3,2,2,3,3,6}, |
158 | | {6,6,4,2,2,3,2,5}, |
159 | | {5,5,3,2,2,2,4}, |
160 | | {4,4,3,3,1,3}, |
161 | | {4,4,2,1,3}, |
162 | | {3,3,1,2}, |
163 | | {2,2,1}, |
164 | | {1,1}, |
165 | | }; |
166 | | |
167 | | static const uint8_t total_zeros_bits[16][16]= { |
168 | | {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, |
169 | | {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, |
170 | | {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, |
171 | | {3,7,5,4,6,5,4,3,3,2,2,1,0}, |
172 | | {5,4,3,7,6,5,4,3,2,1,1,0}, |
173 | | {1,1,7,6,5,4,3,2,1,1,0}, |
174 | | {1,1,5,4,3,3,2,1,1,0}, |
175 | | {1,1,1,3,3,2,2,1,0}, |
176 | | {1,0,1,3,2,1,1,1}, |
177 | | {1,0,1,3,2,1,1}, |
178 | | {0,1,1,2,1,3}, |
179 | | {0,1,1,1,1}, |
180 | | {0,1,1,1}, |
181 | | {0,1,1}, |
182 | | {0,1}, |
183 | | }; |
184 | | |
185 | | static const uint8_t chroma_dc_total_zeros_len[3][4]= { |
186 | | { 1, 2, 3, 3,}, |
187 | | { 1, 2, 2, 0,}, |
188 | | { 1, 1, 0, 0,}, |
189 | | }; |
190 | | |
191 | | static const uint8_t chroma_dc_total_zeros_bits[3][4]= { |
192 | | { 1, 1, 1, 0,}, |
193 | | { 1, 1, 0, 0,}, |
194 | | { 1, 0, 0, 0,}, |
195 | | }; |
196 | | |
197 | | static const uint8_t chroma422_dc_total_zeros_len[7][8]= { |
198 | | { 1, 3, 3, 4, 4, 4, 5, 5 }, |
199 | | { 3, 2, 3, 3, 3, 3, 3 }, |
200 | | { 3, 3, 2, 2, 3, 3 }, |
201 | | { 3, 2, 2, 2, 3 }, |
202 | | { 2, 2, 2, 2 }, |
203 | | { 2, 2, 1 }, |
204 | | { 1, 1 }, |
205 | | }; |
206 | | |
207 | | static const uint8_t chroma422_dc_total_zeros_bits[7][8]= { |
208 | | { 1, 2, 3, 2, 3, 1, 1, 0 }, |
209 | | { 0, 1, 1, 4, 5, 6, 7 }, |
210 | | { 0, 1, 1, 2, 6, 7 }, |
211 | | { 6, 0, 1, 2, 7 }, |
212 | | { 0, 1, 2, 3 }, |
213 | | { 0, 1, 1 }, |
214 | | { 0, 1 }, |
215 | | }; |
216 | | |
217 | | static const uint8_t run_len[7][16]={ |
218 | | {1,1}, |
219 | | {1,2,2}, |
220 | | {2,2,2,2}, |
221 | | {2,2,2,3,3}, |
222 | | {2,2,3,3,3,3}, |
223 | | {2,3,3,3,3,3,3}, |
224 | | {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, |
225 | | }; |
226 | | |
227 | | static const uint8_t run_bits[7][16]={ |
228 | | {1,0}, |
229 | | {1,1,0}, |
230 | | {3,2,1,0}, |
231 | | {3,2,1,1,0}, |
232 | | {3,2,3,2,1,0}, |
233 | | {3,0,1,3,2,5,4}, |
234 | | {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, |
235 | | }; |
236 | | |
237 | 3.34M | #define LEVEL_TAB_BITS 8 |
238 | | static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; |
239 | | |
240 | 435k | #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 |
241 | 198k | #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13 |
242 | 7.07M | #define COEFF_TOKEN_VLC_BITS 8 |
243 | 3.14M | #define TOTAL_ZEROS_VLC_BITS 9 |
244 | 300k | #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 |
245 | 71.8k | #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5 |
246 | 2.31M | #define RUN_VLC_BITS 3 |
247 | 640k | #define RUN7_VLC_BITS 6 |
248 | | |
249 | | /// 17 pointers to only four different VLCs |
250 | | static const VLCElem *coeff_token_vlc[17]; |
251 | | |
252 | | static VLCElem chroma_dc_coeff_token_vlc_table[256]; |
253 | | |
254 | | static VLCElem chroma422_dc_coeff_token_vlc_table[1 << CHROMA422_DC_COEFF_TOKEN_VLC_BITS]; |
255 | | |
256 | | static const VLCElem *total_zeros_vlc[15+1]; |
257 | | |
258 | | static const VLCElem *chroma_dc_total_zeros_vlc[3+1]; |
259 | | |
260 | | static const VLCElem *chroma422_dc_total_zeros_vlc[7+1]; |
261 | | |
262 | | static const VLCElem *run_vlc[6+1]; |
263 | | |
264 | | // The other pointers to VLCElem point into this array. |
265 | | static VLCElem run7_vlc_table[96 + (6 << RUN_VLC_BITS) |
266 | | + (15 << TOTAL_ZEROS_VLC_BITS) |
267 | | + (3 << CHROMA_DC_TOTAL_ZEROS_VLC_BITS) |
268 | | + (7 << CHROMA422_DC_TOTAL_ZEROS_VLC_BITS) |
269 | | + (520 + 332 + 280 + 256) /* coeff token */]; |
270 | | |
271 | | /** |
272 | | * Get the predicted number of non-zero coefficients. |
273 | | * @param n block index |
274 | | */ |
275 | | static inline int pred_non_zero_count(const H264Context *h, const H264SliceContext *sl, int n) |
276 | 7.07M | { |
277 | 7.07M | const int index8= scan8[n]; |
278 | 7.07M | const int left = sl->non_zero_count_cache[index8 - 1]; |
279 | 7.07M | const int top = sl->non_zero_count_cache[index8 - 8]; |
280 | 7.07M | int i= left + top; |
281 | | |
282 | 7.07M | if(i<64) i= (i+1)>>1; |
283 | | |
284 | 7.07M | ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); |
285 | | |
286 | 7.07M | return i&31; |
287 | 7.07M | } |
288 | | |
289 | 2 | static av_cold void init_cavlc_level_tab(void){ |
290 | 2 | int suffix_length; |
291 | 2 | unsigned int i; |
292 | | |
293 | 16 | for(suffix_length=0; suffix_length<7; suffix_length++){ |
294 | 3.59k | for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ |
295 | 3.58k | int prefix= LEVEL_TAB_BITS - av_log2(2*i); |
296 | | |
297 | 3.58k | if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ |
298 | 3.33k | int level_code = (prefix << suffix_length) + |
299 | 3.33k | (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length); |
300 | 3.33k | int mask = -(level_code&1); |
301 | 3.33k | level_code = (((2 + level_code) >> 1) ^ mask) - mask; |
302 | 3.33k | cavlc_level_tab[suffix_length][i][0]= level_code; |
303 | 3.33k | cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; |
304 | 3.33k | }else if(prefix + 1 <= LEVEL_TAB_BITS){ |
305 | 240 | cavlc_level_tab[suffix_length][i][0]= prefix+100; |
306 | 240 | cavlc_level_tab[suffix_length][i][1]= prefix + 1; |
307 | 240 | }else{ |
308 | 14 | cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; |
309 | 14 | cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; |
310 | 14 | } |
311 | 3.58k | } |
312 | 14 | } |
313 | 2 | } |
314 | | |
315 | | av_cold void ff_h264_decode_init_vlc(void) |
316 | 2 | { |
317 | 2 | const VLCElem *coeff_token_vlc_original[4]; |
318 | 2 | VLCInitState state = VLC_INIT_STATE(run7_vlc_table); |
319 | | |
320 | 2 | VLC_INIT_STATIC_TABLE(chroma_dc_coeff_token_vlc_table, |
321 | 2 | CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4 * 5, |
322 | 2 | &chroma_dc_coeff_token_len [0], 1, 1, |
323 | 2 | &chroma_dc_coeff_token_bits[0], 1, 1, 0); |
324 | | |
325 | 2 | VLC_INIT_STATIC_TABLE(chroma422_dc_coeff_token_vlc_table, |
326 | 2 | CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4 * 9, |
327 | 2 | &chroma422_dc_coeff_token_len [0], 1, 1, |
328 | 2 | &chroma422_dc_coeff_token_bits[0], 1, 1, 0); |
329 | | |
330 | 2 | ff_vlc_init_tables(&state, RUN7_VLC_BITS, 16, |
331 | 2 | &run_len [6][0], 1, 1, |
332 | 2 | &run_bits[6][0], 1, 1, 0); |
333 | | |
334 | 14 | for (int i = 0; i < 6; i++) { |
335 | 12 | run_vlc[i + 1] = ff_vlc_init_tables(&state, RUN_VLC_BITS, 7, |
336 | 12 | &run_len [i][0], 1, 1, |
337 | 12 | &run_bits[i][0], 1, 1, 0); |
338 | 12 | } |
339 | | |
340 | 10 | for (int i = 0; i < 4; i++) { |
341 | 8 | coeff_token_vlc_original[i] = |
342 | 8 | ff_vlc_init_tables(&state, COEFF_TOKEN_VLC_BITS, 4*17, |
343 | 8 | &coeff_token_len [i][0], 1, 1, |
344 | 8 | &coeff_token_bits[i][0], 1, 1, 0); |
345 | 8 | } |
346 | 36 | for (int i = 0; i < FF_ARRAY_ELEMS(coeff_token_vlc); i++) { |
347 | 34 | static const uint8_t coeff_token_table_index[17] = { |
348 | 34 | 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 |
349 | 34 | }; |
350 | 34 | coeff_token_vlc[i] = coeff_token_vlc_original[coeff_token_table_index[i]]; |
351 | 34 | } |
352 | | |
353 | 8 | for (int i = 0; i < 3; i++) { |
354 | 6 | chroma_dc_total_zeros_vlc[i + 1] = |
355 | 6 | ff_vlc_init_tables(&state, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, |
356 | 6 | &chroma_dc_total_zeros_len [i][0], 1, 1, |
357 | 6 | &chroma_dc_total_zeros_bits[i][0], 1, 1, 0); |
358 | 6 | } |
359 | | |
360 | 16 | for (int i = 0; i < 7; i++) { |
361 | 14 | chroma422_dc_total_zeros_vlc[i + 1] = |
362 | 14 | ff_vlc_init_tables(&state, CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, |
363 | 14 | &chroma422_dc_total_zeros_len [i][0], 1, 1, |
364 | 14 | &chroma422_dc_total_zeros_bits[i][0], 1, 1, 0); |
365 | 14 | } |
366 | | |
367 | 32 | for (int i = 0; i < 15; i++) { |
368 | 30 | total_zeros_vlc[i + 1] = |
369 | 30 | ff_vlc_init_tables(&state, TOTAL_ZEROS_VLC_BITS, 16, |
370 | 30 | &total_zeros_len [i][0], 1, 1, |
371 | 30 | &total_zeros_bits[i][0], 1, 1, 0); |
372 | 30 | } |
373 | | /* |
374 | | * This is a one time safety check to make sure that |
375 | | * the vlc table sizes were initialized correctly. |
376 | | */ |
377 | 2 | av_assert1(state.size == 0); |
378 | | |
379 | 2 | init_cavlc_level_tab(); |
380 | 2 | } |
381 | | |
382 | 185k | static inline int get_level_prefix(GetBitContext *gb){ |
383 | 185k | unsigned int buf; |
384 | 185k | int log; |
385 | | |
386 | 185k | OPEN_READER(re, gb); |
387 | 185k | UPDATE_CACHE(re, gb); |
388 | 185k | buf=GET_CACHE(re, gb); |
389 | | |
390 | 185k | log= 32 - av_log2(buf); |
391 | | |
392 | 185k | LAST_SKIP_BITS(re, gb, log); |
393 | 185k | CLOSE_READER(re, gb); |
394 | | |
395 | 185k | return log-1; |
396 | 185k | } |
397 | | |
398 | | /** |
399 | | * Decode a residual block. |
400 | | * @param n block index |
401 | | * @param scantable scantable |
402 | | * @param max_coeff number of coefficients in the block |
403 | | * @return <0 if an error occurred |
404 | | */ |
405 | | static int decode_residual(const H264Context *h, H264SliceContext *sl, |
406 | | GetBitContext *gb, int16_t *block, int n, |
407 | | const uint8_t *scantable, const uint32_t *qmul, |
408 | | int max_coeff) |
409 | 7.70M | { |
410 | 7.70M | int level[16]; |
411 | 7.70M | int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before; |
412 | | |
413 | | //FIXME put trailing_onex into the context |
414 | | |
415 | 7.70M | if(max_coeff <= 8){ |
416 | 633k | if (max_coeff == 4) |
417 | 435k | coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc_table, |
418 | 435k | CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); |
419 | 198k | else |
420 | 198k | coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc_table, |
421 | 198k | CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1); |
422 | 7.07M | }else{ |
423 | 7.07M | total_coeff = pred_non_zero_count(h, sl, n >= LUMA_DC_BLOCK_INDEX ? |
424 | 6.56M | (n - LUMA_DC_BLOCK_INDEX) * 16 : n); |
425 | 7.07M | coeff_token = get_vlc2(gb, coeff_token_vlc[total_coeff], |
426 | 7.07M | COEFF_TOKEN_VLC_BITS, 2); |
427 | 7.07M | } |
428 | 7.70M | total_coeff = coeff_token >> 2; |
429 | 7.70M | sl->non_zero_count_cache[scan8[n]] = total_coeff; |
430 | | |
431 | | //FIXME set last_non_zero? |
432 | | |
433 | 7.70M | if(total_coeff==0) |
434 | 4.04M | return 0; |
435 | 3.66M | if(total_coeff > (unsigned)max_coeff) { |
436 | 78.6k | av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff); |
437 | 78.6k | return -1; |
438 | 78.6k | } |
439 | | |
440 | 3.58M | trailing_ones= coeff_token&3; |
441 | 3.58M | ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff); |
442 | 3.58M | av_assert2(total_coeff<=16); |
443 | | |
444 | 3.58M | i = show_bits(gb, 3); |
445 | 3.58M | skip_bits(gb, trailing_ones); |
446 | 3.58M | level[0] = 1-((i&4)>>1); |
447 | 3.58M | level[1] = 1-((i&2) ); |
448 | 3.58M | level[2] = 1-((i&1)<<1); |
449 | | |
450 | 3.58M | if(trailing_ones<total_coeff) { |
451 | 1.06M | int mask, prefix; |
452 | 1.06M | int suffix_length = total_coeff > 10 & trailing_ones < 3; |
453 | 1.06M | int bitsi= show_bits(gb, LEVEL_TAB_BITS); |
454 | 1.06M | int level_code= cavlc_level_tab[suffix_length][bitsi][0]; |
455 | | |
456 | 1.06M | skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); |
457 | 1.06M | if(level_code >= 100){ |
458 | 92.5k | prefix= level_code - 100; |
459 | 92.5k | if(prefix == LEVEL_TAB_BITS) |
460 | 90.4k | prefix += get_level_prefix(gb); |
461 | | |
462 | | //first coefficient has suffix_length equal to 0 or 1 |
463 | 92.5k | if(prefix<14){ //FIXME try to build a large unified VLC table for all this |
464 | 30.2k | if(suffix_length) |
465 | 3.74k | level_code= (prefix<<1) + get_bits1(gb); //part |
466 | 26.4k | else |
467 | 26.4k | level_code= prefix; //part |
468 | 62.2k | }else if(prefix==14){ |
469 | 4.00k | if(suffix_length) |
470 | 434 | level_code= (prefix<<1) + get_bits1(gb); //part |
471 | 3.57k | else |
472 | 3.57k | level_code= prefix + get_bits(gb, 4); //part |
473 | 58.2k | }else{ |
474 | 58.2k | level_code= 30; |
475 | 58.2k | if(prefix>=16){ |
476 | 55.6k | if(prefix > 25+3){ |
477 | 23.3k | av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n"); |
478 | 23.3k | return -1; |
479 | 23.3k | } |
480 | 32.2k | level_code += (1<<(prefix-3))-4096; |
481 | 32.2k | } |
482 | 34.9k | level_code += get_bits(gb, prefix-3); //part |
483 | 34.9k | } |
484 | | |
485 | 69.1k | if(trailing_ones < 3) level_code += 2; |
486 | | |
487 | 69.1k | suffix_length = 2; |
488 | 69.1k | mask= -(level_code&1); |
489 | 69.1k | level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask; |
490 | 976k | }else{ |
491 | 976k | level_code += ((level_code>>31)|1) & -(trailing_ones < 3); |
492 | | |
493 | 976k | suffix_length = 1 + (level_code + 3U > 6U); |
494 | 976k | level[trailing_ones]= level_code; |
495 | 976k | } |
496 | | |
497 | | //remaining coefficients have suffix_length > 0 |
498 | 3.06M | for(i=trailing_ones+1;i<total_coeff;i++) { |
499 | 2.03M | static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX }; |
500 | 2.03M | int bitsi= show_bits(gb, LEVEL_TAB_BITS); |
501 | 2.03M | level_code= cavlc_level_tab[suffix_length][bitsi][0]; |
502 | | |
503 | 2.03M | skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); |
504 | 2.03M | if(level_code >= 100){ |
505 | 140k | prefix= level_code - 100; |
506 | 140k | if(prefix == LEVEL_TAB_BITS){ |
507 | 95.0k | prefix += get_level_prefix(gb); |
508 | 95.0k | } |
509 | 140k | if(prefix<15){ |
510 | 101k | level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length); |
511 | 101k | }else{ |
512 | 39.0k | level_code = 15<<suffix_length; |
513 | 39.0k | if (prefix>=16) { |
514 | 31.4k | if(prefix > 25+3){ |
515 | 15.6k | av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n"); |
516 | 15.6k | return AVERROR_INVALIDDATA; |
517 | 15.6k | } |
518 | 15.7k | level_code += (1<<(prefix-3))-4096; |
519 | 15.7k | } |
520 | 23.4k | level_code += get_bits(gb, prefix-3); |
521 | 23.4k | } |
522 | 124k | mask= -(level_code&1); |
523 | 124k | level_code= (((2+level_code)>>1) ^ mask) - mask; |
524 | 124k | } |
525 | 2.02M | level[i]= level_code; |
526 | 2.02M | suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length]; |
527 | 2.02M | } |
528 | 1.04M | } |
529 | | |
530 | 3.54M | if(total_coeff == max_coeff) |
531 | 26.2k | zeros_left=0; |
532 | 3.51M | else{ |
533 | 3.51M | if (max_coeff <= 8) { |
534 | 371k | if (max_coeff == 4) |
535 | 300k | zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff], |
536 | 300k | CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); |
537 | 71.8k | else |
538 | 71.8k | zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff], |
539 | 71.8k | CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1); |
540 | 3.14M | } else { |
541 | 3.14M | zeros_left = get_vlc2(gb, total_zeros_vlc[total_coeff], |
542 | 3.14M | TOTAL_ZEROS_VLC_BITS, 1); |
543 | 3.14M | } |
544 | 3.51M | } |
545 | | |
546 | 3.54M | #define STORE_BLOCK(type) \ |
547 | 3.54M | scantable += zeros_left + total_coeff - 1; \ |
548 | 3.54M | if(n >= LUMA_DC_BLOCK_INDEX){ \ |
549 | 474k | ((type*)block)[*scantable] = level[0]; \ |
550 | 613k | for(i=1;i<total_coeff && zeros_left > 0;i++) { \ |
551 | 138k | if(zeros_left < 7) \ |
552 | 138k | run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \ |
553 | 138k | else \ |
554 | 138k | run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \ |
555 | 138k | zeros_left -= run_before; \ |
556 | 138k | scantable -= 1 + run_before; \ |
557 | 138k | ((type*)block)[*scantable]= level[i]; \ |
558 | 138k | } \ |
559 | 631k | for(;i<total_coeff;i++) { \ |
560 | 157k | scantable--; \ |
561 | 157k | ((type*)block)[*scantable]= level[i]; \ |
562 | 157k | } \ |
563 | 3.07M | }else{ \ |
564 | 3.07M | ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \ |
565 | 5.88M | for(i=1;i<total_coeff && zeros_left > 0;i++) { \ |
566 | 2.81M | if(zeros_left < 7) \ |
567 | 2.81M | run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \ |
568 | 2.81M | else \ |
569 | 2.81M | run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \ |
570 | 2.81M | zeros_left -= run_before; \ |
571 | 2.81M | scantable -= 1 + run_before; \ |
572 | 2.81M | ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \ |
573 | 2.81M | } \ |
574 | 4.76M | for(;i<total_coeff;i++) { \ |
575 | 1.69M | scantable--; \ |
576 | 1.69M | ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \ |
577 | 1.69M | } \ |
578 | 3.07M | } |
579 | | |
580 | 3.54M | if (h->pixel_shift) { |
581 | 1.95M | STORE_BLOCK(int32_t) |
582 | 1.95M | } else { |
583 | 1.58M | STORE_BLOCK(int16_t) |
584 | 1.58M | } |
585 | | |
586 | 3.54M | if(zeros_left<0){ |
587 | 26.7k | av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y); |
588 | 26.7k | return -1; |
589 | 26.7k | } |
590 | | |
591 | 3.51M | return 0; |
592 | 3.54M | } |
593 | | |
594 | | static av_always_inline |
595 | | int decode_luma_residual(const H264Context *h, H264SliceContext *sl, |
596 | | GetBitContext *gb, const uint8_t *scan, |
597 | | const uint8_t *scan8x8, int pixel_shift, |
598 | | int mb_type, int cbp, int p) |
599 | 1.33M | { |
600 | 1.33M | int i4x4, i8x8; |
601 | 1.33M | int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; |
602 | 1.33M | if(IS_INTRA16x16(mb_type)){ |
603 | 508k | AV_ZERO128(sl->mb_luma_dc[p]+0); |
604 | 508k | AV_ZERO128(sl->mb_luma_dc[p]+8); |
605 | 508k | AV_ZERO128(sl->mb_luma_dc[p]+16); |
606 | 508k | AV_ZERO128(sl->mb_luma_dc[p]+24); |
607 | 508k | if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) { |
608 | 7.13k | return -1; //FIXME continue if partitioned and other return -1 too |
609 | 7.13k | } |
610 | | |
611 | 501k | av_assert2((cbp&15) == 0 || (cbp&15) == 15); |
612 | | |
613 | 501k | if(cbp&15){ |
614 | 274k | for(i8x8=0; i8x8<4; i8x8++){ |
615 | 1.08M | for(i4x4=0; i4x4<4; i4x4++){ |
616 | 871k | const int index= i4x4 + 4*i8x8 + p*16; |
617 | 871k | if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), |
618 | 871k | index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){ |
619 | 14.7k | return -1; |
620 | 14.7k | } |
621 | 871k | } |
622 | 225k | } |
623 | 48.3k | return 0xf; |
624 | 438k | }else{ |
625 | 438k | fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1); |
626 | 438k | return 0; |
627 | 438k | } |
628 | 825k | }else{ |
629 | 825k | int cqm = (IS_INTRA( mb_type ) ? 0:3)+p; |
630 | | /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */ |
631 | 825k | int new_cbp = 0; |
632 | 3.89M | for(i8x8=0; i8x8<4; i8x8++){ |
633 | 3.15M | if(cbp & (1<<i8x8)){ |
634 | 1.13M | if(IS_8x8DCT(mb_type)){ |
635 | 419k | int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift]; |
636 | 419k | uint8_t *nnz; |
637 | 2.01M | for(i4x4=0; i4x4<4; i4x4++){ |
638 | 1.63M | const int index= i4x4 + 4*i8x8 + p*16; |
639 | 1.63M | if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4, |
640 | 1.63M | h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 ) |
641 | 31.8k | return -1; |
642 | 1.63M | } |
643 | 387k | nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]]; |
644 | 387k | nnz[0] += nnz[1] + nnz[8] + nnz[9]; |
645 | 387k | new_cbp |= !!nnz[0] << i8x8; |
646 | 716k | }else{ |
647 | 3.44M | for(i4x4=0; i4x4<4; i4x4++){ |
648 | 2.78M | const int index= i4x4 + 4*i8x8 + p*16; |
649 | 2.78M | if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, |
650 | 2.78M | scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){ |
651 | 57.9k | return -1; |
652 | 57.9k | } |
653 | 2.72M | new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8; |
654 | 2.72M | } |
655 | 716k | } |
656 | 2.02M | }else{ |
657 | 2.02M | uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]]; |
658 | 2.02M | nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; |
659 | 2.02M | } |
660 | 3.15M | } |
661 | 735k | return new_cbp; |
662 | 825k | } |
663 | 1.33M | } |
664 | | |
665 | | int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl) |
666 | 6.88M | { |
667 | 6.88M | int mb_xy; |
668 | 6.88M | int partition_count; |
669 | 6.88M | unsigned int mb_type, cbp; |
670 | 6.88M | int dct8x8_allowed = h->ps.pps->transform_8x8_mode; |
671 | 6.88M | const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2; |
672 | 6.88M | const int pixel_shift = h->pixel_shift; |
673 | | |
674 | 6.88M | mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride; |
675 | | |
676 | 6.88M | ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y); |
677 | 6.88M | cbp = 0; /* avoid warning. FIXME: find a solution without slowing |
678 | | down the code */ |
679 | 6.88M | if (sl->slice_type_nos != AV_PICTURE_TYPE_I) { |
680 | 6.29M | if (sl->mb_skip_run == -1) { |
681 | 1.80M | unsigned mb_skip_run = get_ue_golomb_long(&sl->gb); |
682 | 1.80M | if (mb_skip_run > h->mb_num) { |
683 | 17.1k | av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run); |
684 | 17.1k | return AVERROR_INVALIDDATA; |
685 | 17.1k | } |
686 | 1.79M | sl->mb_skip_run = mb_skip_run; |
687 | 1.79M | } |
688 | | |
689 | 6.27M | if (sl->mb_skip_run--) { |
690 | 4.51M | if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) { |
691 | 1.33M | if (sl->mb_skip_run == 0) |
692 | 168k | sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb); |
693 | 1.33M | } |
694 | 4.51M | decode_mb_skip(h, sl); |
695 | 4.51M | return 0; |
696 | 4.51M | } |
697 | 6.27M | } |
698 | 2.35M | if (FRAME_MBAFF(h)) { |
699 | 934k | if ((sl->mb_y & 1) == 0) |
700 | 512k | sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb); |
701 | 934k | } |
702 | | |
703 | 2.35M | sl->prev_mb_skipped = 0; |
704 | | |
705 | 2.35M | mb_type= get_ue_golomb(&sl->gb); |
706 | 2.35M | if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { |
707 | 821k | if(mb_type < 23){ |
708 | 814k | partition_count = ff_h264_b_mb_type_info[mb_type].partition_count; |
709 | 814k | mb_type = ff_h264_b_mb_type_info[mb_type].type; |
710 | 814k | }else{ |
711 | 6.71k | mb_type -= 23; |
712 | 6.71k | goto decode_intra_mb; |
713 | 6.71k | } |
714 | 1.53M | } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) { |
715 | 934k | if(mb_type < 5){ |
716 | 748k | partition_count = ff_h264_p_mb_type_info[mb_type].partition_count; |
717 | 748k | mb_type = ff_h264_p_mb_type_info[mb_type].type; |
718 | 748k | }else{ |
719 | 186k | mb_type -= 5; |
720 | 186k | goto decode_intra_mb; |
721 | 186k | } |
722 | 934k | }else{ |
723 | 595k | av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I); |
724 | 595k | if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type) |
725 | 35.5k | mb_type--; |
726 | 789k | decode_intra_mb: |
727 | 789k | if(mb_type > 25){ |
728 | 97.0k | av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y); |
729 | 97.0k | return -1; |
730 | 97.0k | } |
731 | 691k | partition_count=0; |
732 | 691k | cbp = ff_h264_i_mb_type_info[mb_type].cbp; |
733 | 691k | sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode; |
734 | 691k | mb_type = ff_h264_i_mb_type_info[mb_type].type; |
735 | 691k | } |
736 | | |
737 | 2.25M | if (MB_FIELD(sl)) |
738 | 875k | mb_type |= MB_TYPE_INTERLACED; |
739 | | |
740 | 2.25M | h->slice_table[mb_xy] = sl->slice_num; |
741 | | |
742 | 2.25M | if(IS_INTRA_PCM(mb_type)){ |
743 | 3.51k | const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] * |
744 | 3.51k | h->ps.sps->bit_depth_luma; |
745 | | |
746 | | // We assume these blocks are very rare so we do not optimize it. |
747 | 3.51k | sl->intra_pcm_ptr = align_get_bits(&sl->gb); |
748 | 3.51k | if (get_bits_left(&sl->gb) < mb_size) { |
749 | 1.52k | av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n"); |
750 | 1.52k | return AVERROR_INVALIDDATA; |
751 | 1.52k | } |
752 | 1.98k | skip_bits_long(&sl->gb, mb_size); |
753 | | |
754 | | // In deblocking, the quantizer is 0 |
755 | 1.98k | h->cur_pic.qscale_table[mb_xy] = 0; |
756 | | // All coeffs are present |
757 | 1.98k | memset(h->non_zero_count[mb_xy], 16, 48); |
758 | | |
759 | 1.98k | h->cur_pic.mb_type[mb_xy] = mb_type; |
760 | 1.98k | return 0; |
761 | 3.51k | } |
762 | | |
763 | 2.25M | fill_decode_neighbors(h, sl, mb_type); |
764 | 2.25M | fill_decode_caches(h, sl, mb_type); |
765 | | |
766 | | //mb_pred |
767 | 2.25M | if(IS_INTRA(mb_type)){ |
768 | 688k | int pred_mode; |
769 | | // init_top_left_availability(h); |
770 | 688k | if(IS_INTRA4x4(mb_type)){ |
771 | 200k | int i; |
772 | 200k | int di = 1; |
773 | 200k | if(dct8x8_allowed && get_bits1(&sl->gb)){ |
774 | 71.9k | mb_type |= MB_TYPE_8x8DCT; |
775 | 71.9k | di = 4; |
776 | 71.9k | } |
777 | | |
778 | | // fill_intra4x4_pred_table(h); |
779 | 2.54M | for(i=0; i<16; i+=di){ |
780 | 2.34M | int mode = pred_intra_mode(h, sl, i); |
781 | | |
782 | 2.34M | if(!get_bits1(&sl->gb)){ |
783 | 907k | const int rem_mode= get_bits(&sl->gb, 3); |
784 | 907k | mode = rem_mode + (rem_mode >= mode); |
785 | 907k | } |
786 | | |
787 | 2.34M | if(di==4) |
788 | 287k | fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1); |
789 | 2.05M | else |
790 | 2.05M | sl->intra4x4_pred_mode_cache[scan8[i]] = mode; |
791 | 2.34M | } |
792 | 200k | write_back_intra_pred_mode(h, sl); |
793 | 200k | if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx, |
794 | 200k | sl->top_samples_available, sl->left_samples_available) < 0) |
795 | 47.7k | return -1; |
796 | 487k | }else{ |
797 | 487k | sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available, |
798 | 487k | sl->left_samples_available, sl->intra16x16_pred_mode, 0); |
799 | 487k | if (sl->intra16x16_pred_mode < 0) |
800 | 79.0k | return -1; |
801 | 487k | } |
802 | 561k | if(decode_chroma){ |
803 | 420k | pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available, |
804 | 420k | sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1); |
805 | 420k | if(pred_mode < 0) |
806 | 24.2k | return -1; |
807 | 396k | sl->chroma_pred_mode = pred_mode; |
808 | 396k | } else { |
809 | 140k | sl->chroma_pred_mode = DC_128_PRED8x8; |
810 | 140k | } |
811 | 1.56M | }else if(partition_count==4){ |
812 | 135k | int i, j, sub_partition_count[4], list, ref[2][4]; |
813 | | |
814 | 135k | if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { |
815 | 152k | for(i=0; i<4; i++){ |
816 | 122k | sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb); |
817 | 122k | if(sl->sub_mb_type[i] >=13){ |
818 | 356 | av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y); |
819 | 356 | return -1; |
820 | 356 | } |
821 | 122k | sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count; |
822 | 122k | sl->sub_mb_type[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type; |
823 | 122k | } |
824 | 30.4k | if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) { |
825 | 16.7k | ff_h264_pred_direct_motion(h, sl, &mb_type); |
826 | 16.7k | sl->ref_cache[0][scan8[4]] = |
827 | 16.7k | sl->ref_cache[1][scan8[4]] = |
828 | 16.7k | sl->ref_cache[0][scan8[12]] = |
829 | 16.7k | sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; |
830 | 16.7k | } |
831 | 104k | }else{ |
832 | 104k | av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ? |
833 | 491k | for(i=0; i<4; i++){ |
834 | 400k | sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb); |
835 | 400k | if(sl->sub_mb_type[i] >=4){ |
836 | 12.9k | av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y); |
837 | 12.9k | return -1; |
838 | 12.9k | } |
839 | 387k | sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count; |
840 | 387k | sl->sub_mb_type[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type; |
841 | 387k | } |
842 | 104k | } |
843 | | |
844 | 271k | for (list = 0; list < sl->list_count; list++) { |
845 | 152k | int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl); |
846 | 757k | for(i=0; i<4; i++){ |
847 | 608k | if(IS_DIRECT(sl->sub_mb_type[i])) continue; |
848 | 553k | if(IS_DIR(sl->sub_mb_type[i], 0, list)){ |
849 | 466k | unsigned int tmp; |
850 | 466k | if(ref_count == 1){ |
851 | 134k | tmp= 0; |
852 | 331k | }else if(ref_count == 2){ |
853 | 155k | tmp= get_bits1(&sl->gb)^1; |
854 | 176k | }else{ |
855 | 176k | tmp= get_ue_golomb_31(&sl->gb); |
856 | 176k | if(tmp>=ref_count){ |
857 | 3.56k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp); |
858 | 3.56k | return -1; |
859 | 3.56k | } |
860 | 176k | } |
861 | 462k | ref[list][i]= tmp; |
862 | 462k | }else{ |
863 | | //FIXME |
864 | 87.3k | ref[list][i] = -1; |
865 | 87.3k | } |
866 | 553k | } |
867 | 152k | } |
868 | | |
869 | 118k | if(dct8x8_allowed) |
870 | 47.2k | dct8x8_allowed = get_dct8x8_allowed(h, sl); |
871 | | |
872 | 267k | for (list = 0; list < sl->list_count; list++) { |
873 | 745k | for(i=0; i<4; i++){ |
874 | 596k | if(IS_DIRECT(sl->sub_mb_type[i])) { |
875 | 54.3k | sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ]; |
876 | 54.3k | continue; |
877 | 54.3k | } |
878 | 542k | sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]= |
879 | 542k | sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; |
880 | | |
881 | 542k | if(IS_DIR(sl->sub_mb_type[i], 0, list)){ |
882 | 455k | const int sub_mb_type= sl->sub_mb_type[i]; |
883 | 455k | const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; |
884 | 1.39M | for(j=0; j<sub_partition_count[i]; j++){ |
885 | 935k | int mx, my; |
886 | 935k | const int index= 4*i + block_width*j; |
887 | 935k | int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ]; |
888 | 935k | pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my); |
889 | 935k | mx += (unsigned)get_se_golomb(&sl->gb); |
890 | 935k | my += (unsigned)get_se_golomb(&sl->gb); |
891 | 935k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
892 | | |
893 | 935k | if(IS_SUB_8X8(sub_mb_type)){ |
894 | 179k | mv_cache[ 1 ][0]= |
895 | 179k | mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; |
896 | 179k | mv_cache[ 1 ][1]= |
897 | 179k | mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; |
898 | 755k | }else if(IS_SUB_8X4(sub_mb_type)){ |
899 | 238k | mv_cache[ 1 ][0]= mx; |
900 | 238k | mv_cache[ 1 ][1]= my; |
901 | 517k | }else if(IS_SUB_4X8(sub_mb_type)){ |
902 | 106k | mv_cache[ 8 ][0]= mx; |
903 | 106k | mv_cache[ 8 ][1]= my; |
904 | 106k | } |
905 | 935k | mv_cache[ 0 ][0]= mx; |
906 | 935k | mv_cache[ 0 ][1]= my; |
907 | 935k | } |
908 | 455k | }else{ |
909 | 87.0k | uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0]; |
910 | 87.0k | p[0] = p[1]= |
911 | 87.0k | p[8] = p[9]= 0; |
912 | 87.0k | } |
913 | 542k | } |
914 | 149k | } |
915 | 1.42M | }else if(IS_DIRECT(mb_type)){ |
916 | 400k | ff_h264_pred_direct_motion(h, sl, &mb_type); |
917 | 400k | dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag; |
918 | 1.02M | }else{ |
919 | 1.02M | int list, mx, my, i; |
920 | | //FIXME we should set ref_idx_l? to 0 if we use that later ... |
921 | 1.02M | if(IS_16X16(mb_type)){ |
922 | 1.65M | for (list = 0; list < sl->list_count; list++) { |
923 | 947k | unsigned int val; |
924 | 947k | if(IS_DIR(mb_type, 0, list)){ |
925 | 785k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
926 | 785k | if (rc == 1) { |
927 | 121k | val= 0; |
928 | 663k | } else if (rc == 2) { |
929 | 298k | val= get_bits1(&sl->gb)^1; |
930 | 365k | }else{ |
931 | 365k | val= get_ue_golomb_31(&sl->gb); |
932 | 365k | if (val >= rc) { |
933 | 12.3k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
934 | 12.3k | return -1; |
935 | 12.3k | } |
936 | 365k | } |
937 | 773k | fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); |
938 | 773k | } |
939 | 947k | } |
940 | 1.64M | for (list = 0; list < sl->list_count; list++) { |
941 | 933k | if(IS_DIR(mb_type, 0, list)){ |
942 | 773k | pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my); |
943 | 773k | mx += (unsigned)get_se_golomb(&sl->gb); |
944 | 773k | my += (unsigned)get_se_golomb(&sl->gb); |
945 | 773k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
946 | | |
947 | 773k | fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); |
948 | 773k | } |
949 | 933k | } |
950 | 712k | } |
951 | 302k | else if(IS_16X8(mb_type)){ |
952 | 367k | for (list = 0; list < sl->list_count; list++) { |
953 | 665k | for(i=0; i<2; i++){ |
954 | 446k | unsigned int val; |
955 | 446k | if(IS_DIR(mb_type, i, list)){ |
956 | 330k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
957 | 330k | if (rc == 1) { |
958 | 36.7k | val= 0; |
959 | 293k | } else if (rc == 2) { |
960 | 230k | val= get_bits1(&sl->gb)^1; |
961 | 230k | }else{ |
962 | 62.9k | val= get_ue_golomb_31(&sl->gb); |
963 | 62.9k | if (val >= rc) { |
964 | 4.91k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
965 | 4.91k | return -1; |
966 | 4.91k | } |
967 | 62.9k | } |
968 | 330k | }else |
969 | 116k | val= LIST_NOT_USED&0xFF; |
970 | 441k | fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); |
971 | 441k | } |
972 | 224k | } |
973 | 361k | for (list = 0; list < sl->list_count; list++) { |
974 | 654k | for(i=0; i<2; i++){ |
975 | 436k | unsigned int val; |
976 | 436k | if(IS_DIR(mb_type, i, list)){ |
977 | 322k | pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my); |
978 | 322k | mx += (unsigned)get_se_golomb(&sl->gb); |
979 | 322k | my += (unsigned)get_se_golomb(&sl->gb); |
980 | 322k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
981 | | |
982 | 322k | val= pack16to32(mx,my); |
983 | 322k | }else |
984 | 114k | val=0; |
985 | 436k | fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4); |
986 | 436k | } |
987 | 218k | } |
988 | 154k | }else{ |
989 | 154k | av_assert2(IS_8X16(mb_type)); |
990 | 387k | for (list = 0; list < sl->list_count; list++) { |
991 | 703k | for(i=0; i<2; i++){ |
992 | 471k | unsigned int val; |
993 | 471k | if(IS_DIR(mb_type, i, list)){ //FIXME optimize |
994 | 352k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
995 | 352k | if (rc == 1) { |
996 | 53.5k | val= 0; |
997 | 299k | } else if (rc == 2) { |
998 | 251k | val= get_bits1(&sl->gb)^1; |
999 | 251k | }else{ |
1000 | 48.1k | val= get_ue_golomb_31(&sl->gb); |
1001 | 48.1k | if (val >= rc) { |
1002 | 5.18k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
1003 | 5.18k | return -1; |
1004 | 5.18k | } |
1005 | 48.1k | } |
1006 | 352k | }else |
1007 | 118k | val= LIST_NOT_USED&0xFF; |
1008 | 465k | fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); |
1009 | 465k | } |
1010 | 237k | } |
1011 | 381k | for (list = 0; list < sl->list_count; list++) { |
1012 | 696k | for(i=0; i<2; i++){ |
1013 | 464k | unsigned int val; |
1014 | 464k | if(IS_DIR(mb_type, i, list)){ |
1015 | 346k | pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); |
1016 | 346k | mx += (unsigned)get_se_golomb(&sl->gb); |
1017 | 346k | my += (unsigned)get_se_golomb(&sl->gb); |
1018 | 346k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
1019 | | |
1020 | 346k | val= pack16to32(mx,my); |
1021 | 346k | }else |
1022 | 117k | val=0; |
1023 | 464k | fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4); |
1024 | 464k | } |
1025 | 232k | } |
1026 | 149k | } |
1027 | 1.02M | } |
1028 | | |
1029 | 2.06M | if(IS_INTER(mb_type)) |
1030 | 1.52M | write_back_motion(h, sl, mb_type); |
1031 | | |
1032 | 2.06M | if(!IS_INTRA16x16(mb_type)){ |
1033 | 1.66M | cbp= get_ue_golomb(&sl->gb); |
1034 | | |
1035 | 1.66M | if(decode_chroma){ |
1036 | 1.41M | if(cbp > 47){ |
1037 | 34.7k | av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y); |
1038 | 34.7k | return -1; |
1039 | 34.7k | } |
1040 | 1.37M | if (IS_INTRA4x4(mb_type)) |
1041 | 91.2k | cbp = ff_h264_golomb_to_intra4x4_cbp[cbp]; |
1042 | 1.28M | else |
1043 | 1.28M | cbp = ff_h264_golomb_to_inter_cbp[cbp]; |
1044 | 1.37M | }else{ |
1045 | 255k | if(cbp > 15){ |
1046 | 37.5k | av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y); |
1047 | 37.5k | return -1; |
1048 | 37.5k | } |
1049 | 217k | if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; |
1050 | 170k | else cbp= golomb_to_inter_cbp_gray[cbp]; |
1051 | 217k | } |
1052 | 1.66M | } else { |
1053 | 395k | if (!decode_chroma && cbp>15) { |
1054 | 14.2k | av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n"); |
1055 | 14.2k | return AVERROR_INVALIDDATA; |
1056 | 14.2k | } |
1057 | 395k | } |
1058 | | |
1059 | 1.97M | if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ |
1060 | 206k | mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb); |
1061 | 206k | } |
1062 | 1.97M | sl->cbp= |
1063 | 1.97M | h->cbp_table[mb_xy]= cbp; |
1064 | 1.97M | h->cur_pic.mb_type[mb_xy] = mb_type; |
1065 | | |
1066 | 1.97M | if(cbp || IS_INTRA16x16(mb_type)){ |
1067 | 1.10M | int i4x4, i8x8, chroma_idx; |
1068 | 1.10M | int dquant; |
1069 | 1.10M | int ret; |
1070 | 1.10M | GetBitContext *gb = &sl->gb; |
1071 | 1.10M | const uint8_t *scan, *scan8x8; |
1072 | 1.10M | const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8); |
1073 | | |
1074 | 1.10M | dquant= get_se_golomb(&sl->gb); |
1075 | | |
1076 | 1.10M | sl->qscale += (unsigned)dquant; |
1077 | | |
1078 | 1.10M | if (((unsigned)sl->qscale) > max_qp){ |
1079 | 10.5k | if (sl->qscale < 0) sl->qscale += max_qp + 1; |
1080 | 6.50k | else sl->qscale -= max_qp+1; |
1081 | 10.5k | if (((unsigned)sl->qscale) > max_qp){ |
1082 | 4.70k | av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y); |
1083 | 4.70k | sl->qscale = max_qp; |
1084 | 4.70k | return -1; |
1085 | 4.70k | } |
1086 | 10.5k | } |
1087 | | |
1088 | 1.10M | sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale); |
1089 | 1.10M | sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale); |
1090 | | |
1091 | 1.10M | if(IS_INTERLACED(mb_type)){ |
1092 | 407k | scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; |
1093 | 407k | scan = sl->qscale ? h->field_scan : h->field_scan_q0; |
1094 | 697k | }else{ |
1095 | 697k | scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; |
1096 | 697k | scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; |
1097 | 697k | } |
1098 | | |
1099 | 1.10M | if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) { |
1100 | 83.2k | return -1; |
1101 | 83.2k | } |
1102 | 1.02M | h->cbp_table[mb_xy] |= ret << 12; |
1103 | 1.02M | if (CHROMA444(h)) { |
1104 | 120k | if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) { |
1105 | 11.0k | return -1; |
1106 | 11.0k | } |
1107 | 109k | if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) { |
1108 | 17.3k | return -1; |
1109 | 17.3k | } |
1110 | 901k | } else { |
1111 | 901k | const int num_c8x8 = h->ps.sps->chroma_format_idc; |
1112 | | |
1113 | 901k | if(cbp&0x30){ |
1114 | 949k | for(chroma_idx=0; chroma_idx<2; chroma_idx++) |
1115 | 633k | if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift), |
1116 | 633k | CHROMA_DC_BLOCK_INDEX + chroma_idx, |
1117 | 633k | CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan, |
1118 | 633k | NULL, 4 * num_c8x8) < 0) { |
1119 | 1.88k | return -1; |
1120 | 1.88k | } |
1121 | 317k | } |
1122 | | |
1123 | 899k | if(cbp&0x20){ |
1124 | 366k | for(chroma_idx=0; chroma_idx<2; chroma_idx++){ |
1125 | 262k | const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]]; |
1126 | 262k | int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift); |
1127 | 561k | for (i8x8 = 0; i8x8<num_c8x8; i8x8++) { |
1128 | 1.57M | for (i4x4 = 0; i4x4 < 4; i4x4++) { |
1129 | 1.27M | const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4; |
1130 | 1.27M | if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0) |
1131 | 30.8k | return -1; |
1132 | 1.24M | mb += 16 << pixel_shift; |
1133 | 1.24M | } |
1134 | 329k | } |
1135 | 262k | } |
1136 | 764k | }else{ |
1137 | 764k | fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
1138 | 764k | fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
1139 | 764k | } |
1140 | 899k | } |
1141 | 1.02M | }else{ |
1142 | 865k | fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); |
1143 | 865k | fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
1144 | 865k | fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
1145 | 865k | } |
1146 | 1.82M | h->cur_pic.qscale_table[mb_xy] = sl->qscale; |
1147 | 1.82M | write_back_non_zero_count(h, sl); |
1148 | | |
1149 | 1.82M | return 0; |
1150 | 1.97M | } |