/src/ffmpeg/libavcodec/h264_cavlc.c
Line | Count | Source |
1 | | /* |
2 | | * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding |
3 | | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding. |
25 | | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | | */ |
27 | | |
28 | 15.2M | #define CABAC(h) 0 |
29 | | #define UNCHECKED_BITSTREAM_READER 1 |
30 | | |
31 | | #include "h264dec.h" |
32 | | #include "h264_mvpred.h" |
33 | | #include "h264data.h" |
34 | | #include "golomb.h" |
35 | | #include "mpegutils.h" |
36 | | #include "libavutil/avassert.h" |
37 | | |
38 | | |
39 | | static const uint8_t golomb_to_inter_cbp_gray[16]={ |
40 | | 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9, |
41 | | }; |
42 | | |
43 | | static const uint8_t golomb_to_intra4x4_cbp_gray[16]={ |
44 | | 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9, |
45 | | }; |
46 | | |
47 | | static const uint8_t chroma_dc_coeff_token_len[4*5]={ |
48 | | 2, 0, 0, 0, |
49 | | 6, 1, 0, 0, |
50 | | 6, 6, 3, 0, |
51 | | 6, 7, 7, 6, |
52 | | 6, 8, 8, 7, |
53 | | }; |
54 | | |
55 | | static const uint8_t chroma_dc_coeff_token_bits[4*5]={ |
56 | | 1, 0, 0, 0, |
57 | | 7, 1, 0, 0, |
58 | | 4, 6, 1, 0, |
59 | | 3, 3, 2, 5, |
60 | | 2, 3, 2, 0, |
61 | | }; |
62 | | |
63 | | static const uint8_t chroma422_dc_coeff_token_len[4*9]={ |
64 | | 1, 0, 0, 0, |
65 | | 7, 2, 0, 0, |
66 | | 7, 7, 3, 0, |
67 | | 9, 7, 7, 5, |
68 | | 9, 9, 7, 6, |
69 | | 10, 10, 9, 7, |
70 | | 11, 11, 10, 7, |
71 | | 12, 12, 11, 10, |
72 | | 13, 12, 12, 11, |
73 | | }; |
74 | | |
75 | | static const uint8_t chroma422_dc_coeff_token_bits[4*9]={ |
76 | | 1, 0, 0, 0, |
77 | | 15, 1, 0, 0, |
78 | | 14, 13, 1, 0, |
79 | | 7, 12, 11, 1, |
80 | | 6, 5, 10, 1, |
81 | | 7, 6, 4, 9, |
82 | | 7, 6, 5, 8, |
83 | | 7, 6, 5, 4, |
84 | | 7, 5, 4, 4, |
85 | | }; |
86 | | |
87 | | static const uint8_t coeff_token_len[4][4*17]={ |
88 | | { |
89 | | 1, 0, 0, 0, |
90 | | 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6, |
91 | | 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10, |
92 | | 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14, |
93 | | 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16, |
94 | | }, |
95 | | { |
96 | | 2, 0, 0, 0, |
97 | | 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4, |
98 | | 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7, |
99 | | 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12, |
100 | | 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14, |
101 | | }, |
102 | | { |
103 | | 4, 0, 0, 0, |
104 | | 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4, |
105 | | 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, |
106 | | 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, |
107 | | 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10, |
108 | | }, |
109 | | { |
110 | | 6, 0, 0, 0, |
111 | | 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, |
112 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
113 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
114 | | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
115 | | } |
116 | | }; |
117 | | |
118 | | static const uint8_t coeff_token_bits[4][4*17]={ |
119 | | { |
120 | | 1, 0, 0, 0, |
121 | | 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3, |
122 | | 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4, |
123 | | 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8, |
124 | | 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8, |
125 | | }, |
126 | | { |
127 | | 3, 0, 0, 0, |
128 | | 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4, |
129 | | 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4, |
130 | | 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12, |
131 | | 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4, |
132 | | }, |
133 | | { |
134 | | 15, 0, 0, 0, |
135 | | 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11, |
136 | | 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13, |
137 | | 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8, |
138 | | 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2, |
139 | | }, |
140 | | { |
141 | | 3, 0, 0, 0, |
142 | | 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15, |
143 | | 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31, |
144 | | 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47, |
145 | | 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63, |
146 | | } |
147 | | }; |
148 | | |
149 | | static const uint8_t total_zeros_len[16][16]= { |
150 | | {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, |
151 | | {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, |
152 | | {4,3,3,3,4,4,3,3,4,5,5,6,5,6}, |
153 | | {5,3,4,4,3,3,3,4,3,4,5,5,5}, |
154 | | {4,4,4,3,3,3,3,3,4,5,4,5}, |
155 | | {6,5,3,3,3,3,3,3,4,3,6}, |
156 | | {6,5,3,3,3,2,3,4,3,6}, |
157 | | {6,4,5,3,2,2,3,3,6}, |
158 | | {6,6,4,2,2,3,2,5}, |
159 | | {5,5,3,2,2,2,4}, |
160 | | {4,4,3,3,1,3}, |
161 | | {4,4,2,1,3}, |
162 | | {3,3,1,2}, |
163 | | {2,2,1}, |
164 | | {1,1}, |
165 | | }; |
166 | | |
167 | | static const uint8_t total_zeros_bits[16][16]= { |
168 | | {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, |
169 | | {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, |
170 | | {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, |
171 | | {3,7,5,4,6,5,4,3,3,2,2,1,0}, |
172 | | {5,4,3,7,6,5,4,3,2,1,1,0}, |
173 | | {1,1,7,6,5,4,3,2,1,1,0}, |
174 | | {1,1,5,4,3,3,2,1,1,0}, |
175 | | {1,1,1,3,3,2,2,1,0}, |
176 | | {1,0,1,3,2,1,1,1}, |
177 | | {1,0,1,3,2,1,1}, |
178 | | {0,1,1,2,1,3}, |
179 | | {0,1,1,1,1}, |
180 | | {0,1,1,1}, |
181 | | {0,1,1}, |
182 | | {0,1}, |
183 | | }; |
184 | | |
185 | | static const uint8_t chroma_dc_total_zeros_len[3][4]= { |
186 | | { 1, 2, 3, 3,}, |
187 | | { 1, 2, 2, 0,}, |
188 | | { 1, 1, 0, 0,}, |
189 | | }; |
190 | | |
191 | | static const uint8_t chroma_dc_total_zeros_bits[3][4]= { |
192 | | { 1, 1, 1, 0,}, |
193 | | { 1, 1, 0, 0,}, |
194 | | { 1, 0, 0, 0,}, |
195 | | }; |
196 | | |
197 | | static const uint8_t chroma422_dc_total_zeros_len[7][8]= { |
198 | | { 1, 3, 3, 4, 4, 4, 5, 5 }, |
199 | | { 3, 2, 3, 3, 3, 3, 3 }, |
200 | | { 3, 3, 2, 2, 3, 3 }, |
201 | | { 3, 2, 2, 2, 3 }, |
202 | | { 2, 2, 2, 2 }, |
203 | | { 2, 2, 1 }, |
204 | | { 1, 1 }, |
205 | | }; |
206 | | |
207 | | static const uint8_t chroma422_dc_total_zeros_bits[7][8]= { |
208 | | { 1, 2, 3, 2, 3, 1, 1, 0 }, |
209 | | { 0, 1, 1, 4, 5, 6, 7 }, |
210 | | { 0, 1, 1, 2, 6, 7 }, |
211 | | { 6, 0, 1, 2, 7 }, |
212 | | { 0, 1, 2, 3 }, |
213 | | { 0, 1, 1 }, |
214 | | { 0, 1 }, |
215 | | }; |
216 | | |
217 | | static const uint8_t run_len[7][16]={ |
218 | | {1,1}, |
219 | | {1,2,2}, |
220 | | {2,2,2,2}, |
221 | | {2,2,2,3,3}, |
222 | | {2,2,3,3,3,3}, |
223 | | {2,3,3,3,3,3,3}, |
224 | | {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, |
225 | | }; |
226 | | |
227 | | static const uint8_t run_bits[7][16]={ |
228 | | {1,0}, |
229 | | {1,1,0}, |
230 | | {3,2,1,0}, |
231 | | {3,2,1,1,0}, |
232 | | {3,2,3,2,1,0}, |
233 | | {3,0,1,3,2,5,4}, |
234 | | {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, |
235 | | }; |
236 | | |
237 | 2.22M | #define LEVEL_TAB_BITS 8 |
238 | | static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; |
239 | | |
240 | 406k | #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 |
241 | 101k | #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13 |
242 | 5.69M | #define COEFF_TOKEN_VLC_BITS 8 |
243 | 2.46M | #define TOTAL_ZEROS_VLC_BITS 9 |
244 | 281k | #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 |
245 | 45.5k | #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5 |
246 | 1.69M | #define RUN_VLC_BITS 3 |
247 | 491k | #define RUN7_VLC_BITS 6 |
248 | | |
249 | | /// 17 pointers to only four different VLCs |
250 | | static const VLCElem *coeff_token_vlc[17]; |
251 | | |
252 | | static VLCElem chroma_dc_coeff_token_vlc_table[256]; |
253 | | |
254 | | static VLCElem chroma422_dc_coeff_token_vlc_table[1 << CHROMA422_DC_COEFF_TOKEN_VLC_BITS]; |
255 | | |
256 | | static const VLCElem *total_zeros_vlc[15+1]; |
257 | | |
258 | | static const VLCElem *chroma_dc_total_zeros_vlc[3+1]; |
259 | | |
260 | | static const VLCElem *chroma422_dc_total_zeros_vlc[7+1]; |
261 | | |
262 | | static const VLCElem *run_vlc[6+1]; |
263 | | |
264 | | // The other pointers to VLCElem point into this array. |
265 | | static VLCElem run7_vlc_table[96 + (6 << RUN_VLC_BITS) |
266 | | + (15 << TOTAL_ZEROS_VLC_BITS) |
267 | | + (3 << CHROMA_DC_TOTAL_ZEROS_VLC_BITS) |
268 | | + (7 << CHROMA422_DC_TOTAL_ZEROS_VLC_BITS) |
269 | | + (520 + 332 + 280 + 256) /* coeff token */]; |
270 | | |
271 | | /** |
272 | | * Get the predicted number of non-zero coefficients. |
273 | | * @param n block index |
274 | | */ |
275 | | static inline int pred_non_zero_count(const H264Context *h, const H264SliceContext *sl, int n) |
276 | 5.69M | { |
277 | 5.69M | const int index8= scan8[n]; |
278 | 5.69M | const int left = sl->non_zero_count_cache[index8 - 1]; |
279 | 5.69M | const int top = sl->non_zero_count_cache[index8 - 8]; |
280 | 5.69M | int i= left + top; |
281 | | |
282 | 5.69M | if(i<64) i= (i+1)>>1; |
283 | | |
284 | 5.69M | ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); |
285 | | |
286 | 5.69M | return i&31; |
287 | 5.69M | } |
288 | | |
289 | 2 | static av_cold void init_cavlc_level_tab(void){ |
290 | 2 | int suffix_length; |
291 | 2 | unsigned int i; |
292 | | |
293 | 16 | for(suffix_length=0; suffix_length<7; suffix_length++){ |
294 | 3.59k | for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ |
295 | 3.58k | int prefix= LEVEL_TAB_BITS - av_log2(2*i); |
296 | | |
297 | 3.58k | if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ |
298 | 3.33k | int level_code = (prefix << suffix_length) + |
299 | 3.33k | (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length); |
300 | 3.33k | int mask = -(level_code&1); |
301 | 3.33k | level_code = (((2 + level_code) >> 1) ^ mask) - mask; |
302 | 3.33k | cavlc_level_tab[suffix_length][i][0]= level_code; |
303 | 3.33k | cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; |
304 | 3.33k | }else if(prefix + 1 <= LEVEL_TAB_BITS){ |
305 | 240 | cavlc_level_tab[suffix_length][i][0]= prefix+100; |
306 | 240 | cavlc_level_tab[suffix_length][i][1]= prefix + 1; |
307 | 240 | }else{ |
308 | 14 | cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; |
309 | 14 | cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; |
310 | 14 | } |
311 | 3.58k | } |
312 | 14 | } |
313 | 2 | } |
314 | | |
315 | | av_cold void ff_h264_decode_init_vlc(void) |
316 | 2 | { |
317 | 2 | const VLCElem *coeff_token_vlc_original[4]; |
318 | 2 | VLCInitState state = VLC_INIT_STATE(run7_vlc_table); |
319 | | |
320 | 2 | VLC_INIT_STATIC_TABLE(chroma_dc_coeff_token_vlc_table, |
321 | 2 | CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4 * 5, |
322 | 2 | &chroma_dc_coeff_token_len [0], 1, 1, |
323 | 2 | &chroma_dc_coeff_token_bits[0], 1, 1, 0); |
324 | | |
325 | 2 | VLC_INIT_STATIC_TABLE(chroma422_dc_coeff_token_vlc_table, |
326 | 2 | CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4 * 9, |
327 | 2 | &chroma422_dc_coeff_token_len [0], 1, 1, |
328 | 2 | &chroma422_dc_coeff_token_bits[0], 1, 1, 0); |
329 | | |
330 | 2 | ff_vlc_init_tables(&state, RUN7_VLC_BITS, 16, |
331 | 2 | &run_len [6][0], 1, 1, |
332 | 2 | &run_bits[6][0], 1, 1, 0); |
333 | | |
334 | 14 | for (int i = 0; i < 6; i++) { |
335 | 12 | run_vlc[i + 1] = ff_vlc_init_tables(&state, RUN_VLC_BITS, 7, |
336 | 12 | &run_len [i][0], 1, 1, |
337 | 12 | &run_bits[i][0], 1, 1, 0); |
338 | 12 | } |
339 | | |
340 | 10 | for (int i = 0; i < 4; i++) { |
341 | 8 | coeff_token_vlc_original[i] = |
342 | 8 | ff_vlc_init_tables(&state, COEFF_TOKEN_VLC_BITS, 4*17, |
343 | 8 | &coeff_token_len [i][0], 1, 1, |
344 | 8 | &coeff_token_bits[i][0], 1, 1, 0); |
345 | 8 | } |
346 | 36 | for (int i = 0; i < FF_ARRAY_ELEMS(coeff_token_vlc); i++) { |
347 | 34 | static const uint8_t coeff_token_table_index[17] = { |
348 | 34 | 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 |
349 | 34 | }; |
350 | 34 | coeff_token_vlc[i] = coeff_token_vlc_original[coeff_token_table_index[i]]; |
351 | 34 | } |
352 | | |
353 | 8 | for (int i = 0; i < 3; i++) { |
354 | 6 | chroma_dc_total_zeros_vlc[i + 1] = |
355 | 6 | ff_vlc_init_tables(&state, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, |
356 | 6 | &chroma_dc_total_zeros_len [i][0], 1, 1, |
357 | 6 | &chroma_dc_total_zeros_bits[i][0], 1, 1, 0); |
358 | 6 | } |
359 | | |
360 | 16 | for (int i = 0; i < 7; i++) { |
361 | 14 | chroma422_dc_total_zeros_vlc[i + 1] = |
362 | 14 | ff_vlc_init_tables(&state, CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, |
363 | 14 | &chroma422_dc_total_zeros_len [i][0], 1, 1, |
364 | 14 | &chroma422_dc_total_zeros_bits[i][0], 1, 1, 0); |
365 | 14 | } |
366 | | |
367 | 32 | for (int i = 0; i < 15; i++) { |
368 | 30 | total_zeros_vlc[i + 1] = |
369 | 30 | ff_vlc_init_tables(&state, TOTAL_ZEROS_VLC_BITS, 16, |
370 | 30 | &total_zeros_len [i][0], 1, 1, |
371 | 30 | &total_zeros_bits[i][0], 1, 1, 0); |
372 | 30 | } |
373 | | /* |
374 | | * This is a one time safety check to make sure that |
375 | | * the vlc table sizes were initialized correctly. |
376 | | */ |
377 | 2 | av_assert1(state.size == 0); |
378 | | |
379 | 2 | init_cavlc_level_tab(); |
380 | 2 | } |
381 | | |
382 | 114k | static inline int get_level_prefix(GetBitContext *gb){ |
383 | 114k | unsigned int buf; |
384 | 114k | int log; |
385 | | |
386 | 114k | OPEN_READER(re, gb); |
387 | 114k | UPDATE_CACHE(re, gb); |
388 | 114k | buf=GET_CACHE(re, gb); |
389 | | |
390 | 114k | log= 32 - av_log2(buf); |
391 | | |
392 | 114k | LAST_SKIP_BITS(re, gb, log); |
393 | 114k | CLOSE_READER(re, gb); |
394 | | |
395 | 114k | return log-1; |
396 | 114k | } |
397 | | |
398 | | /** |
399 | | * Decode a residual block. |
400 | | * @param n block index |
401 | | * @param scantable scantable |
402 | | * @param max_coeff number of coefficients in the block |
403 | | * @return <0 if an error occurred |
404 | | */ |
405 | | static int decode_residual(const H264Context *h, H264SliceContext *sl, |
406 | | GetBitContext *gb, int16_t *block, int n, |
407 | | const uint8_t *scantable, const uint32_t *qmul, |
408 | | int max_coeff) |
409 | 6.20M | { |
410 | 6.20M | int level[16]; |
411 | 6.20M | int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before; |
412 | | |
413 | | //FIXME put trailing_onex into the context |
414 | | |
415 | 6.20M | if(max_coeff <= 8){ |
416 | 507k | if (max_coeff == 4) |
417 | 406k | coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc_table, |
418 | 406k | CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); |
419 | 101k | else |
420 | 101k | coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc_table, |
421 | 101k | CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1); |
422 | 5.69M | }else{ |
423 | 5.69M | total_coeff = pred_non_zero_count(h, sl, n >= LUMA_DC_BLOCK_INDEX ? |
424 | 5.24M | (n - LUMA_DC_BLOCK_INDEX) * 16 : n); |
425 | 5.69M | coeff_token = get_vlc2(gb, coeff_token_vlc[total_coeff], |
426 | 5.69M | COEFF_TOKEN_VLC_BITS, 2); |
427 | 5.69M | } |
428 | 6.20M | total_coeff = coeff_token >> 2; |
429 | 6.20M | sl->non_zero_count_cache[scan8[n]] = total_coeff; |
430 | | |
431 | | //FIXME set last_non_zero? |
432 | | |
433 | 6.20M | if(total_coeff==0) |
434 | 3.31M | return 0; |
435 | 2.89M | if(total_coeff > (unsigned)max_coeff) { |
436 | 48.0k | av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff); |
437 | 48.0k | return -1; |
438 | 48.0k | } |
439 | | |
440 | 2.84M | trailing_ones= coeff_token&3; |
441 | 2.84M | ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff); |
442 | 2.84M | av_assert2(total_coeff<=16); |
443 | | |
444 | 2.84M | i = show_bits(gb, 3); |
445 | 2.84M | skip_bits(gb, trailing_ones); |
446 | 2.84M | level[0] = 1-((i&4)>>1); |
447 | 2.84M | level[1] = 1-((i&2) ); |
448 | 2.84M | level[2] = 1-((i&1)<<1); |
449 | | |
450 | 2.84M | if(trailing_ones<total_coeff) { |
451 | 779k | int mask, prefix; |
452 | 779k | int suffix_length = total_coeff > 10 & trailing_ones < 3; |
453 | 779k | int bitsi= show_bits(gb, LEVEL_TAB_BITS); |
454 | 779k | int level_code= cavlc_level_tab[suffix_length][bitsi][0]; |
455 | | |
456 | 779k | skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); |
457 | 779k | if(level_code >= 100){ |
458 | 56.4k | prefix= level_code - 100; |
459 | 56.4k | if(prefix == LEVEL_TAB_BITS) |
460 | 55.9k | prefix += get_level_prefix(gb); |
461 | | |
462 | | //first coefficient has suffix_length equal to 0 or 1 |
463 | 56.4k | if(prefix<14){ //FIXME try to build a large unified VLC table for all this |
464 | 23.2k | if(suffix_length) |
465 | 1.32k | level_code= (prefix<<1) + get_bits1(gb); //part |
466 | 21.9k | else |
467 | 21.9k | level_code= prefix; //part |
468 | 33.1k | }else if(prefix==14){ |
469 | 2.17k | if(suffix_length) |
470 | 422 | level_code= (prefix<<1) + get_bits1(gb); //part |
471 | 1.75k | else |
472 | 1.75k | level_code= prefix + get_bits(gb, 4); //part |
473 | 31.0k | }else{ |
474 | 31.0k | level_code= 30; |
475 | 31.0k | if(prefix>=16){ |
476 | 28.8k | if(prefix > 25+3){ |
477 | 22.0k | av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n"); |
478 | 22.0k | return -1; |
479 | 22.0k | } |
480 | 6.81k | level_code += (1<<(prefix-3))-4096; |
481 | 6.81k | } |
482 | 8.99k | level_code += get_bits(gb, prefix-3); //part |
483 | 8.99k | } |
484 | | |
485 | 34.4k | if(trailing_ones < 3) level_code += 2; |
486 | | |
487 | 34.4k | suffix_length = 2; |
488 | 34.4k | mask= -(level_code&1); |
489 | 34.4k | level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask; |
490 | 722k | }else{ |
491 | 722k | level_code += ((level_code>>31)|1) & -(trailing_ones < 3); |
492 | | |
493 | 722k | suffix_length = 1 + (level_code + 3U > 6U); |
494 | 722k | level[trailing_ones]= level_code; |
495 | 722k | } |
496 | | |
497 | | //remaining coefficients have suffix_length > 0 |
498 | 2.02M | for(i=trailing_ones+1;i<total_coeff;i++) { |
499 | 1.28M | static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX }; |
500 | 1.28M | int bitsi= show_bits(gb, LEVEL_TAB_BITS); |
501 | 1.28M | level_code= cavlc_level_tab[suffix_length][bitsi][0]; |
502 | | |
503 | 1.28M | skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); |
504 | 1.28M | if(level_code >= 100){ |
505 | 93.2k | prefix= level_code - 100; |
506 | 93.2k | if(prefix == LEVEL_TAB_BITS){ |
507 | 58.1k | prefix += get_level_prefix(gb); |
508 | 58.1k | } |
509 | 93.2k | if(prefix<15){ |
510 | 69.9k | level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length); |
511 | 69.9k | }else{ |
512 | 23.3k | level_code = 15<<suffix_length; |
513 | 23.3k | if (prefix>=16) { |
514 | 20.3k | if(prefix > 25+3){ |
515 | 11.7k | av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n"); |
516 | 11.7k | return AVERROR_INVALIDDATA; |
517 | 11.7k | } |
518 | 8.55k | level_code += (1<<(prefix-3))-4096; |
519 | 8.55k | } |
520 | 11.5k | level_code += get_bits(gb, prefix-3); |
521 | 11.5k | } |
522 | 81.5k | mask= -(level_code&1); |
523 | 81.5k | level_code= (((2+level_code)>>1) ^ mask) - mask; |
524 | 81.5k | } |
525 | 1.27M | level[i]= level_code; |
526 | 1.27M | suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length]; |
527 | 1.27M | } |
528 | 757k | } |
529 | | |
530 | 2.81M | if(total_coeff == max_coeff) |
531 | 14.4k | zeros_left=0; |
532 | 2.79M | else{ |
533 | 2.79M | if (max_coeff <= 8) { |
534 | 326k | if (max_coeff == 4) |
535 | 281k | zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff], |
536 | 281k | CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); |
537 | 45.5k | else |
538 | 45.5k | zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff], |
539 | 45.5k | CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1); |
540 | 2.46M | } else { |
541 | 2.46M | zeros_left = get_vlc2(gb, total_zeros_vlc[total_coeff], |
542 | 2.46M | TOTAL_ZEROS_VLC_BITS, 1); |
543 | 2.46M | } |
544 | 2.79M | } |
545 | | |
546 | 2.81M | #define STORE_BLOCK(type) \ |
547 | 2.81M | scantable += zeros_left + total_coeff - 1; \ |
548 | 2.81M | if(n >= LUMA_DC_BLOCK_INDEX){ \ |
549 | 417k | ((type*)block)[*scantable] = level[0]; \ |
550 | 534k | for(i=1;i<total_coeff && zeros_left > 0;i++) { \ |
551 | 116k | if(zeros_left < 7) \ |
552 | 116k | run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \ |
553 | 116k | else \ |
554 | 116k | run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \ |
555 | 116k | zeros_left -= run_before; \ |
556 | 116k | scantable -= 1 + run_before; \ |
557 | 116k | ((type*)block)[*scantable]= level[i]; \ |
558 | 116k | } \ |
559 | 541k | for(;i<total_coeff;i++) { \ |
560 | 123k | scantable--; \ |
561 | 123k | ((type*)block)[*scantable]= level[i]; \ |
562 | 123k | } \ |
563 | 2.39M | }else{ \ |
564 | 2.39M | ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \ |
565 | 4.46M | for(i=1;i<total_coeff && zeros_left > 0;i++) { \ |
566 | 2.07M | if(zeros_left < 7) \ |
567 | 2.07M | run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \ |
568 | 2.07M | else \ |
569 | 2.07M | run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \ |
570 | 2.07M | zeros_left -= run_before; \ |
571 | 2.07M | scantable -= 1 + run_before; \ |
572 | 2.07M | ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \ |
573 | 2.07M | } \ |
574 | 3.44M | for(;i<total_coeff;i++) { \ |
575 | 1.05M | scantable--; \ |
576 | 1.05M | ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \ |
577 | 1.05M | } \ |
578 | 2.39M | } |
579 | | |
580 | 2.81M | if (h->pixel_shift) { |
581 | 1.44M | STORE_BLOCK(int32_t) |
582 | 1.44M | } else { |
583 | 1.36M | STORE_BLOCK(int16_t) |
584 | 1.36M | } |
585 | | |
586 | 2.81M | if(zeros_left<0){ |
587 | 17.9k | av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y); |
588 | 17.9k | return -1; |
589 | 17.9k | } |
590 | | |
591 | 2.79M | return 0; |
592 | 2.81M | } |
593 | | |
594 | | static av_always_inline |
595 | | int decode_luma_residual(const H264Context *h, H264SliceContext *sl, |
596 | | GetBitContext *gb, const uint8_t *scan, |
597 | | const uint8_t *scan8x8, int pixel_shift, |
598 | | int mb_type, int cbp, int p) |
599 | 1.21M | { |
600 | 1.21M | int i4x4, i8x8; |
601 | 1.21M | int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; |
602 | 1.21M | if(IS_INTRA16x16(mb_type)){ |
603 | 449k | AV_ZERO128(sl->mb_luma_dc[p]+0); |
604 | 449k | AV_ZERO128(sl->mb_luma_dc[p]+8); |
605 | 449k | AV_ZERO128(sl->mb_luma_dc[p]+16); |
606 | 449k | AV_ZERO128(sl->mb_luma_dc[p]+24); |
607 | 449k | if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) { |
608 | 5.73k | return -1; //FIXME continue if partitioned and other return -1 too |
609 | 5.73k | } |
610 | | |
611 | 443k | av_assert2((cbp&15) == 0 || (cbp&15) == 15); |
612 | | |
613 | 443k | if(cbp&15){ |
614 | 224k | for(i8x8=0; i8x8<4; i8x8++){ |
615 | 886k | for(i4x4=0; i4x4<4; i4x4++){ |
616 | 713k | const int index= i4x4 + 4*i8x8 + p*16; |
617 | 713k | if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), |
618 | 713k | index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){ |
619 | 11.9k | return -1; |
620 | 11.9k | } |
621 | 713k | } |
622 | 184k | } |
623 | 39.6k | return 0xf; |
624 | 392k | }else{ |
625 | 392k | fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1); |
626 | 392k | return 0; |
627 | 392k | } |
628 | 760k | }else{ |
629 | 760k | int cqm = (IS_INTRA( mb_type ) ? 0:3)+p; |
630 | | /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */ |
631 | 760k | int new_cbp = 0; |
632 | 3.62M | for(i8x8=0; i8x8<4; i8x8++){ |
633 | 2.93M | if(cbp & (1<<i8x8)){ |
634 | 987k | if(IS_8x8DCT(mb_type)){ |
635 | 371k | int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift]; |
636 | 371k | uint8_t *nnz; |
637 | 1.79M | for(i4x4=0; i4x4<4; i4x4++){ |
638 | 1.44M | const int index= i4x4 + 4*i8x8 + p*16; |
639 | 1.44M | if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4, |
640 | 1.44M | h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 ) |
641 | 26.1k | return -1; |
642 | 1.44M | } |
643 | 345k | nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]]; |
644 | 345k | nnz[0] += nnz[1] + nnz[8] + nnz[9]; |
645 | 345k | new_cbp |= !!nnz[0] << i8x8; |
646 | 616k | }else{ |
647 | 2.97M | for(i4x4=0; i4x4<4; i4x4++){ |
648 | 2.40M | const int index= i4x4 + 4*i8x8 + p*16; |
649 | 2.40M | if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, |
650 | 2.40M | scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){ |
651 | 41.1k | return -1; |
652 | 41.1k | } |
653 | 2.36M | new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8; |
654 | 2.36M | } |
655 | 616k | } |
656 | 1.94M | }else{ |
657 | 1.94M | uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]]; |
658 | 1.94M | nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; |
659 | 1.94M | } |
660 | 2.93M | } |
661 | 693k | return new_cbp; |
662 | 760k | } |
663 | 1.21M | } |
664 | | |
665 | | int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl) |
666 | 5.93M | { |
667 | 5.93M | int mb_xy; |
668 | 5.93M | int partition_count; |
669 | 5.93M | unsigned int mb_type, cbp; |
670 | 5.93M | int dct8x8_allowed = h->ps.pps->transform_8x8_mode; |
671 | 5.93M | const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2; |
672 | 5.93M | const int pixel_shift = h->pixel_shift; |
673 | | |
674 | 5.93M | mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride; |
675 | | |
676 | 5.93M | ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y); |
677 | 5.93M | cbp = 0; /* avoid warning. FIXME: find a solution without slowing |
678 | | down the code */ |
679 | 5.93M | if (sl->slice_type_nos != AV_PICTURE_TYPE_I) { |
680 | 5.45M | if (sl->mb_skip_run == -1) { |
681 | 1.70M | unsigned mb_skip_run = get_ue_golomb_long(&sl->gb); |
682 | 1.70M | if (mb_skip_run > h->mb_num) { |
683 | 15.0k | av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run); |
684 | 15.0k | return AVERROR_INVALIDDATA; |
685 | 15.0k | } |
686 | 1.68M | sl->mb_skip_run = mb_skip_run; |
687 | 1.68M | } |
688 | | |
689 | 5.43M | if (sl->mb_skip_run--) { |
690 | 3.77M | if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) { |
691 | 1.28M | if (sl->mb_skip_run == 0) |
692 | 166k | sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb); |
693 | 1.28M | } |
694 | 3.77M | decode_mb_skip(h, sl); |
695 | 3.77M | return 0; |
696 | 3.77M | } |
697 | 5.43M | } |
698 | 2.14M | if (FRAME_MBAFF(h)) { |
699 | 917k | if ((sl->mb_y & 1) == 0) |
700 | 500k | sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb); |
701 | 917k | } |
702 | | |
703 | 2.14M | sl->prev_mb_skipped = 0; |
704 | | |
705 | 2.14M | mb_type= get_ue_golomb(&sl->gb); |
706 | 2.14M | if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { |
707 | 835k | if(mb_type < 23){ |
708 | 828k | partition_count = ff_h264_b_mb_type_info[mb_type].partition_count; |
709 | 828k | mb_type = ff_h264_b_mb_type_info[mb_type].type; |
710 | 828k | }else{ |
711 | 6.77k | mb_type -= 23; |
712 | 6.77k | goto decode_intra_mb; |
713 | 6.77k | } |
714 | 1.30M | } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) { |
715 | 825k | if(mb_type < 5){ |
716 | 698k | partition_count = ff_h264_p_mb_type_info[mb_type].partition_count; |
717 | 698k | mb_type = ff_h264_p_mb_type_info[mb_type].type; |
718 | 698k | }else{ |
719 | 127k | mb_type -= 5; |
720 | 127k | goto decode_intra_mb; |
721 | 127k | } |
722 | 825k | }else{ |
723 | 482k | av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I); |
724 | 482k | if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type) |
725 | 34.5k | mb_type--; |
726 | 616k | decode_intra_mb: |
727 | 616k | if(mb_type > 25){ |
728 | 62.1k | av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y); |
729 | 62.1k | return -1; |
730 | 62.1k | } |
731 | 554k | partition_count=0; |
732 | 554k | cbp = ff_h264_i_mb_type_info[mb_type].cbp; |
733 | 554k | sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode; |
734 | 554k | mb_type = ff_h264_i_mb_type_info[mb_type].type; |
735 | 554k | } |
736 | | |
737 | 2.08M | if (MB_FIELD(sl)) |
738 | 783k | mb_type |= MB_TYPE_INTERLACED; |
739 | | |
740 | 2.08M | h->slice_table[mb_xy] = sl->slice_num; |
741 | | |
742 | 2.08M | if(IS_INTRA_PCM(mb_type)){ |
743 | 3.59k | const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] * |
744 | 3.59k | h->ps.sps->bit_depth_luma; |
745 | | |
746 | | // We assume these blocks are very rare so we do not optimize it. |
747 | 3.59k | sl->intra_pcm_ptr = align_get_bits(&sl->gb); |
748 | 3.59k | if (get_bits_left(&sl->gb) < mb_size) { |
749 | 1.61k | av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n"); |
750 | 1.61k | return AVERROR_INVALIDDATA; |
751 | 1.61k | } |
752 | 1.98k | skip_bits_long(&sl->gb, mb_size); |
753 | | |
754 | | // In deblocking, the quantizer is 0 |
755 | 1.98k | h->cur_pic.qscale_table[mb_xy] = 0; |
756 | | // All coeffs are present |
757 | 1.98k | memset(h->non_zero_count[mb_xy], 16, 48); |
758 | | |
759 | 1.98k | h->cur_pic.mb_type[mb_xy] = mb_type; |
760 | 1.98k | return 0; |
761 | 3.59k | } |
762 | | |
763 | 2.07M | fill_decode_neighbors(h, sl, mb_type); |
764 | 2.07M | fill_decode_caches(h, sl, mb_type); |
765 | | |
766 | | //mb_pred |
767 | 2.07M | if(IS_INTRA(mb_type)){ |
768 | 551k | int pred_mode; |
769 | | // init_top_left_availability(h); |
770 | 551k | if(IS_INTRA4x4(mb_type)){ |
771 | 136k | int i; |
772 | 136k | int di = 1; |
773 | 136k | if(dct8x8_allowed && get_bits1(&sl->gb)){ |
774 | 58.0k | mb_type |= MB_TYPE_8x8DCT; |
775 | 58.0k | di = 4; |
776 | 58.0k | } |
777 | | |
778 | | // fill_intra4x4_pred_table(h); |
779 | 1.61M | for(i=0; i<16; i+=di){ |
780 | 1.48M | int mode = pred_intra_mode(h, sl, i); |
781 | | |
782 | 1.48M | if(!get_bits1(&sl->gb)){ |
783 | 579k | const int rem_mode= get_bits(&sl->gb, 3); |
784 | 579k | mode = rem_mode + (rem_mode >= mode); |
785 | 579k | } |
786 | | |
787 | 1.48M | if(di==4) |
788 | 232k | fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1); |
789 | 1.24M | else |
790 | 1.24M | sl->intra4x4_pred_mode_cache[scan8[i]] = mode; |
791 | 1.48M | } |
792 | 136k | write_back_intra_pred_mode(h, sl); |
793 | 136k | if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx, |
794 | 136k | sl->top_samples_available, sl->left_samples_available) < 0) |
795 | 37.1k | return -1; |
796 | 415k | }else{ |
797 | 415k | sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available, |
798 | 415k | sl->left_samples_available, sl->intra16x16_pred_mode, 0); |
799 | 415k | if (sl->intra16x16_pred_mode < 0) |
800 | 78.0k | return -1; |
801 | 415k | } |
802 | 436k | if(decode_chroma){ |
803 | 308k | pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available, |
804 | 308k | sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1); |
805 | 308k | if(pred_mode < 0) |
806 | 11.2k | return -1; |
807 | 297k | sl->chroma_pred_mode = pred_mode; |
808 | 297k | } else { |
809 | 127k | sl->chroma_pred_mode = DC_128_PRED8x8; |
810 | 127k | } |
811 | 1.52M | }else if(partition_count==4){ |
812 | 137k | int i, j, sub_partition_count[4], list, ref[2][4]; |
813 | | |
814 | 137k | if (sl->slice_type_nos == AV_PICTURE_TYPE_B) { |
815 | 172k | for(i=0; i<4; i++){ |
816 | 138k | sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb); |
817 | 138k | if(sl->sub_mb_type[i] >=13){ |
818 | 359 | av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y); |
819 | 359 | return -1; |
820 | 359 | } |
821 | 137k | sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count; |
822 | 137k | sl->sub_mb_type[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type; |
823 | 137k | } |
824 | 34.4k | if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) { |
825 | 17.5k | ff_h264_pred_direct_motion(h, sl, &mb_type); |
826 | 17.5k | sl->ref_cache[0][scan8[4]] = |
827 | 17.5k | sl->ref_cache[1][scan8[4]] = |
828 | 17.5k | sl->ref_cache[0][scan8[12]] = |
829 | 17.5k | sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; |
830 | 17.5k | } |
831 | 102k | }else{ |
832 | 102k | av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ? |
833 | 494k | for(i=0; i<4; i++){ |
834 | 400k | sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb); |
835 | 400k | if(sl->sub_mb_type[i] >=4){ |
836 | 8.40k | av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y); |
837 | 8.40k | return -1; |
838 | 8.40k | } |
839 | 392k | sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count; |
840 | 392k | sl->sub_mb_type[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type; |
841 | 392k | } |
842 | 102k | } |
843 | | |
844 | 288k | for (list = 0; list < sl->list_count; list++) { |
845 | 163k | int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl); |
846 | 810k | for(i=0; i<4; i++){ |
847 | 650k | if(IS_DIRECT(sl->sub_mb_type[i])) continue; |
848 | 595k | if(IS_DIR(sl->sub_mb_type[i], 0, list)){ |
849 | 494k | unsigned int tmp; |
850 | 494k | if(ref_count == 1){ |
851 | 146k | tmp= 0; |
852 | 347k | }else if(ref_count == 2){ |
853 | 177k | tmp= get_bits1(&sl->gb)^1; |
854 | 177k | }else{ |
855 | 170k | tmp= get_ue_golomb_31(&sl->gb); |
856 | 170k | if(tmp>=ref_count){ |
857 | 3.49k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp); |
858 | 3.49k | return -1; |
859 | 3.49k | } |
860 | 170k | } |
861 | 490k | ref[list][i]= tmp; |
862 | 490k | }else{ |
863 | | //FIXME |
864 | 101k | ref[list][i] = -1; |
865 | 101k | } |
866 | 595k | } |
867 | 163k | } |
868 | | |
869 | 125k | if(dct8x8_allowed) |
870 | 45.6k | dct8x8_allowed = get_dct8x8_allowed(h, sl); |
871 | | |
872 | 285k | for (list = 0; list < sl->list_count; list++) { |
873 | 798k | for(i=0; i<4; i++){ |
874 | 638k | if(IS_DIRECT(sl->sub_mb_type[i])) { |
875 | 54.8k | sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ]; |
876 | 54.8k | continue; |
877 | 54.8k | } |
878 | 584k | sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]= |
879 | 584k | sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; |
880 | | |
881 | 584k | if(IS_DIR(sl->sub_mb_type[i], 0, list)){ |
882 | 483k | const int sub_mb_type= sl->sub_mb_type[i]; |
883 | 483k | const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; |
884 | 1.45M | for(j=0; j<sub_partition_count[i]; j++){ |
885 | 972k | int mx, my; |
886 | 972k | const int index= 4*i + block_width*j; |
887 | 972k | int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ]; |
888 | 972k | pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my); |
889 | 972k | mx += (unsigned)get_se_golomb(&sl->gb); |
890 | 972k | my += (unsigned)get_se_golomb(&sl->gb); |
891 | 972k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
892 | | |
893 | 972k | if(IS_SUB_8X8(sub_mb_type)){ |
894 | 200k | mv_cache[ 1 ][0]= |
895 | 200k | mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; |
896 | 200k | mv_cache[ 1 ][1]= |
897 | 200k | mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; |
898 | 771k | }else if(IS_SUB_8X4(sub_mb_type)){ |
899 | 240k | mv_cache[ 1 ][0]= mx; |
900 | 240k | mv_cache[ 1 ][1]= my; |
901 | 531k | }else if(IS_SUB_4X8(sub_mb_type)){ |
902 | 117k | mv_cache[ 8 ][0]= mx; |
903 | 117k | mv_cache[ 8 ][1]= my; |
904 | 117k | } |
905 | 972k | mv_cache[ 0 ][0]= mx; |
906 | 972k | mv_cache[ 0 ][1]= my; |
907 | 972k | } |
908 | 483k | }else{ |
909 | 100k | uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0]; |
910 | 100k | p[0] = p[1]= |
911 | 100k | p[8] = p[9]= 0; |
912 | 100k | } |
913 | 584k | } |
914 | 159k | } |
915 | 1.38M | }else if(IS_DIRECT(mb_type)){ |
916 | 404k | ff_h264_pred_direct_motion(h, sl, &mb_type); |
917 | 404k | dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag; |
918 | 984k | }else{ |
919 | 984k | int list, mx, my, i; |
920 | | //FIXME we should set ref_idx_l? to 0 if we use that later ... |
921 | 984k | if(IS_16X16(mb_type)){ |
922 | 1.59M | for (list = 0; list < sl->list_count; list++) { |
923 | 913k | unsigned int val; |
924 | 913k | if(IS_DIR(mb_type, 0, list)){ |
925 | 750k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
926 | 750k | if (rc == 1) { |
927 | 93.2k | val= 0; |
928 | 657k | } else if (rc == 2) { |
929 | 301k | val= get_bits1(&sl->gb)^1; |
930 | 356k | }else{ |
931 | 356k | val= get_ue_golomb_31(&sl->gb); |
932 | 356k | if (val >= rc) { |
933 | 11.8k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
934 | 11.8k | return -1; |
935 | 11.8k | } |
936 | 356k | } |
937 | 739k | fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); |
938 | 739k | } |
939 | 913k | } |
940 | 1.57M | for (list = 0; list < sl->list_count; list++) { |
941 | 900k | if(IS_DIR(mb_type, 0, list)){ |
942 | 738k | pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my); |
943 | 738k | mx += (unsigned)get_se_golomb(&sl->gb); |
944 | 738k | my += (unsigned)get_se_golomb(&sl->gb); |
945 | 738k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
946 | | |
947 | 738k | fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); |
948 | 738k | } |
949 | 900k | } |
950 | 676k | } |
951 | 295k | else if(IS_16X8(mb_type)){ |
952 | 371k | for (list = 0; list < sl->list_count; list++) { |
953 | 675k | for(i=0; i<2; i++){ |
954 | 452k | unsigned int val; |
955 | 452k | if(IS_DIR(mb_type, i, list)){ |
956 | 334k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
957 | 334k | if (rc == 1) { |
958 | 33.8k | val= 0; |
959 | 300k | } else if (rc == 2) { |
960 | 239k | val= get_bits1(&sl->gb)^1; |
961 | 239k | }else{ |
962 | 60.9k | val= get_ue_golomb_31(&sl->gb); |
963 | 60.9k | if (val >= rc) { |
964 | 4.05k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
965 | 4.05k | return -1; |
966 | 4.05k | } |
967 | 60.9k | } |
968 | 334k | }else |
969 | 118k | val= LIST_NOT_USED&0xFF; |
970 | 448k | fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); |
971 | 448k | } |
972 | 227k | } |
973 | 366k | for (list = 0; list < sl->list_count; list++) { |
974 | 666k | for(i=0; i<2; i++){ |
975 | 444k | unsigned int val; |
976 | 444k | if(IS_DIR(mb_type, i, list)){ |
977 | 327k | pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my); |
978 | 327k | mx += (unsigned)get_se_golomb(&sl->gb); |
979 | 327k | my += (unsigned)get_se_golomb(&sl->gb); |
980 | 327k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
981 | | |
982 | 327k | val= pack16to32(mx,my); |
983 | 327k | }else |
984 | 117k | val=0; |
985 | 444k | fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4); |
986 | 444k | } |
987 | 222k | } |
988 | 147k | }else{ |
989 | 147k | av_assert2(IS_8X16(mb_type)); |
990 | 375k | for (list = 0; list < sl->list_count; list++) { |
991 | 686k | for(i=0; i<2; i++){ |
992 | 459k | unsigned int val; |
993 | 459k | if(IS_DIR(mb_type, i, list)){ //FIXME optimize |
994 | 342k | unsigned rc = sl->ref_count[list] << MB_MBAFF(sl); |
995 | 342k | if (rc == 1) { |
996 | 48.2k | val= 0; |
997 | 294k | } else if (rc == 2) { |
998 | 252k | val= get_bits1(&sl->gb)^1; |
999 | 252k | }else{ |
1000 | 42.0k | val= get_ue_golomb_31(&sl->gb); |
1001 | 42.0k | if (val >= rc) { |
1002 | 3.40k | av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val); |
1003 | 3.40k | return -1; |
1004 | 3.40k | } |
1005 | 42.0k | } |
1006 | 342k | }else |
1007 | 116k | val= LIST_NOT_USED&0xFF; |
1008 | 455k | fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); |
1009 | 455k | } |
1010 | 230k | } |
1011 | 371k | for (list = 0; list < sl->list_count; list++) { |
1012 | 681k | for(i=0; i<2; i++){ |
1013 | 454k | unsigned int val; |
1014 | 454k | if(IS_DIR(mb_type, i, list)){ |
1015 | 338k | pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); |
1016 | 338k | mx += (unsigned)get_se_golomb(&sl->gb); |
1017 | 338k | my += (unsigned)get_se_golomb(&sl->gb); |
1018 | 338k | ff_tlog(h->avctx, "final mv:%d %d\n", mx, my); |
1019 | | |
1020 | 338k | val= pack16to32(mx,my); |
1021 | 338k | }else |
1022 | 115k | val=0; |
1023 | 454k | fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4); |
1024 | 454k | } |
1025 | 227k | } |
1026 | 144k | } |
1027 | 984k | } |
1028 | | |
1029 | 1.91M | if(IS_INTER(mb_type)) |
1030 | 1.49M | write_back_motion(h, sl, mb_type); |
1031 | | |
1032 | 1.91M | if(!IS_INTRA16x16(mb_type)){ |
1033 | 1.59M | cbp= get_ue_golomb(&sl->gb); |
1034 | | |
1035 | 1.59M | if(decode_chroma){ |
1036 | 1.35M | if(cbp > 47){ |
1037 | 31.9k | av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y); |
1038 | 31.9k | return -1; |
1039 | 31.9k | } |
1040 | 1.31M | if (IS_INTRA4x4(mb_type)) |
1041 | 52.5k | cbp = ff_h264_golomb_to_intra4x4_cbp[cbp]; |
1042 | 1.26M | else |
1043 | 1.26M | cbp = ff_h264_golomb_to_inter_cbp[cbp]; |
1044 | 1.31M | }else{ |
1045 | 240k | if(cbp > 15){ |
1046 | 35.9k | av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y); |
1047 | 35.9k | return -1; |
1048 | 35.9k | } |
1049 | 204k | if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; |
1050 | 163k | else cbp= golomb_to_inter_cbp_gray[cbp]; |
1051 | 204k | } |
1052 | 1.59M | } else { |
1053 | 329k | if (!decode_chroma && cbp>15) { |
1054 | 10.3k | av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n"); |
1055 | 10.3k | return AVERROR_INVALIDDATA; |
1056 | 10.3k | } |
1057 | 329k | } |
1058 | | |
1059 | 1.84M | if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ |
1060 | 205k | mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb); |
1061 | 205k | } |
1062 | 1.84M | sl->cbp= |
1063 | 1.84M | h->cbp_table[mb_xy]= cbp; |
1064 | 1.84M | h->cur_pic.mb_type[mb_xy] = mb_type; |
1065 | | |
1066 | 1.84M | if(cbp || IS_INTRA16x16(mb_type)){ |
1067 | 982k | int i4x4, i8x8, chroma_idx; |
1068 | 982k | int dquant; |
1069 | 982k | int ret; |
1070 | 982k | GetBitContext *gb = &sl->gb; |
1071 | 982k | const uint8_t *scan, *scan8x8; |
1072 | 982k | const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8); |
1073 | | |
1074 | 982k | dquant= get_se_golomb(&sl->gb); |
1075 | | |
1076 | 982k | sl->qscale += (unsigned)dquant; |
1077 | | |
1078 | 982k | if (((unsigned)sl->qscale) > max_qp){ |
1079 | 9.45k | if (sl->qscale < 0) sl->qscale += max_qp + 1; |
1080 | 5.56k | else sl->qscale -= max_qp+1; |
1081 | 9.45k | if (((unsigned)sl->qscale) > max_qp){ |
1082 | 3.69k | av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y); |
1083 | 3.69k | sl->qscale = max_qp; |
1084 | 3.69k | return -1; |
1085 | 3.69k | } |
1086 | 9.45k | } |
1087 | | |
1088 | 978k | sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale); |
1089 | 978k | sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale); |
1090 | | |
1091 | 978k | if(IS_INTERLACED(mb_type)){ |
1092 | 344k | scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; |
1093 | 344k | scan = sl->qscale ? h->field_scan : h->field_scan_q0; |
1094 | 633k | }else{ |
1095 | 633k | scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; |
1096 | 633k | scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; |
1097 | 633k | } |
1098 | | |
1099 | 978k | if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) { |
1100 | 60.0k | return -1; |
1101 | 60.0k | } |
1102 | 918k | h->cbp_table[mb_xy] |= ret << 12; |
1103 | 918k | if (CHROMA444(h)) { |
1104 | 120k | if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) { |
1105 | 10.0k | return -1; |
1106 | 10.0k | } |
1107 | 110k | if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) { |
1108 | 14.7k | return -1; |
1109 | 14.7k | } |
1110 | 797k | } else { |
1111 | 797k | const int num_c8x8 = h->ps.sps->chroma_format_idc; |
1112 | | |
1113 | 797k | if(cbp&0x30){ |
1114 | 760k | for(chroma_idx=0; chroma_idx<2; chroma_idx++) |
1115 | 507k | if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift), |
1116 | 507k | CHROMA_DC_BLOCK_INDEX + chroma_idx, |
1117 | 507k | CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan, |
1118 | 507k | NULL, 4 * num_c8x8) < 0) { |
1119 | 1.89k | return -1; |
1120 | 1.89k | } |
1121 | 254k | } |
1122 | | |
1123 | 795k | if(cbp&0x20){ |
1124 | 232k | for(chroma_idx=0; chroma_idx<2; chroma_idx++){ |
1125 | 162k | const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]]; |
1126 | 162k | int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift); |
1127 | 325k | for (i8x8 = 0; i8x8<num_c8x8; i8x8++) { |
1128 | 847k | for (i4x4 = 0; i4x4 < 4; i4x4++) { |
1129 | 684k | const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4; |
1130 | 684k | if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0) |
1131 | 12.9k | return -1; |
1132 | 671k | mb += 16 << pixel_shift; |
1133 | 671k | } |
1134 | 176k | } |
1135 | 162k | } |
1136 | 712k | }else{ |
1137 | 712k | fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
1138 | 712k | fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
1139 | 712k | } |
1140 | 795k | } |
1141 | 918k | }else{ |
1142 | 859k | fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); |
1143 | 859k | fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
1144 | 859k | fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
1145 | 859k | } |
1146 | 1.73M | h->cur_pic.qscale_table[mb_xy] = sl->qscale; |
1147 | 1.73M | write_back_non_zero_count(h, sl); |
1148 | | |
1149 | 1.73M | return 0; |
1150 | 1.84M | } |