Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/h264_cavlc.c
Line
Count
Source
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
16.7M
#define CABAC(h) 0
29
#define UNCHECKED_BITSTREAM_READER 1
30
31
#include "h264dec.h"
32
#include "h264_mvpred.h"
33
#include "h264data.h"
34
#include "golomb.h"
35
#include "mpegutils.h"
36
#include "libavutil/avassert.h"
37
38
39
static const uint8_t golomb_to_inter_cbp_gray[16]={
40
 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
41
};
42
43
static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
44
15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
45
};
46
47
static const uint8_t chroma_dc_coeff_token_len[4*5]={
48
 2, 0, 0, 0,
49
 6, 1, 0, 0,
50
 6, 6, 3, 0,
51
 6, 7, 7, 6,
52
 6, 8, 8, 7,
53
};
54
55
static const uint8_t chroma_dc_coeff_token_bits[4*5]={
56
 1, 0, 0, 0,
57
 7, 1, 0, 0,
58
 4, 6, 1, 0,
59
 3, 3, 2, 5,
60
 2, 3, 2, 0,
61
};
62
63
static const uint8_t chroma422_dc_coeff_token_len[4*9]={
64
  1,  0,  0,  0,
65
  7,  2,  0,  0,
66
  7,  7,  3,  0,
67
  9,  7,  7,  5,
68
  9,  9,  7,  6,
69
 10, 10,  9,  7,
70
 11, 11, 10,  7,
71
 12, 12, 11, 10,
72
 13, 12, 12, 11,
73
};
74
75
static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
76
  1,   0,  0, 0,
77
 15,   1,  0, 0,
78
 14,  13,  1, 0,
79
  7,  12, 11, 1,
80
  6,   5, 10, 1,
81
  7,   6,  4, 9,
82
  7,   6,  5, 8,
83
  7,   6,  5, 4,
84
  7,   5,  4, 4,
85
};
86
87
static const uint8_t coeff_token_len[4][4*17]={
88
{
89
     1, 0, 0, 0,
90
     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
91
    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
92
    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
93
    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
94
},
95
{
96
     2, 0, 0, 0,
97
     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
98
     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
99
    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
100
    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
101
},
102
{
103
     4, 0, 0, 0,
104
     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
105
     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
106
     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
107
    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
108
},
109
{
110
     6, 0, 0, 0,
111
     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
112
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
113
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
114
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115
}
116
};
117
118
static const uint8_t coeff_token_bits[4][4*17]={
119
{
120
     1, 0, 0, 0,
121
     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
122
     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
123
    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
124
    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
125
},
126
{
127
     3, 0, 0, 0,
128
    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
129
     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
130
    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
131
    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
132
},
133
{
134
    15, 0, 0, 0,
135
    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
136
    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
137
    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
138
    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
139
},
140
{
141
     3, 0, 0, 0,
142
     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
143
    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
144
    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
145
    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
146
}
147
};
148
149
static const uint8_t total_zeros_len[16][16]= {
150
    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
151
    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
152
    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
153
    {5,3,4,4,3,3,3,4,3,4,5,5,5},
154
    {4,4,4,3,3,3,3,3,4,5,4,5},
155
    {6,5,3,3,3,3,3,3,4,3,6},
156
    {6,5,3,3,3,2,3,4,3,6},
157
    {6,4,5,3,2,2,3,3,6},
158
    {6,6,4,2,2,3,2,5},
159
    {5,5,3,2,2,2,4},
160
    {4,4,3,3,1,3},
161
    {4,4,2,1,3},
162
    {3,3,1,2},
163
    {2,2,1},
164
    {1,1},
165
};
166
167
static const uint8_t total_zeros_bits[16][16]= {
168
    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
169
    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
170
    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
171
    {3,7,5,4,6,5,4,3,3,2,2,1,0},
172
    {5,4,3,7,6,5,4,3,2,1,1,0},
173
    {1,1,7,6,5,4,3,2,1,1,0},
174
    {1,1,5,4,3,3,2,1,1,0},
175
    {1,1,1,3,3,2,2,1,0},
176
    {1,0,1,3,2,1,1,1},
177
    {1,0,1,3,2,1,1},
178
    {0,1,1,2,1,3},
179
    {0,1,1,1,1},
180
    {0,1,1,1},
181
    {0,1,1},
182
    {0,1},
183
};
184
185
static const uint8_t chroma_dc_total_zeros_len[3][4]= {
186
    { 1, 2, 3, 3,},
187
    { 1, 2, 2, 0,},
188
    { 1, 1, 0, 0,},
189
};
190
191
static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
192
    { 1, 1, 1, 0,},
193
    { 1, 1, 0, 0,},
194
    { 1, 0, 0, 0,},
195
};
196
197
static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
198
    { 1, 3, 3, 4, 4, 4, 5, 5 },
199
    { 3, 2, 3, 3, 3, 3, 3 },
200
    { 3, 3, 2, 2, 3, 3 },
201
    { 3, 2, 2, 2, 3 },
202
    { 2, 2, 2, 2 },
203
    { 2, 2, 1 },
204
    { 1, 1 },
205
};
206
207
static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
208
    { 1, 2, 3, 2, 3, 1, 1, 0 },
209
    { 0, 1, 1, 4, 5, 6, 7 },
210
    { 0, 1, 1, 2, 6, 7 },
211
    { 6, 0, 1, 2, 7 },
212
    { 0, 1, 2, 3 },
213
    { 0, 1, 1 },
214
    { 0, 1 },
215
};
216
217
static const uint8_t run_len[7][16]={
218
    {1,1},
219
    {1,2,2},
220
    {2,2,2,2},
221
    {2,2,2,3,3},
222
    {2,2,3,3,3,3},
223
    {2,3,3,3,3,3,3},
224
    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
225
};
226
227
static const uint8_t run_bits[7][16]={
228
    {1,0},
229
    {1,1,0},
230
    {3,2,1,0},
231
    {3,2,1,1,0},
232
    {3,2,3,2,1,0},
233
    {3,0,1,3,2,5,4},
234
    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
235
};
236
237
3.34M
#define LEVEL_TAB_BITS 8
238
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
239
240
435k
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
241
198k
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
242
7.07M
#define COEFF_TOKEN_VLC_BITS           8
243
3.14M
#define TOTAL_ZEROS_VLC_BITS           9
244
300k
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
245
71.8k
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
246
2.31M
#define RUN_VLC_BITS                   3
247
640k
#define RUN7_VLC_BITS                  6
248
249
/// 17 pointers to only four different VLCs
250
static const VLCElem *coeff_token_vlc[17];
251
252
static VLCElem chroma_dc_coeff_token_vlc_table[256];
253
254
static VLCElem chroma422_dc_coeff_token_vlc_table[1 << CHROMA422_DC_COEFF_TOKEN_VLC_BITS];
255
256
static const VLCElem *total_zeros_vlc[15+1];
257
258
static const VLCElem *chroma_dc_total_zeros_vlc[3+1];
259
260
static const VLCElem *chroma422_dc_total_zeros_vlc[7+1];
261
262
static const VLCElem *run_vlc[6+1];
263
264
// The other pointers to VLCElem point into this array.
265
static VLCElem run7_vlc_table[96 + (6  << RUN_VLC_BITS)
266
                                 + (15 << TOTAL_ZEROS_VLC_BITS)
267
                                 + (3  << CHROMA_DC_TOTAL_ZEROS_VLC_BITS)
268
                                 + (7  << CHROMA422_DC_TOTAL_ZEROS_VLC_BITS)
269
                                 + (520 + 332 + 280 + 256) /* coeff token */];
270
271
/**
272
 * Get the predicted number of non-zero coefficients.
273
 * @param n block index
274
 */
275
static inline int pred_non_zero_count(const H264Context *h, const H264SliceContext *sl, int n)
276
7.07M
{
277
7.07M
    const int index8= scan8[n];
278
7.07M
    const int left = sl->non_zero_count_cache[index8 - 1];
279
7.07M
    const int top  = sl->non_zero_count_cache[index8 - 8];
280
7.07M
    int i= left + top;
281
282
7.07M
    if(i<64) i= (i+1)>>1;
283
284
7.07M
    ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
285
286
7.07M
    return i&31;
287
7.07M
}
288
289
2
static av_cold void init_cavlc_level_tab(void){
290
2
    int suffix_length;
291
2
    unsigned int i;
292
293
16
    for(suffix_length=0; suffix_length<7; suffix_length++){
294
3.59k
        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
295
3.58k
            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
296
297
3.58k
            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
298
3.33k
                int level_code = (prefix << suffix_length) +
299
3.33k
                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
300
3.33k
                int mask = -(level_code&1);
301
3.33k
                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
302
3.33k
                cavlc_level_tab[suffix_length][i][0]= level_code;
303
3.33k
                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
304
3.33k
            }else if(prefix + 1 <= LEVEL_TAB_BITS){
305
240
                cavlc_level_tab[suffix_length][i][0]= prefix+100;
306
240
                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
307
240
            }else{
308
14
                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
309
14
                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
310
14
            }
311
3.58k
        }
312
14
    }
313
2
}
314
315
av_cold void ff_h264_decode_init_vlc(void)
316
2
{
317
2
    const VLCElem *coeff_token_vlc_original[4];
318
2
    VLCInitState state = VLC_INIT_STATE(run7_vlc_table);
319
320
2
    VLC_INIT_STATIC_TABLE(chroma_dc_coeff_token_vlc_table,
321
2
                          CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4 * 5,
322
2
                          &chroma_dc_coeff_token_len [0], 1, 1,
323
2
                          &chroma_dc_coeff_token_bits[0], 1, 1, 0);
324
325
2
    VLC_INIT_STATIC_TABLE(chroma422_dc_coeff_token_vlc_table,
326
2
                          CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4 * 9,
327
2
                          &chroma422_dc_coeff_token_len [0], 1, 1,
328
2
                          &chroma422_dc_coeff_token_bits[0], 1, 1, 0);
329
330
2
    ff_vlc_init_tables(&state, RUN7_VLC_BITS, 16,
331
2
                       &run_len [6][0], 1, 1,
332
2
                       &run_bits[6][0], 1, 1, 0);
333
334
14
    for (int i = 0; i < 6; i++) {
335
12
        run_vlc[i + 1] = ff_vlc_init_tables(&state, RUN_VLC_BITS, 7,
336
12
                                            &run_len [i][0], 1, 1,
337
12
                                            &run_bits[i][0], 1, 1, 0);
338
12
    }
339
340
10
    for (int i = 0; i < 4; i++) {
341
8
        coeff_token_vlc_original[i] =
342
8
            ff_vlc_init_tables(&state, COEFF_TOKEN_VLC_BITS, 4*17,
343
8
                               &coeff_token_len [i][0], 1, 1,
344
8
                               &coeff_token_bits[i][0], 1, 1, 0);
345
8
    }
346
36
    for (int i = 0; i < FF_ARRAY_ELEMS(coeff_token_vlc); i++) {
347
34
        static const uint8_t coeff_token_table_index[17] = {
348
34
            0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3
349
34
        };
350
34
        coeff_token_vlc[i] = coeff_token_vlc_original[coeff_token_table_index[i]];
351
34
    }
352
353
8
    for (int i = 0; i < 3; i++) {
354
6
        chroma_dc_total_zeros_vlc[i + 1] =
355
6
            ff_vlc_init_tables(&state, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
356
6
                               &chroma_dc_total_zeros_len [i][0], 1, 1,
357
6
                               &chroma_dc_total_zeros_bits[i][0], 1, 1, 0);
358
6
    }
359
360
16
    for (int i = 0; i < 7; i++) {
361
14
        chroma422_dc_total_zeros_vlc[i + 1] =
362
14
            ff_vlc_init_tables(&state, CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
363
14
                               &chroma422_dc_total_zeros_len [i][0], 1, 1,
364
14
                               &chroma422_dc_total_zeros_bits[i][0], 1, 1, 0);
365
14
    }
366
367
32
    for (int i = 0; i < 15; i++) {
368
30
        total_zeros_vlc[i + 1] =
369
30
            ff_vlc_init_tables(&state, TOTAL_ZEROS_VLC_BITS, 16,
370
30
                               &total_zeros_len [i][0], 1, 1,
371
30
                               &total_zeros_bits[i][0], 1, 1, 0);
372
30
    }
373
    /*
374
     * This is a one time safety check to make sure that
375
     * the vlc table sizes were initialized correctly.
376
     */
377
2
    av_assert1(state.size == 0);
378
379
2
    init_cavlc_level_tab();
380
2
}
381
382
185k
static inline int get_level_prefix(GetBitContext *gb){
383
185k
    unsigned int buf;
384
185k
    int log;
385
386
185k
    OPEN_READER(re, gb);
387
185k
    UPDATE_CACHE(re, gb);
388
185k
    buf=GET_CACHE(re, gb);
389
390
185k
    log= 32 - av_log2(buf);
391
392
185k
    LAST_SKIP_BITS(re, gb, log);
393
185k
    CLOSE_READER(re, gb);
394
395
185k
    return log-1;
396
185k
}
397
398
/**
399
 * Decode a residual block.
400
 * @param n block index
401
 * @param scantable scantable
402
 * @param max_coeff number of coefficients in the block
403
 * @return <0 if an error occurred
404
 */
405
static int decode_residual(const H264Context *h, H264SliceContext *sl,
406
                           GetBitContext *gb, int16_t *block, int n,
407
                           const uint8_t *scantable, const uint32_t *qmul,
408
                           int max_coeff)
409
7.70M
{
410
7.70M
    int level[16];
411
7.70M
    int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
412
413
    //FIXME put trailing_onex into the context
414
415
7.70M
    if(max_coeff <= 8){
416
633k
        if (max_coeff == 4)
417
435k
            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc_table,
418
435k
                                   CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
419
198k
        else
420
198k
            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc_table,
421
198k
                                   CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
422
7.07M
    }else{
423
7.07M
        total_coeff = pred_non_zero_count(h, sl, n >= LUMA_DC_BLOCK_INDEX ?
424
6.56M
                                                 (n - LUMA_DC_BLOCK_INDEX) * 16 : n);
425
7.07M
        coeff_token = get_vlc2(gb, coeff_token_vlc[total_coeff],
426
7.07M
                               COEFF_TOKEN_VLC_BITS, 2);
427
7.07M
    }
428
7.70M
    total_coeff = coeff_token >> 2;
429
7.70M
    sl->non_zero_count_cache[scan8[n]] = total_coeff;
430
431
    //FIXME set last_non_zero?
432
433
7.70M
    if(total_coeff==0)
434
4.04M
        return 0;
435
3.66M
    if(total_coeff > (unsigned)max_coeff) {
436
78.6k
        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
437
78.6k
        return -1;
438
78.6k
    }
439
440
3.58M
    trailing_ones= coeff_token&3;
441
3.58M
    ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
442
3.58M
    av_assert2(total_coeff<=16);
443
444
3.58M
    i = show_bits(gb, 3);
445
3.58M
    skip_bits(gb, trailing_ones);
446
3.58M
    level[0] = 1-((i&4)>>1);
447
3.58M
    level[1] = 1-((i&2)   );
448
3.58M
    level[2] = 1-((i&1)<<1);
449
450
3.58M
    if(trailing_ones<total_coeff) {
451
1.06M
        int mask, prefix;
452
1.06M
        int suffix_length = total_coeff > 10 & trailing_ones < 3;
453
1.06M
        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
454
1.06M
        int level_code= cavlc_level_tab[suffix_length][bitsi][0];
455
456
1.06M
        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
457
1.06M
        if(level_code >= 100){
458
92.5k
            prefix= level_code - 100;
459
92.5k
            if(prefix == LEVEL_TAB_BITS)
460
90.4k
                prefix += get_level_prefix(gb);
461
462
            //first coefficient has suffix_length equal to 0 or 1
463
92.5k
            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
464
30.2k
                if(suffix_length)
465
3.74k
                    level_code= (prefix<<1) + get_bits1(gb); //part
466
26.4k
                else
467
26.4k
                    level_code= prefix; //part
468
62.2k
            }else if(prefix==14){
469
4.00k
                if(suffix_length)
470
434
                    level_code= (prefix<<1) + get_bits1(gb); //part
471
3.57k
                else
472
3.57k
                    level_code= prefix + get_bits(gb, 4); //part
473
58.2k
            }else{
474
58.2k
                level_code= 30;
475
58.2k
                if(prefix>=16){
476
55.6k
                    if(prefix > 25+3){
477
23.3k
                        av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
478
23.3k
                        return -1;
479
23.3k
                    }
480
32.2k
                    level_code += (1<<(prefix-3))-4096;
481
32.2k
                }
482
34.9k
                level_code += get_bits(gb, prefix-3); //part
483
34.9k
            }
484
485
69.1k
            if(trailing_ones < 3) level_code += 2;
486
487
69.1k
            suffix_length = 2;
488
69.1k
            mask= -(level_code&1);
489
69.1k
            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
490
976k
        }else{
491
976k
            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
492
493
976k
            suffix_length = 1 + (level_code + 3U > 6U);
494
976k
            level[trailing_ones]= level_code;
495
976k
        }
496
497
        //remaining coefficients have suffix_length > 0
498
3.06M
        for(i=trailing_ones+1;i<total_coeff;i++) {
499
2.03M
            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
500
2.03M
            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
501
2.03M
            level_code= cavlc_level_tab[suffix_length][bitsi][0];
502
503
2.03M
            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
504
2.03M
            if(level_code >= 100){
505
140k
                prefix= level_code - 100;
506
140k
                if(prefix == LEVEL_TAB_BITS){
507
95.0k
                    prefix += get_level_prefix(gb);
508
95.0k
                }
509
140k
                if(prefix<15){
510
101k
                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
511
101k
                }else{
512
39.0k
                    level_code = 15<<suffix_length;
513
39.0k
                    if (prefix>=16) {
514
31.4k
                        if(prefix > 25+3){
515
15.6k
                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
516
15.6k
                            return AVERROR_INVALIDDATA;
517
15.6k
                        }
518
15.7k
                        level_code += (1<<(prefix-3))-4096;
519
15.7k
                    }
520
23.4k
                    level_code += get_bits(gb, prefix-3);
521
23.4k
                }
522
124k
                mask= -(level_code&1);
523
124k
                level_code= (((2+level_code)>>1) ^ mask) - mask;
524
124k
            }
525
2.02M
            level[i]= level_code;
526
2.02M
            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
527
2.02M
        }
528
1.04M
    }
529
530
3.54M
    if(total_coeff == max_coeff)
531
26.2k
        zeros_left=0;
532
3.51M
    else{
533
3.51M
        if (max_coeff <= 8) {
534
371k
            if (max_coeff == 4)
535
300k
                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff],
536
300k
                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
537
71.8k
            else
538
71.8k
                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff],
539
71.8k
                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
540
3.14M
        } else {
541
3.14M
            zeros_left = get_vlc2(gb, total_zeros_vlc[total_coeff],
542
3.14M
                                  TOTAL_ZEROS_VLC_BITS, 1);
543
3.14M
        }
544
3.51M
    }
545
546
3.54M
#define STORE_BLOCK(type) \
547
3.54M
    scantable += zeros_left + total_coeff - 1; \
548
3.54M
    if(n >= LUMA_DC_BLOCK_INDEX){ \
549
474k
        ((type*)block)[*scantable] = level[0]; \
550
613k
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
551
138k
            if(zeros_left < 7) \
552
138k
                run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \
553
138k
            else \
554
138k
                run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \
555
138k
            zeros_left -= run_before; \
556
138k
            scantable -= 1 + run_before; \
557
138k
            ((type*)block)[*scantable]= level[i]; \
558
138k
        } \
559
631k
        for(;i<total_coeff;i++) { \
560
157k
            scantable--; \
561
157k
            ((type*)block)[*scantable]= level[i]; \
562
157k
        } \
563
3.07M
    }else{ \
564
3.07M
        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
565
5.88M
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
566
2.81M
            if(zeros_left < 7) \
567
2.81M
                run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \
568
2.81M
            else \
569
2.81M
                run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \
570
2.81M
            zeros_left -= run_before; \
571
2.81M
            scantable -= 1 + run_before; \
572
2.81M
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
573
2.81M
        } \
574
4.76M
        for(;i<total_coeff;i++) { \
575
1.69M
            scantable--; \
576
1.69M
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
577
1.69M
        } \
578
3.07M
    }
579
580
3.54M
    if (h->pixel_shift) {
581
1.95M
        STORE_BLOCK(int32_t)
582
1.95M
    } else {
583
1.58M
        STORE_BLOCK(int16_t)
584
1.58M
    }
585
586
3.54M
    if(zeros_left<0){
587
26.7k
        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
588
26.7k
        return -1;
589
26.7k
    }
590
591
3.51M
    return 0;
592
3.54M
}
593
594
static av_always_inline
595
int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
596
                         GetBitContext *gb, const uint8_t *scan,
597
                         const uint8_t *scan8x8, int pixel_shift,
598
                         int mb_type, int cbp, int p)
599
1.33M
{
600
1.33M
    int i4x4, i8x8;
601
1.33M
    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
602
1.33M
    if(IS_INTRA16x16(mb_type)){
603
508k
        AV_ZERO128(sl->mb_luma_dc[p]+0);
604
508k
        AV_ZERO128(sl->mb_luma_dc[p]+8);
605
508k
        AV_ZERO128(sl->mb_luma_dc[p]+16);
606
508k
        AV_ZERO128(sl->mb_luma_dc[p]+24);
607
508k
        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
608
7.13k
            return -1; //FIXME continue if partitioned and other return -1 too
609
7.13k
        }
610
611
501k
        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
612
613
501k
        if(cbp&15){
614
274k
            for(i8x8=0; i8x8<4; i8x8++){
615
1.08M
                for(i4x4=0; i4x4<4; i4x4++){
616
871k
                    const int index= i4x4 + 4*i8x8 + p*16;
617
871k
                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
618
871k
                        index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
619
14.7k
                        return -1;
620
14.7k
                    }
621
871k
                }
622
225k
            }
623
48.3k
            return 0xf;
624
438k
        }else{
625
438k
            fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
626
438k
            return 0;
627
438k
        }
628
825k
    }else{
629
825k
        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
630
        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
631
825k
        int new_cbp = 0;
632
3.89M
        for(i8x8=0; i8x8<4; i8x8++){
633
3.15M
            if(cbp & (1<<i8x8)){
634
1.13M
                if(IS_8x8DCT(mb_type)){
635
419k
                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
636
419k
                    uint8_t *nnz;
637
2.01M
                    for(i4x4=0; i4x4<4; i4x4++){
638
1.63M
                        const int index= i4x4 + 4*i8x8 + p*16;
639
1.63M
                        if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
640
1.63M
                                            h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
641
31.8k
                            return -1;
642
1.63M
                    }
643
387k
                    nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
644
387k
                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
645
387k
                    new_cbp |= !!nnz[0] << i8x8;
646
716k
                }else{
647
3.44M
                    for(i4x4=0; i4x4<4; i4x4++){
648
2.78M
                        const int index= i4x4 + 4*i8x8 + p*16;
649
2.78M
                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
650
2.78M
                                            scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
651
57.9k
                            return -1;
652
57.9k
                        }
653
2.72M
                        new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
654
2.72M
                    }
655
716k
                }
656
2.02M
            }else{
657
2.02M
                uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
658
2.02M
                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
659
2.02M
            }
660
3.15M
        }
661
735k
        return new_cbp;
662
825k
    }
663
1.33M
}
664
665
int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
666
6.88M
{
667
6.88M
    int mb_xy;
668
6.88M
    int partition_count;
669
6.88M
    unsigned int mb_type, cbp;
670
6.88M
    int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
671
6.88M
    const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
672
6.88M
    const int pixel_shift = h->pixel_shift;
673
674
6.88M
    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
675
676
6.88M
    ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
677
6.88M
    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
678
                down the code */
679
6.88M
    if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
680
6.29M
        if (sl->mb_skip_run == -1) {
681
1.80M
            unsigned mb_skip_run = get_ue_golomb_long(&sl->gb);
682
1.80M
            if (mb_skip_run > h->mb_num) {
683
17.1k
                av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run);
684
17.1k
                return AVERROR_INVALIDDATA;
685
17.1k
            }
686
1.79M
            sl->mb_skip_run = mb_skip_run;
687
1.79M
        }
688
689
6.27M
        if (sl->mb_skip_run--) {
690
4.51M
            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
691
1.33M
                if (sl->mb_skip_run == 0)
692
168k
                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
693
1.33M
            }
694
4.51M
            decode_mb_skip(h, sl);
695
4.51M
            return 0;
696
4.51M
        }
697
6.27M
    }
698
2.35M
    if (FRAME_MBAFF(h)) {
699
934k
        if ((sl->mb_y & 1) == 0)
700
512k
            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
701
934k
    }
702
703
2.35M
    sl->prev_mb_skipped = 0;
704
705
2.35M
    mb_type= get_ue_golomb(&sl->gb);
706
2.35M
    if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
707
821k
        if(mb_type < 23){
708
814k
            partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
709
814k
            mb_type         = ff_h264_b_mb_type_info[mb_type].type;
710
814k
        }else{
711
6.71k
            mb_type -= 23;
712
6.71k
            goto decode_intra_mb;
713
6.71k
        }
714
1.53M
    } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
715
934k
        if(mb_type < 5){
716
748k
            partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
717
748k
            mb_type         = ff_h264_p_mb_type_info[mb_type].type;
718
748k
        }else{
719
186k
            mb_type -= 5;
720
186k
            goto decode_intra_mb;
721
186k
        }
722
934k
    }else{
723
595k
       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
724
595k
        if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
725
35.5k
            mb_type--;
726
789k
decode_intra_mb:
727
789k
        if(mb_type > 25){
728
97.0k
            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
729
97.0k
            return -1;
730
97.0k
        }
731
691k
        partition_count=0;
732
691k
        cbp                      = ff_h264_i_mb_type_info[mb_type].cbp;
733
691k
        sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
734
691k
        mb_type                  = ff_h264_i_mb_type_info[mb_type].type;
735
691k
    }
736
737
2.25M
    if (MB_FIELD(sl))
738
875k
        mb_type |= MB_TYPE_INTERLACED;
739
740
2.25M
    h->slice_table[mb_xy] = sl->slice_num;
741
742
2.25M
    if(IS_INTRA_PCM(mb_type)){
743
3.51k
        const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
744
3.51k
                            h->ps.sps->bit_depth_luma;
745
746
        // We assume these blocks are very rare so we do not optimize it.
747
3.51k
        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
748
3.51k
        if (get_bits_left(&sl->gb) < mb_size) {
749
1.52k
            av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
750
1.52k
            return AVERROR_INVALIDDATA;
751
1.52k
        }
752
1.98k
        skip_bits_long(&sl->gb, mb_size);
753
754
        // In deblocking, the quantizer is 0
755
1.98k
        h->cur_pic.qscale_table[mb_xy] = 0;
756
        // All coeffs are present
757
1.98k
        memset(h->non_zero_count[mb_xy], 16, 48);
758
759
1.98k
        h->cur_pic.mb_type[mb_xy] = mb_type;
760
1.98k
        return 0;
761
3.51k
    }
762
763
2.25M
    fill_decode_neighbors(h, sl, mb_type);
764
2.25M
    fill_decode_caches(h, sl, mb_type);
765
766
    //mb_pred
767
2.25M
    if(IS_INTRA(mb_type)){
768
688k
        int pred_mode;
769
//            init_top_left_availability(h);
770
688k
        if(IS_INTRA4x4(mb_type)){
771
200k
            int i;
772
200k
            int di = 1;
773
200k
            if(dct8x8_allowed && get_bits1(&sl->gb)){
774
71.9k
                mb_type |= MB_TYPE_8x8DCT;
775
71.9k
                di = 4;
776
71.9k
            }
777
778
//                fill_intra4x4_pred_table(h);
779
2.54M
            for(i=0; i<16; i+=di){
780
2.34M
                int mode = pred_intra_mode(h, sl, i);
781
782
2.34M
                if(!get_bits1(&sl->gb)){
783
907k
                    const int rem_mode= get_bits(&sl->gb, 3);
784
907k
                    mode = rem_mode + (rem_mode >= mode);
785
907k
                }
786
787
2.34M
                if(di==4)
788
287k
                    fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
789
2.05M
                else
790
2.05M
                    sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
791
2.34M
            }
792
200k
            write_back_intra_pred_mode(h, sl);
793
200k
            if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
794
200k
                                                 sl->top_samples_available, sl->left_samples_available) < 0)
795
47.7k
                return -1;
796
487k
        }else{
797
487k
            sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
798
487k
                                                                     sl->left_samples_available, sl->intra16x16_pred_mode, 0);
799
487k
            if (sl->intra16x16_pred_mode < 0)
800
79.0k
                return -1;
801
487k
        }
802
561k
        if(decode_chroma){
803
420k
            pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
804
420k
                                                     sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
805
420k
            if(pred_mode < 0)
806
24.2k
                return -1;
807
396k
            sl->chroma_pred_mode = pred_mode;
808
396k
        } else {
809
140k
            sl->chroma_pred_mode = DC_128_PRED8x8;
810
140k
        }
811
1.56M
    }else if(partition_count==4){
812
135k
        int i, j, sub_partition_count[4], list, ref[2][4];
813
814
135k
        if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
815
152k
            for(i=0; i<4; i++){
816
122k
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
817
122k
                if(sl->sub_mb_type[i] >=13){
818
356
                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
819
356
                    return -1;
820
356
                }
821
122k
                sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
822
122k
                sl->sub_mb_type[i]     = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
823
122k
            }
824
30.4k
            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
825
16.7k
                ff_h264_pred_direct_motion(h, sl, &mb_type);
826
16.7k
                sl->ref_cache[0][scan8[4]] =
827
16.7k
                sl->ref_cache[1][scan8[4]] =
828
16.7k
                sl->ref_cache[0][scan8[12]] =
829
16.7k
                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
830
16.7k
            }
831
104k
        }else{
832
104k
            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
833
491k
            for(i=0; i<4; i++){
834
400k
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
835
400k
                if(sl->sub_mb_type[i] >=4){
836
12.9k
                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
837
12.9k
                    return -1;
838
12.9k
                }
839
387k
                sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
840
387k
                sl->sub_mb_type[i]     = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
841
387k
            }
842
104k
        }
843
844
271k
        for (list = 0; list < sl->list_count; list++) {
845
152k
            int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
846
757k
            for(i=0; i<4; i++){
847
608k
                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
848
553k
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
849
466k
                    unsigned int tmp;
850
466k
                    if(ref_count == 1){
851
134k
                        tmp= 0;
852
331k
                    }else if(ref_count == 2){
853
155k
                        tmp= get_bits1(&sl->gb)^1;
854
176k
                    }else{
855
176k
                        tmp= get_ue_golomb_31(&sl->gb);
856
176k
                        if(tmp>=ref_count){
857
3.56k
                            av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
858
3.56k
                            return -1;
859
3.56k
                        }
860
176k
                    }
861
462k
                    ref[list][i]= tmp;
862
462k
                }else{
863
                 //FIXME
864
87.3k
                    ref[list][i] = -1;
865
87.3k
                }
866
553k
            }
867
152k
        }
868
869
118k
        if(dct8x8_allowed)
870
47.2k
            dct8x8_allowed = get_dct8x8_allowed(h, sl);
871
872
267k
        for (list = 0; list < sl->list_count; list++) {
873
745k
            for(i=0; i<4; i++){
874
596k
                if(IS_DIRECT(sl->sub_mb_type[i])) {
875
54.3k
                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
876
54.3k
                    continue;
877
54.3k
                }
878
542k
                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
879
542k
                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
880
881
542k
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
882
455k
                    const int sub_mb_type= sl->sub_mb_type[i];
883
455k
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
884
1.39M
                    for(j=0; j<sub_partition_count[i]; j++){
885
935k
                        int mx, my;
886
935k
                        const int index= 4*i + block_width*j;
887
935k
                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
888
935k
                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
889
935k
                        mx += (unsigned)get_se_golomb(&sl->gb);
890
935k
                        my += (unsigned)get_se_golomb(&sl->gb);
891
935k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
892
893
935k
                        if(IS_SUB_8X8(sub_mb_type)){
894
179k
                            mv_cache[ 1 ][0]=
895
179k
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
896
179k
                            mv_cache[ 1 ][1]=
897
179k
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
898
755k
                        }else if(IS_SUB_8X4(sub_mb_type)){
899
238k
                            mv_cache[ 1 ][0]= mx;
900
238k
                            mv_cache[ 1 ][1]= my;
901
517k
                        }else if(IS_SUB_4X8(sub_mb_type)){
902
106k
                            mv_cache[ 8 ][0]= mx;
903
106k
                            mv_cache[ 8 ][1]= my;
904
106k
                        }
905
935k
                        mv_cache[ 0 ][0]= mx;
906
935k
                        mv_cache[ 0 ][1]= my;
907
935k
                    }
908
455k
                }else{
909
87.0k
                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
910
87.0k
                    p[0] = p[1]=
911
87.0k
                    p[8] = p[9]= 0;
912
87.0k
                }
913
542k
            }
914
149k
        }
915
1.42M
    }else if(IS_DIRECT(mb_type)){
916
400k
        ff_h264_pred_direct_motion(h, sl, &mb_type);
917
400k
        dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
918
1.02M
    }else{
919
1.02M
        int list, mx, my, i;
920
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
921
1.02M
        if(IS_16X16(mb_type)){
922
1.65M
            for (list = 0; list < sl->list_count; list++) {
923
947k
                    unsigned int val;
924
947k
                    if(IS_DIR(mb_type, 0, list)){
925
785k
                        unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
926
785k
                        if (rc == 1) {
927
121k
                            val= 0;
928
663k
                        } else if (rc == 2) {
929
298k
                            val= get_bits1(&sl->gb)^1;
930
365k
                        }else{
931
365k
                            val= get_ue_golomb_31(&sl->gb);
932
365k
                            if (val >= rc) {
933
12.3k
                                av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
934
12.3k
                                return -1;
935
12.3k
                            }
936
365k
                        }
937
773k
                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
938
773k
                    }
939
947k
            }
940
1.64M
            for (list = 0; list < sl->list_count; list++) {
941
933k
                if(IS_DIR(mb_type, 0, list)){
942
773k
                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
943
773k
                    mx += (unsigned)get_se_golomb(&sl->gb);
944
773k
                    my += (unsigned)get_se_golomb(&sl->gb);
945
773k
                    ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
946
947
773k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
948
773k
                }
949
933k
            }
950
712k
        }
951
302k
        else if(IS_16X8(mb_type)){
952
367k
            for (list = 0; list < sl->list_count; list++) {
953
665k
                    for(i=0; i<2; i++){
954
446k
                        unsigned int val;
955
446k
                        if(IS_DIR(mb_type, i, list)){
956
330k
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
957
330k
                            if (rc == 1) {
958
36.7k
                                val= 0;
959
293k
                            } else if (rc == 2) {
960
230k
                                val= get_bits1(&sl->gb)^1;
961
230k
                            }else{
962
62.9k
                                val= get_ue_golomb_31(&sl->gb);
963
62.9k
                                if (val >= rc) {
964
4.91k
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
965
4.91k
                                    return -1;
966
4.91k
                                }
967
62.9k
                            }
968
330k
                        }else
969
116k
                            val= LIST_NOT_USED&0xFF;
970
441k
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
971
441k
                    }
972
224k
            }
973
361k
            for (list = 0; list < sl->list_count; list++) {
974
654k
                for(i=0; i<2; i++){
975
436k
                    unsigned int val;
976
436k
                    if(IS_DIR(mb_type, i, list)){
977
322k
                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
978
322k
                        mx += (unsigned)get_se_golomb(&sl->gb);
979
322k
                        my += (unsigned)get_se_golomb(&sl->gb);
980
322k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
981
982
322k
                        val= pack16to32(mx,my);
983
322k
                    }else
984
114k
                        val=0;
985
436k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
986
436k
                }
987
218k
            }
988
154k
        }else{
989
154k
            av_assert2(IS_8X16(mb_type));
990
387k
            for (list = 0; list < sl->list_count; list++) {
991
703k
                    for(i=0; i<2; i++){
992
471k
                        unsigned int val;
993
471k
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
994
352k
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
995
352k
                            if (rc == 1) {
996
53.5k
                                val= 0;
997
299k
                            } else if (rc == 2) {
998
251k
                                val= get_bits1(&sl->gb)^1;
999
251k
                            }else{
1000
48.1k
                                val= get_ue_golomb_31(&sl->gb);
1001
48.1k
                                if (val >= rc) {
1002
5.18k
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1003
5.18k
                                    return -1;
1004
5.18k
                                }
1005
48.1k
                            }
1006
352k
                        }else
1007
118k
                            val= LIST_NOT_USED&0xFF;
1008
465k
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1009
465k
                    }
1010
237k
            }
1011
381k
            for (list = 0; list < sl->list_count; list++) {
1012
696k
                for(i=0; i<2; i++){
1013
464k
                    unsigned int val;
1014
464k
                    if(IS_DIR(mb_type, i, list)){
1015
346k
                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1016
346k
                        mx += (unsigned)get_se_golomb(&sl->gb);
1017
346k
                        my += (unsigned)get_se_golomb(&sl->gb);
1018
346k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1019
1020
346k
                        val= pack16to32(mx,my);
1021
346k
                    }else
1022
117k
                        val=0;
1023
464k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1024
464k
                }
1025
232k
            }
1026
149k
        }
1027
1.02M
    }
1028
1029
2.06M
    if(IS_INTER(mb_type))
1030
1.52M
        write_back_motion(h, sl, mb_type);
1031
1032
2.06M
    if(!IS_INTRA16x16(mb_type)){
1033
1.66M
        cbp= get_ue_golomb(&sl->gb);
1034
1035
1.66M
        if(decode_chroma){
1036
1.41M
            if(cbp > 47){
1037
34.7k
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1038
34.7k
                return -1;
1039
34.7k
            }
1040
1.37M
            if (IS_INTRA4x4(mb_type))
1041
91.2k
                cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1042
1.28M
            else
1043
1.28M
                cbp = ff_h264_golomb_to_inter_cbp[cbp];
1044
1.37M
        }else{
1045
255k
            if(cbp > 15){
1046
37.5k
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1047
37.5k
                return -1;
1048
37.5k
            }
1049
217k
            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1050
170k
            else                     cbp= golomb_to_inter_cbp_gray[cbp];
1051
217k
        }
1052
1.66M
    } else {
1053
395k
        if (!decode_chroma && cbp>15) {
1054
14.2k
            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1055
14.2k
            return AVERROR_INVALIDDATA;
1056
14.2k
        }
1057
395k
    }
1058
1059
1.97M
    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1060
206k
        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1061
206k
    }
1062
1.97M
    sl->cbp=
1063
1.97M
    h->cbp_table[mb_xy]= cbp;
1064
1.97M
    h->cur_pic.mb_type[mb_xy] = mb_type;
1065
1066
1.97M
    if(cbp || IS_INTRA16x16(mb_type)){
1067
1.10M
        int i4x4, i8x8, chroma_idx;
1068
1.10M
        int dquant;
1069
1.10M
        int ret;
1070
1.10M
        GetBitContext *gb = &sl->gb;
1071
1.10M
        const uint8_t *scan, *scan8x8;
1072
1.10M
        const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1073
1074
1.10M
        dquant= get_se_golomb(&sl->gb);
1075
1076
1.10M
        sl->qscale += (unsigned)dquant;
1077
1078
1.10M
        if (((unsigned)sl->qscale) > max_qp){
1079
10.5k
            if (sl->qscale < 0) sl->qscale += max_qp + 1;
1080
6.50k
            else                sl->qscale -= max_qp+1;
1081
10.5k
            if (((unsigned)sl->qscale) > max_qp){
1082
4.70k
                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1083
4.70k
                sl->qscale = max_qp;
1084
4.70k
                return -1;
1085
4.70k
            }
1086
10.5k
        }
1087
1088
1.10M
        sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1089
1.10M
        sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1090
1091
1.10M
        if(IS_INTERLACED(mb_type)){
1092
407k
            scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1093
407k
            scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
1094
697k
        }else{
1095
697k
            scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1096
697k
            scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1097
697k
        }
1098
1099
1.10M
        if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1100
83.2k
            return -1;
1101
83.2k
        }
1102
1.02M
        h->cbp_table[mb_xy] |= ret << 12;
1103
1.02M
        if (CHROMA444(h)) {
1104
120k
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1105
11.0k
                return -1;
1106
11.0k
            }
1107
109k
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1108
17.3k
                return -1;
1109
17.3k
            }
1110
901k
        } else {
1111
901k
            const int num_c8x8 = h->ps.sps->chroma_format_idc;
1112
1113
901k
            if(cbp&0x30){
1114
949k
                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1115
633k
                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1116
633k
                                        CHROMA_DC_BLOCK_INDEX + chroma_idx,
1117
633k
                                        CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
1118
633k
                                        NULL, 4 * num_c8x8) < 0) {
1119
1.88k
                        return -1;
1120
1.88k
                    }
1121
317k
            }
1122
1123
899k
            if(cbp&0x20){
1124
366k
                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1125
262k
                    const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1126
262k
                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1127
561k
                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1128
1.57M
                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
1129
1.27M
                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1130
1.27M
                            if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1131
30.8k
                                return -1;
1132
1.24M
                            mb += 16 << pixel_shift;
1133
1.24M
                        }
1134
329k
                    }
1135
262k
                }
1136
764k
            }else{
1137
764k
                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1138
764k
                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1139
764k
            }
1140
899k
        }
1141
1.02M
    }else{
1142
865k
        fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1143
865k
        fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1144
865k
        fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1145
865k
    }
1146
1.82M
    h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1147
1.82M
    write_back_non_zero_count(h, sl);
1148
1149
1.82M
    return 0;
1150
1.97M
}