Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/h264_cavlc.c
Line
Count
Source
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
15.2M
#define CABAC(h) 0
29
#define UNCHECKED_BITSTREAM_READER 1
30
31
#include "h264dec.h"
32
#include "h264_mvpred.h"
33
#include "h264data.h"
34
#include "golomb.h"
35
#include "mpegutils.h"
36
#include "libavutil/avassert.h"
37
38
39
static const uint8_t golomb_to_inter_cbp_gray[16]={
40
 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
41
};
42
43
static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
44
15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
45
};
46
47
static const uint8_t chroma_dc_coeff_token_len[4*5]={
48
 2, 0, 0, 0,
49
 6, 1, 0, 0,
50
 6, 6, 3, 0,
51
 6, 7, 7, 6,
52
 6, 8, 8, 7,
53
};
54
55
static const uint8_t chroma_dc_coeff_token_bits[4*5]={
56
 1, 0, 0, 0,
57
 7, 1, 0, 0,
58
 4, 6, 1, 0,
59
 3, 3, 2, 5,
60
 2, 3, 2, 0,
61
};
62
63
static const uint8_t chroma422_dc_coeff_token_len[4*9]={
64
  1,  0,  0,  0,
65
  7,  2,  0,  0,
66
  7,  7,  3,  0,
67
  9,  7,  7,  5,
68
  9,  9,  7,  6,
69
 10, 10,  9,  7,
70
 11, 11, 10,  7,
71
 12, 12, 11, 10,
72
 13, 12, 12, 11,
73
};
74
75
static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
76
  1,   0,  0, 0,
77
 15,   1,  0, 0,
78
 14,  13,  1, 0,
79
  7,  12, 11, 1,
80
  6,   5, 10, 1,
81
  7,   6,  4, 9,
82
  7,   6,  5, 8,
83
  7,   6,  5, 4,
84
  7,   5,  4, 4,
85
};
86
87
static const uint8_t coeff_token_len[4][4*17]={
88
{
89
     1, 0, 0, 0,
90
     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
91
    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
92
    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
93
    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
94
},
95
{
96
     2, 0, 0, 0,
97
     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
98
     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
99
    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
100
    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
101
},
102
{
103
     4, 0, 0, 0,
104
     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
105
     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
106
     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
107
    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
108
},
109
{
110
     6, 0, 0, 0,
111
     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
112
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
113
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
114
     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115
}
116
};
117
118
static const uint8_t coeff_token_bits[4][4*17]={
119
{
120
     1, 0, 0, 0,
121
     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
122
     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
123
    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
124
    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
125
},
126
{
127
     3, 0, 0, 0,
128
    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
129
     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
130
    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
131
    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
132
},
133
{
134
    15, 0, 0, 0,
135
    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
136
    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
137
    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
138
    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
139
},
140
{
141
     3, 0, 0, 0,
142
     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
143
    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
144
    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
145
    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
146
}
147
};
148
149
static const uint8_t total_zeros_len[16][16]= {
150
    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
151
    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
152
    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
153
    {5,3,4,4,3,3,3,4,3,4,5,5,5},
154
    {4,4,4,3,3,3,3,3,4,5,4,5},
155
    {6,5,3,3,3,3,3,3,4,3,6},
156
    {6,5,3,3,3,2,3,4,3,6},
157
    {6,4,5,3,2,2,3,3,6},
158
    {6,6,4,2,2,3,2,5},
159
    {5,5,3,2,2,2,4},
160
    {4,4,3,3,1,3},
161
    {4,4,2,1,3},
162
    {3,3,1,2},
163
    {2,2,1},
164
    {1,1},
165
};
166
167
static const uint8_t total_zeros_bits[16][16]= {
168
    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
169
    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
170
    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
171
    {3,7,5,4,6,5,4,3,3,2,2,1,0},
172
    {5,4,3,7,6,5,4,3,2,1,1,0},
173
    {1,1,7,6,5,4,3,2,1,1,0},
174
    {1,1,5,4,3,3,2,1,1,0},
175
    {1,1,1,3,3,2,2,1,0},
176
    {1,0,1,3,2,1,1,1},
177
    {1,0,1,3,2,1,1},
178
    {0,1,1,2,1,3},
179
    {0,1,1,1,1},
180
    {0,1,1,1},
181
    {0,1,1},
182
    {0,1},
183
};
184
185
static const uint8_t chroma_dc_total_zeros_len[3][4]= {
186
    { 1, 2, 3, 3,},
187
    { 1, 2, 2, 0,},
188
    { 1, 1, 0, 0,},
189
};
190
191
static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
192
    { 1, 1, 1, 0,},
193
    { 1, 1, 0, 0,},
194
    { 1, 0, 0, 0,},
195
};
196
197
static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
198
    { 1, 3, 3, 4, 4, 4, 5, 5 },
199
    { 3, 2, 3, 3, 3, 3, 3 },
200
    { 3, 3, 2, 2, 3, 3 },
201
    { 3, 2, 2, 2, 3 },
202
    { 2, 2, 2, 2 },
203
    { 2, 2, 1 },
204
    { 1, 1 },
205
};
206
207
static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
208
    { 1, 2, 3, 2, 3, 1, 1, 0 },
209
    { 0, 1, 1, 4, 5, 6, 7 },
210
    { 0, 1, 1, 2, 6, 7 },
211
    { 6, 0, 1, 2, 7 },
212
    { 0, 1, 2, 3 },
213
    { 0, 1, 1 },
214
    { 0, 1 },
215
};
216
217
static const uint8_t run_len[7][16]={
218
    {1,1},
219
    {1,2,2},
220
    {2,2,2,2},
221
    {2,2,2,3,3},
222
    {2,2,3,3,3,3},
223
    {2,3,3,3,3,3,3},
224
    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
225
};
226
227
static const uint8_t run_bits[7][16]={
228
    {1,0},
229
    {1,1,0},
230
    {3,2,1,0},
231
    {3,2,1,1,0},
232
    {3,2,3,2,1,0},
233
    {3,0,1,3,2,5,4},
234
    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
235
};
236
237
2.22M
#define LEVEL_TAB_BITS 8
238
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
239
240
406k
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
241
101k
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
242
5.69M
#define COEFF_TOKEN_VLC_BITS           8
243
2.46M
#define TOTAL_ZEROS_VLC_BITS           9
244
281k
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
245
45.5k
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
246
1.69M
#define RUN_VLC_BITS                   3
247
491k
#define RUN7_VLC_BITS                  6
248
249
/// 17 pointers to only four different VLCs
250
static const VLCElem *coeff_token_vlc[17];
251
252
static VLCElem chroma_dc_coeff_token_vlc_table[256];
253
254
static VLCElem chroma422_dc_coeff_token_vlc_table[1 << CHROMA422_DC_COEFF_TOKEN_VLC_BITS];
255
256
static const VLCElem *total_zeros_vlc[15+1];
257
258
static const VLCElem *chroma_dc_total_zeros_vlc[3+1];
259
260
static const VLCElem *chroma422_dc_total_zeros_vlc[7+1];
261
262
static const VLCElem *run_vlc[6+1];
263
264
// The other pointers to VLCElem point into this array.
265
static VLCElem run7_vlc_table[96 + (6  << RUN_VLC_BITS)
266
                                 + (15 << TOTAL_ZEROS_VLC_BITS)
267
                                 + (3  << CHROMA_DC_TOTAL_ZEROS_VLC_BITS)
268
                                 + (7  << CHROMA422_DC_TOTAL_ZEROS_VLC_BITS)
269
                                 + (520 + 332 + 280 + 256) /* coeff token */];
270
271
/**
272
 * Get the predicted number of non-zero coefficients.
273
 * @param n block index
274
 */
275
static inline int pred_non_zero_count(const H264Context *h, const H264SliceContext *sl, int n)
276
5.69M
{
277
5.69M
    const int index8= scan8[n];
278
5.69M
    const int left = sl->non_zero_count_cache[index8 - 1];
279
5.69M
    const int top  = sl->non_zero_count_cache[index8 - 8];
280
5.69M
    int i= left + top;
281
282
5.69M
    if(i<64) i= (i+1)>>1;
283
284
5.69M
    ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
285
286
5.69M
    return i&31;
287
5.69M
}
288
289
2
static av_cold void init_cavlc_level_tab(void){
290
2
    int suffix_length;
291
2
    unsigned int i;
292
293
16
    for(suffix_length=0; suffix_length<7; suffix_length++){
294
3.59k
        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
295
3.58k
            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
296
297
3.58k
            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
298
3.33k
                int level_code = (prefix << suffix_length) +
299
3.33k
                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
300
3.33k
                int mask = -(level_code&1);
301
3.33k
                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
302
3.33k
                cavlc_level_tab[suffix_length][i][0]= level_code;
303
3.33k
                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
304
3.33k
            }else if(prefix + 1 <= LEVEL_TAB_BITS){
305
240
                cavlc_level_tab[suffix_length][i][0]= prefix+100;
306
240
                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
307
240
            }else{
308
14
                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
309
14
                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
310
14
            }
311
3.58k
        }
312
14
    }
313
2
}
314
315
av_cold void ff_h264_decode_init_vlc(void)
316
2
{
317
2
    const VLCElem *coeff_token_vlc_original[4];
318
2
    VLCInitState state = VLC_INIT_STATE(run7_vlc_table);
319
320
2
    VLC_INIT_STATIC_TABLE(chroma_dc_coeff_token_vlc_table,
321
2
                          CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4 * 5,
322
2
                          &chroma_dc_coeff_token_len [0], 1, 1,
323
2
                          &chroma_dc_coeff_token_bits[0], 1, 1, 0);
324
325
2
    VLC_INIT_STATIC_TABLE(chroma422_dc_coeff_token_vlc_table,
326
2
                          CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4 * 9,
327
2
                          &chroma422_dc_coeff_token_len [0], 1, 1,
328
2
                          &chroma422_dc_coeff_token_bits[0], 1, 1, 0);
329
330
2
    ff_vlc_init_tables(&state, RUN7_VLC_BITS, 16,
331
2
                       &run_len [6][0], 1, 1,
332
2
                       &run_bits[6][0], 1, 1, 0);
333
334
14
    for (int i = 0; i < 6; i++) {
335
12
        run_vlc[i + 1] = ff_vlc_init_tables(&state, RUN_VLC_BITS, 7,
336
12
                                            &run_len [i][0], 1, 1,
337
12
                                            &run_bits[i][0], 1, 1, 0);
338
12
    }
339
340
10
    for (int i = 0; i < 4; i++) {
341
8
        coeff_token_vlc_original[i] =
342
8
            ff_vlc_init_tables(&state, COEFF_TOKEN_VLC_BITS, 4*17,
343
8
                               &coeff_token_len [i][0], 1, 1,
344
8
                               &coeff_token_bits[i][0], 1, 1, 0);
345
8
    }
346
36
    for (int i = 0; i < FF_ARRAY_ELEMS(coeff_token_vlc); i++) {
347
34
        static const uint8_t coeff_token_table_index[17] = {
348
34
            0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3
349
34
        };
350
34
        coeff_token_vlc[i] = coeff_token_vlc_original[coeff_token_table_index[i]];
351
34
    }
352
353
8
    for (int i = 0; i < 3; i++) {
354
6
        chroma_dc_total_zeros_vlc[i + 1] =
355
6
            ff_vlc_init_tables(&state, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
356
6
                               &chroma_dc_total_zeros_len [i][0], 1, 1,
357
6
                               &chroma_dc_total_zeros_bits[i][0], 1, 1, 0);
358
6
    }
359
360
16
    for (int i = 0; i < 7; i++) {
361
14
        chroma422_dc_total_zeros_vlc[i + 1] =
362
14
            ff_vlc_init_tables(&state, CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
363
14
                               &chroma422_dc_total_zeros_len [i][0], 1, 1,
364
14
                               &chroma422_dc_total_zeros_bits[i][0], 1, 1, 0);
365
14
    }
366
367
32
    for (int i = 0; i < 15; i++) {
368
30
        total_zeros_vlc[i + 1] =
369
30
            ff_vlc_init_tables(&state, TOTAL_ZEROS_VLC_BITS, 16,
370
30
                               &total_zeros_len [i][0], 1, 1,
371
30
                               &total_zeros_bits[i][0], 1, 1, 0);
372
30
    }
373
    /*
374
     * This is a one time safety check to make sure that
375
     * the vlc table sizes were initialized correctly.
376
     */
377
2
    av_assert1(state.size == 0);
378
379
2
    init_cavlc_level_tab();
380
2
}
381
382
114k
static inline int get_level_prefix(GetBitContext *gb){
383
114k
    unsigned int buf;
384
114k
    int log;
385
386
114k
    OPEN_READER(re, gb);
387
114k
    UPDATE_CACHE(re, gb);
388
114k
    buf=GET_CACHE(re, gb);
389
390
114k
    log= 32 - av_log2(buf);
391
392
114k
    LAST_SKIP_BITS(re, gb, log);
393
114k
    CLOSE_READER(re, gb);
394
395
114k
    return log-1;
396
114k
}
397
398
/**
399
 * Decode a residual block.
400
 * @param n block index
401
 * @param scantable scantable
402
 * @param max_coeff number of coefficients in the block
403
 * @return <0 if an error occurred
404
 */
405
static int decode_residual(const H264Context *h, H264SliceContext *sl,
406
                           GetBitContext *gb, int16_t *block, int n,
407
                           const uint8_t *scantable, const uint32_t *qmul,
408
                           int max_coeff)
409
6.20M
{
410
6.20M
    int level[16];
411
6.20M
    int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
412
413
    //FIXME put trailing_onex into the context
414
415
6.20M
    if(max_coeff <= 8){
416
507k
        if (max_coeff == 4)
417
406k
            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc_table,
418
406k
                                   CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
419
101k
        else
420
101k
            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc_table,
421
101k
                                   CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
422
5.69M
    }else{
423
5.69M
        total_coeff = pred_non_zero_count(h, sl, n >= LUMA_DC_BLOCK_INDEX ?
424
5.24M
                                                 (n - LUMA_DC_BLOCK_INDEX) * 16 : n);
425
5.69M
        coeff_token = get_vlc2(gb, coeff_token_vlc[total_coeff],
426
5.69M
                               COEFF_TOKEN_VLC_BITS, 2);
427
5.69M
    }
428
6.20M
    total_coeff = coeff_token >> 2;
429
6.20M
    sl->non_zero_count_cache[scan8[n]] = total_coeff;
430
431
    //FIXME set last_non_zero?
432
433
6.20M
    if(total_coeff==0)
434
3.31M
        return 0;
435
2.89M
    if(total_coeff > (unsigned)max_coeff) {
436
48.0k
        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
437
48.0k
        return -1;
438
48.0k
    }
439
440
2.84M
    trailing_ones= coeff_token&3;
441
2.84M
    ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
442
2.84M
    av_assert2(total_coeff<=16);
443
444
2.84M
    i = show_bits(gb, 3);
445
2.84M
    skip_bits(gb, trailing_ones);
446
2.84M
    level[0] = 1-((i&4)>>1);
447
2.84M
    level[1] = 1-((i&2)   );
448
2.84M
    level[2] = 1-((i&1)<<1);
449
450
2.84M
    if(trailing_ones<total_coeff) {
451
779k
        int mask, prefix;
452
779k
        int suffix_length = total_coeff > 10 & trailing_ones < 3;
453
779k
        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
454
779k
        int level_code= cavlc_level_tab[suffix_length][bitsi][0];
455
456
779k
        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
457
779k
        if(level_code >= 100){
458
56.4k
            prefix= level_code - 100;
459
56.4k
            if(prefix == LEVEL_TAB_BITS)
460
55.9k
                prefix += get_level_prefix(gb);
461
462
            //first coefficient has suffix_length equal to 0 or 1
463
56.4k
            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
464
23.2k
                if(suffix_length)
465
1.32k
                    level_code= (prefix<<1) + get_bits1(gb); //part
466
21.9k
                else
467
21.9k
                    level_code= prefix; //part
468
33.1k
            }else if(prefix==14){
469
2.17k
                if(suffix_length)
470
422
                    level_code= (prefix<<1) + get_bits1(gb); //part
471
1.75k
                else
472
1.75k
                    level_code= prefix + get_bits(gb, 4); //part
473
31.0k
            }else{
474
31.0k
                level_code= 30;
475
31.0k
                if(prefix>=16){
476
28.8k
                    if(prefix > 25+3){
477
22.0k
                        av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
478
22.0k
                        return -1;
479
22.0k
                    }
480
6.81k
                    level_code += (1<<(prefix-3))-4096;
481
6.81k
                }
482
8.99k
                level_code += get_bits(gb, prefix-3); //part
483
8.99k
            }
484
485
34.4k
            if(trailing_ones < 3) level_code += 2;
486
487
34.4k
            suffix_length = 2;
488
34.4k
            mask= -(level_code&1);
489
34.4k
            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
490
722k
        }else{
491
722k
            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
492
493
722k
            suffix_length = 1 + (level_code + 3U > 6U);
494
722k
            level[trailing_ones]= level_code;
495
722k
        }
496
497
        //remaining coefficients have suffix_length > 0
498
2.02M
        for(i=trailing_ones+1;i<total_coeff;i++) {
499
1.28M
            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
500
1.28M
            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
501
1.28M
            level_code= cavlc_level_tab[suffix_length][bitsi][0];
502
503
1.28M
            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
504
1.28M
            if(level_code >= 100){
505
93.2k
                prefix= level_code - 100;
506
93.2k
                if(prefix == LEVEL_TAB_BITS){
507
58.1k
                    prefix += get_level_prefix(gb);
508
58.1k
                }
509
93.2k
                if(prefix<15){
510
69.9k
                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
511
69.9k
                }else{
512
23.3k
                    level_code = 15<<suffix_length;
513
23.3k
                    if (prefix>=16) {
514
20.3k
                        if(prefix > 25+3){
515
11.7k
                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
516
11.7k
                            return AVERROR_INVALIDDATA;
517
11.7k
                        }
518
8.55k
                        level_code += (1<<(prefix-3))-4096;
519
8.55k
                    }
520
11.5k
                    level_code += get_bits(gb, prefix-3);
521
11.5k
                }
522
81.5k
                mask= -(level_code&1);
523
81.5k
                level_code= (((2+level_code)>>1) ^ mask) - mask;
524
81.5k
            }
525
1.27M
            level[i]= level_code;
526
1.27M
            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
527
1.27M
        }
528
757k
    }
529
530
2.81M
    if(total_coeff == max_coeff)
531
14.4k
        zeros_left=0;
532
2.79M
    else{
533
2.79M
        if (max_coeff <= 8) {
534
326k
            if (max_coeff == 4)
535
281k
                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff],
536
281k
                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
537
45.5k
            else
538
45.5k
                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff],
539
45.5k
                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
540
2.46M
        } else {
541
2.46M
            zeros_left = get_vlc2(gb, total_zeros_vlc[total_coeff],
542
2.46M
                                  TOTAL_ZEROS_VLC_BITS, 1);
543
2.46M
        }
544
2.79M
    }
545
546
2.81M
#define STORE_BLOCK(type) \
547
2.81M
    scantable += zeros_left + total_coeff - 1; \
548
2.81M
    if(n >= LUMA_DC_BLOCK_INDEX){ \
549
417k
        ((type*)block)[*scantable] = level[0]; \
550
534k
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
551
116k
            if(zeros_left < 7) \
552
116k
                run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \
553
116k
            else \
554
116k
                run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \
555
116k
            zeros_left -= run_before; \
556
116k
            scantable -= 1 + run_before; \
557
116k
            ((type*)block)[*scantable]= level[i]; \
558
116k
        } \
559
541k
        for(;i<total_coeff;i++) { \
560
123k
            scantable--; \
561
123k
            ((type*)block)[*scantable]= level[i]; \
562
123k
        } \
563
2.39M
    }else{ \
564
2.39M
        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
565
4.46M
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
566
2.07M
            if(zeros_left < 7) \
567
2.07M
                run_before = get_vlc2(gb, run_vlc[zeros_left], RUN_VLC_BITS, 1); \
568
2.07M
            else \
569
2.07M
                run_before = get_vlc2(gb, run7_vlc_table, RUN7_VLC_BITS, 2); \
570
2.07M
            zeros_left -= run_before; \
571
2.07M
            scantable -= 1 + run_before; \
572
2.07M
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
573
2.07M
        } \
574
3.44M
        for(;i<total_coeff;i++) { \
575
1.05M
            scantable--; \
576
1.05M
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
577
1.05M
        } \
578
2.39M
    }
579
580
2.81M
    if (h->pixel_shift) {
581
1.44M
        STORE_BLOCK(int32_t)
582
1.44M
    } else {
583
1.36M
        STORE_BLOCK(int16_t)
584
1.36M
    }
585
586
2.81M
    if(zeros_left<0){
587
17.9k
        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
588
17.9k
        return -1;
589
17.9k
    }
590
591
2.79M
    return 0;
592
2.81M
}
593
594
static av_always_inline
595
int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
596
                         GetBitContext *gb, const uint8_t *scan,
597
                         const uint8_t *scan8x8, int pixel_shift,
598
                         int mb_type, int cbp, int p)
599
1.21M
{
600
1.21M
    int i4x4, i8x8;
601
1.21M
    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
602
1.21M
    if(IS_INTRA16x16(mb_type)){
603
449k
        AV_ZERO128(sl->mb_luma_dc[p]+0);
604
449k
        AV_ZERO128(sl->mb_luma_dc[p]+8);
605
449k
        AV_ZERO128(sl->mb_luma_dc[p]+16);
606
449k
        AV_ZERO128(sl->mb_luma_dc[p]+24);
607
449k
        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
608
5.73k
            return -1; //FIXME continue if partitioned and other return -1 too
609
5.73k
        }
610
611
443k
        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
612
613
443k
        if(cbp&15){
614
224k
            for(i8x8=0; i8x8<4; i8x8++){
615
886k
                for(i4x4=0; i4x4<4; i4x4++){
616
713k
                    const int index= i4x4 + 4*i8x8 + p*16;
617
713k
                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
618
713k
                        index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
619
11.9k
                        return -1;
620
11.9k
                    }
621
713k
                }
622
184k
            }
623
39.6k
            return 0xf;
624
392k
        }else{
625
392k
            fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
626
392k
            return 0;
627
392k
        }
628
760k
    }else{
629
760k
        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
630
        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
631
760k
        int new_cbp = 0;
632
3.62M
        for(i8x8=0; i8x8<4; i8x8++){
633
2.93M
            if(cbp & (1<<i8x8)){
634
987k
                if(IS_8x8DCT(mb_type)){
635
371k
                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
636
371k
                    uint8_t *nnz;
637
1.79M
                    for(i4x4=0; i4x4<4; i4x4++){
638
1.44M
                        const int index= i4x4 + 4*i8x8 + p*16;
639
1.44M
                        if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
640
1.44M
                                            h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
641
26.1k
                            return -1;
642
1.44M
                    }
643
345k
                    nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
644
345k
                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
645
345k
                    new_cbp |= !!nnz[0] << i8x8;
646
616k
                }else{
647
2.97M
                    for(i4x4=0; i4x4<4; i4x4++){
648
2.40M
                        const int index= i4x4 + 4*i8x8 + p*16;
649
2.40M
                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
650
2.40M
                                            scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
651
41.1k
                            return -1;
652
41.1k
                        }
653
2.36M
                        new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
654
2.36M
                    }
655
616k
                }
656
1.94M
            }else{
657
1.94M
                uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
658
1.94M
                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
659
1.94M
            }
660
2.93M
        }
661
693k
        return new_cbp;
662
760k
    }
663
1.21M
}
664
665
int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
666
5.93M
{
667
5.93M
    int mb_xy;
668
5.93M
    int partition_count;
669
5.93M
    unsigned int mb_type, cbp;
670
5.93M
    int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
671
5.93M
    const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
672
5.93M
    const int pixel_shift = h->pixel_shift;
673
674
5.93M
    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
675
676
5.93M
    ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
677
5.93M
    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
678
                down the code */
679
5.93M
    if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
680
5.45M
        if (sl->mb_skip_run == -1) {
681
1.70M
            unsigned mb_skip_run = get_ue_golomb_long(&sl->gb);
682
1.70M
            if (mb_skip_run > h->mb_num) {
683
15.0k
                av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run);
684
15.0k
                return AVERROR_INVALIDDATA;
685
15.0k
            }
686
1.68M
            sl->mb_skip_run = mb_skip_run;
687
1.68M
        }
688
689
5.43M
        if (sl->mb_skip_run--) {
690
3.77M
            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
691
1.28M
                if (sl->mb_skip_run == 0)
692
166k
                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
693
1.28M
            }
694
3.77M
            decode_mb_skip(h, sl);
695
3.77M
            return 0;
696
3.77M
        }
697
5.43M
    }
698
2.14M
    if (FRAME_MBAFF(h)) {
699
917k
        if ((sl->mb_y & 1) == 0)
700
500k
            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
701
917k
    }
702
703
2.14M
    sl->prev_mb_skipped = 0;
704
705
2.14M
    mb_type= get_ue_golomb(&sl->gb);
706
2.14M
    if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
707
835k
        if(mb_type < 23){
708
828k
            partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
709
828k
            mb_type         = ff_h264_b_mb_type_info[mb_type].type;
710
828k
        }else{
711
6.77k
            mb_type -= 23;
712
6.77k
            goto decode_intra_mb;
713
6.77k
        }
714
1.30M
    } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
715
825k
        if(mb_type < 5){
716
698k
            partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
717
698k
            mb_type         = ff_h264_p_mb_type_info[mb_type].type;
718
698k
        }else{
719
127k
            mb_type -= 5;
720
127k
            goto decode_intra_mb;
721
127k
        }
722
825k
    }else{
723
482k
       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
724
482k
        if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
725
34.5k
            mb_type--;
726
616k
decode_intra_mb:
727
616k
        if(mb_type > 25){
728
62.1k
            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
729
62.1k
            return -1;
730
62.1k
        }
731
554k
        partition_count=0;
732
554k
        cbp                      = ff_h264_i_mb_type_info[mb_type].cbp;
733
554k
        sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
734
554k
        mb_type                  = ff_h264_i_mb_type_info[mb_type].type;
735
554k
    }
736
737
2.08M
    if (MB_FIELD(sl))
738
783k
        mb_type |= MB_TYPE_INTERLACED;
739
740
2.08M
    h->slice_table[mb_xy] = sl->slice_num;
741
742
2.08M
    if(IS_INTRA_PCM(mb_type)){
743
3.59k
        const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
744
3.59k
                            h->ps.sps->bit_depth_luma;
745
746
        // We assume these blocks are very rare so we do not optimize it.
747
3.59k
        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
748
3.59k
        if (get_bits_left(&sl->gb) < mb_size) {
749
1.61k
            av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
750
1.61k
            return AVERROR_INVALIDDATA;
751
1.61k
        }
752
1.98k
        skip_bits_long(&sl->gb, mb_size);
753
754
        // In deblocking, the quantizer is 0
755
1.98k
        h->cur_pic.qscale_table[mb_xy] = 0;
756
        // All coeffs are present
757
1.98k
        memset(h->non_zero_count[mb_xy], 16, 48);
758
759
1.98k
        h->cur_pic.mb_type[mb_xy] = mb_type;
760
1.98k
        return 0;
761
3.59k
    }
762
763
2.07M
    fill_decode_neighbors(h, sl, mb_type);
764
2.07M
    fill_decode_caches(h, sl, mb_type);
765
766
    //mb_pred
767
2.07M
    if(IS_INTRA(mb_type)){
768
551k
        int pred_mode;
769
//            init_top_left_availability(h);
770
551k
        if(IS_INTRA4x4(mb_type)){
771
136k
            int i;
772
136k
            int di = 1;
773
136k
            if(dct8x8_allowed && get_bits1(&sl->gb)){
774
58.0k
                mb_type |= MB_TYPE_8x8DCT;
775
58.0k
                di = 4;
776
58.0k
            }
777
778
//                fill_intra4x4_pred_table(h);
779
1.61M
            for(i=0; i<16; i+=di){
780
1.48M
                int mode = pred_intra_mode(h, sl, i);
781
782
1.48M
                if(!get_bits1(&sl->gb)){
783
579k
                    const int rem_mode= get_bits(&sl->gb, 3);
784
579k
                    mode = rem_mode + (rem_mode >= mode);
785
579k
                }
786
787
1.48M
                if(di==4)
788
232k
                    fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
789
1.24M
                else
790
1.24M
                    sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
791
1.48M
            }
792
136k
            write_back_intra_pred_mode(h, sl);
793
136k
            if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
794
136k
                                                 sl->top_samples_available, sl->left_samples_available) < 0)
795
37.1k
                return -1;
796
415k
        }else{
797
415k
            sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
798
415k
                                                                     sl->left_samples_available, sl->intra16x16_pred_mode, 0);
799
415k
            if (sl->intra16x16_pred_mode < 0)
800
78.0k
                return -1;
801
415k
        }
802
436k
        if(decode_chroma){
803
308k
            pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
804
308k
                                                     sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
805
308k
            if(pred_mode < 0)
806
11.2k
                return -1;
807
297k
            sl->chroma_pred_mode = pred_mode;
808
297k
        } else {
809
127k
            sl->chroma_pred_mode = DC_128_PRED8x8;
810
127k
        }
811
1.52M
    }else if(partition_count==4){
812
137k
        int i, j, sub_partition_count[4], list, ref[2][4];
813
814
137k
        if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
815
172k
            for(i=0; i<4; i++){
816
138k
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
817
138k
                if(sl->sub_mb_type[i] >=13){
818
359
                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
819
359
                    return -1;
820
359
                }
821
137k
                sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
822
137k
                sl->sub_mb_type[i]     = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
823
137k
            }
824
34.4k
            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
825
17.5k
                ff_h264_pred_direct_motion(h, sl, &mb_type);
826
17.5k
                sl->ref_cache[0][scan8[4]] =
827
17.5k
                sl->ref_cache[1][scan8[4]] =
828
17.5k
                sl->ref_cache[0][scan8[12]] =
829
17.5k
                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
830
17.5k
            }
831
102k
        }else{
832
102k
            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
833
494k
            for(i=0; i<4; i++){
834
400k
                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
835
400k
                if(sl->sub_mb_type[i] >=4){
836
8.40k
                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
837
8.40k
                    return -1;
838
8.40k
                }
839
392k
                sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
840
392k
                sl->sub_mb_type[i]     = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
841
392k
            }
842
102k
        }
843
844
288k
        for (list = 0; list < sl->list_count; list++) {
845
163k
            int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
846
810k
            for(i=0; i<4; i++){
847
650k
                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
848
595k
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
849
494k
                    unsigned int tmp;
850
494k
                    if(ref_count == 1){
851
146k
                        tmp= 0;
852
347k
                    }else if(ref_count == 2){
853
177k
                        tmp= get_bits1(&sl->gb)^1;
854
177k
                    }else{
855
170k
                        tmp= get_ue_golomb_31(&sl->gb);
856
170k
                        if(tmp>=ref_count){
857
3.49k
                            av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
858
3.49k
                            return -1;
859
3.49k
                        }
860
170k
                    }
861
490k
                    ref[list][i]= tmp;
862
490k
                }else{
863
                 //FIXME
864
101k
                    ref[list][i] = -1;
865
101k
                }
866
595k
            }
867
163k
        }
868
869
125k
        if(dct8x8_allowed)
870
45.6k
            dct8x8_allowed = get_dct8x8_allowed(h, sl);
871
872
285k
        for (list = 0; list < sl->list_count; list++) {
873
798k
            for(i=0; i<4; i++){
874
638k
                if(IS_DIRECT(sl->sub_mb_type[i])) {
875
54.8k
                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
876
54.8k
                    continue;
877
54.8k
                }
878
584k
                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
879
584k
                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
880
881
584k
                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
882
483k
                    const int sub_mb_type= sl->sub_mb_type[i];
883
483k
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
884
1.45M
                    for(j=0; j<sub_partition_count[i]; j++){
885
972k
                        int mx, my;
886
972k
                        const int index= 4*i + block_width*j;
887
972k
                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
888
972k
                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
889
972k
                        mx += (unsigned)get_se_golomb(&sl->gb);
890
972k
                        my += (unsigned)get_se_golomb(&sl->gb);
891
972k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
892
893
972k
                        if(IS_SUB_8X8(sub_mb_type)){
894
200k
                            mv_cache[ 1 ][0]=
895
200k
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
896
200k
                            mv_cache[ 1 ][1]=
897
200k
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
898
771k
                        }else if(IS_SUB_8X4(sub_mb_type)){
899
240k
                            mv_cache[ 1 ][0]= mx;
900
240k
                            mv_cache[ 1 ][1]= my;
901
531k
                        }else if(IS_SUB_4X8(sub_mb_type)){
902
117k
                            mv_cache[ 8 ][0]= mx;
903
117k
                            mv_cache[ 8 ][1]= my;
904
117k
                        }
905
972k
                        mv_cache[ 0 ][0]= mx;
906
972k
                        mv_cache[ 0 ][1]= my;
907
972k
                    }
908
483k
                }else{
909
100k
                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
910
100k
                    p[0] = p[1]=
911
100k
                    p[8] = p[9]= 0;
912
100k
                }
913
584k
            }
914
159k
        }
915
1.38M
    }else if(IS_DIRECT(mb_type)){
916
404k
        ff_h264_pred_direct_motion(h, sl, &mb_type);
917
404k
        dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
918
984k
    }else{
919
984k
        int list, mx, my, i;
920
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
921
984k
        if(IS_16X16(mb_type)){
922
1.59M
            for (list = 0; list < sl->list_count; list++) {
923
913k
                    unsigned int val;
924
913k
                    if(IS_DIR(mb_type, 0, list)){
925
750k
                        unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
926
750k
                        if (rc == 1) {
927
93.2k
                            val= 0;
928
657k
                        } else if (rc == 2) {
929
301k
                            val= get_bits1(&sl->gb)^1;
930
356k
                        }else{
931
356k
                            val= get_ue_golomb_31(&sl->gb);
932
356k
                            if (val >= rc) {
933
11.8k
                                av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
934
11.8k
                                return -1;
935
11.8k
                            }
936
356k
                        }
937
739k
                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
938
739k
                    }
939
913k
            }
940
1.57M
            for (list = 0; list < sl->list_count; list++) {
941
900k
                if(IS_DIR(mb_type, 0, list)){
942
738k
                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
943
738k
                    mx += (unsigned)get_se_golomb(&sl->gb);
944
738k
                    my += (unsigned)get_se_golomb(&sl->gb);
945
738k
                    ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
946
947
738k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
948
738k
                }
949
900k
            }
950
676k
        }
951
295k
        else if(IS_16X8(mb_type)){
952
371k
            for (list = 0; list < sl->list_count; list++) {
953
675k
                    for(i=0; i<2; i++){
954
452k
                        unsigned int val;
955
452k
                        if(IS_DIR(mb_type, i, list)){
956
334k
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
957
334k
                            if (rc == 1) {
958
33.8k
                                val= 0;
959
300k
                            } else if (rc == 2) {
960
239k
                                val= get_bits1(&sl->gb)^1;
961
239k
                            }else{
962
60.9k
                                val= get_ue_golomb_31(&sl->gb);
963
60.9k
                                if (val >= rc) {
964
4.05k
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
965
4.05k
                                    return -1;
966
4.05k
                                }
967
60.9k
                            }
968
334k
                        }else
969
118k
                            val= LIST_NOT_USED&0xFF;
970
448k
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
971
448k
                    }
972
227k
            }
973
366k
            for (list = 0; list < sl->list_count; list++) {
974
666k
                for(i=0; i<2; i++){
975
444k
                    unsigned int val;
976
444k
                    if(IS_DIR(mb_type, i, list)){
977
327k
                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
978
327k
                        mx += (unsigned)get_se_golomb(&sl->gb);
979
327k
                        my += (unsigned)get_se_golomb(&sl->gb);
980
327k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
981
982
327k
                        val= pack16to32(mx,my);
983
327k
                    }else
984
117k
                        val=0;
985
444k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
986
444k
                }
987
222k
            }
988
147k
        }else{
989
147k
            av_assert2(IS_8X16(mb_type));
990
375k
            for (list = 0; list < sl->list_count; list++) {
991
686k
                    for(i=0; i<2; i++){
992
459k
                        unsigned int val;
993
459k
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
994
342k
                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
995
342k
                            if (rc == 1) {
996
48.2k
                                val= 0;
997
294k
                            } else if (rc == 2) {
998
252k
                                val= get_bits1(&sl->gb)^1;
999
252k
                            }else{
1000
42.0k
                                val= get_ue_golomb_31(&sl->gb);
1001
42.0k
                                if (val >= rc) {
1002
3.40k
                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1003
3.40k
                                    return -1;
1004
3.40k
                                }
1005
42.0k
                            }
1006
342k
                        }else
1007
116k
                            val= LIST_NOT_USED&0xFF;
1008
455k
                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1009
455k
                    }
1010
230k
            }
1011
371k
            for (list = 0; list < sl->list_count; list++) {
1012
681k
                for(i=0; i<2; i++){
1013
454k
                    unsigned int val;
1014
454k
                    if(IS_DIR(mb_type, i, list)){
1015
338k
                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1016
338k
                        mx += (unsigned)get_se_golomb(&sl->gb);
1017
338k
                        my += (unsigned)get_se_golomb(&sl->gb);
1018
338k
                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1019
1020
338k
                        val= pack16to32(mx,my);
1021
338k
                    }else
1022
115k
                        val=0;
1023
454k
                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1024
454k
                }
1025
227k
            }
1026
144k
        }
1027
984k
    }
1028
1029
1.91M
    if(IS_INTER(mb_type))
1030
1.49M
        write_back_motion(h, sl, mb_type);
1031
1032
1.91M
    if(!IS_INTRA16x16(mb_type)){
1033
1.59M
        cbp= get_ue_golomb(&sl->gb);
1034
1035
1.59M
        if(decode_chroma){
1036
1.35M
            if(cbp > 47){
1037
31.9k
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1038
31.9k
                return -1;
1039
31.9k
            }
1040
1.31M
            if (IS_INTRA4x4(mb_type))
1041
52.5k
                cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1042
1.26M
            else
1043
1.26M
                cbp = ff_h264_golomb_to_inter_cbp[cbp];
1044
1.31M
        }else{
1045
240k
            if(cbp > 15){
1046
35.9k
                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1047
35.9k
                return -1;
1048
35.9k
            }
1049
204k
            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1050
163k
            else                     cbp= golomb_to_inter_cbp_gray[cbp];
1051
204k
        }
1052
1.59M
    } else {
1053
329k
        if (!decode_chroma && cbp>15) {
1054
10.3k
            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1055
10.3k
            return AVERROR_INVALIDDATA;
1056
10.3k
        }
1057
329k
    }
1058
1059
1.84M
    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1060
205k
        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1061
205k
    }
1062
1.84M
    sl->cbp=
1063
1.84M
    h->cbp_table[mb_xy]= cbp;
1064
1.84M
    h->cur_pic.mb_type[mb_xy] = mb_type;
1065
1066
1.84M
    if(cbp || IS_INTRA16x16(mb_type)){
1067
982k
        int i4x4, i8x8, chroma_idx;
1068
982k
        int dquant;
1069
982k
        int ret;
1070
982k
        GetBitContext *gb = &sl->gb;
1071
982k
        const uint8_t *scan, *scan8x8;
1072
982k
        const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1073
1074
982k
        dquant= get_se_golomb(&sl->gb);
1075
1076
982k
        sl->qscale += (unsigned)dquant;
1077
1078
982k
        if (((unsigned)sl->qscale) > max_qp){
1079
9.45k
            if (sl->qscale < 0) sl->qscale += max_qp + 1;
1080
5.56k
            else                sl->qscale -= max_qp+1;
1081
9.45k
            if (((unsigned)sl->qscale) > max_qp){
1082
3.69k
                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1083
3.69k
                sl->qscale = max_qp;
1084
3.69k
                return -1;
1085
3.69k
            }
1086
9.45k
        }
1087
1088
978k
        sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1089
978k
        sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1090
1091
978k
        if(IS_INTERLACED(mb_type)){
1092
344k
            scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1093
344k
            scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
1094
633k
        }else{
1095
633k
            scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1096
633k
            scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1097
633k
        }
1098
1099
978k
        if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1100
60.0k
            return -1;
1101
60.0k
        }
1102
918k
        h->cbp_table[mb_xy] |= ret << 12;
1103
918k
        if (CHROMA444(h)) {
1104
120k
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1105
10.0k
                return -1;
1106
10.0k
            }
1107
110k
            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1108
14.7k
                return -1;
1109
14.7k
            }
1110
797k
        } else {
1111
797k
            const int num_c8x8 = h->ps.sps->chroma_format_idc;
1112
1113
797k
            if(cbp&0x30){
1114
760k
                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1115
507k
                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1116
507k
                                        CHROMA_DC_BLOCK_INDEX + chroma_idx,
1117
507k
                                        CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
1118
507k
                                        NULL, 4 * num_c8x8) < 0) {
1119
1.89k
                        return -1;
1120
1.89k
                    }
1121
254k
            }
1122
1123
795k
            if(cbp&0x20){
1124
232k
                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1125
162k
                    const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1126
162k
                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1127
325k
                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1128
847k
                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
1129
684k
                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1130
684k
                            if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1131
12.9k
                                return -1;
1132
671k
                            mb += 16 << pixel_shift;
1133
671k
                        }
1134
176k
                    }
1135
162k
                }
1136
712k
            }else{
1137
712k
                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1138
712k
                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1139
712k
            }
1140
795k
        }
1141
918k
    }else{
1142
859k
        fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1143
859k
        fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1144
859k
        fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1145
859k
    }
1146
1.73M
    h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1147
1.73M
    write_back_non_zero_count(h, sl);
1148
1149
1.73M
    return 0;
1150
1.84M
}