Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/encoder/entropy.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
0
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
0
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
0
{
226
0
    markValid();
227
0
    m_fracBits = 0;
228
0
    m_pad = 0;
229
0
    m_meanQP = 0;
230
0
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
0
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
0
{
239
0
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
0
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
0
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
0
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
0
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
0
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
0
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
0
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
0
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
0
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
0
    {
253
0
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
0
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
0
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
0
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
0
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
0
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
0
#endif
294
295
0
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
0
    WRITE_FLAG(0, "vps_extension_flag");
509
0
#endif
510
0
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
0
{
514
0
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
0
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
0
#endif
524
0
    {
525
0
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
0
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
0
    }
528
529
0
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
0
    {
536
0
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
0
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
0
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
0
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
0
        const Window& conf = sps.conformanceWindow;
545
0
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
0
        if (conf.bEnabled)
547
0
        {
548
0
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
0
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
0
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
0
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
0
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
0
        }
554
555
0
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
0
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
0
    }
558
559
0
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
0
    {
564
0
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
0
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
0
        {
568
0
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
0
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
0
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
0
        }
572
0
    }
573
574
0
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
0
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
0
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
0
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
0
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
0
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
0
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
0
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
0
    WRITE_FLAG(0, "pcm_enabled_flag");
600
0
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
0
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
0
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
0
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
0
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
0
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
0
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
0
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
0
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
0
{
645
0
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
0
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
0
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
0
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
0
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
0
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
0
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
0
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
0
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
0
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
0
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
0
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
0
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
0
    if (pps.bUseDQP)
661
0
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
0
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
0
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
0
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
0
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
0
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
0
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
0
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
0
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
0
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
0
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
0
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
0
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
0
    WRITE_FLAG(0, "lists_modification_present_flag");
688
0
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
0
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
0
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
0
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
0
{
727
0
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
0
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
0
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
0
    for (int j = 0; j < 32; j++)
731
0
    {
732
0
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
0
        else
735
0
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
0
    }
737
738
0
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
0
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
0
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
0
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
0
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
0
    else
771
0
    {
772
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
0
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
0
    }
776
0
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
0
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
0
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
0
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
0
{
795
0
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
0
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
0
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
0
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
0
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
0
    if (vui.videoSignalTypePresentFlag)
812
0
    {
813
0
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
0
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
0
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
0
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
0
    }
823
824
0
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
0
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
0
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
0
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
0
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
0
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
0
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
0
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
0
    else
847
0
    {
848
0
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
0
        else
851
0
        {
852
0
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
0
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
0
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
0
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
0
        }
857
858
0
        if (!bEmitVUIHRDInfo)
859
0
            WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
860
0
        else
861
0
        {
862
0
            WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
863
0
            if (vui.hrdParametersPresentFlag)
864
0
                codeHrdParameters(vui.hrdParameters, maxSubTLayers);
865
0
        }
866
0
    }
867
868
0
    WRITE_FLAG(0, "bitstream_restriction_flag");
869
0
}
870
871
void Entropy::codeScalingList(const ScalingList& scalingList)
872
0
{
873
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
874
0
    {
875
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
876
0
        {
877
0
            int predList = scalingList.checkPredMode(sizeId, listId);
878
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
879
0
            if (predList >= 0)
880
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
881
0
            else // DPCM Mode
882
0
                codeScalingList(scalingList, sizeId, listId);
883
0
        }
884
0
    }
885
0
}
886
887
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
888
0
{
889
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
890
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
891
0
    int nextCoef = START_VALUE;
892
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
893
0
    int data;
894
895
0
    if (sizeId > BLOCK_8x8)
896
0
    {
897
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
898
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
899
0
    }
900
0
    for (int i = 0; i < coefNum; i++)
901
0
    {
902
0
        data = src[scan[i]] - nextCoef;
903
0
        if (data < -128)
904
0
            data += 256;
905
0
        if (data > 127)
906
0
            data -= 256;
907
0
        nextCoef = (nextCoef + data + 256) % 256;
908
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
909
0
    }
910
0
}
911
912
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
913
0
{
914
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
916
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
917
918
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
919
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
920
921
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
923
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
924
925
0
    for (int i = 0; i < maxSubTLayers; i++)
926
0
    {
927
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
928
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
929
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
930
931
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
932
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
933
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
934
0
    }
935
0
}
936
937
void Entropy::codeAUD(const Slice& slice)
938
0
{
939
0
    int picType;
940
941
0
    switch (slice.m_sliceType)
942
0
    {
943
0
    case I_SLICE:
944
0
        picType = 0;
945
0
        break;
946
0
    case P_SLICE:
947
0
        picType = 1;
948
0
        break;
949
0
    case B_SLICE:
950
0
        picType = 2;
951
0
        break;
952
0
    default:
953
0
        picType = 7;
954
0
        break;
955
0
    }
956
957
0
    WRITE_CODE(picType, 3, "pic_type");
958
0
}
959
960
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
961
0
{
962
0
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
963
0
    if (slice.getRapPicFlag())
964
0
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
965
966
0
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
967
968
    /* x265 does not use dependent slices, so always write all this data */
969
0
    if (slice_addr)
970
0
    {
971
        // if( dependent_slice_segments_enabled_flag )
972
        //     dependent_slice_segment_flag             u(1)
973
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
974
0
    }
975
976
#if ENABLE_MULTIVIEW
977
    if (encData.m_param->numViews > 1)
978
    {
979
        int esb = 0;
980
        if (2 > esb)
981
        {
982
            esb++;
983
            WRITE_FLAG(0, "discardable_flag");
984
        }
985
        if (2 > esb)
986
        {
987
            esb++;
988
            WRITE_FLAG(0, "cross_layer_bla_flag");
989
        }
990
    }
991
#endif
992
993
0
    WRITE_UVLC(slice.m_sliceType, "slice_type");
994
995
0
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
996
0
    {
997
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
998
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
999
0
    }
1000
0
    if (!slice.getIdrPicFlag())
1001
0
    {
1002
#if _DEBUG || CHECKED_BUILD
1003
        // check for bitstream restriction stating that:
1004
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1005
        // Ideally this process should not be repeated for each slice in a picture
1006
        if (slice.isIRAP())
1007
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1008
            {
1009
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1010
            }
1011
#endif
1012
1013
0
        if (slice.m_rpsIdx < 0)
1014
0
        {
1015
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1016
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1017
0
        }
1018
0
        else
1019
0
        {
1020
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1021
0
            int numBits = 0;
1022
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1023
0
                numBits++;
1024
1025
0
            if (numBits > 0)
1026
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1027
0
        }
1028
1029
0
        if (slice.m_sps->bTemporalMVPEnabled)
1030
#if ENABLE_SCC_EXT
1031
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1032
#else
1033
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1034
0
#endif
1035
0
    }
1036
0
    const SAOParam *saoParam = encData.m_saoParam;
1037
0
    if (slice.m_bUseSao)
1038
0
    {
1039
0
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1040
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1041
0
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1042
0
    }
1043
0
    else if(encData.m_param->selectiveSAO)
1044
0
    {
1045
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1046
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1047
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1048
0
    }
1049
1050
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1051
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1052
1053
0
    if (!slice.isIntra())
1054
0
    {
1055
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1056
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1057
0
        if (overrideFlag)
1058
0
        {
1059
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1060
0
            if (slice.isInterB())
1061
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1062
0
            else
1063
0
            {
1064
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1065
0
            }
1066
0
        }
1067
0
    }
1068
0
    else
1069
0
    {
1070
0
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1071
0
    }
1072
1073
0
    if (slice.isInterB())
1074
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1075
1076
#if ENABLE_SCC_EXT
1077
    if (slice.m_bTemporalMvp)
1078
#else
1079
0
    if (slice.m_sps->bTemporalMVPEnabled)
1080
0
#endif
1081
0
    {
1082
0
        if (slice.m_sliceType == B_SLICE)
1083
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1084
1085
0
        if (slice.m_sliceType != I_SLICE &&
1086
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1087
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1088
0
        {
1089
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1090
0
        }
1091
0
    }
1092
0
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1093
0
        codePredWeightTable(slice);
1094
1095
0
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1096
0
    if (!slice.isIntra())
1097
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1098
1099
0
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1100
0
    WRITE_SVLC(code, "slice_qp_delta");
1101
1102
0
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1103
0
    {
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1105
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1106
0
    }
1107
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1108
    //       We didn't support filter across slice board, so disable it now
1109
1110
0
    if (encData.m_param->maxSlices <= 1)
1111
0
    {
1112
0
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1113
0
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1114
1115
0
        if (isSAOEnabled || isDBFEnabled)
1116
0
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1117
0
    }
1118
0
}
1119
1120
/** write wavefront substreams sizes for the slice header */
1121
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1122
0
{
1123
0
    uint32_t offsetLen = 1;
1124
0
    while (maxOffset >= (1U << offsetLen))
1125
0
    {
1126
0
        offsetLen++;
1127
0
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1128
0
    }
1129
1130
0
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1131
0
    if (numSubStreams > 0)
1132
0
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1133
1134
0
    for (uint32_t i = 0; i < numSubStreams; i++)
1135
0
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1136
0
}
1137
1138
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1139
0
{
1140
0
    if (idx > 0)
1141
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1142
1143
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1144
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1145
0
    int prev = 0;
1146
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1147
0
    {
1148
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1149
0
        prev = rps.deltaPOC[j];
1150
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1151
0
    }
1152
1153
0
    prev = 0;
1154
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1155
0
    {
1156
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1157
0
        prev = rps.deltaPOC[j];
1158
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1159
0
    }
1160
0
}
1161
1162
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1163
0
{
1164
0
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1165
0
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1166
0
}
1167
1168
/* encode a CU block recursively */
1169
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1170
0
{
1171
0
    const Slice* slice = ctu.m_slice;
1172
1173
0
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1174
0
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1175
1176
0
    if (!cuUnsplitFlag)
1177
0
    {
1178
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1179
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1180
0
            bEncodeDQP = true;
1181
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1182
0
        {
1183
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1184
0
            if (childGeom.flags & CUGeom::PRESENT)
1185
0
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1186
0
        }
1187
0
        return;
1188
0
    }
1189
1190
0
    if (cuSplitFlag) 
1191
0
        codeSplitFlag(ctu, absPartIdx, depth);
1192
1193
0
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1194
0
    {
1195
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1196
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1197
0
            bEncodeDQP = true;
1198
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1199
0
        {
1200
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1201
0
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1202
0
        }
1203
0
        return;
1204
0
    }
1205
1206
0
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1207
0
        bEncodeDQP = true;
1208
1209
0
    if (slice->m_pps->bTransquantBypassEnabled)
1210
0
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1211
1212
0
    if (!slice->isIntra())
1213
0
    {
1214
0
        codeSkipFlag(ctu, absPartIdx);
1215
0
        if (ctu.isSkipped(absPartIdx))
1216
0
        {
1217
0
            codeMergeIndex(ctu, absPartIdx);
1218
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1219
0
            return;
1220
0
        }
1221
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1222
0
    }
1223
1224
0
    codePartSize(ctu, absPartIdx, depth);
1225
1226
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1227
0
    codePredInfo(ctu, absPartIdx);
1228
1229
0
    uint32_t tuDepthRange[2];
1230
0
    if (ctu.isIntra(absPartIdx))
1231
0
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1232
0
    else
1233
0
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1234
1235
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1236
0
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1237
1238
    // --- write terminating bit ---
1239
0
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1240
0
}
1241
1242
/* Return bit count of signaling inter mode */
1243
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1244
0
{
1245
0
    uint32_t bits;
1246
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1247
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1248
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1249
0
    switch (partSize)
1250
0
    {
1251
0
    case SIZE_2Nx2N:
1252
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1253
0
        break;
1254
1255
0
    case SIZE_2NxN:
1256
0
    case SIZE_2NxnU:
1257
0
    case SIZE_2NxnD:
1258
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1259
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1260
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1261
0
        {
1262
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1263
0
            if (partSize != SIZE_2NxN)
1264
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1265
0
        }
1266
0
        break;
1267
1268
0
    case SIZE_Nx2N:
1269
0
    case SIZE_nLx2N:
1270
0
    case SIZE_nRx2N:
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1272
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1273
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1274
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1275
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1276
0
        {
1277
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1278
0
            if (partSize != SIZE_Nx2N)
1279
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1280
0
        }
1281
0
        break;
1282
0
    default:
1283
0
        X265_CHECK(0, "invalid CU partition\n");
1284
0
        break;
1285
0
    }
1286
1287
0
    return bits;
1288
0
}
1289
1290
/* finish encoding a cu and handle end-of-slice conditions */
1291
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1292
0
{
1293
0
    const Slice* slice = ctu.m_slice;
1294
0
    uint32_t realEndAddress = slice->m_endCUAddr;
1295
0
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1296
0
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1297
1298
0
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1299
0
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1300
0
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1301
0
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1302
0
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1303
0
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1304
1305
0
    if (slice->m_pps->bUseDQP)
1306
0
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1307
1308
0
    if (granularityBoundary)
1309
0
    {
1310
        // Encode slice finish
1311
0
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1312
0
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1313
0
            bTerminateSlice = 1;
1314
1315
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1316
0
        if (!bTerminateSlice)
1317
0
            encodeBinTrm(0);    // end_of_slice_segment_flag
1318
1319
0
        if (!m_bitIf)
1320
0
            resetBits(); // TODO: most likely unnecessary
1321
0
    }
1322
0
}
1323
1324
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1325
                              bool& bCodeDQP, const uint32_t depthRange[2])
1326
0
{
1327
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1328
1329
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1330
     * so we have checks to make sure the implied value matches our intentions */
1331
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1332
0
    {
1333
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1334
0
    }
1335
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1336
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1337
0
    {
1338
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1339
0
    }
1340
0
    else if (log2CurSize > depthRange[1])
1341
0
    {
1342
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1343
0
    }
1344
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1345
0
    {
1346
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1347
0
    }
1348
0
    else
1349
0
    {
1350
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1351
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1352
0
    }
1353
1354
0
    uint32_t hChromaShift = cu.m_hChromaShift;
1355
0
    uint32_t vChromaShift = cu.m_vChromaShift;
1356
0
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1357
0
    if (!curDepth || !bSmallChroma)
1358
0
    {
1359
0
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1360
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1361
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1362
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1363
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1364
0
    }
1365
1366
0
    if (subdiv)
1367
0
    {
1368
0
        --log2CurSize;
1369
0
        ++curDepth;
1370
1371
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1372
1373
0
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
0
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
0
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
0
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1377
0
        return;
1378
0
    }
1379
1380
0
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1381
1382
0
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1383
0
    {
1384
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1385
0
    }
1386
0
    else
1387
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1388
1389
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1390
0
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1391
0
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1392
0
    if (!(cbfY || cbfU || cbfV))
1393
0
        return;
1394
1395
    // dQP: only for CTU once
1396
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1397
0
    {
1398
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1399
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1400
0
        codeDeltaQP(cu, absPartIdxLT);
1401
0
        bCodeDQP = false;
1402
0
    }
1403
1404
0
    if (cbfY)
1405
0
    {
1406
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1407
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1408
0
        if (!(cbfU || cbfV))
1409
0
            return;
1410
0
    }
1411
1412
0
    if (bSmallChroma)
1413
0
    {
1414
0
        if ((absPartIdx & 3) != 3)
1415
0
            return;
1416
1417
0
        const uint32_t log2CurSizeC = 2;
1418
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1419
0
        const uint32_t curPartNum = 4;
1420
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1421
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1422
0
        {
1423
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1424
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1425
0
            do
1426
0
            {
1427
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1428
0
                {
1429
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1430
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1431
0
                }
1432
0
            }
1433
0
            while (tuIterator.isNextSection());
1434
0
        }
1435
0
    }
1436
0
    else
1437
0
    {
1438
0
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1439
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1440
0
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1441
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1442
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1443
0
        {
1444
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1445
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1446
0
            do
1447
0
            {
1448
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1449
0
                {
1450
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1451
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1452
0
                }
1453
0
            }
1454
0
            while (tuIterator.isNextSection());
1455
0
        }
1456
0
    }
1457
0
}
1458
1459
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1460
                              bool& bCodeDQP, const uint32_t depthRange[2])
1461
0
{
1462
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1463
1464
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1465
     * so we have checks to make sure the implied value matches our intentions */
1466
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1467
0
    {
1468
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1469
0
    }
1470
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1471
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1472
0
    {
1473
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1474
0
    }
1475
0
    else if (log2CurSize > depthRange[1])
1476
0
    {
1477
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1478
0
    }
1479
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1480
0
    {
1481
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1482
0
    }
1483
0
    else
1484
0
    {
1485
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1486
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1487
0
    }
1488
1489
0
    if (subdiv)
1490
0
    {
1491
0
        --log2CurSize;
1492
0
        ++curDepth;
1493
1494
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1495
1496
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1500
0
        return;
1501
0
    }
1502
1503
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1504
0
    {
1505
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1506
0
    }
1507
0
    else
1508
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1509
1510
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1511
1512
0
    if (!cbfY)
1513
0
        return;
1514
1515
    // dQP: only for CTU once
1516
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1517
0
    {
1518
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1519
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1520
0
        codeDeltaQP(cu, absPartIdxLT);
1521
0
        bCodeDQP = false;
1522
0
    }
1523
1524
0
    if (cbfY)
1525
0
    {
1526
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1527
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1528
0
    }
1529
0
}
1530
1531
1532
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1533
0
{
1534
0
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1535
0
    {
1536
0
        codeIntraDirLumaAng(cu, absPartIdx, true);
1537
0
        if (cu.m_chromaFormat != X265_CSP_I400)
1538
0
        {
1539
0
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1540
0
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1541
1542
0
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1543
1544
0
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1545
0
            {
1546
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1547
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1548
0
                {
1549
0
                    absPartIdx += qNumParts;
1550
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1551
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1552
0
                }
1553
0
            }
1554
0
        }
1555
0
    }
1556
0
    else // if it is inter mode, encode motion vector and reference index
1557
0
        codePUWise(cu, absPartIdx);
1558
0
}
1559
1560
/** encode motion information for every PU block */
1561
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1562
0
{
1563
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1564
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1565
1566
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1567
0
    {
1568
0
        codeMergeFlag(cu, subPartIdx);
1569
0
        if (cu.m_mergeFlag[subPartIdx])
1570
0
            codeMergeIndex(cu, subPartIdx);
1571
0
        else
1572
0
        {
1573
0
            if (cu.m_slice->isInterB())
1574
0
                codeInterDir(cu, subPartIdx);
1575
1576
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1577
0
            for (uint32_t list = 0; list < 2; list++)
1578
0
            {
1579
0
                if (interDir & (1 << list))
1580
0
                {
1581
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1582
1583
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1584
0
                    codeMvd(cu, subPartIdx, list);
1585
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1586
0
                }
1587
0
            }
1588
0
        }
1589
0
    }
1590
0
}
1591
1592
/** encode reference frame index for a PU block */
1593
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1594
0
{
1595
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1596
1597
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1598
0
        codeRefFrmIdx(cu, absPartIdx, list);
1599
0
}
1600
1601
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1602
0
{
1603
0
    if (!cu.isIntra(absPartIdx))
1604
0
    {
1605
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1606
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1607
0
        if (!cu.getQtRootCbf(absPartIdx))
1608
0
            return;
1609
0
    }
1610
1611
0
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1612
0
    if (cu.m_chromaFormat == X265_CSP_I400)
1613
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1614
0
    else
1615
0
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1616
0
}
1617
1618
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1619
0
{
1620
0
    int typeIdx = ctuParam.typeIdx;
1621
1622
0
    if (plane != 2)
1623
0
    {
1624
0
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1625
0
        if (typeIdx >= 0)
1626
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1627
0
    }
1628
1629
0
    if (typeIdx >= 0)
1630
0
    {
1631
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1632
0
        if (typeIdx == SAO_BO)
1633
0
        {
1634
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1635
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1636
1637
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1638
0
                if (ctuParam.offset[i] != 0)
1639
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1640
1641
0
            encodeBinsEP(ctuParam.bandPos, 5);
1642
0
        }
1643
0
        else // if (typeIdx < SAO_BO)
1644
0
        {
1645
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1648
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1649
0
            if (plane != 2)
1650
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1651
0
        }
1652
0
    }
1653
0
}
1654
1655
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1656
0
{
1657
0
    if (plane != 2)
1658
0
    {
1659
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1660
0
        encodeBinEP(1);
1661
0
    }
1662
1663
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1664
1665
0
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1666
0
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1667
0
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1668
0
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1669
0
    if (plane != 2)
1670
0
        encodeBinsEP((uint32_t)(typeIdx), 2);
1671
0
}
1672
1673
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1674
0
{
1675
0
    if (plane != 2)
1676
0
    {
1677
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1678
0
        encodeBinEP(0);
1679
0
    }
1680
1681
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1682
1683
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1684
0
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1685
1686
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1687
0
        if (offset[i] != 0)
1688
0
            encodeBinEP(offset[i] < 0);
1689
1690
0
    encodeBinsEP(bandPos, 5);
1691
0
}
1692
1693
/** initialize context model with respect to QP and initialization value */
1694
uint8_t sbacInit(int qp, int initValue)
1695
0
{
1696
0
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1697
1698
0
    int  slope      = (initValue >> 4) * 5 - 45;
1699
0
    int  offset     = ((initValue & 15) << 3) - 16;
1700
0
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1701
0
    uint32_t mpState = (initState >= 64);
1702
0
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1703
1704
0
    return (uint8_t)state;
1705
0
}
1706
1707
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1708
0
{
1709
0
    ctxModel += sliceType * size;
1710
1711
0
    for (int n = 0; n < size; n++)
1712
0
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1713
0
}
1714
1715
void Entropy::resetEntropy(const Slice& slice)
1716
0
{
1717
0
    int  qp              = slice.m_sliceQp;
1718
0
    SliceType sliceType  = slice.m_sliceType;
1719
1720
0
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1721
0
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1722
0
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1723
0
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1724
0
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1725
0
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1726
0
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1727
0
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1728
0
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1729
0
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1730
0
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1731
0
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1732
0
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1733
0
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1734
0
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1735
0
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1736
0
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1737
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1739
0
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1740
0
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1741
0
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1742
0
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1743
0
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1744
0
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1745
0
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1746
    // new structure
1747
1748
0
    start();
1749
0
}
1750
1751
/* code explicit wp tables */
1752
void Entropy::codePredWeightTable(const Slice& slice)
1753
0
{
1754
0
    const WeightParam *wp;
1755
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1756
0
    bool            bDenomCoded  = false;
1757
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1758
0
    uint32_t        totalSignalledWeightFlags = 0;
1759
1760
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1761
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1762
0
    {
1763
0
        for (int list = 0; list < numRefDirs; list++)
1764
0
        {
1765
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1766
0
            {
1767
0
                wp = slice.m_weightPredTable[list][ref];
1768
0
                if (!bDenomCoded)
1769
0
                {
1770
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1771
1772
0
                    if (bChroma)
1773
0
                    {
1774
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1775
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1776
0
                    }
1777
0
                    bDenomCoded = true;
1778
0
                }
1779
#if ENABLE_SCC_EXT
1780
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1781
                    assert(!wp[0].wtPresent);
1782
                else
1783
#endif
1784
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1785
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1786
0
            }
1787
1788
0
            if (bChroma)
1789
0
            {
1790
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1791
0
                {
1792
0
                    wp = slice.m_weightPredTable[list][ref];
1793
#if ENABLE_SCC_EXT
1794
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1795
                        assert(!wp[1].wtPresent);
1796
                    else
1797
#endif
1798
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1799
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1800
0
                }
1801
0
            }
1802
1803
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1804
0
            {
1805
0
                wp = slice.m_weightPredTable[list][ref];
1806
0
                if (wp[0].wtPresent)
1807
0
                {
1808
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1809
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1810
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1811
0
                }
1812
1813
0
                if (bChroma)
1814
0
                {
1815
0
                    if (wp[1].wtPresent)
1816
0
                    {
1817
0
                        for (int plane = 1; plane < 3; plane++)
1818
0
                        {
1819
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1820
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1821
1822
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1823
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1824
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1825
0
                        }
1826
0
                    }
1827
0
                }
1828
0
            }
1829
0
        }
1830
1831
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1832
0
    }
1833
0
}
1834
1835
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1836
0
{
1837
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1838
1839
0
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1840
1841
0
    if (!symbol)
1842
0
        return;
1843
1844
0
    bool bCodeLast = (maxSymbol > symbol);
1845
1846
0
    while (--symbol)
1847
0
        encodeBin(1, scmModel[offset]);
1848
1849
0
    if (bCodeLast)
1850
0
        encodeBin(0, scmModel[offset]);
1851
0
}
1852
1853
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1854
0
{
1855
0
    uint32_t bins = 0;
1856
0
    int numBins = 0;
1857
1858
0
    while (symbol >= (uint32_t)(1 << count))
1859
0
    {
1860
0
        bins = 2 * bins + 1;
1861
0
        numBins++;
1862
0
        symbol -= 1 << count;
1863
0
        count++;
1864
0
    }
1865
1866
0
    bins = 2 * bins + 0;
1867
0
    numBins++;
1868
1869
0
    bins = (bins << count) | symbol;
1870
0
    numBins += count;
1871
1872
0
    X265_CHECK(numBins <= 32, "numBins too large\n");
1873
0
    encodeBinsEP(bins, numBins);
1874
0
}
1875
1876
/** Coding of coeff_abs_level_minus3 */
1877
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1878
0
{
1879
0
    uint32_t length;
1880
0
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1881
1882
0
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1883
0
    {
1884
0
        length = codeNumber >> absGoRice;
1885
1886
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1887
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1888
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1889
0
    }
1890
0
    else
1891
0
    {
1892
0
        length = 0;
1893
0
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1894
0
        {
1895
0
            unsigned long idx;
1896
0
            BSR(idx, codeNumber + 1);
1897
0
            length = idx;
1898
0
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1899
0
            codeNumber -= (1 << idx) - 1;
1900
0
        }
1901
0
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1902
1903
0
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1904
0
        encodeBinsEP(codeNumber, length + absGoRice);
1905
0
    }
1906
0
}
1907
1908
// SBAC RD
1909
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1910
0
{
1911
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1912
0
    m_fracBits = src.m_fracBits;
1913
0
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1914
0
}
1915
1916
void Entropy::copyFrom(const Entropy& src)
1917
0
{
1918
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1919
1920
0
    copyState(src);
1921
1922
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1923
0
    markValid();
1924
0
}
1925
1926
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1927
0
{
1928
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1929
1930
0
    if (cu.isIntra(absPartIdx))
1931
0
    {
1932
0
        if (depth == cu.m_encData->m_param->maxCUDepth)
1933
0
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1934
0
        return;
1935
0
    }
1936
1937
0
    switch (partSize)
1938
0
    {
1939
0
    case SIZE_2Nx2N:
1940
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1941
0
        break;
1942
1943
0
    case SIZE_2NxN:
1944
0
    case SIZE_2NxnU:
1945
0
    case SIZE_2NxnD:
1946
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1947
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1948
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1949
0
        {
1950
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1951
0
            if (partSize != SIZE_2NxN)
1952
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1953
0
        }
1954
0
        break;
1955
1956
0
    case SIZE_Nx2N:
1957
0
    case SIZE_nLx2N:
1958
0
    case SIZE_nRx2N:
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1960
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1961
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1962
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1963
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1964
0
        {
1965
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1966
0
            if (partSize != SIZE_Nx2N)
1967
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1968
0
        }
1969
0
        break;
1970
0
    default:
1971
0
        X265_CHECK(0, "invalid CU partition\n");
1972
0
        break;
1973
0
    }
1974
0
}
1975
1976
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1977
0
{
1978
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1979
1980
0
    if (numCand > 1)
1981
0
    {
1982
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1983
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1984
1985
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1986
1987
0
        if (unaryIdx != 0)
1988
0
        {
1989
0
            uint32_t mask = (1 << unaryIdx) - 2;
1990
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1991
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1992
0
        }
1993
0
    }
1994
0
}
1995
1996
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1997
0
{
1998
0
    uint32_t dir[4], j;
1999
0
    uint32_t preds[4][3];
2000
0
    int predIdx[4];
2001
0
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2002
0
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2003
2004
0
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2005
0
    {
2006
0
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2007
0
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2008
0
        predIdx[j] = -1;
2009
0
        for (uint32_t i = 0; i < 3; i++)
2010
0
            if (dir[j] == preds[j][i])
2011
0
                predIdx[j] = i;
2012
2013
0
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2014
0
    }
2015
2016
0
    for (j = 0; j < partNum; j++)
2017
0
    {
2018
0
        if (predIdx[j] != -1)
2019
0
        {
2020
0
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2021
            // NOTE: Mapping
2022
            //       0 = 0
2023
            //       1 = 10
2024
            //       2 = 11
2025
0
            int nonzero = (!!predIdx[j]);
2026
0
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2027
0
        }
2028
0
        else
2029
0
        {
2030
0
            if (preds[j][0] > preds[j][1])
2031
0
                std::swap(preds[j][0], preds[j][1]);
2032
2033
0
            if (preds[j][0] > preds[j][2])
2034
0
                std::swap(preds[j][0], preds[j][2]);
2035
2036
0
            if (preds[j][1] > preds[j][2])
2037
0
                std::swap(preds[j][1], preds[j][2]);
2038
2039
0
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2040
0
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2041
0
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2042
2043
0
            encodeBinsEP(dir[j], 5);
2044
0
        }
2045
0
    }
2046
0
}
2047
2048
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2049
0
{
2050
0
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2051
2052
0
    if (intraDirChroma == DM_CHROMA_IDX)
2053
0
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2054
0
    else
2055
0
    {
2056
0
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2057
0
        {
2058
0
            if (intraDirChroma == chromaDirMode[i])
2059
0
            {
2060
0
                intraDirChroma = i;
2061
0
                break;
2062
0
            }
2063
0
        }
2064
2065
0
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2066
0
        encodeBinsEP(intraDirChroma, 2);
2067
0
    }
2068
0
}
2069
2070
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2071
0
{
2072
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2073
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2074
2075
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2076
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2077
0
    if (interDir < 2)
2078
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2079
0
}
2080
2081
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2082
0
{
2083
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2084
2085
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2086
2087
0
    if (refFrame > 0)
2088
0
    {
2089
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2090
0
        if (refNum == 0)
2091
0
            return;
2092
2093
0
        refFrame--;
2094
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2095
0
        if (refFrame > 0)
2096
0
        {
2097
0
            uint32_t mask = (1 << refFrame) - 2;
2098
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2099
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2100
0
        }
2101
0
    }
2102
0
}
2103
2104
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2105
0
{
2106
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2107
0
    const int hor = mvd.x;
2108
0
    const int ver = mvd.y;
2109
2110
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2112
2113
0
    const bool bHorAbsGr0 = hor != 0;
2114
0
    const bool bVerAbsGr0 = ver != 0;
2115
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2116
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2117
2118
0
    if (bHorAbsGr0)
2119
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2120
2121
0
    if (bVerAbsGr0)
2122
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2123
2124
0
    if (bHorAbsGr0)
2125
0
    {
2126
0
        if (horAbs > 1)
2127
0
            writeEpExGolomb(horAbs - 2, 1);
2128
2129
0
        encodeBinEP(0 > hor ? 1 : 0);
2130
0
    }
2131
2132
0
    if (bVerAbsGr0)
2133
0
    {
2134
0
        if (verAbs > 1)
2135
0
            writeEpExGolomb(verAbs - 2, 1);
2136
2137
0
        encodeBinEP(0 > ver ? 1 : 0);
2138
0
    }
2139
0
}
2140
2141
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2142
0
{
2143
0
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2144
2145
0
    int qpBdOffsetY = QP_BD_OFFSET;
2146
2147
0
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2148
2149
0
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2150
0
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2151
0
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2152
0
    if (absDQp >= CU_DQP_TU_CMAX)
2153
0
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2154
2155
0
    if (absDQp > 0)
2156
0
    {
2157
0
        uint32_t sign = (dqp > 0 ? 0 : 1);
2158
0
        encodeBinEP(sign);
2159
0
    }
2160
0
}
2161
2162
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2163
0
{
2164
0
    uint32_t ctx = tuDepth + 2;
2165
2166
0
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2167
0
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2168
0
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2169
2170
0
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2171
0
    {
2172
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2173
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2174
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2175
2176
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2178
0
    }
2179
0
    else
2180
0
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2181
0
}
2182
2183
#if CHECKED_BUILD || _DEBUG
2184
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2185
{
2186
    uint32_t goRiceParam = 0;
2187
    int firstCoeff2 = 1;
2188
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2189
2190
    uint32_t sum = 0;
2191
    int idx = 0;
2192
    do
2193
    {
2194
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2195
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2196
        baseLevelN >>= 2;
2197
        int codeNumber = absCoeff[idx] - baseLevel;
2198
2199
        if (codeNumber >= 0)
2200
        {
2201
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2202
            uint32_t length = 0;
2203
2204
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2205
            if (codeNumber >= 0)
2206
            {
2207
                {
2208
                    unsigned long cidx;
2209
                    BSR(cidx, codeNumber + 1);
2210
                    length = cidx;
2211
                }
2212
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2213
2214
                codeNumber = (length + length);
2215
            }
2216
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2217
2218
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2219
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2220
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2221
        }
2222
        if (absCoeff[idx] >= 2)
2223
            firstCoeff2 = 0;
2224
        idx++;
2225
    }
2226
    while(idx < numNonZero);
2227
2228
    return sum;
2229
}
2230
#endif // debug only code
2231
2232
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2233
0
{
2234
0
    uint32_t trSize = 1 << log2TrSize;
2235
0
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2236
    // compute number of significant coefficients
2237
0
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2238
0
    X265_CHECK(numSig > 0, "cbf check fail\n");
2239
0
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2240
2241
0
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2242
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2243
2244
0
    bool bIsLuma = ttype == TEXT_LUMA;
2245
2246
    // select scans
2247
0
    TUEntropyCodingParameters codingParameters;
2248
0
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2249
2250
0
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2251
0
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2252
0
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2253
2254
    //----- encode significance map -----
2255
2256
    // Find position of last coefficient
2257
0
    int scanPosLast = 0;
2258
0
    uint32_t posLast;
2259
0
    uint64_t sigCoeffGroupFlag64 = 0;
2260
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2261
0
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2262
2263
0
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2264
0
    posLast = codingParameters.scan[scanPosLast];
2265
2266
0
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2267
2268
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2269
0
    for(int idx = 0; idx < lastScanSet; idx++)
2270
0
    {
2271
0
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2272
0
        const uint8_t nonZero = (coeffNum[idx] != 0);
2273
0
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2274
0
    }
2275
2276
2277
    // Code position of last coefficient
2278
0
    {
2279
        // The last position is composed of a prefix and suffix.
2280
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2281
        // The bypass coded bins for both the x and y components are grouped together.
2282
0
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2283
0
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2284
        // swap
2285
0
        if (codingParameters.scanType == SCAN_VER)
2286
0
            std::swap(pos[0], pos[1]);
2287
2288
0
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2289
0
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2290
0
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2291
0
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2292
0
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2293
2294
0
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2295
0
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2296
0
        {
2297
0
            uint32_t temp = g_lastCoeffTable[pos[i]];
2298
0
            uint32_t prefixOnes = temp & 15;
2299
0
            uint32_t suffixLen = temp >> 4;
2300
2301
0
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2302
0
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2303
2304
0
            if (prefixOnes < maxGroupIdx)
2305
0
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2306
2307
0
            packedSuffixBits <<= suffixLen;
2308
0
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2309
0
            packedSuffixLen += suffixLen;
2310
0
        }
2311
2312
0
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2313
0
    }
2314
2315
    // code significance flag
2316
0
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2317
0
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2318
0
    uint32_t c1 = 1;
2319
0
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2320
0
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2321
0
    uint32_t numNonZero = 1;
2322
0
    unsigned long lastNZPosInCG;
2323
0
    unsigned long firstNZPosInCG;
2324
2325
#if _DEBUG
2326
    // Unnecessary, for Valgrind-3.10.0 only
2327
    memset(absCoeff, 0, sizeof(absCoeff));
2328
#endif
2329
2330
0
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2331
2332
0
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2333
0
    {
2334
0
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2335
0
        uint32_t scanFlagMask = subCoeffFlag;
2336
0
        int subPosBase = subSet << MLS_CG_SIZE;
2337
        
2338
0
        if (subSet == lastScanSet)
2339
0
        {
2340
0
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2341
0
            scanFlagMask >>= 1;
2342
0
        }
2343
2344
        // encode significant_coeffgroup_flag
2345
0
        const int cgBlkPos = codingParameters.scanCG[subSet];
2346
0
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2347
0
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2348
0
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2349
2350
0
        if (subSet == lastScanSet || !subSet)
2351
0
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2352
0
        else
2353
0
        {
2354
0
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2355
0
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2356
0
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2357
0
        }
2358
2359
        // encode significant_coeff_flag
2360
0
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2361
0
        {
2362
0
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2363
0
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2364
0
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2365
2366
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2367
0
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2368
0
            {
2369
                // patternSigCtx = 0
2370
0
                {
2371
0
                    2, 1, 1, 0,
2372
0
                    1, 1, 0, 0,
2373
0
                    1, 0, 0, 0,
2374
0
                    0, 0, 0, 0,
2375
0
                },
2376
                // patternSigCtx = 1
2377
0
                {
2378
0
                    2, 2, 2, 2,
2379
0
                    1, 1, 1, 1,
2380
0
                    0, 0, 0, 0,
2381
0
                    0, 0, 0, 0,
2382
0
                },
2383
                // patternSigCtx = 2
2384
0
                {
2385
0
                    2, 1, 0, 0,
2386
0
                    2, 1, 0, 0,
2387
0
                    2, 1, 0, 0,
2388
0
                    2, 1, 0, 0,
2389
0
                },
2390
                // patternSigCtx = 3
2391
0
                {
2392
0
                    2, 2, 2, 2,
2393
0
                    2, 2, 2, 2,
2394
0
                    2, 2, 2, 2,
2395
0
                    2, 2, 2, 2,
2396
0
                },
2397
                // 4x4
2398
0
                {
2399
0
                    0, 1, 4, 5,
2400
0
                    2, 3, 4, 5,
2401
0
                    6, 6, 8, 8,
2402
0
                    7, 7, 8, 8
2403
0
                }
2404
0
            };
2405
2406
0
            const int offset = codingParameters.firstSignificanceMapContext;
2407
0
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2408
2409
0
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2410
0
            if (m_bitIf)
2411
0
            {
2412
0
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2413
2414
                // TODO: accelerate by PABSW
2415
0
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
0
                {
2417
0
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
0
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
0
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
0
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
0
                }
2422
2423
0
                if (log2TrSize == 2)
2424
0
                {
2425
0
                    do
2426
0
                    {
2427
0
                        uint32_t blkPos, sig, ctxSig;
2428
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
0
                        sig     = scanFlagMask & 1;
2430
0
                        scanFlagMask >>= 1;
2431
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
0
                        {
2433
0
                            ctxSig = table_cnt[4][blkPos];
2434
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
0
                            encodeBin(sig, baseCtx[ctxSig]);
2436
0
                        }
2437
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
0
                        numNonZero += sig;
2439
0
                        scanPosSigOff--;
2440
0
                    }
2441
0
                    while(scanPosSigOff >= 0);
2442
0
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
0
            }
2471
0
            else // fast RD path
2472
0
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
0
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
0
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
0
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
0
                m_fracBits += sum;
2483
0
            } // end of fast RD path -- !m_bitIf
2484
0
        }
2485
0
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
0
        uint32_t coeffSigns = coeffSign[subSet];
2488
0
        numNonZero = coeffNum[subSet];
2489
0
        if (numNonZero > 0)
2490
0
        {
2491
0
            uint32_t idx;
2492
0
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
0
            BSR(lastNZPosInCG, subCoeffFlag);
2494
0
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
0
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
0
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
0
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
0
            c1 = 1;
2501
0
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
0
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
0
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
0
            if (!m_bitIf)
2507
0
            {
2508
0
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
0
                uint32_t firstC2Idx = (sum >> 28);
2510
0
                c1 = ((sum >> 26) & 3);
2511
0
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
0
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
0
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
0
                if (numNonZero > firstC2Idx)
2518
0
                {
2519
0
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
0
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
0
                    m_fracBits += ((uint64_t)sum << 15);
2522
0
                }
2523
0
            }
2524
            // Standard path
2525
0
            else
2526
0
            {
2527
0
                uint32_t firstC2Idx = 8;
2528
0
                uint32_t firstC2Flag = 2;
2529
0
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
0
                idx = 0;
2532
0
                do
2533
0
                {
2534
0
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
0
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
0
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
0
                    if (symbol1)
2539
0
                        c1Next = 0;
2540
2541
0
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
0
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
0
                    c1 = (c1Next & 3);
2545
0
                    c1Next >>= 2;
2546
0
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
0
                    idx++;
2548
0
                }
2549
0
                while(idx < numC1Flag);
2550
2551
0
                if (!c1)
2552
0
                {
2553
0
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
0
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
0
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
0
                }
2558
2559
0
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
0
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
0
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
0
                {
2564
                    // Standard path
2565
0
                    uint32_t goRiceParam = 0;
2566
0
                    int baseLevel = 3;
2567
0
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
0
                    idx = firstC2Idx;
2572
0
                    do
2573
0
                    {
2574
0
                        if (idx >= C1FLAG_NUMBER)
2575
0
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
0
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
0
                        if (absCoeff[idx] >= baseLevel)
2580
0
                        {
2581
0
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
0
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
0
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
0
                            goRiceParam += adjust;
2585
0
                            threshold += (adjust) ? threshold : 0;
2586
0
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
0
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
0
                        baseLevel = 2;
2592
0
                        idx++;
2593
0
                    }
2594
0
                    while(idx < numNonZero);
2595
0
                }
2596
0
            } // end of !bitIf
2597
0
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
0
        numNonZero = 0;
2601
0
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
0
    }
2603
0
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
0
{
2607
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
0
    uint32_t isCodeNonZero = !!code;
2610
2611
0
    encodeBinEP(isCodeNonZero);
2612
0
    if (isCodeNonZero)
2613
0
    {
2614
0
        uint32_t isCodeLast = (maxSymbol > code);
2615
0
        uint32_t mask = (1 << (code - 1)) - 1;
2616
0
        uint32_t len = code - 1 + isCodeLast;
2617
0
        mask <<= isCodeLast;
2618
2619
0
        encodeBinsEP(mask, len);
2620
0
    }
2621
0
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
0
{
2626
0
    estCBFBit(estBitsSbac);
2627
2628
0
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
0
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
0
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
0
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
0
{
2640
0
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
0
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
0
    {
2644
0
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
0
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
0
    }
2647
2648
0
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
0
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
0
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
0
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
0
{
2657
0
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
0
        for (uint32_t bin = 0; bin < 2; bin++)
2661
0
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
0
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
0
{
2667
0
    int firstCtx = 1, numCtx = 8;
2668
2669
0
    if (log2TrSize >= 4)
2670
0
    {
2671
0
        firstCtx = bIsLuma ? 21 : 12;
2672
0
        numCtx = bIsLuma ? 6 : 3;
2673
0
    }
2674
0
    else if (log2TrSize == 3)
2675
0
    {
2676
0
        firstCtx = 9;
2677
0
        numCtx = bIsLuma ? 12 : 3;
2678
0
    }
2679
2680
0
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
0
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
0
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
0
    {
2687
0
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
0
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
0
    }
2690
2691
0
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
0
    if (bIsLuma)
2693
0
    {
2694
0
        if (log2TrSize == 2)
2695
0
        {
2696
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
0
            {
2698
0
                int bits = 0;
2699
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
0
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
0
                {
2703
0
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
0
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
0
                }
2706
2707
0
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
0
            }
2709
0
        }
2710
0
        else
2711
0
        {
2712
0
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
0
            {
2716
0
                int bits = 0;
2717
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
0
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
0
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
0
                {
2722
0
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
0
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
0
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
0
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
0
                    bits += 2 * cost1;
2727
0
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
0
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
0
            }
2731
0
        }
2732
0
    }
2733
0
    else
2734
0
    {
2735
0
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
0
        const int ctxShift = log2TrSize - 2;
2737
2738
0
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
0
        {
2740
0
            int bits = 0;
2741
0
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
0
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
0
            {
2745
0
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
0
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
0
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
0
            }
2749
2750
0
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
0
        }
2752
0
    }
2753
0
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
0
{
2758
0
    if (bIsLuma)
2759
0
    {
2760
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
0
        {
2765
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
0
        }
2768
2769
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
0
        {
2771
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
0
        }
2774
0
    }
2775
0
    else
2776
0
    {
2777
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
0
        {
2782
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
0
        }
2785
2786
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
0
        {
2788
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
0
        }
2791
0
    }
2792
0
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
0
{
2797
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
0
    markValid();
2801
0
}
2802
2803
void Entropy::start()
2804
0
{
2805
0
    m_low = 0;
2806
0
    m_range = 510;
2807
0
    m_bitsLeft = -12;
2808
0
    m_numBufferedBytes = 0;
2809
0
    m_bufferedByte = 0xff;
2810
0
}
2811
2812
void Entropy::finish()
2813
0
{
2814
0
    if (m_low >> (21 + m_bitsLeft))
2815
0
    {
2816
0
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
0
        while (m_numBufferedBytes > 1)
2818
0
        {
2819
0
            m_bitIf->writeByte(0x00);
2820
0
            m_numBufferedBytes--;
2821
0
        }
2822
2823
0
        m_low -= 1 << (21 + m_bitsLeft);
2824
0
    }
2825
0
    else
2826
0
    {
2827
0
        if (m_numBufferedBytes > 0)
2828
0
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
0
        while (m_numBufferedBytes > 1)
2831
0
        {
2832
0
            m_bitIf->writeByte(0xff);
2833
0
            m_numBufferedBytes--;
2834
0
        }
2835
0
    }
2836
0
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
0
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
0
{
2841
0
    m_low = other.m_low;
2842
0
    m_range = other.m_range;
2843
0
    m_bitsLeft = other.m_bitsLeft;
2844
0
    m_bufferedByte = other.m_bufferedByte;
2845
0
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
0
    m_fracBits = other.m_fracBits;
2847
0
}
2848
2849
void Entropy::resetBits()
2850
0
{
2851
0
    m_low = 0;
2852
0
    m_bitsLeft = -12;
2853
0
    m_numBufferedBytes = 0;
2854
0
    m_bufferedByte = 0xff;
2855
0
    m_fracBits &= 32767;
2856
0
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
0
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
0
{
2863
0
    uint32_t mstate = ctxModel;
2864
2865
0
    ctxModel = sbacNext(mstate, binValue);
2866
2867
0
    if (!m_bitIf)
2868
0
    {
2869
0
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
0
        return;
2871
0
    }
2872
2873
0
    uint32_t range = m_range;
2874
0
    uint32_t state = sbacGetState(mstate);
2875
0
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
0
    range -= lps;
2877
2878
0
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
0
    int numBits = (uint32_t)(range - 256) >> 31;
2881
0
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
0
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
0
    if ((binValue ^ mstate) & 1)
2886
0
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
0
        unsigned long idx;
2890
0
        BSR(idx, lps);
2891
0
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
0
        numBits = 8 - idx;
2894
0
        if (state >= 63)
2895
0
            numBits = 6;
2896
0
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
0
        low += range;
2899
0
        range = lps;
2900
0
    }
2901
0
    m_low = (low << numBits);
2902
0
    m_range = (range << numBits);
2903
0
    m_bitsLeft += numBits;
2904
2905
0
    if (m_bitsLeft >= 0)
2906
0
        writeOut();
2907
0
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
0
{
2912
0
    if (!m_bitIf)
2913
0
    {
2914
0
        m_fracBits += 32768;
2915
0
        return;
2916
0
    }
2917
0
    m_low <<= 1;
2918
0
    if (binValue)
2919
0
        m_low += m_range;
2920
0
    m_bitsLeft++;
2921
2922
0
    if (m_bitsLeft >= 0)
2923
0
        writeOut();
2924
0
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
0
{
2929
0
    if (!m_bitIf)
2930
0
    {
2931
0
        m_fracBits += 32768 * numBins;
2932
0
        return;
2933
0
    }
2934
2935
0
    while (numBins > 8)
2936
0
    {
2937
0
        numBins -= 8;
2938
0
        uint32_t pattern = binValues >> numBins;
2939
0
        m_low <<= 8;
2940
0
        m_low += m_range * pattern;
2941
0
        binValues -= pattern << numBins;
2942
0
        m_bitsLeft += 8;
2943
2944
0
        if (m_bitsLeft >= 0)
2945
0
            writeOut();
2946
0
    }
2947
2948
0
    m_low <<= numBins;
2949
0
    m_low += m_range * binValues;
2950
0
    m_bitsLeft += numBins;
2951
2952
0
    if (m_bitsLeft >= 0)
2953
0
        writeOut();
2954
0
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
0
{
2959
0
    if (!m_bitIf)
2960
0
    {
2961
0
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
0
        return;
2963
0
    }
2964
2965
0
    m_range -= 2;
2966
0
    if (binValue)
2967
0
    {
2968
0
        m_low += m_range;
2969
0
        m_low <<= 7;
2970
0
        m_range = 2 << 7;
2971
0
        m_bitsLeft += 7;
2972
0
    }
2973
0
    else if (m_range >= 256)
2974
0
        return;
2975
0
    else
2976
0
    {
2977
0
        m_low <<= 1;
2978
0
        m_range <<= 1;
2979
0
        m_bitsLeft++;
2980
0
    }
2981
2982
0
    if (m_bitsLeft >= 0)
2983
0
        writeOut();
2984
0
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
0
{
2989
0
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
0
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
0
    m_bitsLeft -= 8;
2993
0
    m_low &= low_mask;
2994
2995
0
    if (leadByte == 0xff)
2996
0
        m_numBufferedBytes++;
2997
0
    else
2998
0
    {
2999
0
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
0
        if (numBufferedBytes > 0)
3001
0
        {
3002
0
            uint32_t carry = leadByte >> 8;
3003
0
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
0
            m_bitIf->writeByte(byteTowrite);
3005
3006
0
            byteTowrite = (0xff + carry) & 0xff;
3007
0
            while (numBufferedBytes > 1)
3008
0
            {
3009
0
                m_bitIf->writeByte(byteTowrite);
3010
0
                numBufferedBytes--;
3011
0
            }
3012
0
        }
3013
0
        m_numBufferedBytes = 1;
3014
0
        m_bufferedByte = (uint8_t)leadByte;
3015
0
    }
3016
0
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074