Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
0
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
0
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
0
{
226
0
    markValid();
227
0
    m_fracBits = 0;
228
0
    m_pad = 0;
229
0
    m_meanQP = 0;
230
0
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
0
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
0
{
239
0
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
0
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
0
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
0
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
0
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
0
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
0
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
0
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
0
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
0
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
0
    {
253
0
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
0
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
0
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
0
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
0
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
0
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
0
#endif
294
295
0
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
0
    WRITE_FLAG(0, "vps_extension_flag");
509
0
#endif
510
0
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
0
{
514
0
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
0
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
0
#endif
524
0
    {
525
0
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
0
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
0
    }
528
529
0
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
0
    {
536
0
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
0
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
0
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
0
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
0
        const Window& conf = sps.conformanceWindow;
545
0
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
0
        if (conf.bEnabled)
547
0
        {
548
0
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
0
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
0
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
0
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
0
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
0
        }
554
555
0
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
0
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
0
    }
558
559
0
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
0
    {
564
0
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
0
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
0
        {
568
0
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
0
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
0
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
0
        }
572
0
    }
573
574
0
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
0
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
0
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
0
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
0
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
0
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
0
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
0
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
0
    WRITE_FLAG(0, "pcm_enabled_flag");
600
0
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
0
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
0
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
0
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
0
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
0
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
0
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
0
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
0
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
0
{
645
0
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
0
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
0
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
0
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
0
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
0
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
0
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
0
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
0
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
0
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
0
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
0
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
0
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
0
    if (pps.bUseDQP)
661
0
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
0
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
0
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
0
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
0
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
0
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
0
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
0
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
0
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
0
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
0
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
0
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
0
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
0
    WRITE_FLAG(0, "lists_modification_present_flag");
688
0
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
0
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
0
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
0
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
0
{
727
0
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
0
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
0
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
0
    for (int j = 0; j < 32; j++)
731
0
    {
732
0
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
0
        else
735
0
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
0
    }
737
738
0
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
0
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
0
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
0
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
0
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
0
    else
771
0
    {
772
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
0
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
0
    }
776
0
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
0
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
0
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
0
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
0
{
795
0
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
0
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
0
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
0
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
0
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
0
    if (vui.videoSignalTypePresentFlag)
812
0
    {
813
0
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
0
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
0
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
0
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
0
    }
823
824
0
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
0
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
0
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
0
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
0
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
0
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
0
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
0
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
0
    else
847
0
    {
848
0
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
0
        else
851
0
        {
852
0
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
0
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
0
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
0
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
0
            if (!bEmitVUIHRDInfo)
857
0
                WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
858
0
            else
859
0
            {
860
0
                WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
861
0
                if (vui.hrdParametersPresentFlag)
862
0
                    codeHrdParameters(vui.hrdParameters, maxSubTLayers);
863
0
            }
864
0
        }
865
0
    }
866
867
0
    WRITE_FLAG(0, "bitstream_restriction_flag");
868
0
}
869
870
void Entropy::codeScalingList(const ScalingList& scalingList)
871
0
{
872
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
873
0
    {
874
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
875
0
        {
876
0
            int predList = scalingList.checkPredMode(sizeId, listId);
877
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
878
0
            if (predList >= 0)
879
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
880
0
            else // DPCM Mode
881
0
                codeScalingList(scalingList, sizeId, listId);
882
0
        }
883
0
    }
884
0
}
885
886
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
887
0
{
888
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
889
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
890
0
    int nextCoef = START_VALUE;
891
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
892
0
    int data;
893
894
0
    if (sizeId > BLOCK_8x8)
895
0
    {
896
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
897
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
898
0
    }
899
0
    for (int i = 0; i < coefNum; i++)
900
0
    {
901
0
        data = src[scan[i]] - nextCoef;
902
0
        if (data < -128)
903
0
            data += 256;
904
0
        if (data > 127)
905
0
            data -= 256;
906
0
        nextCoef = (nextCoef + data + 256) % 256;
907
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
908
0
    }
909
0
}
910
911
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
912
0
{
913
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
914
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
916
917
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
918
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
919
920
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
921
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
923
924
0
    for (int i = 0; i < maxSubTLayers; i++)
925
0
    {
926
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
927
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
928
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
929
930
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
931
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
932
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
933
0
    }
934
0
}
935
936
void Entropy::codeAUD(const Slice& slice)
937
0
{
938
0
    int picType;
939
940
0
    switch (slice.m_sliceType)
941
0
    {
942
0
    case I_SLICE:
943
0
        picType = 0;
944
0
        break;
945
0
    case P_SLICE:
946
0
        picType = 1;
947
0
        break;
948
0
    case B_SLICE:
949
0
        picType = 2;
950
0
        break;
951
0
    default:
952
0
        picType = 7;
953
0
        break;
954
0
    }
955
956
0
    WRITE_CODE(picType, 3, "pic_type");
957
0
}
958
959
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
960
0
{
961
0
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
962
0
    if (slice.getRapPicFlag())
963
0
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
964
965
0
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
966
967
    /* x265 does not use dependent slices, so always write all this data */
968
0
    if (slice_addr)
969
0
    {
970
        // if( dependent_slice_segments_enabled_flag )
971
        //     dependent_slice_segment_flag             u(1)
972
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
973
0
    }
974
975
#if ENABLE_MULTIVIEW
976
    if (encData.m_param->numViews > 1)
977
    {
978
        int esb = 0;
979
        if (2 > esb)
980
        {
981
            esb++;
982
            WRITE_FLAG(0, "discardable_flag");
983
        }
984
        if (2 > esb)
985
        {
986
            esb++;
987
            WRITE_FLAG(0, "cross_layer_bla_flag");
988
        }
989
    }
990
#endif
991
992
0
    WRITE_UVLC(slice.m_sliceType, "slice_type");
993
994
0
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
995
0
    {
996
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
997
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
998
0
    }
999
0
    if (!slice.getIdrPicFlag())
1000
0
    {
1001
#if _DEBUG || CHECKED_BUILD
1002
        // check for bitstream restriction stating that:
1003
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1004
        // Ideally this process should not be repeated for each slice in a picture
1005
        if (slice.isIRAP())
1006
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1007
            {
1008
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1009
            }
1010
#endif
1011
1012
0
        if (slice.m_rpsIdx < 0)
1013
0
        {
1014
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1015
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1016
0
        }
1017
0
        else
1018
0
        {
1019
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1020
0
            int numBits = 0;
1021
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1022
0
                numBits++;
1023
1024
0
            if (numBits > 0)
1025
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1026
0
        }
1027
1028
0
        if (slice.m_sps->bTemporalMVPEnabled)
1029
#if ENABLE_SCC_EXT
1030
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1031
#else
1032
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1033
0
#endif
1034
0
    }
1035
0
    const SAOParam *saoParam = encData.m_saoParam;
1036
0
    if (slice.m_bUseSao)
1037
0
    {
1038
0
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1039
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1040
0
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1041
0
    }
1042
0
    else if(encData.m_param->selectiveSAO)
1043
0
    {
1044
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1045
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1046
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1047
0
    }
1048
1049
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1050
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1051
1052
0
    if (!slice.isIntra())
1053
0
    {
1054
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1055
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1056
0
        if (overrideFlag)
1057
0
        {
1058
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1059
0
            if (slice.isInterB())
1060
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1061
0
            else
1062
0
            {
1063
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1064
0
            }
1065
0
        }
1066
0
    }
1067
0
    else
1068
0
    {
1069
0
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1070
0
    }
1071
1072
0
    if (slice.isInterB())
1073
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1074
1075
#if ENABLE_SCC_EXT
1076
    if (slice.m_bTemporalMvp)
1077
#else
1078
0
    if (slice.m_sps->bTemporalMVPEnabled)
1079
0
#endif
1080
0
    {
1081
0
        if (slice.m_sliceType == B_SLICE)
1082
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1083
1084
0
        if (slice.m_sliceType != I_SLICE &&
1085
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1086
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1087
0
        {
1088
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1089
0
        }
1090
0
    }
1091
0
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1092
0
        codePredWeightTable(slice);
1093
1094
0
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1095
0
    if (!slice.isIntra())
1096
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1097
1098
0
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1099
0
    WRITE_SVLC(code, "slice_qp_delta");
1100
1101
0
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1102
0
    {
1103
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1105
0
    }
1106
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1107
    //       We didn't support filter across slice board, so disable it now
1108
1109
0
    if (encData.m_param->maxSlices <= 1)
1110
0
    {
1111
0
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1112
0
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1113
1114
0
        if (isSAOEnabled || isDBFEnabled)
1115
0
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1116
0
    }
1117
0
}
1118
1119
/** write wavefront substreams sizes for the slice header */
1120
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1121
0
{
1122
0
    uint32_t offsetLen = 1;
1123
0
    while (maxOffset >= (1U << offsetLen))
1124
0
    {
1125
0
        offsetLen++;
1126
0
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1127
0
    }
1128
1129
0
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1130
0
    if (numSubStreams > 0)
1131
0
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1132
1133
0
    for (uint32_t i = 0; i < numSubStreams; i++)
1134
0
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1135
0
}
1136
1137
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1138
0
{
1139
0
    if (idx > 0)
1140
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1141
1142
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1143
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1144
0
    int prev = 0;
1145
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1146
0
    {
1147
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1148
0
        prev = rps.deltaPOC[j];
1149
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1150
0
    }
1151
1152
0
    prev = 0;
1153
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1154
0
    {
1155
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1156
0
        prev = rps.deltaPOC[j];
1157
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1158
0
    }
1159
0
}
1160
1161
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1162
0
{
1163
0
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1164
0
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1165
0
}
1166
1167
/* encode a CU block recursively */
1168
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1169
0
{
1170
0
    const Slice* slice = ctu.m_slice;
1171
1172
0
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1173
0
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1174
1175
0
    if (!cuUnsplitFlag)
1176
0
    {
1177
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1178
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1179
0
            bEncodeDQP = true;
1180
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1181
0
        {
1182
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1183
0
            if (childGeom.flags & CUGeom::PRESENT)
1184
0
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1185
0
        }
1186
0
        return;
1187
0
    }
1188
1189
0
    if (cuSplitFlag) 
1190
0
        codeSplitFlag(ctu, absPartIdx, depth);
1191
1192
0
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1193
0
    {
1194
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1195
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1196
0
            bEncodeDQP = true;
1197
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1198
0
        {
1199
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1200
0
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1201
0
        }
1202
0
        return;
1203
0
    }
1204
1205
0
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1206
0
        bEncodeDQP = true;
1207
1208
0
    if (slice->m_pps->bTransquantBypassEnabled)
1209
0
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1210
1211
0
    if (!slice->isIntra())
1212
0
    {
1213
0
        codeSkipFlag(ctu, absPartIdx);
1214
0
        if (ctu.isSkipped(absPartIdx))
1215
0
        {
1216
0
            codeMergeIndex(ctu, absPartIdx);
1217
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1218
0
            return;
1219
0
        }
1220
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1221
0
    }
1222
1223
0
    codePartSize(ctu, absPartIdx, depth);
1224
1225
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1226
0
    codePredInfo(ctu, absPartIdx);
1227
1228
0
    uint32_t tuDepthRange[2];
1229
0
    if (ctu.isIntra(absPartIdx))
1230
0
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1231
0
    else
1232
0
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1233
1234
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1235
0
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1236
1237
    // --- write terminating bit ---
1238
0
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1239
0
}
1240
1241
/* Return bit count of signaling inter mode */
1242
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1243
0
{
1244
0
    uint32_t bits;
1245
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1246
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1247
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1248
0
    switch (partSize)
1249
0
    {
1250
0
    case SIZE_2Nx2N:
1251
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1252
0
        break;
1253
1254
0
    case SIZE_2NxN:
1255
0
    case SIZE_2NxnU:
1256
0
    case SIZE_2NxnD:
1257
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1258
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1259
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1260
0
        {
1261
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1262
0
            if (partSize != SIZE_2NxN)
1263
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1264
0
        }
1265
0
        break;
1266
1267
0
    case SIZE_Nx2N:
1268
0
    case SIZE_nLx2N:
1269
0
    case SIZE_nRx2N:
1270
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1272
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1273
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1274
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1275
0
        {
1276
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1277
0
            if (partSize != SIZE_Nx2N)
1278
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1279
0
        }
1280
0
        break;
1281
0
    default:
1282
0
        X265_CHECK(0, "invalid CU partition\n");
1283
0
        break;
1284
0
    }
1285
1286
0
    return bits;
1287
0
}
1288
1289
/* finish encoding a cu and handle end-of-slice conditions */
1290
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1291
0
{
1292
0
    const Slice* slice = ctu.m_slice;
1293
0
    uint32_t realEndAddress = slice->m_endCUAddr;
1294
0
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1295
0
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1296
1297
0
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1298
0
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1299
0
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1300
0
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1301
0
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1302
0
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1303
1304
0
    if (slice->m_pps->bUseDQP)
1305
0
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1306
1307
0
    if (granularityBoundary)
1308
0
    {
1309
        // Encode slice finish
1310
0
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1311
0
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1312
0
            bTerminateSlice = 1;
1313
1314
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1315
0
        if (!bTerminateSlice)
1316
0
            encodeBinTrm(0);    // end_of_slice_segment_flag
1317
1318
0
        if (!m_bitIf)
1319
0
            resetBits(); // TODO: most likely unnecessary
1320
0
    }
1321
0
}
1322
1323
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1324
                              bool& bCodeDQP, const uint32_t depthRange[2])
1325
0
{
1326
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1327
1328
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1329
     * so we have checks to make sure the implied value matches our intentions */
1330
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1331
0
    {
1332
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1333
0
    }
1334
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1335
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1336
0
    {
1337
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1338
0
    }
1339
0
    else if (log2CurSize > depthRange[1])
1340
0
    {
1341
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1342
0
    }
1343
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1344
0
    {
1345
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1346
0
    }
1347
0
    else
1348
0
    {
1349
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1350
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1351
0
    }
1352
1353
0
    uint32_t hChromaShift = cu.m_hChromaShift;
1354
0
    uint32_t vChromaShift = cu.m_vChromaShift;
1355
0
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1356
0
    if (!curDepth || !bSmallChroma)
1357
0
    {
1358
0
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1359
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1360
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1361
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1362
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1363
0
    }
1364
1365
0
    if (subdiv)
1366
0
    {
1367
0
        --log2CurSize;
1368
0
        ++curDepth;
1369
1370
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1371
1372
0
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1373
0
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
0
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
0
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
0
        return;
1377
0
    }
1378
1379
0
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1380
1381
0
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1382
0
    {
1383
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1384
0
    }
1385
0
    else
1386
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1387
1388
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1389
0
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1390
0
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1391
0
    if (!(cbfY || cbfU || cbfV))
1392
0
        return;
1393
1394
    // dQP: only for CTU once
1395
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1396
0
    {
1397
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1398
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1399
0
        codeDeltaQP(cu, absPartIdxLT);
1400
0
        bCodeDQP = false;
1401
0
    }
1402
1403
0
    if (cbfY)
1404
0
    {
1405
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1406
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1407
0
        if (!(cbfU || cbfV))
1408
0
            return;
1409
0
    }
1410
1411
0
    if (bSmallChroma)
1412
0
    {
1413
0
        if ((absPartIdx & 3) != 3)
1414
0
            return;
1415
1416
0
        const uint32_t log2CurSizeC = 2;
1417
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1418
0
        const uint32_t curPartNum = 4;
1419
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1420
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1421
0
        {
1422
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1423
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1424
0
            do
1425
0
            {
1426
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1427
0
                {
1428
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1429
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1430
0
                }
1431
0
            }
1432
0
            while (tuIterator.isNextSection());
1433
0
        }
1434
0
    }
1435
0
    else
1436
0
    {
1437
0
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1438
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1439
0
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1440
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1441
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1442
0
        {
1443
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1444
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1445
0
            do
1446
0
            {
1447
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1448
0
                {
1449
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1450
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1451
0
                }
1452
0
            }
1453
0
            while (tuIterator.isNextSection());
1454
0
        }
1455
0
    }
1456
0
}
1457
1458
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1459
                              bool& bCodeDQP, const uint32_t depthRange[2])
1460
0
{
1461
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1462
1463
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1464
     * so we have checks to make sure the implied value matches our intentions */
1465
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1466
0
    {
1467
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1468
0
    }
1469
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1470
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1471
0
    {
1472
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1473
0
    }
1474
0
    else if (log2CurSize > depthRange[1])
1475
0
    {
1476
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1477
0
    }
1478
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1479
0
    {
1480
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1481
0
    }
1482
0
    else
1483
0
    {
1484
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1485
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1486
0
    }
1487
1488
0
    if (subdiv)
1489
0
    {
1490
0
        --log2CurSize;
1491
0
        ++curDepth;
1492
1493
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1494
1495
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1496
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        return;
1500
0
    }
1501
1502
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1503
0
    {
1504
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1505
0
    }
1506
0
    else
1507
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1508
1509
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1510
1511
0
    if (!cbfY)
1512
0
        return;
1513
1514
    // dQP: only for CTU once
1515
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1516
0
    {
1517
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1518
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1519
0
        codeDeltaQP(cu, absPartIdxLT);
1520
0
        bCodeDQP = false;
1521
0
    }
1522
1523
0
    if (cbfY)
1524
0
    {
1525
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1526
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1527
0
    }
1528
0
}
1529
1530
1531
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1532
0
{
1533
0
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1534
0
    {
1535
0
        codeIntraDirLumaAng(cu, absPartIdx, true);
1536
0
        if (cu.m_chromaFormat != X265_CSP_I400)
1537
0
        {
1538
0
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1539
0
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1540
1541
0
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1542
1543
0
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1544
0
            {
1545
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1546
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1547
0
                {
1548
0
                    absPartIdx += qNumParts;
1549
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1550
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1551
0
                }
1552
0
            }
1553
0
        }
1554
0
    }
1555
0
    else // if it is inter mode, encode motion vector and reference index
1556
0
        codePUWise(cu, absPartIdx);
1557
0
}
1558
1559
/** encode motion information for every PU block */
1560
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1561
0
{
1562
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1563
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1564
1565
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1566
0
    {
1567
0
        codeMergeFlag(cu, subPartIdx);
1568
0
        if (cu.m_mergeFlag[subPartIdx])
1569
0
            codeMergeIndex(cu, subPartIdx);
1570
0
        else
1571
0
        {
1572
0
            if (cu.m_slice->isInterB())
1573
0
                codeInterDir(cu, subPartIdx);
1574
1575
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1576
0
            for (uint32_t list = 0; list < 2; list++)
1577
0
            {
1578
0
                if (interDir & (1 << list))
1579
0
                {
1580
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1581
1582
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1583
0
                    codeMvd(cu, subPartIdx, list);
1584
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1585
0
                }
1586
0
            }
1587
0
        }
1588
0
    }
1589
0
}
1590
1591
/** encode reference frame index for a PU block */
1592
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1593
0
{
1594
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1595
1596
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1597
0
        codeRefFrmIdx(cu, absPartIdx, list);
1598
0
}
1599
1600
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1601
0
{
1602
0
    if (!cu.isIntra(absPartIdx))
1603
0
    {
1604
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1605
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1606
0
        if (!cu.getQtRootCbf(absPartIdx))
1607
0
            return;
1608
0
    }
1609
1610
0
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1611
0
    if (cu.m_chromaFormat == X265_CSP_I400)
1612
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1613
0
    else
1614
0
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1615
0
}
1616
1617
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1618
0
{
1619
0
    int typeIdx = ctuParam.typeIdx;
1620
1621
0
    if (plane != 2)
1622
0
    {
1623
0
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1624
0
        if (typeIdx >= 0)
1625
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1626
0
    }
1627
1628
0
    if (typeIdx >= 0)
1629
0
    {
1630
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1631
0
        if (typeIdx == SAO_BO)
1632
0
        {
1633
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1634
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1635
1636
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1637
0
                if (ctuParam.offset[i] != 0)
1638
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1639
1640
0
            encodeBinsEP(ctuParam.bandPos, 5);
1641
0
        }
1642
0
        else // if (typeIdx < SAO_BO)
1643
0
        {
1644
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1645
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1648
0
            if (plane != 2)
1649
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1650
0
        }
1651
0
    }
1652
0
}
1653
1654
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1655
0
{
1656
0
    if (plane != 2)
1657
0
    {
1658
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1659
0
        encodeBinEP(1);
1660
0
    }
1661
1662
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1663
1664
0
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1665
0
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1666
0
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1667
0
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1668
0
    if (plane != 2)
1669
0
        encodeBinsEP((uint32_t)(typeIdx), 2);
1670
0
}
1671
1672
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1673
0
{
1674
0
    if (plane != 2)
1675
0
    {
1676
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1677
0
        encodeBinEP(0);
1678
0
    }
1679
1680
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1681
1682
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1683
0
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1684
1685
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1686
0
        if (offset[i] != 0)
1687
0
            encodeBinEP(offset[i] < 0);
1688
1689
0
    encodeBinsEP(bandPos, 5);
1690
0
}
1691
1692
/** initialize context model with respect to QP and initialization value */
1693
uint8_t sbacInit(int qp, int initValue)
1694
0
{
1695
0
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1696
1697
0
    int  slope      = (initValue >> 4) * 5 - 45;
1698
0
    int  offset     = ((initValue & 15) << 3) - 16;
1699
0
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1700
0
    uint32_t mpState = (initState >= 64);
1701
0
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1702
1703
0
    return (uint8_t)state;
1704
0
}
1705
1706
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1707
0
{
1708
0
    ctxModel += sliceType * size;
1709
1710
0
    for (int n = 0; n < size; n++)
1711
0
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1712
0
}
1713
1714
void Entropy::resetEntropy(const Slice& slice)
1715
0
{
1716
0
    int  qp              = slice.m_sliceQp;
1717
0
    SliceType sliceType  = slice.m_sliceType;
1718
1719
0
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1720
0
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1721
0
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1722
0
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1723
0
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1724
0
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1725
0
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1726
0
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1727
0
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1728
0
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1729
0
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1730
0
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1731
0
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1732
0
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1733
0
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1734
0
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1735
0
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1736
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1737
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
0
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1739
0
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1740
0
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1741
0
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1742
0
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1743
0
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1744
0
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1745
    // new structure
1746
1747
0
    start();
1748
0
}
1749
1750
/* code explicit wp tables */
1751
void Entropy::codePredWeightTable(const Slice& slice)
1752
0
{
1753
0
    const WeightParam *wp;
1754
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1755
0
    bool            bDenomCoded  = false;
1756
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1757
0
    uint32_t        totalSignalledWeightFlags = 0;
1758
1759
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1760
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1761
0
    {
1762
0
        for (int list = 0; list < numRefDirs; list++)
1763
0
        {
1764
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1765
0
            {
1766
0
                wp = slice.m_weightPredTable[list][ref];
1767
0
                if (!bDenomCoded)
1768
0
                {
1769
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1770
1771
0
                    if (bChroma)
1772
0
                    {
1773
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1774
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1775
0
                    }
1776
0
                    bDenomCoded = true;
1777
0
                }
1778
#if ENABLE_SCC_EXT
1779
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1780
                    assert(!wp[0].wtPresent);
1781
                else
1782
#endif
1783
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1784
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1785
0
            }
1786
1787
0
            if (bChroma)
1788
0
            {
1789
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1790
0
                {
1791
0
                    wp = slice.m_weightPredTable[list][ref];
1792
#if ENABLE_SCC_EXT
1793
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1794
                        assert(!wp[1].wtPresent);
1795
                    else
1796
#endif
1797
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1798
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1799
0
                }
1800
0
            }
1801
1802
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1803
0
            {
1804
0
                wp = slice.m_weightPredTable[list][ref];
1805
0
                if (wp[0].wtPresent)
1806
0
                {
1807
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1808
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1809
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1810
0
                }
1811
1812
0
                if (bChroma)
1813
0
                {
1814
0
                    if (wp[1].wtPresent)
1815
0
                    {
1816
0
                        for (int plane = 1; plane < 3; plane++)
1817
0
                        {
1818
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1819
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1820
1821
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1822
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1823
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1824
0
                        }
1825
0
                    }
1826
0
                }
1827
0
            }
1828
0
        }
1829
1830
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1831
0
    }
1832
0
}
1833
1834
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1835
0
{
1836
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1837
1838
0
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1839
1840
0
    if (!symbol)
1841
0
        return;
1842
1843
0
    bool bCodeLast = (maxSymbol > symbol);
1844
1845
0
    while (--symbol)
1846
0
        encodeBin(1, scmModel[offset]);
1847
1848
0
    if (bCodeLast)
1849
0
        encodeBin(0, scmModel[offset]);
1850
0
}
1851
1852
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1853
0
{
1854
0
    uint32_t bins = 0;
1855
0
    int numBins = 0;
1856
1857
0
    while (symbol >= (uint32_t)(1 << count))
1858
0
    {
1859
0
        bins = 2 * bins + 1;
1860
0
        numBins++;
1861
0
        symbol -= 1 << count;
1862
0
        count++;
1863
0
    }
1864
1865
0
    bins = 2 * bins + 0;
1866
0
    numBins++;
1867
1868
0
    bins = (bins << count) | symbol;
1869
0
    numBins += count;
1870
1871
0
    X265_CHECK(numBins <= 32, "numBins too large\n");
1872
0
    encodeBinsEP(bins, numBins);
1873
0
}
1874
1875
/** Coding of coeff_abs_level_minus3 */
1876
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1877
0
{
1878
0
    uint32_t length;
1879
0
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1880
1881
0
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1882
0
    {
1883
0
        length = codeNumber >> absGoRice;
1884
1885
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1886
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1887
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1888
0
    }
1889
0
    else
1890
0
    {
1891
0
        length = 0;
1892
0
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1893
0
        {
1894
0
            unsigned long idx;
1895
0
            BSR(idx, codeNumber + 1);
1896
0
            length = idx;
1897
0
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1898
0
            codeNumber -= (1 << idx) - 1;
1899
0
        }
1900
0
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1901
1902
0
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1903
0
        encodeBinsEP(codeNumber, length + absGoRice);
1904
0
    }
1905
0
}
1906
1907
// SBAC RD
1908
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1909
0
{
1910
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1911
0
    m_fracBits = src.m_fracBits;
1912
0
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1913
0
}
1914
1915
void Entropy::copyFrom(const Entropy& src)
1916
0
{
1917
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1918
1919
0
    copyState(src);
1920
1921
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1922
0
    markValid();
1923
0
}
1924
1925
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1926
0
{
1927
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1928
1929
0
    if (cu.isIntra(absPartIdx))
1930
0
    {
1931
0
        if (depth == cu.m_encData->m_param->maxCUDepth)
1932
0
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1933
0
        return;
1934
0
    }
1935
1936
0
    switch (partSize)
1937
0
    {
1938
0
    case SIZE_2Nx2N:
1939
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1940
0
        break;
1941
1942
0
    case SIZE_2NxN:
1943
0
    case SIZE_2NxnU:
1944
0
    case SIZE_2NxnD:
1945
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1946
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1947
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1948
0
        {
1949
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1950
0
            if (partSize != SIZE_2NxN)
1951
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1952
0
        }
1953
0
        break;
1954
1955
0
    case SIZE_Nx2N:
1956
0
    case SIZE_nLx2N:
1957
0
    case SIZE_nRx2N:
1958
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1960
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1961
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1962
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1963
0
        {
1964
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1965
0
            if (partSize != SIZE_Nx2N)
1966
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1967
0
        }
1968
0
        break;
1969
0
    default:
1970
0
        X265_CHECK(0, "invalid CU partition\n");
1971
0
        break;
1972
0
    }
1973
0
}
1974
1975
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1976
0
{
1977
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1978
1979
0
    if (numCand > 1)
1980
0
    {
1981
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1982
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1983
1984
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1985
1986
0
        if (unaryIdx != 0)
1987
0
        {
1988
0
            uint32_t mask = (1 << unaryIdx) - 2;
1989
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1990
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1991
0
        }
1992
0
    }
1993
0
}
1994
1995
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1996
0
{
1997
0
    uint32_t dir[4], j;
1998
0
    uint32_t preds[4][3];
1999
0
    int predIdx[4];
2000
0
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2001
0
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2002
2003
0
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2004
0
    {
2005
0
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2006
0
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2007
0
        predIdx[j] = -1;
2008
0
        for (uint32_t i = 0; i < 3; i++)
2009
0
            if (dir[j] == preds[j][i])
2010
0
                predIdx[j] = i;
2011
2012
0
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2013
0
    }
2014
2015
0
    for (j = 0; j < partNum; j++)
2016
0
    {
2017
0
        if (predIdx[j] != -1)
2018
0
        {
2019
0
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2020
            // NOTE: Mapping
2021
            //       0 = 0
2022
            //       1 = 10
2023
            //       2 = 11
2024
0
            int nonzero = (!!predIdx[j]);
2025
0
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2026
0
        }
2027
0
        else
2028
0
        {
2029
0
            if (preds[j][0] > preds[j][1])
2030
0
                std::swap(preds[j][0], preds[j][1]);
2031
2032
0
            if (preds[j][0] > preds[j][2])
2033
0
                std::swap(preds[j][0], preds[j][2]);
2034
2035
0
            if (preds[j][1] > preds[j][2])
2036
0
                std::swap(preds[j][1], preds[j][2]);
2037
2038
0
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2039
0
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2040
0
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2041
2042
0
            encodeBinsEP(dir[j], 5);
2043
0
        }
2044
0
    }
2045
0
}
2046
2047
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2048
0
{
2049
0
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2050
2051
0
    if (intraDirChroma == DM_CHROMA_IDX)
2052
0
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2053
0
    else
2054
0
    {
2055
0
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2056
0
        {
2057
0
            if (intraDirChroma == chromaDirMode[i])
2058
0
            {
2059
0
                intraDirChroma = i;
2060
0
                break;
2061
0
            }
2062
0
        }
2063
2064
0
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2065
0
        encodeBinsEP(intraDirChroma, 2);
2066
0
    }
2067
0
}
2068
2069
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2070
0
{
2071
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2072
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2073
2074
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2075
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2076
0
    if (interDir < 2)
2077
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2078
0
}
2079
2080
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2081
0
{
2082
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2083
2084
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2085
2086
0
    if (refFrame > 0)
2087
0
    {
2088
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2089
0
        if (refNum == 0)
2090
0
            return;
2091
2092
0
        refFrame--;
2093
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2094
0
        if (refFrame > 0)
2095
0
        {
2096
0
            uint32_t mask = (1 << refFrame) - 2;
2097
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2098
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2099
0
        }
2100
0
    }
2101
0
}
2102
2103
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2104
0
{
2105
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2106
0
    const int hor = mvd.x;
2107
0
    const int ver = mvd.y;
2108
2109
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2110
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
2112
0
    const bool bHorAbsGr0 = hor != 0;
2113
0
    const bool bVerAbsGr0 = ver != 0;
2114
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2115
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2116
2117
0
    if (bHorAbsGr0)
2118
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2119
2120
0
    if (bVerAbsGr0)
2121
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2122
2123
0
    if (bHorAbsGr0)
2124
0
    {
2125
0
        if (horAbs > 1)
2126
0
            writeEpExGolomb(horAbs - 2, 1);
2127
2128
0
        encodeBinEP(0 > hor ? 1 : 0);
2129
0
    }
2130
2131
0
    if (bVerAbsGr0)
2132
0
    {
2133
0
        if (verAbs > 1)
2134
0
            writeEpExGolomb(verAbs - 2, 1);
2135
2136
0
        encodeBinEP(0 > ver ? 1 : 0);
2137
0
    }
2138
0
}
2139
2140
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2141
0
{
2142
0
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2143
2144
0
    int qpBdOffsetY = QP_BD_OFFSET;
2145
2146
0
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2147
2148
0
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2149
0
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2150
0
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2151
0
    if (absDQp >= CU_DQP_TU_CMAX)
2152
0
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2153
2154
0
    if (absDQp > 0)
2155
0
    {
2156
0
        uint32_t sign = (dqp > 0 ? 0 : 1);
2157
0
        encodeBinEP(sign);
2158
0
    }
2159
0
}
2160
2161
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2162
0
{
2163
0
    uint32_t ctx = tuDepth + 2;
2164
2165
0
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2166
0
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2167
0
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2168
2169
0
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2170
0
    {
2171
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2172
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2173
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2174
2175
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2176
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
    }
2178
0
    else
2179
0
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2180
0
}
2181
2182
#if CHECKED_BUILD || _DEBUG
2183
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2184
{
2185
    uint32_t goRiceParam = 0;
2186
    int firstCoeff2 = 1;
2187
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2188
2189
    uint32_t sum = 0;
2190
    int idx = 0;
2191
    do
2192
    {
2193
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2194
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2195
        baseLevelN >>= 2;
2196
        int codeNumber = absCoeff[idx] - baseLevel;
2197
2198
        if (codeNumber >= 0)
2199
        {
2200
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2201
            uint32_t length = 0;
2202
2203
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2204
            if (codeNumber >= 0)
2205
            {
2206
                {
2207
                    unsigned long cidx;
2208
                    BSR(cidx, codeNumber + 1);
2209
                    length = cidx;
2210
                }
2211
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2212
2213
                codeNumber = (length + length);
2214
            }
2215
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2216
2217
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2218
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2219
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2220
        }
2221
        if (absCoeff[idx] >= 2)
2222
            firstCoeff2 = 0;
2223
        idx++;
2224
    }
2225
    while(idx < numNonZero);
2226
2227
    return sum;
2228
}
2229
#endif // debug only code
2230
2231
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2232
0
{
2233
0
    uint32_t trSize = 1 << log2TrSize;
2234
0
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2235
    // compute number of significant coefficients
2236
0
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2237
0
    X265_CHECK(numSig > 0, "cbf check fail\n");
2238
0
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2239
2240
0
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2241
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2242
2243
0
    bool bIsLuma = ttype == TEXT_LUMA;
2244
2245
    // select scans
2246
0
    TUEntropyCodingParameters codingParameters;
2247
0
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2248
2249
0
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2250
0
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2251
0
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2252
2253
    //----- encode significance map -----
2254
2255
    // Find position of last coefficient
2256
0
    int scanPosLast = 0;
2257
0
    uint32_t posLast;
2258
0
    uint64_t sigCoeffGroupFlag64 = 0;
2259
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2260
0
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2261
2262
0
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2263
0
    posLast = codingParameters.scan[scanPosLast];
2264
2265
0
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2266
2267
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2268
0
    for(int idx = 0; idx < lastScanSet; idx++)
2269
0
    {
2270
0
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2271
0
        const uint8_t nonZero = (coeffNum[idx] != 0);
2272
0
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2273
0
    }
2274
2275
2276
    // Code position of last coefficient
2277
0
    {
2278
        // The last position is composed of a prefix and suffix.
2279
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2280
        // The bypass coded bins for both the x and y components are grouped together.
2281
0
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2282
0
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2283
        // swap
2284
0
        if (codingParameters.scanType == SCAN_VER)
2285
0
            std::swap(pos[0], pos[1]);
2286
2287
0
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2288
0
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2289
0
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2290
0
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2291
0
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2292
2293
0
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2294
0
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2295
0
        {
2296
0
            uint32_t temp = g_lastCoeffTable[pos[i]];
2297
0
            uint32_t prefixOnes = temp & 15;
2298
0
            uint32_t suffixLen = temp >> 4;
2299
2300
0
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2301
0
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2302
2303
0
            if (prefixOnes < maxGroupIdx)
2304
0
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2305
2306
0
            packedSuffixBits <<= suffixLen;
2307
0
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2308
0
            packedSuffixLen += suffixLen;
2309
0
        }
2310
2311
0
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2312
0
    }
2313
2314
    // code significance flag
2315
0
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2316
0
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2317
0
    uint32_t c1 = 1;
2318
0
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2319
0
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2320
0
    uint32_t numNonZero = 1;
2321
0
    unsigned long lastNZPosInCG = 0;
2322
0
    unsigned long firstNZPosInCG = 0;
2323
2324
#if _DEBUG
2325
    // Unnecessary, for Valgrind-3.10.0 only
2326
    memset(absCoeff, 0, sizeof(absCoeff));
2327
#endif
2328
2329
0
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2330
2331
0
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2332
0
    {
2333
0
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2334
0
        uint32_t scanFlagMask = subCoeffFlag;
2335
0
        int subPosBase = subSet << MLS_CG_SIZE;
2336
        
2337
0
        if (subSet == lastScanSet)
2338
0
        {
2339
0
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2340
0
            scanFlagMask >>= 1;
2341
0
        }
2342
2343
        // encode significant_coeffgroup_flag
2344
0
        const int cgBlkPos = codingParameters.scanCG[subSet];
2345
0
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2346
0
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2347
0
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2348
2349
0
        if (subSet == lastScanSet || !subSet)
2350
0
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2351
0
        else
2352
0
        {
2353
0
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2354
0
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2355
0
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2356
0
        }
2357
2358
        // encode significant_coeff_flag
2359
0
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2360
0
        {
2361
0
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2362
0
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2363
0
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2364
2365
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2366
0
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2367
0
            {
2368
                // patternSigCtx = 0
2369
0
                {
2370
0
                    2, 1, 1, 0,
2371
0
                    1, 1, 0, 0,
2372
0
                    1, 0, 0, 0,
2373
0
                    0, 0, 0, 0,
2374
0
                },
2375
                // patternSigCtx = 1
2376
0
                {
2377
0
                    2, 2, 2, 2,
2378
0
                    1, 1, 1, 1,
2379
0
                    0, 0, 0, 0,
2380
0
                    0, 0, 0, 0,
2381
0
                },
2382
                // patternSigCtx = 2
2383
0
                {
2384
0
                    2, 1, 0, 0,
2385
0
                    2, 1, 0, 0,
2386
0
                    2, 1, 0, 0,
2387
0
                    2, 1, 0, 0,
2388
0
                },
2389
                // patternSigCtx = 3
2390
0
                {
2391
0
                    2, 2, 2, 2,
2392
0
                    2, 2, 2, 2,
2393
0
                    2, 2, 2, 2,
2394
0
                    2, 2, 2, 2,
2395
0
                },
2396
                // 4x4
2397
0
                {
2398
0
                    0, 1, 4, 5,
2399
0
                    2, 3, 4, 5,
2400
0
                    6, 6, 8, 8,
2401
0
                    7, 7, 8, 8
2402
0
                }
2403
0
            };
2404
2405
0
            const int offset = codingParameters.firstSignificanceMapContext;
2406
0
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2407
2408
0
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2409
0
            if (m_bitIf)
2410
0
            {
2411
0
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2412
0
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2413
2414
                // TODO: accelerate by PABSW
2415
0
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
0
                {
2417
0
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
0
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
0
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
0
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
0
                }
2422
2423
0
                if (log2TrSize == 2)
2424
0
                {
2425
0
                    do
2426
0
                    {
2427
0
                        uint32_t blkPos, sig, ctxSig;
2428
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
0
                        sig     = scanFlagMask & 1;
2430
0
                        scanFlagMask >>= 1;
2431
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
0
                        {
2433
0
                            ctxSig = table_cnt[4][blkPos];
2434
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
0
                            encodeBin(sig, baseCtx[ctxSig]);
2436
0
                        }
2437
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
0
                        numNonZero += sig;
2439
0
                        scanPosSigOff--;
2440
0
                    }
2441
0
                    while(scanPosSigOff >= 0);
2442
0
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
0
            }
2471
0
            else // fast RD path
2472
0
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
0
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
0
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
0
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
0
                m_fracBits += sum;
2483
0
            } // end of fast RD path -- !m_bitIf
2484
0
        }
2485
0
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
0
        uint32_t coeffSigns = coeffSign[subSet];
2488
0
        numNonZero = coeffNum[subSet];
2489
0
        if (numNonZero > 0)
2490
0
        {
2491
0
            uint32_t idx = 0;
2492
0
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
0
            BSR(lastNZPosInCG, subCoeffFlag);
2494
0
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
0
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
0
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
0
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
0
            c1 = 1;
2501
0
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
0
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
0
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
0
            if (!m_bitIf)
2507
0
            {
2508
0
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
0
                uint32_t firstC2Idx = (sum >> 28);
2510
0
                c1 = ((sum >> 26) & 3);
2511
0
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
0
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
0
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
0
                if (numNonZero > firstC2Idx)
2518
0
                {
2519
0
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
0
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
0
                    m_fracBits += ((uint64_t)sum << 15);
2522
0
                }
2523
0
            }
2524
            // Standard path
2525
0
            else
2526
0
            {
2527
0
                uint32_t firstC2Idx = 8;
2528
0
                uint32_t firstC2Flag = 2;
2529
0
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
0
                idx = 0;
2532
0
                do
2533
0
                {
2534
0
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
0
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
0
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
0
                    if (symbol1)
2539
0
                        c1Next = 0;
2540
2541
0
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
0
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
0
                    c1 = (c1Next & 3);
2545
0
                    c1Next >>= 2;
2546
0
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
0
                    idx++;
2548
0
                }
2549
0
                while(idx < numC1Flag);
2550
2551
0
                if (!c1)
2552
0
                {
2553
0
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
0
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
0
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
0
                }
2558
2559
0
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
0
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
0
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
0
                {
2564
                    // Standard path
2565
0
                    uint32_t goRiceParam = 0;
2566
0
                    int baseLevel = 3;
2567
0
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
0
                    idx = firstC2Idx;
2572
0
                    do
2573
0
                    {
2574
0
                        if (idx >= C1FLAG_NUMBER)
2575
0
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
0
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
0
                        if (absCoeff[idx] >= baseLevel)
2580
0
                        {
2581
0
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
0
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
0
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
0
                            goRiceParam += adjust;
2585
0
                            threshold += (adjust) ? threshold : 0;
2586
0
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
0
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
0
                        baseLevel = 2;
2592
0
                        idx++;
2593
0
                    }
2594
0
                    while(idx < numNonZero);
2595
0
                }
2596
0
            } // end of !bitIf
2597
0
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
0
        numNonZero = 0;
2601
0
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
0
    }
2603
0
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
0
{
2607
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
0
    uint32_t isCodeNonZero = !!code;
2610
2611
0
    encodeBinEP(isCodeNonZero);
2612
0
    if (isCodeNonZero)
2613
0
    {
2614
0
        uint32_t isCodeLast = (maxSymbol > code);
2615
0
        uint32_t mask = (1 << (code - 1)) - 1;
2616
0
        uint32_t len = code - 1 + isCodeLast;
2617
0
        mask <<= isCodeLast;
2618
2619
0
        encodeBinsEP(mask, len);
2620
0
    }
2621
0
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
0
{
2626
0
    estCBFBit(estBitsSbac);
2627
2628
0
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
0
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
0
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
0
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
0
{
2640
0
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
0
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
0
    {
2644
0
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
0
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
0
    }
2647
2648
0
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
0
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
0
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
0
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
0
{
2657
0
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
0
        for (uint32_t bin = 0; bin < 2; bin++)
2661
0
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
0
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
0
{
2667
0
    int firstCtx = 1, numCtx = 8;
2668
2669
0
    if (log2TrSize >= 4)
2670
0
    {
2671
0
        firstCtx = bIsLuma ? 21 : 12;
2672
0
        numCtx = bIsLuma ? 6 : 3;
2673
0
    }
2674
0
    else if (log2TrSize == 3)
2675
0
    {
2676
0
        firstCtx = 9;
2677
0
        numCtx = bIsLuma ? 12 : 3;
2678
0
    }
2679
2680
0
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
0
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
0
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
0
    {
2687
0
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
0
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
0
    }
2690
2691
0
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
0
    if (bIsLuma)
2693
0
    {
2694
0
        if (log2TrSize == 2)
2695
0
        {
2696
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
0
            {
2698
0
                int bits = 0;
2699
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
0
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
0
                {
2703
0
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
0
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
0
                }
2706
2707
0
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
0
            }
2709
0
        }
2710
0
        else
2711
0
        {
2712
0
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
0
            {
2716
0
                int bits = 0;
2717
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
0
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
0
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
0
                {
2722
0
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
0
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
0
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
0
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
0
                    bits += 2 * cost1;
2727
0
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
0
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
0
            }
2731
0
        }
2732
0
    }
2733
0
    else
2734
0
    {
2735
0
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
0
        const int ctxShift = log2TrSize - 2;
2737
2738
0
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
0
        {
2740
0
            int bits = 0;
2741
0
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
0
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
0
            {
2745
0
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
0
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
0
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
0
            }
2749
2750
0
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
0
        }
2752
0
    }
2753
0
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
0
{
2758
0
    if (bIsLuma)
2759
0
    {
2760
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
0
        {
2765
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
0
        }
2768
2769
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
0
        {
2771
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
0
        }
2774
0
    }
2775
0
    else
2776
0
    {
2777
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
0
        {
2782
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
0
        }
2785
2786
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
0
        {
2788
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
0
        }
2791
0
    }
2792
0
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
0
{
2797
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
0
    markValid();
2801
0
}
2802
2803
void Entropy::start()
2804
0
{
2805
0
    m_low = 0;
2806
0
    m_range = 510;
2807
0
    m_bitsLeft = -12;
2808
0
    m_numBufferedBytes = 0;
2809
0
    m_bufferedByte = 0xff;
2810
0
}
2811
2812
void Entropy::finish()
2813
0
{
2814
0
    if (m_low >> (21 + m_bitsLeft))
2815
0
    {
2816
0
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
0
        while (m_numBufferedBytes > 1)
2818
0
        {
2819
0
            m_bitIf->writeByte(0x00);
2820
0
            m_numBufferedBytes--;
2821
0
        }
2822
2823
0
        m_low -= 1 << (21 + m_bitsLeft);
2824
0
    }
2825
0
    else
2826
0
    {
2827
0
        if (m_numBufferedBytes > 0)
2828
0
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
0
        while (m_numBufferedBytes > 1)
2831
0
        {
2832
0
            m_bitIf->writeByte(0xff);
2833
0
            m_numBufferedBytes--;
2834
0
        }
2835
0
    }
2836
0
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
0
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
0
{
2841
0
    m_low = other.m_low;
2842
0
    m_range = other.m_range;
2843
0
    m_bitsLeft = other.m_bitsLeft;
2844
0
    m_bufferedByte = other.m_bufferedByte;
2845
0
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
0
    m_fracBits = other.m_fracBits;
2847
0
}
2848
2849
void Entropy::resetBits()
2850
0
{
2851
0
    m_low = 0;
2852
0
    m_bitsLeft = -12;
2853
0
    m_numBufferedBytes = 0;
2854
0
    m_bufferedByte = 0xff;
2855
0
    m_fracBits &= 32767;
2856
0
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
0
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
0
{
2863
0
    uint32_t mstate = ctxModel;
2864
2865
0
    ctxModel = sbacNext(mstate, binValue);
2866
2867
0
    if (!m_bitIf)
2868
0
    {
2869
0
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
0
        return;
2871
0
    }
2872
2873
0
    uint32_t range = m_range;
2874
0
    uint32_t state = sbacGetState(mstate);
2875
0
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
0
    range -= lps;
2877
2878
0
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
0
    int numBits = (uint32_t)(range - 256) >> 31;
2881
0
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
0
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
0
    if ((binValue ^ mstate) & 1)
2886
0
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
0
        unsigned long idx;
2890
0
        BSR(idx, lps);
2891
0
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
0
        numBits = 8 - idx;
2894
0
        if (state >= 63)
2895
0
            numBits = 6;
2896
0
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
0
        low += range;
2899
0
        range = lps;
2900
0
    }
2901
0
    m_low = (low << numBits);
2902
0
    m_range = (range << numBits);
2903
0
    m_bitsLeft += numBits;
2904
2905
0
    if (m_bitsLeft >= 0)
2906
0
        writeOut();
2907
0
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
0
{
2912
0
    if (!m_bitIf)
2913
0
    {
2914
0
        m_fracBits += 32768;
2915
0
        return;
2916
0
    }
2917
0
    m_low <<= 1;
2918
0
    if (binValue)
2919
0
        m_low += m_range;
2920
0
    m_bitsLeft++;
2921
2922
0
    if (m_bitsLeft >= 0)
2923
0
        writeOut();
2924
0
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
0
{
2929
0
    if (!m_bitIf)
2930
0
    {
2931
0
        m_fracBits += 32768 * numBins;
2932
0
        return;
2933
0
    }
2934
2935
0
    while (numBins > 8)
2936
0
    {
2937
0
        numBins -= 8;
2938
0
        uint32_t pattern = binValues >> numBins;
2939
0
        m_low <<= 8;
2940
0
        m_low += m_range * pattern;
2941
0
        binValues -= pattern << numBins;
2942
0
        m_bitsLeft += 8;
2943
2944
0
        if (m_bitsLeft >= 0)
2945
0
            writeOut();
2946
0
    }
2947
2948
0
    m_low <<= numBins;
2949
0
    m_low += m_range * binValues;
2950
0
    m_bitsLeft += numBins;
2951
2952
0
    if (m_bitsLeft >= 0)
2953
0
        writeOut();
2954
0
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
0
{
2959
0
    if (!m_bitIf)
2960
0
    {
2961
0
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
0
        return;
2963
0
    }
2964
2965
0
    m_range -= 2;
2966
0
    if (binValue)
2967
0
    {
2968
0
        m_low += m_range;
2969
0
        m_low <<= 7;
2970
0
        m_range = 2 << 7;
2971
0
        m_bitsLeft += 7;
2972
0
    }
2973
0
    else if (m_range >= 256)
2974
0
        return;
2975
0
    else
2976
0
    {
2977
0
        m_low <<= 1;
2978
0
        m_range <<= 1;
2979
0
        m_bitsLeft++;
2980
0
    }
2981
2982
0
    if (m_bitsLeft >= 0)
2983
0
        writeOut();
2984
0
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
0
{
2989
0
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
0
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
0
    m_bitsLeft -= 8;
2993
0
    m_low &= low_mask;
2994
2995
0
    if (leadByte == 0xff)
2996
0
        m_numBufferedBytes++;
2997
0
    else
2998
0
    {
2999
0
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
0
        if (numBufferedBytes > 0)
3001
0
        {
3002
0
            uint32_t carry = leadByte >> 8;
3003
0
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
0
            m_bitIf->writeByte(byteTowrite);
3005
3006
0
            byteTowrite = (0xff + carry) & 0xff;
3007
0
            while (numBufferedBytes > 1)
3008
0
            {
3009
0
                m_bitIf->writeByte(byteTowrite);
3010
0
                numBufferedBytes--;
3011
0
            }
3012
0
        }
3013
0
        m_numBufferedBytes = 1;
3014
0
        m_bufferedByte = (uint8_t)leadByte;
3015
0
    }
3016
0
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074