Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
13.9k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.26k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
2.06M
{
226
2.06M
    markValid();
227
2.06M
    m_fracBits = 0;
228
2.06M
    m_pad = 0;
229
2.06M
    m_meanQP = 0;
230
2.06M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
2.06M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
651
{
239
651
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
651
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
651
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
651
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
651
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
651
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
651
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
651
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
651
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.30k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
651
    {
253
651
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
651
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
651
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
651
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
651
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
651
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
651
#endif
294
295
651
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
651
    WRITE_FLAG(0, "vps_extension_flag");
509
651
#endif
510
651
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
651
{
514
651
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
651
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
651
#endif
524
651
    {
525
651
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
651
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
651
    }
528
529
651
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
651
    {
536
651
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
651
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
651
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
651
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
651
        const Window& conf = sps.conformanceWindow;
545
651
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
651
        if (conf.bEnabled)
547
488
        {
548
488
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
488
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
488
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
488
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
488
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
488
        }
554
555
651
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
651
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
651
    }
558
559
651
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
651
    {
564
651
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.30k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
651
        {
568
651
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
651
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
651
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
651
        }
572
651
    }
573
574
651
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
651
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
651
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
651
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
651
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
651
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
651
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
651
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
651
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
651
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
651
    WRITE_FLAG(0, "pcm_enabled_flag");
600
651
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
651
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
651
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
651
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
651
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
651
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
651
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
651
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
651
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
651
{
645
651
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
651
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
651
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
651
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
651
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
651
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
651
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
651
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
651
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
651
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
651
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
651
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
651
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
651
    if (pps.bUseDQP)
661
500
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
651
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
651
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
651
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
651
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
651
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
651
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
651
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
651
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
651
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
651
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
651
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
651
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
651
    WRITE_FLAG(0, "lists_modification_present_flag");
688
651
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
651
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
651
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
651
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.30k
{
727
1.30k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.30k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.30k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
42.9k
    for (int j = 0; j < 32; j++)
731
41.6k
    {
732
41.6k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
41.6k
        else
735
41.6k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
41.6k
    }
737
738
1.30k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.30k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.30k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.30k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.30k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.30k
    else
771
1.30k
    {
772
1.30k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.30k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.30k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.30k
    }
776
1.30k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.30k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.30k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.30k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
651
{
795
651
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
651
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
651
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
651
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
651
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
651
    if (vui.videoSignalTypePresentFlag)
812
651
    {
813
651
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
651
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
651
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
651
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
651
    }
823
824
651
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
651
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
651
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
651
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
651
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
651
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
651
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
651
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
651
    else
847
651
    {
848
651
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
651
        else
851
651
        {
852
651
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
651
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
651
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
651
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
651
            if (!bEmitVUIHRDInfo)
857
0
                WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
858
651
            else
859
651
            {
860
651
                WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
861
651
                if (vui.hrdParametersPresentFlag)
862
0
                    codeHrdParameters(vui.hrdParameters, maxSubTLayers);
863
651
            }
864
651
        }
865
651
    }
866
867
651
    WRITE_FLAG(0, "bitstream_restriction_flag");
868
651
}
869
870
void Entropy::codeScalingList(const ScalingList& scalingList)
871
0
{
872
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
873
0
    {
874
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
875
0
        {
876
0
            int predList = scalingList.checkPredMode(sizeId, listId);
877
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
878
0
            if (predList >= 0)
879
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
880
0
            else // DPCM Mode
881
0
                codeScalingList(scalingList, sizeId, listId);
882
0
        }
883
0
    }
884
0
}
885
886
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
887
0
{
888
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
889
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
890
0
    int nextCoef = START_VALUE;
891
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
892
0
    int data;
893
894
0
    if (sizeId > BLOCK_8x8)
895
0
    {
896
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
897
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
898
0
    }
899
0
    for (int i = 0; i < coefNum; i++)
900
0
    {
901
0
        data = src[scan[i]] - nextCoef;
902
0
        if (data < -128)
903
0
            data += 256;
904
0
        if (data > 127)
905
0
            data -= 256;
906
0
        nextCoef = (nextCoef + data + 256) % 256;
907
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
908
0
    }
909
0
}
910
911
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
912
0
{
913
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
914
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
916
917
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
918
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
919
920
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
921
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
923
924
0
    for (int i = 0; i < maxSubTLayers; i++)
925
0
    {
926
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
927
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
928
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
929
930
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
931
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
932
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
933
0
    }
934
0
}
935
936
void Entropy::codeAUD(const Slice& slice)
937
0
{
938
0
    int picType;
939
940
0
    switch (slice.m_sliceType)
941
0
    {
942
0
    case I_SLICE:
943
0
        picType = 0;
944
0
        break;
945
0
    case P_SLICE:
946
0
        picType = 1;
947
0
        break;
948
0
    case B_SLICE:
949
0
        picType = 2;
950
0
        break;
951
0
    default:
952
0
        picType = 7;
953
0
        break;
954
0
    }
955
956
0
    WRITE_CODE(picType, 3, "pic_type");
957
0
}
958
959
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
960
651
{
961
651
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
962
651
    if (slice.getRapPicFlag())
963
651
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
964
965
651
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
966
967
    /* x265 does not use dependent slices, so always write all this data */
968
651
    if (slice_addr)
969
0
    {
970
        // if( dependent_slice_segments_enabled_flag )
971
        //     dependent_slice_segment_flag             u(1)
972
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
973
0
    }
974
975
#if ENABLE_MULTIVIEW
976
    if (encData.m_param->numViews > 1)
977
    {
978
        int esb = 0;
979
        if (2 > esb)
980
        {
981
            esb++;
982
            WRITE_FLAG(0, "discardable_flag");
983
        }
984
        if (2 > esb)
985
        {
986
            esb++;
987
            WRITE_FLAG(0, "cross_layer_bla_flag");
988
        }
989
    }
990
#endif
991
992
651
    WRITE_UVLC(slice.m_sliceType, "slice_type");
993
994
651
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
995
0
    {
996
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
997
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
998
0
    }
999
651
    if (!slice.getIdrPicFlag())
1000
0
    {
1001
#if _DEBUG || CHECKED_BUILD
1002
        // check for bitstream restriction stating that:
1003
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1004
        // Ideally this process should not be repeated for each slice in a picture
1005
        if (slice.isIRAP())
1006
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1007
            {
1008
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1009
            }
1010
#endif
1011
1012
0
        if (slice.m_rpsIdx < 0)
1013
0
        {
1014
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1015
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1016
0
        }
1017
0
        else
1018
0
        {
1019
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1020
0
            int numBits = 0;
1021
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1022
0
                numBits++;
1023
1024
0
            if (numBits > 0)
1025
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1026
0
        }
1027
1028
0
        if (slice.m_sps->bTemporalMVPEnabled)
1029
#if ENABLE_SCC_EXT
1030
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1031
#else
1032
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1033
0
#endif
1034
0
    }
1035
651
    const SAOParam *saoParam = encData.m_saoParam;
1036
651
    if (slice.m_bUseSao)
1037
651
    {
1038
651
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1039
651
        if (encData.m_param->internalCsp != X265_CSP_I400)
1040
651
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1041
651
    }
1042
0
    else if(encData.m_param->selectiveSAO)
1043
0
    {
1044
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1045
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1046
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1047
0
    }
1048
1049
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1050
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1051
1052
651
    if (!slice.isIntra())
1053
0
    {
1054
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1055
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1056
0
        if (overrideFlag)
1057
0
        {
1058
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1059
0
            if (slice.isInterB())
1060
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1061
0
            else
1062
0
            {
1063
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1064
0
            }
1065
0
        }
1066
0
    }
1067
651
    else
1068
651
    {
1069
651
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1070
651
    }
1071
1072
651
    if (slice.isInterB())
1073
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1074
1075
#if ENABLE_SCC_EXT
1076
    if (slice.m_bTemporalMvp)
1077
#else
1078
651
    if (slice.m_sps->bTemporalMVPEnabled)
1079
651
#endif
1080
651
    {
1081
651
        if (slice.m_sliceType == B_SLICE)
1082
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1083
1084
651
        if (slice.m_sliceType != I_SLICE &&
1085
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1086
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1087
0
        {
1088
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1089
0
        }
1090
651
    }
1091
651
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1092
0
        codePredWeightTable(slice);
1093
1094
651
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1095
651
    if (!slice.isIntra())
1096
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1097
1098
651
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1099
651
    WRITE_SVLC(code, "slice_qp_delta");
1100
1101
651
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1102
0
    {
1103
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1105
0
    }
1106
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1107
    //       We didn't support filter across slice board, so disable it now
1108
1109
651
    if (encData.m_param->maxSlices <= 1)
1110
651
    {
1111
651
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1112
651
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1113
1114
651
        if (isSAOEnabled || isDBFEnabled)
1115
651
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1116
651
    }
1117
651
}
1118
1119
/** write wavefront substreams sizes for the slice header */
1120
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1121
532
{
1122
532
    uint32_t offsetLen = 1;
1123
2.99k
    while (maxOffset >= (1U << offsetLen))
1124
2.46k
    {
1125
2.46k
        offsetLen++;
1126
2.46k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1127
2.46k
    }
1128
1129
532
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1130
532
    if (numSubStreams > 0)
1131
532
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1132
1133
2.94k
    for (uint32_t i = 0; i < numSubStreams; i++)
1134
2.41k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1135
532
}
1136
1137
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1138
0
{
1139
0
    if (idx > 0)
1140
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1141
1142
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1143
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1144
0
    int prev = 0;
1145
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1146
0
    {
1147
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1148
0
        prev = rps.deltaPOC[j];
1149
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1150
0
    }
1151
1152
0
    prev = 0;
1153
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1154
0
    {
1155
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1156
0
        prev = rps.deltaPOC[j];
1157
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1158
0
    }
1159
0
}
1160
1161
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1162
27.8k
{
1163
27.8k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1164
27.8k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1165
27.8k
}
1166
1167
/* encode a CU block recursively */
1168
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1169
113k
{
1170
113k
    const Slice* slice = ctu.m_slice;
1171
1172
113k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1173
113k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1174
1175
113k
    if (!cuUnsplitFlag)
1176
25.2k
    {
1177
25.2k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1178
25.2k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1179
6.90k
            bEncodeDQP = true;
1180
126k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1181
101k
        {
1182
101k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1183
101k
            if (childGeom.flags & CUGeom::PRESENT)
1184
56.7k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1185
101k
        }
1186
25.2k
        return;
1187
25.2k
    }
1188
1189
88.4k
    if (cuSplitFlag) 
1190
62.7k
        codeSplitFlag(ctu, absPartIdx, depth);
1191
1192
88.4k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1193
7.29k
    {
1194
7.29k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1195
7.29k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1196
288
            bEncodeDQP = true;
1197
36.4k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1198
29.1k
        {
1199
29.1k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1200
29.1k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1201
29.1k
        }
1202
7.29k
        return;
1203
7.29k
    }
1204
1205
81.1k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1206
34.7k
        bEncodeDQP = true;
1207
1208
81.1k
    if (slice->m_pps->bTransquantBypassEnabled)
1209
20.8k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1210
1211
81.1k
    if (!slice->isIntra())
1212
0
    {
1213
0
        codeSkipFlag(ctu, absPartIdx);
1214
0
        if (ctu.isSkipped(absPartIdx))
1215
0
        {
1216
0
            codeMergeIndex(ctu, absPartIdx);
1217
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1218
0
            return;
1219
0
        }
1220
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1221
0
    }
1222
1223
81.1k
    codePartSize(ctu, absPartIdx, depth);
1224
1225
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1226
81.1k
    codePredInfo(ctu, absPartIdx);
1227
1228
81.1k
    uint32_t tuDepthRange[2];
1229
81.1k
    if (ctu.isIntra(absPartIdx))
1230
81.1k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1231
0
    else
1232
0
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1233
1234
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1235
81.1k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1236
1237
    // --- write terminating bit ---
1238
81.1k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1239
81.1k
}
1240
1241
/* Return bit count of signaling inter mode */
1242
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1243
0
{
1244
0
    uint32_t bits;
1245
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1246
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1247
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1248
0
    switch (partSize)
1249
0
    {
1250
0
    case SIZE_2Nx2N:
1251
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1252
0
        break;
1253
1254
0
    case SIZE_2NxN:
1255
0
    case SIZE_2NxnU:
1256
0
    case SIZE_2NxnD:
1257
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1258
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1259
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1260
0
        {
1261
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1262
0
            if (partSize != SIZE_2NxN)
1263
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1264
0
        }
1265
0
        break;
1266
1267
0
    case SIZE_Nx2N:
1268
0
    case SIZE_nLx2N:
1269
0
    case SIZE_nRx2N:
1270
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1272
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1273
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1274
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1275
0
        {
1276
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1277
0
            if (partSize != SIZE_Nx2N)
1278
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1279
0
        }
1280
0
        break;
1281
0
    default:
1282
0
        X265_CHECK(0, "invalid CU partition\n");
1283
0
        break;
1284
0
    }
1285
1286
0
    return bits;
1287
0
}
1288
1289
/* finish encoding a cu and handle end-of-slice conditions */
1290
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1291
81.1k
{
1292
81.1k
    const Slice* slice = ctu.m_slice;
1293
81.1k
    uint32_t realEndAddress = slice->m_endCUAddr;
1294
81.1k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1295
81.1k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1296
1297
81.1k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1298
81.1k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1299
81.1k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1300
81.1k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1301
81.1k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1302
47.3k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1303
1304
81.1k
    if (slice->m_pps->bUseDQP)
1305
60.3k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1306
1307
81.1k
    if (granularityBoundary)
1308
27.8k
    {
1309
        // Encode slice finish
1310
27.8k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1311
27.8k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1312
1.30k
            bTerminateSlice = 1;
1313
1314
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1315
27.8k
        if (!bTerminateSlice)
1316
26.5k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1317
1318
27.8k
        if (!m_bitIf)
1319
13.9k
            resetBits(); // TODO: most likely unnecessary
1320
27.8k
    }
1321
81.1k
}
1322
1323
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1324
                              bool& bCodeDQP, const uint32_t depthRange[2])
1325
2.00M
{
1326
2.00M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1327
1328
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1329
     * so we have checks to make sure the implied value matches our intentions */
1330
2.00M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1331
305k
    {
1332
305k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1333
305k
    }
1334
1.69M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1335
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1336
0
    {
1337
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1338
0
    }
1339
1.69M
    else if (log2CurSize > depthRange[1])
1340
0
    {
1341
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1342
0
    }
1343
1.69M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1344
1.22M
    {
1345
1.22M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1346
1.22M
    }
1347
474k
    else
1348
474k
    {
1349
474k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1350
474k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1351
474k
    }
1352
1353
2.00M
    uint32_t hChromaShift = cu.m_hChromaShift;
1354
2.00M
    uint32_t vChromaShift = cu.m_vChromaShift;
1355
2.00M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1356
2.00M
    if (!curDepth || !bSmallChroma)
1357
781k
    {
1358
781k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1359
781k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1360
781k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1361
781k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1362
781k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1363
781k
    }
1364
1365
2.00M
    if (subdiv)
1366
306k
    {
1367
306k
        --log2CurSize;
1368
306k
        ++curDepth;
1369
1370
306k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1371
1372
306k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1373
306k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
306k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
306k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
306k
        return;
1377
306k
    }
1378
1379
1.69M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1380
1381
1.69M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1382
0
    {
1383
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1384
0
    }
1385
1.69M
    else
1386
1.69M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1387
1388
1.69M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1389
1.69M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1390
1.69M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1391
1.69M
    if (!(cbfY || cbfU || cbfV))
1392
1.69M
        return;
1393
1394
    // dQP: only for CTU once
1395
7.71k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1396
3.61k
    {
1397
3.61k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1398
3.61k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1399
3.61k
        codeDeltaQP(cu, absPartIdxLT);
1400
3.61k
        bCodeDQP = false;
1401
3.61k
    }
1402
1403
7.71k
    if (cbfY)
1404
4.62k
    {
1405
4.62k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1406
4.62k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1407
4.62k
        if (!(cbfU || cbfV))
1408
600
            return;
1409
4.62k
    }
1410
1411
7.11k
    if (bSmallChroma)
1412
4.42k
    {
1413
4.42k
        if ((absPartIdx & 3) != 3)
1414
3.31k
            return;
1415
1416
1.10k
        const uint32_t log2CurSizeC = 2;
1417
1.10k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1418
1.10k
        const uint32_t curPartNum = 4;
1419
1.10k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1420
3.31k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1421
2.21k
        {
1422
2.21k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1423
2.21k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1424
2.21k
            do
1425
2.21k
            {
1426
2.21k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1427
2.21k
                {
1428
2.21k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1429
2.21k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1430
2.21k
                }
1431
2.21k
            }
1432
2.21k
            while (tuIterator.isNextSection());
1433
2.21k
        }
1434
1.10k
    }
1435
2.68k
    else
1436
2.68k
    {
1437
2.68k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1438
2.68k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1439
2.68k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1440
2.68k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1441
8.27k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1442
5.58k
        {
1443
5.58k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1444
5.58k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1445
5.58k
            do
1446
5.58k
            {
1447
5.58k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1448
5.58k
                {
1449
5.58k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1450
5.58k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1451
5.58k
                }
1452
5.58k
            }
1453
5.58k
            while (tuIterator.isNextSection());
1454
5.58k
        }
1455
2.68k
    }
1456
7.11k
}
1457
1458
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1459
                              bool& bCodeDQP, const uint32_t depthRange[2])
1460
0
{
1461
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1462
1463
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1464
     * so we have checks to make sure the implied value matches our intentions */
1465
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1466
0
    {
1467
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1468
0
    }
1469
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1470
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1471
0
    {
1472
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1473
0
    }
1474
0
    else if (log2CurSize > depthRange[1])
1475
0
    {
1476
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1477
0
    }
1478
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1479
0
    {
1480
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1481
0
    }
1482
0
    else
1483
0
    {
1484
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1485
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1486
0
    }
1487
1488
0
    if (subdiv)
1489
0
    {
1490
0
        --log2CurSize;
1491
0
        ++curDepth;
1492
1493
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1494
1495
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1496
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        return;
1500
0
    }
1501
1502
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1503
0
    {
1504
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1505
0
    }
1506
0
    else
1507
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1508
1509
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1510
1511
0
    if (!cbfY)
1512
0
        return;
1513
1514
    // dQP: only for CTU once
1515
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1516
0
    {
1517
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1518
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1519
0
        codeDeltaQP(cu, absPartIdxLT);
1520
0
        bCodeDQP = false;
1521
0
    }
1522
1523
0
    if (cbfY)
1524
0
    {
1525
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1526
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1527
0
    }
1528
0
}
1529
1530
1531
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1532
780k
{
1533
780k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1534
780k
    {
1535
780k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1536
780k
        if (cu.m_chromaFormat != X265_CSP_I400)
1537
780k
        {
1538
780k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1539
780k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1540
1541
780k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1542
1543
780k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1544
0
            {
1545
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1546
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1547
0
                {
1548
0
                    absPartIdx += qNumParts;
1549
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1550
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1551
0
                }
1552
0
            }
1553
780k
        }
1554
780k
    }
1555
18.4E
    else // if it is inter mode, encode motion vector and reference index
1556
18.4E
        codePUWise(cu, absPartIdx);
1557
780k
}
1558
1559
/** encode motion information for every PU block */
1560
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1561
0
{
1562
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1563
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1564
1565
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1566
0
    {
1567
0
        codeMergeFlag(cu, subPartIdx);
1568
0
        if (cu.m_mergeFlag[subPartIdx])
1569
0
            codeMergeIndex(cu, subPartIdx);
1570
0
        else
1571
0
        {
1572
0
            if (cu.m_slice->isInterB())
1573
0
                codeInterDir(cu, subPartIdx);
1574
1575
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1576
0
            for (uint32_t list = 0; list < 2; list++)
1577
0
            {
1578
0
                if (interDir & (1 << list))
1579
0
                {
1580
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1581
1582
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1583
0
                    codeMvd(cu, subPartIdx, list);
1584
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1585
0
                }
1586
0
            }
1587
0
        }
1588
0
    }
1589
0
}
1590
1591
/** encode reference frame index for a PU block */
1592
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1593
0
{
1594
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1595
1596
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1597
0
        codeRefFrmIdx(cu, absPartIdx, list);
1598
0
}
1599
1600
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1601
780k
{
1602
780k
    if (!cu.isIntra(absPartIdx))
1603
0
    {
1604
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1605
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1606
0
        if (!cu.getQtRootCbf(absPartIdx))
1607
0
            return;
1608
0
    }
1609
1610
780k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1611
780k
    if (cu.m_chromaFormat == X265_CSP_I400)
1612
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1613
780k
    else
1614
780k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1615
780k
}
1616
1617
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1618
54.4k
{
1619
54.4k
    int typeIdx = ctuParam.typeIdx;
1620
1621
54.4k
    if (plane != 2)
1622
36.2k
    {
1623
36.2k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1624
36.2k
        if (typeIdx >= 0)
1625
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1626
36.2k
    }
1627
1628
54.4k
    if (typeIdx >= 0)
1629
0
    {
1630
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1631
0
        if (typeIdx == SAO_BO)
1632
0
        {
1633
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1634
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1635
1636
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1637
0
                if (ctuParam.offset[i] != 0)
1638
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1639
1640
0
            encodeBinsEP(ctuParam.bandPos, 5);
1641
0
        }
1642
0
        else // if (typeIdx < SAO_BO)
1643
0
        {
1644
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1645
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1648
0
            if (plane != 2)
1649
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1650
0
        }
1651
0
    }
1652
54.4k
}
1653
1654
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1655
167k
{
1656
167k
    if (plane != 2)
1657
111k
    {
1658
111k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1659
111k
        encodeBinEP(1);
1660
111k
    }
1661
1662
167k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1663
1664
167k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1665
167k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1666
167k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1667
167k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1668
167k
    if (plane != 2)
1669
111k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1670
167k
}
1671
1672
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1673
41.7k
{
1674
41.7k
    if (plane != 2)
1675
27.8k
    {
1676
27.8k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1677
27.8k
        encodeBinEP(0);
1678
27.8k
    }
1679
1680
41.7k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1681
1682
208k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1683
167k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1684
1685
208k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1686
167k
        if (offset[i] != 0)
1687
91
            encodeBinEP(offset[i] < 0);
1688
1689
41.7k
    encodeBinsEP(bandPos, 5);
1690
41.7k
}
1691
1692
/** initialize context model with respect to QP and initialization value */
1693
uint8_t sbacInit(int qp, int initValue)
1694
102k
{
1695
102k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1696
1697
102k
    int  slope      = (initValue >> 4) * 5 - 45;
1698
102k
    int  offset     = ((initValue & 15) << 3) - 16;
1699
102k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1700
102k
    uint32_t mpState = (initState >= 64);
1701
102k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1702
1703
102k
    return (uint8_t)state;
1704
102k
}
1705
1706
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1707
16.9k
{
1708
16.9k
    ctxModel += sliceType * size;
1709
1710
119k
    for (int n = 0; n < size; n++)
1711
102k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1712
16.9k
}
1713
1714
void Entropy::resetEntropy(const Slice& slice)
1715
651
{
1716
651
    int  qp              = slice.m_sliceQp;
1717
651
    SliceType sliceType  = slice.m_sliceType;
1718
1719
651
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1720
651
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1721
651
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1722
651
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1723
651
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1724
651
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1725
651
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1726
651
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1727
651
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1728
651
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1729
651
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1730
651
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1731
651
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1732
651
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1733
651
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1734
651
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1735
651
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1736
651
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1737
651
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
651
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1739
651
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1740
651
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1741
651
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1742
651
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1743
651
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1744
651
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1745
    // new structure
1746
1747
651
    start();
1748
651
}
1749
1750
/* code explicit wp tables */
1751
void Entropy::codePredWeightTable(const Slice& slice)
1752
0
{
1753
0
    const WeightParam *wp;
1754
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1755
0
    bool            bDenomCoded  = false;
1756
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1757
0
    uint32_t        totalSignalledWeightFlags = 0;
1758
1759
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1760
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1761
0
    {
1762
0
        for (int list = 0; list < numRefDirs; list++)
1763
0
        {
1764
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1765
0
            {
1766
0
                wp = slice.m_weightPredTable[list][ref];
1767
0
                if (!bDenomCoded)
1768
0
                {
1769
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1770
1771
0
                    if (bChroma)
1772
0
                    {
1773
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1774
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1775
0
                    }
1776
0
                    bDenomCoded = true;
1777
0
                }
1778
#if ENABLE_SCC_EXT
1779
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1780
                    assert(!wp[0].wtPresent);
1781
                else
1782
#endif
1783
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1784
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1785
0
            }
1786
1787
0
            if (bChroma)
1788
0
            {
1789
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1790
0
                {
1791
0
                    wp = slice.m_weightPredTable[list][ref];
1792
#if ENABLE_SCC_EXT
1793
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1794
                        assert(!wp[1].wtPresent);
1795
                    else
1796
#endif
1797
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1798
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1799
0
                }
1800
0
            }
1801
1802
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1803
0
            {
1804
0
                wp = slice.m_weightPredTable[list][ref];
1805
0
                if (wp[0].wtPresent)
1806
0
                {
1807
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1808
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1809
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1810
0
                }
1811
1812
0
                if (bChroma)
1813
0
                {
1814
0
                    if (wp[1].wtPresent)
1815
0
                    {
1816
0
                        for (int plane = 1; plane < 3; plane++)
1817
0
                        {
1818
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1819
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1820
1821
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1822
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1823
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1824
0
                        }
1825
0
                    }
1826
0
                }
1827
0
            }
1828
0
        }
1829
1830
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1831
0
    }
1832
0
}
1833
1834
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1835
4.83k
{
1836
4.83k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1837
1838
4.83k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1839
1840
4.83k
    if (!symbol)
1841
428
        return;
1842
1843
4.40k
    bool bCodeLast = (maxSymbol > symbol);
1844
1845
21.7k
    while (--symbol)
1846
17.3k
        encodeBin(1, scmModel[offset]);
1847
1848
4.40k
    if (bCodeLast)
1849
147
        encodeBin(0, scmModel[offset]);
1850
4.40k
}
1851
1852
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1853
4.26k
{
1854
4.26k
    uint32_t bins = 0;
1855
4.26k
    int numBins = 0;
1856
1857
16.7k
    while (symbol >= (uint32_t)(1 << count))
1858
12.4k
    {
1859
12.4k
        bins = 2 * bins + 1;
1860
12.4k
        numBins++;
1861
12.4k
        symbol -= 1 << count;
1862
12.4k
        count++;
1863
12.4k
    }
1864
1865
4.26k
    bins = 2 * bins + 0;
1866
4.26k
    numBins++;
1867
1868
4.26k
    bins = (bins << count) | symbol;
1869
4.26k
    numBins += count;
1870
1871
4.26k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1872
4.26k
    encodeBinsEP(bins, numBins);
1873
4.26k
}
1874
1875
/** Coding of coeff_abs_level_minus3 */
1876
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1877
8.74k
{
1878
8.74k
    uint32_t length;
1879
8.74k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1880
1881
8.74k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1882
0
    {
1883
0
        length = codeNumber >> absGoRice;
1884
1885
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1886
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1887
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1888
0
    }
1889
8.74k
    else
1890
8.74k
    {
1891
8.74k
        length = 0;
1892
8.74k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1893
8.74k
        {
1894
8.74k
            unsigned long idx;
1895
8.74k
            BSR(idx, codeNumber + 1);
1896
8.74k
            length = idx;
1897
8.74k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1898
8.74k
            codeNumber -= (1 << idx) - 1;
1899
8.74k
        }
1900
8.74k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1901
1902
8.74k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1903
8.74k
        encodeBinsEP(codeNumber, length + absGoRice);
1904
8.74k
    }
1905
8.74k
}
1906
1907
// SBAC RD
1908
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1909
1.61M
{
1910
1.61M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1911
1.61M
    m_fracBits = src.m_fracBits;
1912
1.61M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1913
1.61M
}
1914
1915
void Entropy::copyFrom(const Entropy& src)
1916
11.2M
{
1917
11.2M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1918
1919
11.2M
    copyState(src);
1920
1921
11.2M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1922
11.2M
    markValid();
1923
11.2M
}
1924
1925
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1926
2.58M
{
1927
2.58M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1928
1929
2.58M
    if (cu.isIntra(absPartIdx))
1930
2.58M
    {
1931
2.58M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1932
2.17M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1933
2.58M
        return;
1934
2.58M
    }
1935
1936
18.4E
    switch (partSize)
1937
18.4E
    {
1938
0
    case SIZE_2Nx2N:
1939
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1940
0
        break;
1941
1942
0
    case SIZE_2NxN:
1943
0
    case SIZE_2NxnU:
1944
0
    case SIZE_2NxnD:
1945
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1946
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1947
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1948
0
        {
1949
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1950
0
            if (partSize != SIZE_2NxN)
1951
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1952
0
        }
1953
0
        break;
1954
1955
0
    case SIZE_Nx2N:
1956
0
    case SIZE_nLx2N:
1957
0
    case SIZE_nRx2N:
1958
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1960
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1961
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1962
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1963
0
        {
1964
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1965
0
            if (partSize != SIZE_Nx2N)
1966
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1967
0
        }
1968
0
        break;
1969
0
    default:
1970
0
        X265_CHECK(0, "invalid CU partition\n");
1971
0
        break;
1972
18.4E
    }
1973
18.4E
}
1974
1975
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1976
0
{
1977
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1978
1979
0
    if (numCand > 1)
1980
0
    {
1981
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1982
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1983
1984
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1985
1986
0
        if (unaryIdx != 0)
1987
0
        {
1988
0
            uint32_t mask = (1 << unaryIdx) - 2;
1989
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1990
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1991
0
        }
1992
0
    }
1993
0
}
1994
1995
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1996
4.42M
{
1997
4.42M
    uint32_t dir[4], j;
1998
4.42M
    uint32_t preds[4][3];
1999
4.42M
    int predIdx[4];
2000
4.42M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2001
4.42M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2002
2003
9.76M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2004
5.33M
    {
2005
5.33M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2006
5.33M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2007
5.33M
        predIdx[j] = -1;
2008
21.3M
        for (uint32_t i = 0; i < 3; i++)
2009
16.0M
            if (dir[j] == preds[j][i])
2010
5.32M
                predIdx[j] = i;
2011
2012
5.33M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2013
5.33M
    }
2014
2015
9.75M
    for (j = 0; j < partNum; j++)
2016
5.33M
    {
2017
5.33M
        if (predIdx[j] != -1)
2018
5.32M
        {
2019
5.32M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2020
            // NOTE: Mapping
2021
            //       0 = 0
2022
            //       1 = 10
2023
            //       2 = 11
2024
5.32M
            int nonzero = (!!predIdx[j]);
2025
5.32M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2026
5.32M
        }
2027
10.7k
        else
2028
10.7k
        {
2029
10.7k
            if (preds[j][0] > preds[j][1])
2030
491
                std::swap(preds[j][0], preds[j][1]);
2031
2032
10.7k
            if (preds[j][0] > preds[j][2])
2033
0
                std::swap(preds[j][0], preds[j][2]);
2034
2035
10.7k
            if (preds[j][1] > preds[j][2])
2036
0
                std::swap(preds[j][1], preds[j][2]);
2037
2038
10.7k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2039
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2041
2042
10.7k
            encodeBinsEP(dir[j], 5);
2043
10.7k
        }
2044
5.33M
    }
2045
4.42M
}
2046
2047
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2048
4.27M
{
2049
4.27M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2050
2051
4.27M
    if (intraDirChroma == DM_CHROMA_IDX)
2052
1.10M
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2053
3.17M
    else
2054
3.17M
    {
2055
7.36M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2056
7.36M
        {
2057
7.36M
            if (intraDirChroma == chromaDirMode[i])
2058
3.17M
            {
2059
3.17M
                intraDirChroma = i;
2060
3.17M
                break;
2061
3.17M
            }
2062
7.36M
        }
2063
2064
3.17M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2065
3.17M
        encodeBinsEP(intraDirChroma, 2);
2066
3.17M
    }
2067
4.27M
}
2068
2069
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2070
0
{
2071
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2072
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2073
2074
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2075
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2076
0
    if (interDir < 2)
2077
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2078
0
}
2079
2080
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2081
0
{
2082
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2083
2084
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2085
2086
0
    if (refFrame > 0)
2087
0
    {
2088
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2089
0
        if (refNum == 0)
2090
0
            return;
2091
2092
0
        refFrame--;
2093
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2094
0
        if (refFrame > 0)
2095
0
        {
2096
0
            uint32_t mask = (1 << refFrame) - 2;
2097
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2098
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2099
0
        }
2100
0
    }
2101
0
}
2102
2103
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2104
0
{
2105
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2106
0
    const int hor = mvd.x;
2107
0
    const int ver = mvd.y;
2108
2109
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2110
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
2112
0
    const bool bHorAbsGr0 = hor != 0;
2113
0
    const bool bVerAbsGr0 = ver != 0;
2114
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2115
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2116
2117
0
    if (bHorAbsGr0)
2118
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2119
2120
0
    if (bVerAbsGr0)
2121
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2122
2123
0
    if (bHorAbsGr0)
2124
0
    {
2125
0
        if (horAbs > 1)
2126
0
            writeEpExGolomb(horAbs - 2, 1);
2127
2128
0
        encodeBinEP(0 > hor ? 1 : 0);
2129
0
    }
2130
2131
0
    if (bVerAbsGr0)
2132
0
    {
2133
0
        if (verAbs > 1)
2134
0
            writeEpExGolomb(verAbs - 2, 1);
2135
2136
0
        encodeBinEP(0 > ver ? 1 : 0);
2137
0
    }
2138
0
}
2139
2140
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2141
4.83k
{
2142
4.83k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2143
2144
4.83k
    int qpBdOffsetY = QP_BD_OFFSET;
2145
2146
4.83k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2147
2148
4.83k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2149
4.83k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2150
4.83k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2151
4.83k
    if (absDQp >= CU_DQP_TU_CMAX)
2152
4.26k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2153
2154
4.83k
    if (absDQp > 0)
2155
4.40k
    {
2156
4.40k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2157
4.40k
        encodeBinEP(sign);
2158
4.40k
    }
2159
4.83k
}
2160
2161
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2162
8.56M
{
2163
8.56M
    uint32_t ctx = tuDepth + 2;
2164
2165
8.56M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2166
8.56M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2167
8.56M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2168
2169
8.56M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2170
0
    {
2171
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2172
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2173
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2174
2175
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2176
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
    }
2178
8.56M
    else
2179
8.56M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2180
8.56M
}
2181
2182
#if CHECKED_BUILD || _DEBUG
2183
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2184
{
2185
    uint32_t goRiceParam = 0;
2186
    int firstCoeff2 = 1;
2187
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2188
2189
    uint32_t sum = 0;
2190
    int idx = 0;
2191
    do
2192
    {
2193
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2194
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2195
        baseLevelN >>= 2;
2196
        int codeNumber = absCoeff[idx] - baseLevel;
2197
2198
        if (codeNumber >= 0)
2199
        {
2200
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2201
            uint32_t length = 0;
2202
2203
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2204
            if (codeNumber >= 0)
2205
            {
2206
                {
2207
                    unsigned long cidx;
2208
                    BSR(cidx, codeNumber + 1);
2209
                    length = cidx;
2210
                }
2211
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2212
2213
                codeNumber = (length + length);
2214
            }
2215
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2216
2217
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2218
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2219
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2220
        }
2221
        if (absCoeff[idx] >= 2)
2222
            firstCoeff2 = 0;
2223
        idx++;
2224
    }
2225
    while(idx < numNonZero);
2226
2227
    return sum;
2228
}
2229
#endif // debug only code
2230
2231
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2232
61.6k
{
2233
61.6k
    uint32_t trSize = 1 << log2TrSize;
2234
61.6k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2235
    // compute number of significant coefficients
2236
61.6k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2237
61.6k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2238
61.6k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2239
2240
61.6k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2241
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2242
2243
61.6k
    bool bIsLuma = ttype == TEXT_LUMA;
2244
2245
    // select scans
2246
61.6k
    TUEntropyCodingParameters codingParameters;
2247
61.6k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2248
2249
61.6k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2250
61.6k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2251
61.6k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2252
2253
    //----- encode significance map -----
2254
2255
    // Find position of last coefficient
2256
61.6k
    int scanPosLast = 0;
2257
61.6k
    uint32_t posLast;
2258
61.6k
    uint64_t sigCoeffGroupFlag64 = 0;
2259
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2260
61.6k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2261
2262
61.6k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2263
61.6k
    posLast = codingParameters.scan[scanPosLast];
2264
2265
61.6k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2266
2267
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2268
150k
    for(int idx = 0; idx < lastScanSet; idx++)
2269
89.0k
    {
2270
89.0k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2271
89.0k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2272
89.0k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2273
89.0k
    }
2274
2275
2276
    // Code position of last coefficient
2277
61.6k
    {
2278
        // The last position is composed of a prefix and suffix.
2279
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2280
        // The bypass coded bins for both the x and y components are grouped together.
2281
61.6k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2282
61.6k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2283
        // swap
2284
61.6k
        if (codingParameters.scanType == SCAN_VER)
2285
4.50k
            std::swap(pos[0], pos[1]);
2286
2287
61.6k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2288
61.6k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2289
61.6k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2290
61.6k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2291
61.6k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2292
2293
61.6k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2294
184k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2295
123k
        {
2296
123k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2297
123k
            uint32_t prefixOnes = temp & 15;
2298
123k
            uint32_t suffixLen = temp >> 4;
2299
2300
263k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2301
140k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2302
2303
123k
            if (prefixOnes < maxGroupIdx)
2304
89.1k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2305
2306
123k
            packedSuffixBits <<= suffixLen;
2307
123k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2308
123k
            packedSuffixLen += suffixLen;
2309
123k
        }
2310
2311
61.6k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2312
61.6k
    }
2313
2314
    // code significance flag
2315
61.6k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2316
61.6k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2317
61.6k
    uint32_t c1 = 1;
2318
61.6k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2319
61.6k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2320
61.6k
    uint32_t numNonZero = 1;
2321
61.6k
    unsigned long lastNZPosInCG = 0;
2322
61.6k
    unsigned long firstNZPosInCG = 0;
2323
2324
#if _DEBUG
2325
    // Unnecessary, for Valgrind-3.10.0 only
2326
    memset(absCoeff, 0, sizeof(absCoeff));
2327
#endif
2328
2329
61.6k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2330
2331
212k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2332
150k
    {
2333
150k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2334
150k
        uint32_t scanFlagMask = subCoeffFlag;
2335
150k
        int subPosBase = subSet << MLS_CG_SIZE;
2336
        
2337
150k
        if (subSet == lastScanSet)
2338
61.6k
        {
2339
61.6k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2340
61.6k
            scanFlagMask >>= 1;
2341
61.6k
        }
2342
2343
        // encode significant_coeffgroup_flag
2344
150k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2345
150k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2346
150k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2347
150k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2348
2349
150k
        if (subSet == lastScanSet || !subSet)
2350
67.0k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2351
83.6k
        else
2352
83.6k
        {
2353
83.6k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2354
83.6k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2355
83.6k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2356
83.6k
        }
2357
2358
        // encode significant_coeff_flag
2359
150k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2360
108k
        {
2361
108k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2362
108k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2363
108k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2364
2365
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2366
108k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2367
108k
            {
2368
                // patternSigCtx = 0
2369
108k
                {
2370
108k
                    2, 1, 1, 0,
2371
108k
                    1, 1, 0, 0,
2372
108k
                    1, 0, 0, 0,
2373
108k
                    0, 0, 0, 0,
2374
108k
                },
2375
                // patternSigCtx = 1
2376
108k
                {
2377
108k
                    2, 2, 2, 2,
2378
108k
                    1, 1, 1, 1,
2379
108k
                    0, 0, 0, 0,
2380
108k
                    0, 0, 0, 0,
2381
108k
                },
2382
                // patternSigCtx = 2
2383
108k
                {
2384
108k
                    2, 1, 0, 0,
2385
108k
                    2, 1, 0, 0,
2386
108k
                    2, 1, 0, 0,
2387
108k
                    2, 1, 0, 0,
2388
108k
                },
2389
                // patternSigCtx = 3
2390
108k
                {
2391
108k
                    2, 2, 2, 2,
2392
108k
                    2, 2, 2, 2,
2393
108k
                    2, 2, 2, 2,
2394
108k
                    2, 2, 2, 2,
2395
108k
                },
2396
                // 4x4
2397
108k
                {
2398
108k
                    0, 1, 4, 5,
2399
108k
                    2, 3, 4, 5,
2400
108k
                    6, 6, 8, 8,
2401
108k
                    7, 7, 8, 8
2402
108k
                }
2403
108k
            };
2404
2405
108k
            const int offset = codingParameters.firstSignificanceMapContext;
2406
108k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2407
2408
108k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2409
108k
            if (m_bitIf)
2410
453
            {
2411
453
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2412
453
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2413
2414
                // TODO: accelerate by PABSW
2415
2.26k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
1.81k
                {
2417
1.81k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
1.81k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
1.81k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
1.81k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
1.81k
                }
2422
2423
453
                if (log2TrSize == 2)
2424
453
                {
2425
453
                    do
2426
6.79k
                    {
2427
6.79k
                        uint32_t blkPos, sig, ctxSig;
2428
6.79k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
6.79k
                        sig     = scanFlagMask & 1;
2430
6.79k
                        scanFlagMask >>= 1;
2431
6.79k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
6.79k
                        {
2433
6.79k
                            ctxSig = table_cnt[4][blkPos];
2434
6.79k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
6.79k
                            encodeBin(sig, baseCtx[ctxSig]);
2436
6.79k
                        }
2437
6.79k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
6.79k
                        numNonZero += sig;
2439
6.79k
                        scanPosSigOff--;
2440
6.79k
                    }
2441
6.79k
                    while(scanPosSigOff >= 0);
2442
453
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
453
            }
2471
107k
            else // fast RD path
2472
107k
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
107k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
107k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
107k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
107k
                m_fracBits += sum;
2483
107k
            } // end of fast RD path -- !m_bitIf
2484
108k
        }
2485
150k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
150k
        uint32_t coeffSigns = coeffSign[subSet];
2488
150k
        numNonZero = coeffNum[subSet];
2489
150k
        if (numNonZero > 0)
2490
150k
        {
2491
150k
            uint32_t idx = 0;
2492
150k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
150k
            BSR(lastNZPosInCG, subCoeffFlag);
2494
150k
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
150k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
150k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
150k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
150k
            c1 = 1;
2501
150k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
150k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
150k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
150k
            if (!m_bitIf)
2507
148k
            {
2508
148k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
148k
                uint32_t firstC2Idx = (sum >> 28);
2510
148k
                c1 = ((sum >> 26) & 3);
2511
148k
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
148k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
148k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
148k
                if (numNonZero > firstC2Idx)
2518
143k
                {
2519
143k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
143k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
143k
                    m_fracBits += ((uint64_t)sum << 15);
2522
143k
                }
2523
148k
            }
2524
            // Standard path
2525
2.07k
            else
2526
2.07k
            {
2527
2.07k
                uint32_t firstC2Idx = 8;
2528
2.07k
                uint32_t firstC2Flag = 2;
2529
2.07k
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
2.07k
                idx = 0;
2532
2.07k
                do
2533
5.24k
                {
2534
5.24k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
5.24k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
5.24k
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
5.24k
                    if (symbol1)
2539
5.12k
                        c1Next = 0;
2540
2541
5.24k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
5.24k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
5.24k
                    c1 = (c1Next & 3);
2545
5.24k
                    c1Next >>= 2;
2546
5.24k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
5.24k
                    idx++;
2548
5.24k
                }
2549
5.24k
                while(idx < numC1Flag);
2550
2551
2.07k
                if (!c1)
2552
1.95k
                {
2553
1.95k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
1.95k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
1.95k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
1.95k
                }
2558
2559
2.07k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
2.07k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
2.07k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
1.95k
                {
2564
                    // Standard path
2565
1.95k
                    uint32_t goRiceParam = 0;
2566
1.95k
                    int baseLevel = 3;
2567
1.95k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
1.95k
                    idx = firstC2Idx;
2572
1.95k
                    do
2573
8.74k
                    {
2574
8.74k
                        if (idx >= C1FLAG_NUMBER)
2575
3.62k
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
8.74k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
8.74k
                        if (absCoeff[idx] >= baseLevel)
2580
8.74k
                        {
2581
8.74k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
8.74k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
8.74k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
8.74k
                            goRiceParam += adjust;
2585
8.74k
                            threshold += (adjust) ? threshold : 0;
2586
8.74k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
8.74k
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
8.74k
                        baseLevel = 2;
2592
8.74k
                        idx++;
2593
8.74k
                    }
2594
8.74k
                    while(idx < numNonZero);
2595
1.95k
                }
2596
2.07k
            } // end of !bitIf
2597
150k
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
150k
        numNonZero = 0;
2601
150k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
150k
    }
2603
61.6k
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
835k
{
2607
835k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
835k
    uint32_t isCodeNonZero = !!code;
2610
2611
835k
    encodeBinEP(isCodeNonZero);
2612
835k
    if (isCodeNonZero)
2613
91
    {
2614
91
        uint32_t isCodeLast = (maxSymbol > code);
2615
91
        uint32_t mask = (1 << (code - 1)) - 1;
2616
91
        uint32_t len = code - 1 + isCodeLast;
2617
91
        mask <<= isCodeLast;
2618
2619
91
        encodeBinsEP(mask, len);
2620
91
    }
2621
835k
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
8.31M
{
2626
8.31M
    estCBFBit(estBitsSbac);
2627
2628
8.31M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
8.31M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
8.31M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
8.31M
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
8.31M
{
2640
8.31M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
66.5M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
58.2M
    {
2644
58.2M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
58.2M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
58.2M
    }
2647
2648
8.31M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
8.31M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
8.31M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
8.31M
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
8.31M
{
2657
8.31M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
24.9M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
49.8M
        for (uint32_t bin = 0; bin < 2; bin++)
2661
33.2M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
8.31M
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
8.31M
{
2667
8.31M
    int firstCtx = 1, numCtx = 8;
2668
2669
8.31M
    if (log2TrSize >= 4)
2670
321k
    {
2671
321k
        firstCtx = bIsLuma ? 21 : 12;
2672
321k
        numCtx = bIsLuma ? 6 : 3;
2673
321k
    }
2674
7.99M
    else if (log2TrSize == 3)
2675
1.27M
    {
2676
1.27M
        firstCtx = 9;
2677
1.27M
        numCtx = bIsLuma ? 12 : 3;
2678
1.27M
    }
2679
2680
8.31M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
8.31M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
8.31M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
75.7M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
67.4M
    {
2687
67.4M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
67.4M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
67.4M
    }
2690
2691
8.31M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
8.31M
    if (bIsLuma)
2693
4.82M
    {
2694
4.82M
        if (log2TrSize == 2)
2695
3.66M
        {
2696
11.0M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
7.33M
            {
2698
7.33M
                int bits = 0;
2699
7.33M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
29.3M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
22.0M
                {
2703
22.0M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
22.0M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
22.0M
                }
2706
2707
7.33M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
7.33M
            }
2709
3.66M
        }
2710
1.15M
        else
2711
1.15M
        {
2712
1.15M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
3.46M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
2.30M
            {
2716
2.30M
                int bits = 0;
2717
2.30M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
2.30M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
9.79M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
7.48M
                {
2722
7.48M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
7.48M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
7.48M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
7.48M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
7.48M
                    bits += 2 * cost1;
2727
7.48M
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
2.30M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
2.30M
            }
2731
1.15M
        }
2732
4.82M
    }
2733
3.49M
    else
2734
3.49M
    {
2735
3.49M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
3.49M
        const int ctxShift = log2TrSize - 2;
2737
2738
10.4M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
6.99M
        {
2740
6.99M
            int bits = 0;
2741
6.99M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
30.0M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
23.0M
            {
2745
23.0M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
23.0M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
23.0M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
23.0M
            }
2749
2750
6.99M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
6.99M
        }
2752
3.49M
    }
2753
8.31M
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
8.31M
{
2758
8.31M
    if (bIsLuma)
2759
4.82M
    {
2760
4.82M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
4.82M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
82.0M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
77.1M
        {
2765
77.1M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
77.1M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
77.1M
        }
2768
2769
24.1M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
19.2M
        {
2771
19.2M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
19.2M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
19.2M
        }
2774
4.82M
    }
2775
3.49M
    else
2776
3.49M
    {
2777
3.49M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
3.49M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
31.4M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
27.9M
        {
2782
27.9M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
27.9M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
27.9M
        }
2785
2786
10.4M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
6.99M
        {
2788
6.99M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
6.99M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
6.99M
        }
2791
3.49M
    }
2792
8.31M
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
10.7k
{
2797
10.7k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
10.7k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
10.7k
    markValid();
2801
10.7k
}
2802
2803
void Entropy::start()
2804
651
{
2805
651
    m_low = 0;
2806
651
    m_range = 510;
2807
651
    m_bitsLeft = -12;
2808
651
    m_numBufferedBytes = 0;
2809
651
    m_bufferedByte = 0xff;
2810
651
}
2811
2812
void Entropy::finish()
2813
3.06k
{
2814
3.06k
    if (m_low >> (21 + m_bitsLeft))
2815
5
    {
2816
5
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
7
        while (m_numBufferedBytes > 1)
2818
2
        {
2819
2
            m_bitIf->writeByte(0x00);
2820
2
            m_numBufferedBytes--;
2821
2
        }
2822
2823
5
        m_low -= 1 << (21 + m_bitsLeft);
2824
5
    }
2825
3.05k
    else
2826
3.05k
    {
2827
3.05k
        if (m_numBufferedBytes > 0)
2828
3.05k
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
3.06k
        while (m_numBufferedBytes > 1)
2831
4
        {
2832
4
            m_bitIf->writeByte(0xff);
2833
4
            m_numBufferedBytes--;
2834
4
        }
2835
3.05k
    }
2836
3.06k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
3.06k
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
11.2M
{
2841
11.2M
    m_low = other.m_low;
2842
11.2M
    m_range = other.m_range;
2843
11.2M
    m_bitsLeft = other.m_bitsLeft;
2844
11.2M
    m_bufferedByte = other.m_bufferedByte;
2845
11.2M
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
11.2M
    m_fracBits = other.m_fracBits;
2847
11.2M
}
2848
2849
void Entropy::resetBits()
2850
9.80M
{
2851
9.80M
    m_low = 0;
2852
9.80M
    m_bitsLeft = -12;
2853
9.80M
    m_numBufferedBytes = 0;
2854
9.80M
    m_bufferedByte = 0xff;
2855
9.80M
    m_fracBits &= 32767;
2856
9.80M
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
9.80M
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
29.5M
{
2863
29.5M
    uint32_t mstate = ctxModel;
2864
2865
29.5M
    ctxModel = sbacNext(mstate, binValue);
2866
2867
29.5M
    if (!m_bitIf)
2868
29.2M
    {
2869
29.2M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
29.2M
        return;
2871
29.2M
    }
2872
2873
341k
    uint32_t range = m_range;
2874
341k
    uint32_t state = sbacGetState(mstate);
2875
341k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
341k
    range -= lps;
2877
2878
341k
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
341k
    int numBits = (uint32_t)(range - 256) >> 31;
2881
341k
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
341k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
341k
    if ((binValue ^ mstate) & 1)
2886
35.4k
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
35.4k
        unsigned long idx;
2890
35.4k
        BSR(idx, lps);
2891
35.4k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
35.4k
        numBits = 8 - idx;
2894
35.4k
        if (state >= 63)
2895
0
            numBits = 6;
2896
35.4k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
35.4k
        low += range;
2899
35.4k
        range = lps;
2900
35.4k
    }
2901
341k
    m_low = (low << numBits);
2902
341k
    m_range = (range << numBits);
2903
341k
    m_bitsLeft += numBits;
2904
2905
341k
    if (m_bitsLeft >= 0)
2906
15.8k
        writeOut();
2907
341k
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
978k
{
2912
978k
    if (!m_bitIf)
2913
978k
    {
2914
978k
        m_fracBits += 32768;
2915
978k
        return;
2916
978k
    }
2917
488
    m_low <<= 1;
2918
488
    if (binValue)
2919
488
        m_low += m_range;
2920
488
    m_bitsLeft++;
2921
2922
488
    if (m_bitsLeft >= 0)
2923
87
        writeOut();
2924
488
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
8.74M
{
2929
8.74M
    if (!m_bitIf)
2930
8.66M
    {
2931
8.66M
        m_fracBits += 32768 * numBins;
2932
8.66M
        return;
2933
8.66M
    }
2934
2935
85.4k
    while (numBins > 8)
2936
3.49k
    {
2937
3.49k
        numBins -= 8;
2938
3.49k
        uint32_t pattern = binValues >> numBins;
2939
3.49k
        m_low <<= 8;
2940
3.49k
        m_low += m_range * pattern;
2941
3.49k
        binValues -= pattern << numBins;
2942
3.49k
        m_bitsLeft += 8;
2943
2944
3.49k
        if (m_bitsLeft >= 0)
2945
3.49k
            writeOut();
2946
3.49k
    }
2947
2948
81.9k
    m_low <<= numBins;
2949
81.9k
    m_low += m_range * binValues;
2950
81.9k
    m_bitsLeft += numBins;
2951
2952
81.9k
    if (m_bitsLeft >= 0)
2953
22.6k
        writeOut();
2954
81.9k
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
29.6k
{
2959
29.6k
    if (!m_bitIf)
2960
13.2k
    {
2961
13.2k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
13.2k
        return;
2963
13.2k
    }
2964
2965
16.3k
    m_range -= 2;
2966
16.3k
    if (binValue)
2967
3.06k
    {
2968
3.06k
        m_low += m_range;
2969
3.06k
        m_low <<= 7;
2970
3.06k
        m_range = 2 << 7;
2971
3.06k
        m_bitsLeft += 7;
2972
3.06k
    }
2973
13.2k
    else if (m_range >= 256)
2974
12.5k
        return;
2975
694
    else
2976
694
    {
2977
694
        m_low <<= 1;
2978
694
        m_range <<= 1;
2979
694
        m_bitsLeft++;
2980
694
    }
2981
2982
3.75k
    if (m_bitsLeft >= 0)
2983
2.82k
        writeOut();
2984
3.75k
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
44.9k
{
2989
44.9k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
44.9k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
44.9k
    m_bitsLeft -= 8;
2993
44.9k
    m_low &= low_mask;
2994
2995
44.9k
    if (leadByte == 0xff)
2996
3.41k
        m_numBufferedBytes++;
2997
41.5k
    else
2998
41.5k
    {
2999
41.5k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
41.5k
        if (numBufferedBytes > 0)
3001
38.4k
        {
3002
38.4k
            uint32_t carry = leadByte >> 8;
3003
38.4k
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
38.4k
            m_bitIf->writeByte(byteTowrite);
3005
3006
38.4k
            byteTowrite = (0xff + carry) & 0xff;
3007
41.8k
            while (numBufferedBytes > 1)
3008
3.40k
            {
3009
3.40k
                m_bitIf->writeByte(byteTowrite);
3010
3.40k
                numBufferedBytes--;
3011
3.40k
            }
3012
38.4k
        }
3013
41.5k
        m_numBufferedBytes = 1;
3014
41.5k
        m_bufferedByte = (uint8_t)leadByte;
3015
41.5k
    }
3016
44.9k
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074