Coverage Report

Created: 2026-06-15 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
12.5k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
3.72k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
1.84M
{
226
1.84M
    markValid();
227
1.84M
    m_fracBits = 0;
228
1.84M
    m_pad = 0;
229
1.84M
    m_meanQP = 0;
230
1.84M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
1.84M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
578
{
239
578
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
578
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
578
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
578
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
578
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
578
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
578
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
578
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
578
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.15k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
578
    {
253
578
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
578
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
578
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
578
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
578
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
578
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
578
#endif
294
295
578
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
578
    WRITE_FLAG(0, "vps_extension_flag");
509
578
#endif
510
578
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
578
{
514
578
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
578
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
578
#endif
524
578
    {
525
578
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
578
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
578
    }
528
529
578
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
578
    {
536
578
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
578
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
578
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
578
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
578
        const Window& conf = sps.conformanceWindow;
545
578
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
578
        if (conf.bEnabled)
547
430
        {
548
430
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
430
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
430
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
430
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
430
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
430
        }
554
555
578
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
578
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
578
    }
558
559
578
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
578
    {
564
578
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.15k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
578
        {
568
578
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
578
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
578
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
578
        }
572
578
    }
573
574
578
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
578
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
578
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
578
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
578
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
578
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
578
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
578
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
578
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
578
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
578
    WRITE_FLAG(0, "pcm_enabled_flag");
600
578
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
578
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
578
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
578
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
578
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
578
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
578
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
578
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
578
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
578
{
645
578
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
578
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
578
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
578
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
578
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
578
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
578
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
578
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
578
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
578
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
578
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
578
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
578
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
578
    if (pps.bUseDQP)
661
450
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
578
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
578
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
578
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
578
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
578
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
578
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
578
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
578
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
578
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
578
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
578
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
578
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
578
    WRITE_FLAG(0, "lists_modification_present_flag");
688
578
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
578
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
578
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
578
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.15k
{
727
1.15k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.15k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.15k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
38.1k
    for (int j = 0; j < 32; j++)
731
36.9k
    {
732
36.9k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
36.9k
        else
735
36.9k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
36.9k
    }
737
738
1.15k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.15k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.15k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.15k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.15k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.15k
    else
771
1.15k
    {
772
1.15k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.15k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.15k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.15k
    }
776
1.15k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.15k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.15k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.15k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
578
{
795
578
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
578
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
578
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
578
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
578
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
578
    if (vui.videoSignalTypePresentFlag)
812
578
    {
813
578
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
578
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
578
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
578
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
578
    }
823
824
578
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
578
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
578
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
578
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
578
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
578
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
578
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
578
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
578
    else
847
578
    {
848
578
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
578
        else
851
578
        {
852
578
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
578
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
578
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
578
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
578
            if (!bEmitVUIHRDInfo)
857
0
                WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
858
578
            else
859
578
            {
860
578
                WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
861
578
                if (vui.hrdParametersPresentFlag)
862
0
                    codeHrdParameters(vui.hrdParameters, maxSubTLayers);
863
578
            }
864
578
        }
865
578
    }
866
867
578
    WRITE_FLAG(0, "bitstream_restriction_flag");
868
578
}
869
870
void Entropy::codeScalingList(const ScalingList& scalingList)
871
0
{
872
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
873
0
    {
874
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
875
0
        {
876
0
            int predList = scalingList.checkPredMode(sizeId, listId);
877
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
878
0
            if (predList >= 0)
879
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
880
0
            else // DPCM Mode
881
0
                codeScalingList(scalingList, sizeId, listId);
882
0
        }
883
0
    }
884
0
}
885
886
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
887
0
{
888
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
889
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
890
0
    int nextCoef = START_VALUE;
891
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
892
0
    int data;
893
894
0
    if (sizeId > BLOCK_8x8)
895
0
    {
896
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
897
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
898
0
    }
899
0
    for (int i = 0; i < coefNum; i++)
900
0
    {
901
0
        data = src[scan[i]] - nextCoef;
902
0
        if (data < -128)
903
0
            data += 256;
904
0
        if (data > 127)
905
0
            data -= 256;
906
0
        nextCoef = (nextCoef + data + 256) % 256;
907
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
908
0
    }
909
0
}
910
911
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
912
0
{
913
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
914
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
916
917
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
918
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
919
920
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
921
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
923
924
0
    for (int i = 0; i < maxSubTLayers; i++)
925
0
    {
926
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
927
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
928
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
929
930
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
931
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
932
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
933
0
    }
934
0
}
935
936
void Entropy::codeAUD(const Slice& slice)
937
0
{
938
0
    int picType;
939
940
0
    switch (slice.m_sliceType)
941
0
    {
942
0
    case I_SLICE:
943
0
        picType = 0;
944
0
        break;
945
0
    case P_SLICE:
946
0
        picType = 1;
947
0
        break;
948
0
    case B_SLICE:
949
0
        picType = 2;
950
0
        break;
951
0
    default:
952
0
        picType = 7;
953
0
        break;
954
0
    }
955
956
0
    WRITE_CODE(picType, 3, "pic_type");
957
0
}
958
959
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
960
578
{
961
578
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
962
578
    if (slice.getRapPicFlag())
963
578
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
964
965
578
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
966
967
    /* x265 does not use dependent slices, so always write all this data */
968
578
    if (slice_addr)
969
0
    {
970
        // if( dependent_slice_segments_enabled_flag )
971
        //     dependent_slice_segment_flag             u(1)
972
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
973
0
    }
974
975
#if ENABLE_MULTIVIEW
976
    if (encData.m_param->numViews > 1)
977
    {
978
        int esb = 0;
979
        if (2 > esb)
980
        {
981
            esb++;
982
            WRITE_FLAG(0, "discardable_flag");
983
        }
984
        if (2 > esb)
985
        {
986
            esb++;
987
            WRITE_FLAG(0, "cross_layer_bla_flag");
988
        }
989
    }
990
#endif
991
992
578
    WRITE_UVLC(slice.m_sliceType, "slice_type");
993
994
578
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
995
0
    {
996
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
997
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
998
0
    }
999
578
    if (!slice.getIdrPicFlag())
1000
0
    {
1001
#if _DEBUG || CHECKED_BUILD
1002
        // check for bitstream restriction stating that:
1003
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1004
        // Ideally this process should not be repeated for each slice in a picture
1005
        if (slice.isIRAP())
1006
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1007
            {
1008
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1009
            }
1010
#endif
1011
1012
0
        if (slice.m_rpsIdx < 0)
1013
0
        {
1014
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1015
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1016
0
        }
1017
0
        else
1018
0
        {
1019
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1020
0
            int numBits = 0;
1021
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1022
0
                numBits++;
1023
1024
0
            if (numBits > 0)
1025
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1026
0
        }
1027
1028
0
        if (slice.m_sps->bTemporalMVPEnabled)
1029
#if ENABLE_SCC_EXT
1030
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1031
#else
1032
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1033
0
#endif
1034
0
    }
1035
578
    const SAOParam *saoParam = encData.m_saoParam;
1036
578
    if (slice.m_bUseSao)
1037
578
    {
1038
578
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1039
578
        if (encData.m_param->internalCsp != X265_CSP_I400)
1040
578
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1041
578
    }
1042
0
    else if(encData.m_param->selectiveSAO)
1043
0
    {
1044
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1045
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1046
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1047
0
    }
1048
1049
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1050
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1051
1052
578
    if (!slice.isIntra())
1053
0
    {
1054
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1055
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1056
0
        if (overrideFlag)
1057
0
        {
1058
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1059
0
            if (slice.isInterB())
1060
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1061
0
            else
1062
0
            {
1063
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1064
0
            }
1065
0
        }
1066
0
    }
1067
578
    else
1068
578
    {
1069
578
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1070
578
    }
1071
1072
578
    if (slice.isInterB())
1073
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1074
1075
#if ENABLE_SCC_EXT
1076
    if (slice.m_bTemporalMvp)
1077
#else
1078
578
    if (slice.m_sps->bTemporalMVPEnabled)
1079
578
#endif
1080
578
    {
1081
578
        if (slice.m_sliceType == B_SLICE)
1082
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1083
1084
578
        if (slice.m_sliceType != I_SLICE &&
1085
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1086
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1087
0
        {
1088
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1089
0
        }
1090
578
    }
1091
578
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1092
0
        codePredWeightTable(slice);
1093
1094
578
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1095
578
    if (!slice.isIntra())
1096
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1097
1098
578
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1099
578
    WRITE_SVLC(code, "slice_qp_delta");
1100
1101
578
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1102
0
    {
1103
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1105
0
    }
1106
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1107
    //       We didn't support filter across slice board, so disable it now
1108
1109
578
    if (encData.m_param->maxSlices <= 1)
1110
578
    {
1111
578
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1112
578
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1113
1114
578
        if (isSAOEnabled || isDBFEnabled)
1115
578
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1116
578
    }
1117
578
}
1118
1119
/** write wavefront substreams sizes for the slice header */
1120
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1121
470
{
1122
470
    uint32_t offsetLen = 1;
1123
2.64k
    while (maxOffset >= (1U << offsetLen))
1124
2.17k
    {
1125
2.17k
        offsetLen++;
1126
2.17k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1127
2.17k
    }
1128
1129
470
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1130
470
    if (numSubStreams > 0)
1131
470
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1132
1133
2.66k
    for (uint32_t i = 0; i < numSubStreams; i++)
1134
2.19k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1135
470
}
1136
1137
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1138
0
{
1139
0
    if (idx > 0)
1140
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1141
1142
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1143
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1144
0
    int prev = 0;
1145
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1146
0
    {
1147
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1148
0
        prev = rps.deltaPOC[j];
1149
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1150
0
    }
1151
1152
0
    prev = 0;
1153
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1154
0
    {
1155
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1156
0
        prev = rps.deltaPOC[j];
1157
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1158
0
    }
1159
0
}
1160
1161
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1162
25.8k
{
1163
25.8k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1164
25.8k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1165
25.8k
}
1166
1167
/* encode a CU block recursively */
1168
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1169
96.4k
{
1170
96.4k
    const Slice* slice = ctu.m_slice;
1171
1172
96.4k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1173
96.4k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1174
1175
96.4k
    if (!cuUnsplitFlag)
1176
20.6k
    {
1177
20.6k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1178
20.6k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1179
5.74k
            bEncodeDQP = true;
1180
103k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1181
82.4k
        {
1182
82.4k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1183
82.4k
            if (childGeom.flags & CUGeom::PRESENT)
1184
45.9k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1185
82.4k
        }
1186
20.6k
        return;
1187
20.6k
    }
1188
1189
75.8k
    if (cuSplitFlag) 
1190
55.4k
        codeSplitFlag(ctu, absPartIdx, depth);
1191
1192
75.8k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1193
6.15k
    {
1194
6.15k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1195
6.15k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1196
262
            bEncodeDQP = true;
1197
30.7k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1198
24.6k
        {
1199
24.6k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1200
24.6k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1201
24.6k
        }
1202
6.15k
        return;
1203
6.15k
    }
1204
1205
69.6k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1206
30.6k
        bEncodeDQP = true;
1207
1208
69.6k
    if (slice->m_pps->bTransquantBypassEnabled)
1209
18.4k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1210
1211
69.6k
    if (!slice->isIntra())
1212
0
    {
1213
0
        codeSkipFlag(ctu, absPartIdx);
1214
0
        if (ctu.isSkipped(absPartIdx))
1215
0
        {
1216
0
            codeMergeIndex(ctu, absPartIdx);
1217
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1218
0
            return;
1219
0
        }
1220
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1221
0
    }
1222
1223
69.6k
    codePartSize(ctu, absPartIdx, depth);
1224
1225
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1226
69.6k
    codePredInfo(ctu, absPartIdx);
1227
1228
69.6k
    uint32_t tuDepthRange[2];
1229
69.6k
    if (ctu.isIntra(absPartIdx))
1230
69.6k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1231
1
    else
1232
1
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1233
1234
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1235
69.6k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1236
1237
    // --- write terminating bit ---
1238
69.6k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1239
69.6k
}
1240
1241
/* Return bit count of signaling inter mode */
1242
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1243
0
{
1244
0
    uint32_t bits;
1245
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1246
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1247
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1248
0
    switch (partSize)
1249
0
    {
1250
0
    case SIZE_2Nx2N:
1251
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1252
0
        break;
1253
1254
0
    case SIZE_2NxN:
1255
0
    case SIZE_2NxnU:
1256
0
    case SIZE_2NxnD:
1257
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1258
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1259
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1260
0
        {
1261
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1262
0
            if (partSize != SIZE_2NxN)
1263
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1264
0
        }
1265
0
        break;
1266
1267
0
    case SIZE_Nx2N:
1268
0
    case SIZE_nLx2N:
1269
0
    case SIZE_nRx2N:
1270
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1272
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1273
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1274
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1275
0
        {
1276
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1277
0
            if (partSize != SIZE_Nx2N)
1278
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1279
0
        }
1280
0
        break;
1281
0
    default:
1282
0
        X265_CHECK(0, "invalid CU partition\n");
1283
0
        break;
1284
0
    }
1285
1286
0
    return bits;
1287
0
}
1288
1289
/* finish encoding a cu and handle end-of-slice conditions */
1290
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1291
69.6k
{
1292
69.6k
    const Slice* slice = ctu.m_slice;
1293
69.6k
    uint32_t realEndAddress = slice->m_endCUAddr;
1294
69.6k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1295
69.6k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1296
1297
69.6k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1298
69.6k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1299
69.6k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1300
69.6k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1301
69.6k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1302
42.0k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1303
1304
69.6k
    if (slice->m_pps->bUseDQP)
1305
51.1k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1306
1307
69.6k
    if (granularityBoundary)
1308
25.8k
    {
1309
        // Encode slice finish
1310
25.8k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1311
25.8k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1312
1.15k
            bTerminateSlice = 1;
1313
1314
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1315
25.8k
        if (!bTerminateSlice)
1316
24.6k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1317
1318
25.8k
        if (!m_bitIf)
1319
12.9k
            resetBits(); // TODO: most likely unnecessary
1320
25.8k
    }
1321
69.6k
}
1322
1323
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1324
                              bool& bCodeDQP, const uint32_t depthRange[2])
1325
1.71M
{
1326
1.71M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1327
1328
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1329
     * so we have checks to make sure the implied value matches our intentions */
1330
1.71M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1331
261k
    {
1332
261k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1333
261k
    }
1334
1.45M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1335
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1336
0
    {
1337
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1338
0
    }
1339
1.45M
    else if (log2CurSize > depthRange[1])
1340
0
    {
1341
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1342
0
    }
1343
1.45M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1344
1.05M
    {
1345
1.05M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1346
1.05M
    }
1347
407k
    else
1348
407k
    {
1349
407k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1350
407k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1351
407k
    }
1352
1353
1.71M
    uint32_t hChromaShift = cu.m_hChromaShift;
1354
1.71M
    uint32_t vChromaShift = cu.m_vChromaShift;
1355
1.71M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1356
1.71M
    if (!curDepth || !bSmallChroma)
1357
670k
    {
1358
670k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1359
670k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1360
670k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1361
670k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1362
670k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1363
670k
    }
1364
1365
1.71M
    if (subdiv)
1366
262k
    {
1367
262k
        --log2CurSize;
1368
262k
        ++curDepth;
1369
1370
262k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1371
1372
262k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1373
262k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
262k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
262k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
262k
        return;
1377
262k
    }
1378
1379
1.45M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1380
1381
1.45M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1382
0
    {
1383
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1384
0
    }
1385
1.45M
    else
1386
1.45M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1387
1388
1.45M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1389
1.45M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1390
1.45M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1391
1.45M
    if (!(cbfY || cbfU || cbfV))
1392
1.45M
        return;
1393
1394
    // dQP: only for CTU once
1395
6.78k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1396
3.29k
    {
1397
3.29k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1398
3.29k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1399
3.29k
        codeDeltaQP(cu, absPartIdxLT);
1400
3.29k
        bCodeDQP = false;
1401
3.29k
    }
1402
1403
6.78k
    if (cbfY)
1404
4.07k
    {
1405
4.07k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1406
4.07k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1407
4.07k
        if (!(cbfU || cbfV))
1408
531
            return;
1409
4.07k
    }
1410
1411
6.25k
    if (bSmallChroma)
1412
3.87k
    {
1413
3.87k
        if ((absPartIdx & 3) != 3)
1414
2.90k
            return;
1415
1416
968
        const uint32_t log2CurSizeC = 2;
1417
968
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1418
968
        const uint32_t curPartNum = 4;
1419
968
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1420
2.90k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1421
1.93k
        {
1422
1.93k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1423
1.93k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1424
1.93k
            do
1425
1.93k
            {
1426
1.93k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1427
1.93k
                {
1428
1.93k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1429
1.93k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1430
1.93k
                }
1431
1.93k
            }
1432
1.93k
            while (tuIterator.isNextSection());
1433
1.93k
        }
1434
968
    }
1435
2.38k
    else
1436
2.38k
    {
1437
2.38k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1438
2.38k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1439
2.38k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1440
2.38k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1441
7.47k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1442
5.09k
        {
1443
5.09k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1444
5.09k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1445
5.09k
            do
1446
5.09k
            {
1447
5.09k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1448
5.09k
                {
1449
5.09k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1450
5.09k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1451
5.09k
                }
1452
5.09k
            }
1453
5.09k
            while (tuIterator.isNextSection());
1454
5.09k
        }
1455
2.38k
    }
1456
6.25k
}
1457
1458
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1459
                              bool& bCodeDQP, const uint32_t depthRange[2])
1460
0
{
1461
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1462
1463
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1464
     * so we have checks to make sure the implied value matches our intentions */
1465
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1466
0
    {
1467
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1468
0
    }
1469
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1470
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1471
0
    {
1472
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1473
0
    }
1474
0
    else if (log2CurSize > depthRange[1])
1475
0
    {
1476
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1477
0
    }
1478
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1479
0
    {
1480
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1481
0
    }
1482
0
    else
1483
0
    {
1484
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1485
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1486
0
    }
1487
1488
0
    if (subdiv)
1489
0
    {
1490
0
        --log2CurSize;
1491
0
        ++curDepth;
1492
1493
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1494
1495
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1496
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        return;
1500
0
    }
1501
1502
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1503
0
    {
1504
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1505
0
    }
1506
0
    else
1507
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1508
1509
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1510
1511
0
    if (!cbfY)
1512
0
        return;
1513
1514
    // dQP: only for CTU once
1515
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1516
0
    {
1517
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1518
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1519
0
        codeDeltaQP(cu, absPartIdxLT);
1520
0
        bCodeDQP = false;
1521
0
    }
1522
1523
0
    if (cbfY)
1524
0
    {
1525
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1526
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1527
0
    }
1528
0
}
1529
1530
1531
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1532
669k
{
1533
669k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1534
669k
    {
1535
669k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1536
669k
        if (cu.m_chromaFormat != X265_CSP_I400)
1537
669k
        {
1538
669k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1539
669k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1540
1541
669k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1542
1543
669k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1544
0
            {
1545
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1546
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1547
0
                {
1548
0
                    absPartIdx += qNumParts;
1549
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1550
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1551
0
                }
1552
0
            }
1553
669k
        }
1554
669k
    }
1555
18.4E
    else // if it is inter mode, encode motion vector and reference index
1556
18.4E
        codePUWise(cu, absPartIdx);
1557
669k
}
1558
1559
/** encode motion information for every PU block */
1560
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1561
0
{
1562
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1563
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1564
1565
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1566
0
    {
1567
0
        codeMergeFlag(cu, subPartIdx);
1568
0
        if (cu.m_mergeFlag[subPartIdx])
1569
0
            codeMergeIndex(cu, subPartIdx);
1570
0
        else
1571
0
        {
1572
0
            if (cu.m_slice->isInterB())
1573
0
                codeInterDir(cu, subPartIdx);
1574
1575
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1576
0
            for (uint32_t list = 0; list < 2; list++)
1577
0
            {
1578
0
                if (interDir & (1 << list))
1579
0
                {
1580
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1581
1582
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1583
0
                    codeMvd(cu, subPartIdx, list);
1584
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1585
0
                }
1586
0
            }
1587
0
        }
1588
0
    }
1589
0
}
1590
1591
/** encode reference frame index for a PU block */
1592
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1593
0
{
1594
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1595
1596
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1597
0
        codeRefFrmIdx(cu, absPartIdx, list);
1598
0
}
1599
1600
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1601
669k
{
1602
669k
    if (!cu.isIntra(absPartIdx))
1603
0
    {
1604
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1605
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1606
0
        if (!cu.getQtRootCbf(absPartIdx))
1607
0
            return;
1608
0
    }
1609
1610
669k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1611
669k
    if (cu.m_chromaFormat == X265_CSP_I400)
1612
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1613
669k
    else
1614
669k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1615
669k
}
1616
1617
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1618
51.3k
{
1619
51.3k
    int typeIdx = ctuParam.typeIdx;
1620
1621
51.3k
    if (plane != 2)
1622
34.2k
    {
1623
34.2k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1624
34.2k
        if (typeIdx >= 0)
1625
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1626
34.2k
    }
1627
1628
51.3k
    if (typeIdx >= 0)
1629
0
    {
1630
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1631
0
        if (typeIdx == SAO_BO)
1632
0
        {
1633
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1634
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1635
1636
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1637
0
                if (ctuParam.offset[i] != 0)
1638
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1639
1640
0
            encodeBinsEP(ctuParam.bandPos, 5);
1641
0
        }
1642
0
        else // if (typeIdx < SAO_BO)
1643
0
        {
1644
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1645
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1648
0
            if (plane != 2)
1649
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1650
0
        }
1651
0
    }
1652
51.3k
}
1653
1654
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1655
155k
{
1656
155k
    if (plane != 2)
1657
103k
    {
1658
103k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1659
103k
        encodeBinEP(1);
1660
103k
    }
1661
1662
155k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1663
1664
155k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1665
155k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1666
155k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1667
155k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1668
155k
    if (plane != 2)
1669
103k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1670
155k
}
1671
1672
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1673
38.7k
{
1674
38.7k
    if (plane != 2)
1675
25.8k
    {
1676
25.8k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1677
25.8k
        encodeBinEP(0);
1678
25.8k
    }
1679
1680
38.7k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1681
1682
193k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1683
155k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1684
1685
193k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1686
155k
        if (offset[i] != 0)
1687
86
            encodeBinEP(offset[i] < 0);
1688
1689
38.7k
    encodeBinsEP(bandPos, 5);
1690
38.7k
}
1691
1692
/** initialize context model with respect to QP and initialization value */
1693
uint8_t sbacInit(int qp, int initValue)
1694
90.7k
{
1695
90.7k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1696
1697
90.7k
    int  slope      = (initValue >> 4) * 5 - 45;
1698
90.7k
    int  offset     = ((initValue & 15) << 3) - 16;
1699
90.7k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1700
90.7k
    uint32_t mpState = (initState >= 64);
1701
90.7k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1702
1703
90.7k
    return (uint8_t)state;
1704
90.7k
}
1705
1706
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1707
15.0k
{
1708
15.0k
    ctxModel += sliceType * size;
1709
1710
105k
    for (int n = 0; n < size; n++)
1711
90.7k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1712
15.0k
}
1713
1714
void Entropy::resetEntropy(const Slice& slice)
1715
578
{
1716
578
    int  qp              = slice.m_sliceQp;
1717
578
    SliceType sliceType  = slice.m_sliceType;
1718
1719
578
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1720
578
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1721
578
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1722
578
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1723
578
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1724
578
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1725
578
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1726
578
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1727
578
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1728
578
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1729
578
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1730
578
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1731
578
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1732
578
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1733
578
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1734
578
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1735
578
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1736
578
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1737
578
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
578
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1739
578
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1740
578
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1741
578
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1742
578
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1743
578
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1744
578
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1745
    // new structure
1746
1747
578
    start();
1748
578
}
1749
1750
/* code explicit wp tables */
1751
void Entropy::codePredWeightTable(const Slice& slice)
1752
0
{
1753
0
    const WeightParam *wp;
1754
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1755
0
    bool            bDenomCoded  = false;
1756
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1757
0
    uint32_t        totalSignalledWeightFlags = 0;
1758
1759
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1760
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1761
0
    {
1762
0
        for (int list = 0; list < numRefDirs; list++)
1763
0
        {
1764
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1765
0
            {
1766
0
                wp = slice.m_weightPredTable[list][ref];
1767
0
                if (!bDenomCoded)
1768
0
                {
1769
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1770
1771
0
                    if (bChroma)
1772
0
                    {
1773
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1774
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1775
0
                    }
1776
0
                    bDenomCoded = true;
1777
0
                }
1778
#if ENABLE_SCC_EXT
1779
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1780
                    assert(!wp[0].wtPresent);
1781
                else
1782
#endif
1783
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1784
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1785
0
            }
1786
1787
0
            if (bChroma)
1788
0
            {
1789
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1790
0
                {
1791
0
                    wp = slice.m_weightPredTable[list][ref];
1792
#if ENABLE_SCC_EXT
1793
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1794
                        assert(!wp[1].wtPresent);
1795
                    else
1796
#endif
1797
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1798
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1799
0
                }
1800
0
            }
1801
1802
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1803
0
            {
1804
0
                wp = slice.m_weightPredTable[list][ref];
1805
0
                if (wp[0].wtPresent)
1806
0
                {
1807
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1808
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1809
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1810
0
                }
1811
1812
0
                if (bChroma)
1813
0
                {
1814
0
                    if (wp[1].wtPresent)
1815
0
                    {
1816
0
                        for (int plane = 1; plane < 3; plane++)
1817
0
                        {
1818
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1819
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1820
1821
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1822
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1823
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1824
0
                        }
1825
0
                    }
1826
0
                }
1827
0
            }
1828
0
        }
1829
1830
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1831
0
    }
1832
0
}
1833
1834
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1835
4.41k
{
1836
4.41k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1837
1838
4.41k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1839
1840
4.41k
    if (!symbol)
1841
498
        return;
1842
1843
3.91k
    bool bCodeLast = (maxSymbol > symbol);
1844
1845
19.0k
    while (--symbol)
1846
15.1k
        encodeBin(1, scmModel[offset]);
1847
1848
3.91k
    if (bCodeLast)
1849
195
        encodeBin(0, scmModel[offset]);
1850
3.91k
}
1851
1852
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1853
3.72k
{
1854
3.72k
    uint32_t bins = 0;
1855
3.72k
    int numBins = 0;
1856
1857
14.6k
    while (symbol >= (uint32_t)(1 << count))
1858
10.9k
    {
1859
10.9k
        bins = 2 * bins + 1;
1860
10.9k
        numBins++;
1861
10.9k
        symbol -= 1 << count;
1862
10.9k
        count++;
1863
10.9k
    }
1864
1865
3.72k
    bins = 2 * bins + 0;
1866
3.72k
    numBins++;
1867
1868
3.72k
    bins = (bins << count) | symbol;
1869
3.72k
    numBins += count;
1870
1871
3.72k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1872
3.72k
    encodeBinsEP(bins, numBins);
1873
3.72k
}
1874
1875
/** Coding of coeff_abs_level_minus3 */
1876
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1877
7.49k
{
1878
7.49k
    uint32_t length;
1879
7.49k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1880
1881
7.49k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1882
0
    {
1883
0
        length = codeNumber >> absGoRice;
1884
1885
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1886
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1887
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1888
0
    }
1889
7.49k
    else
1890
7.49k
    {
1891
7.49k
        length = 0;
1892
7.49k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1893
7.49k
        {
1894
7.49k
            unsigned long idx;
1895
7.49k
            BSR(idx, codeNumber + 1);
1896
7.49k
            length = idx;
1897
7.49k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1898
7.49k
            codeNumber -= (1 << idx) - 1;
1899
7.49k
        }
1900
7.49k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1901
1902
7.49k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1903
7.49k
        encodeBinsEP(codeNumber, length + absGoRice);
1904
7.49k
    }
1905
7.49k
}
1906
1907
// SBAC RD
1908
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1909
1.38M
{
1910
1.38M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1911
1.38M
    m_fracBits = src.m_fracBits;
1912
1.38M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1913
1.38M
}
1914
1915
void Entropy::copyFrom(const Entropy& src)
1916
9.67M
{
1917
9.67M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1918
1919
9.67M
    copyState(src);
1920
1921
9.67M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1922
9.67M
    markValid();
1923
9.67M
}
1924
1925
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1926
2.22M
{
1927
2.22M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1928
1929
2.22M
    if (cu.isIntra(absPartIdx))
1930
2.22M
    {
1931
2.22M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1932
1.86M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1933
2.22M
        return;
1934
2.22M
    }
1935
1936
3
    switch (partSize)
1937
3
    {
1938
0
    case SIZE_2Nx2N:
1939
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1940
0
        break;
1941
1942
0
    case SIZE_2NxN:
1943
0
    case SIZE_2NxnU:
1944
0
    case SIZE_2NxnD:
1945
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1946
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1947
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1948
0
        {
1949
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1950
0
            if (partSize != SIZE_2NxN)
1951
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1952
0
        }
1953
0
        break;
1954
1955
0
    case SIZE_Nx2N:
1956
0
    case SIZE_nLx2N:
1957
0
    case SIZE_nRx2N:
1958
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1960
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1961
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1962
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1963
0
        {
1964
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1965
0
            if (partSize != SIZE_Nx2N)
1966
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1967
0
        }
1968
0
        break;
1969
0
    default:
1970
0
        X265_CHECK(0, "invalid CU partition\n");
1971
0
        break;
1972
3
    }
1973
3
}
1974
1975
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1976
0
{
1977
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1978
1979
0
    if (numCand > 1)
1980
0
    {
1981
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1982
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1983
1984
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1985
1986
0
        if (unaryIdx != 0)
1987
0
        {
1988
0
            uint32_t mask = (1 << unaryIdx) - 2;
1989
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1990
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1991
0
        }
1992
0
    }
1993
0
}
1994
1995
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1996
3.79M
{
1997
3.79M
    uint32_t dir[4], j;
1998
3.79M
    uint32_t preds[4][3];
1999
3.79M
    int predIdx[4];
2000
3.79M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2001
3.79M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2002
2003
8.37M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2004
4.58M
    {
2005
4.58M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2006
4.58M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2007
4.58M
        predIdx[j] = -1;
2008
18.3M
        for (uint32_t i = 0; i < 3; i++)
2009
13.7M
            if (dir[j] == preds[j][i])
2010
4.56M
                predIdx[j] = i;
2011
2012
4.58M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2013
4.58M
    }
2014
2015
8.37M
    for (j = 0; j < partNum; j++)
2016
4.57M
    {
2017
4.57M
        if (predIdx[j] != -1)
2018
4.57M
        {
2019
4.57M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2020
            // NOTE: Mapping
2021
            //       0 = 0
2022
            //       1 = 10
2023
            //       2 = 11
2024
4.57M
            int nonzero = (!!predIdx[j]);
2025
4.57M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2026
4.57M
        }
2027
9.52k
        else
2028
9.52k
        {
2029
9.52k
            if (preds[j][0] > preds[j][1])
2030
415
                std::swap(preds[j][0], preds[j][1]);
2031
2032
9.52k
            if (preds[j][0] > preds[j][2])
2033
0
                std::swap(preds[j][0], preds[j][2]);
2034
2035
9.52k
            if (preds[j][1] > preds[j][2])
2036
0
                std::swap(preds[j][1], preds[j][2]);
2037
2038
9.52k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2039
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2041
2042
9.52k
            encodeBinsEP(dir[j], 5);
2043
9.52k
        }
2044
4.57M
    }
2045
3.79M
}
2046
2047
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2048
3.66M
{
2049
3.66M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2050
2051
3.66M
    if (intraDirChroma == DM_CHROMA_IDX)
2052
947k
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2053
2.72M
    else
2054
2.72M
    {
2055
6.32M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2056
6.32M
        {
2057
6.32M
            if (intraDirChroma == chromaDirMode[i])
2058
2.72M
            {
2059
2.72M
                intraDirChroma = i;
2060
2.72M
                break;
2061
2.72M
            }
2062
6.32M
        }
2063
2064
2.72M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2065
2.72M
        encodeBinsEP(intraDirChroma, 2);
2066
2.72M
    }
2067
3.66M
}
2068
2069
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2070
0
{
2071
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2072
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2073
2074
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2075
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2076
0
    if (interDir < 2)
2077
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2078
0
}
2079
2080
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2081
0
{
2082
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2083
2084
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2085
2086
0
    if (refFrame > 0)
2087
0
    {
2088
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2089
0
        if (refNum == 0)
2090
0
            return;
2091
2092
0
        refFrame--;
2093
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2094
0
        if (refFrame > 0)
2095
0
        {
2096
0
            uint32_t mask = (1 << refFrame) - 2;
2097
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2098
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2099
0
        }
2100
0
    }
2101
0
}
2102
2103
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2104
0
{
2105
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2106
0
    const int hor = mvd.x;
2107
0
    const int ver = mvd.y;
2108
2109
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2110
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
2112
0
    const bool bHorAbsGr0 = hor != 0;
2113
0
    const bool bVerAbsGr0 = ver != 0;
2114
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2115
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2116
2117
0
    if (bHorAbsGr0)
2118
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2119
2120
0
    if (bVerAbsGr0)
2121
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2122
2123
0
    if (bHorAbsGr0)
2124
0
    {
2125
0
        if (horAbs > 1)
2126
0
            writeEpExGolomb(horAbs - 2, 1);
2127
2128
0
        encodeBinEP(0 > hor ? 1 : 0);
2129
0
    }
2130
2131
0
    if (bVerAbsGr0)
2132
0
    {
2133
0
        if (verAbs > 1)
2134
0
            writeEpExGolomb(verAbs - 2, 1);
2135
2136
0
        encodeBinEP(0 > ver ? 1 : 0);
2137
0
    }
2138
0
}
2139
2140
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2141
4.41k
{
2142
4.41k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2143
2144
4.41k
    int qpBdOffsetY = QP_BD_OFFSET;
2145
2146
4.41k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2147
2148
4.41k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2149
4.41k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2150
4.41k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2151
4.41k
    if (absDQp >= CU_DQP_TU_CMAX)
2152
3.72k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2153
2154
4.41k
    if (absDQp > 0)
2155
3.91k
    {
2156
3.91k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2157
3.91k
        encodeBinEP(sign);
2158
3.91k
    }
2159
4.41k
}
2160
2161
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2162
7.34M
{
2163
7.34M
    uint32_t ctx = tuDepth + 2;
2164
2165
7.34M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2166
7.34M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2167
7.34M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2168
2169
7.34M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2170
0
    {
2171
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2172
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2173
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2174
2175
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2176
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
    }
2178
7.34M
    else
2179
7.34M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2180
7.34M
}
2181
2182
#if CHECKED_BUILD || _DEBUG
2183
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2184
{
2185
    uint32_t goRiceParam = 0;
2186
    int firstCoeff2 = 1;
2187
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2188
2189
    uint32_t sum = 0;
2190
    int idx = 0;
2191
    do
2192
    {
2193
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2194
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2195
        baseLevelN >>= 2;
2196
        int codeNumber = absCoeff[idx] - baseLevel;
2197
2198
        if (codeNumber >= 0)
2199
        {
2200
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2201
            uint32_t length = 0;
2202
2203
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2204
            if (codeNumber >= 0)
2205
            {
2206
                {
2207
                    unsigned long cidx;
2208
                    BSR(cidx, codeNumber + 1);
2209
                    length = cidx;
2210
                }
2211
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2212
2213
                codeNumber = (length + length);
2214
            }
2215
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2216
2217
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2218
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2219
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2220
        }
2221
        if (absCoeff[idx] >= 2)
2222
            firstCoeff2 = 0;
2223
        idx++;
2224
    }
2225
    while(idx < numNonZero);
2226
2227
    return sum;
2228
}
2229
#endif // debug only code
2230
2231
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2232
54.7k
{
2233
54.7k
    uint32_t trSize = 1 << log2TrSize;
2234
54.7k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2235
    // compute number of significant coefficients
2236
54.7k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2237
54.7k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2238
54.7k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2239
2240
54.7k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2241
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2242
2243
54.7k
    bool bIsLuma = ttype == TEXT_LUMA;
2244
2245
    // select scans
2246
54.7k
    TUEntropyCodingParameters codingParameters;
2247
54.7k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2248
2249
54.7k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2250
54.7k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2251
54.7k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2252
2253
    //----- encode significance map -----
2254
2255
    // Find position of last coefficient
2256
54.7k
    int scanPosLast = 0;
2257
54.7k
    uint32_t posLast;
2258
54.7k
    uint64_t sigCoeffGroupFlag64 = 0;
2259
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2260
54.7k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2261
2262
54.7k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2263
54.7k
    posLast = codingParameters.scan[scanPosLast];
2264
2265
54.7k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2266
2267
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2268
127k
    for(int idx = 0; idx < lastScanSet; idx++)
2269
72.9k
    {
2270
72.9k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2271
72.9k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2272
72.9k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2273
72.9k
    }
2274
2275
2276
    // Code position of last coefficient
2277
54.7k
    {
2278
        // The last position is composed of a prefix and suffix.
2279
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2280
        // The bypass coded bins for both the x and y components are grouped together.
2281
54.7k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2282
54.7k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2283
        // swap
2284
54.7k
        if (codingParameters.scanType == SCAN_VER)
2285
3.98k
            std::swap(pos[0], pos[1]);
2286
2287
54.7k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2288
54.7k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2289
54.7k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2290
54.7k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2291
54.7k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2292
2293
54.7k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2294
164k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2295
109k
        {
2296
109k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2297
109k
            uint32_t prefixOnes = temp & 15;
2298
109k
            uint32_t suffixLen = temp >> 4;
2299
2300
227k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2301
118k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2302
2303
109k
            if (prefixOnes < maxGroupIdx)
2304
80.6k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2305
2306
109k
            packedSuffixBits <<= suffixLen;
2307
109k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2308
109k
            packedSuffixLen += suffixLen;
2309
109k
        }
2310
2311
54.7k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2312
54.7k
    }
2313
2314
    // code significance flag
2315
54.7k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2316
54.7k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2317
54.7k
    uint32_t c1 = 1;
2318
54.7k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2319
54.7k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2320
54.7k
    uint32_t numNonZero = 1;
2321
54.7k
    unsigned long lastNZPosInCG = 0;
2322
54.7k
    unsigned long firstNZPosInCG = 0;
2323
2324
#if _DEBUG
2325
    // Unnecessary, for Valgrind-3.10.0 only
2326
    memset(absCoeff, 0, sizeof(absCoeff));
2327
#endif
2328
2329
54.7k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2330
2331
182k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2332
127k
    {
2333
127k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2334
127k
        uint32_t scanFlagMask = subCoeffFlag;
2335
127k
        int subPosBase = subSet << MLS_CG_SIZE;
2336
        
2337
127k
        if (subSet == lastScanSet)
2338
54.7k
        {
2339
54.7k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2340
54.7k
            scanFlagMask >>= 1;
2341
54.7k
        }
2342
2343
        // encode significant_coeffgroup_flag
2344
127k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2345
127k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2346
127k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2347
127k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2348
2349
127k
        if (subSet == lastScanSet || !subSet)
2350
59.2k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2351
68.4k
        else
2352
68.4k
        {
2353
68.4k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2354
68.4k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2355
68.4k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2356
68.4k
        }
2357
2358
        // encode significant_coeff_flag
2359
127k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2360
89.1k
        {
2361
89.1k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2362
89.1k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2363
89.1k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2364
2365
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2366
89.1k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2367
89.1k
            {
2368
                // patternSigCtx = 0
2369
89.1k
                {
2370
89.1k
                    2, 1, 1, 0,
2371
89.1k
                    1, 1, 0, 0,
2372
89.1k
                    1, 0, 0, 0,
2373
89.1k
                    0, 0, 0, 0,
2374
89.1k
                },
2375
                // patternSigCtx = 1
2376
89.1k
                {
2377
89.1k
                    2, 2, 2, 2,
2378
89.1k
                    1, 1, 1, 1,
2379
89.1k
                    0, 0, 0, 0,
2380
89.1k
                    0, 0, 0, 0,
2381
89.1k
                },
2382
                // patternSigCtx = 2
2383
89.1k
                {
2384
89.1k
                    2, 1, 0, 0,
2385
89.1k
                    2, 1, 0, 0,
2386
89.1k
                    2, 1, 0, 0,
2387
89.1k
                    2, 1, 0, 0,
2388
89.1k
                },
2389
                // patternSigCtx = 3
2390
89.1k
                {
2391
89.1k
                    2, 2, 2, 2,
2392
89.1k
                    2, 2, 2, 2,
2393
89.1k
                    2, 2, 2, 2,
2394
89.1k
                    2, 2, 2, 2,
2395
89.1k
                },
2396
                // 4x4
2397
89.1k
                {
2398
89.1k
                    0, 1, 4, 5,
2399
89.1k
                    2, 3, 4, 5,
2400
89.1k
                    6, 6, 8, 8,
2401
89.1k
                    7, 7, 8, 8
2402
89.1k
                }
2403
89.1k
            };
2404
2405
89.1k
            const int offset = codingParameters.firstSignificanceMapContext;
2406
89.1k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2407
2408
89.1k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2409
89.1k
            if (m_bitIf)
2410
384
            {
2411
384
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2412
384
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2413
2414
                // TODO: accelerate by PABSW
2415
1.92k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
1.53k
                {
2417
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
1.53k
                }
2422
2423
384
                if (log2TrSize == 2)
2424
384
                {
2425
384
                    do
2426
5.76k
                    {
2427
5.76k
                        uint32_t blkPos, sig, ctxSig;
2428
5.76k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
5.76k
                        sig     = scanFlagMask & 1;
2430
5.76k
                        scanFlagMask >>= 1;
2431
5.76k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
5.76k
                        {
2433
5.76k
                            ctxSig = table_cnt[4][blkPos];
2434
5.76k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
5.76k
                            encodeBin(sig, baseCtx[ctxSig]);
2436
5.76k
                        }
2437
5.76k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
5.76k
                        numNonZero += sig;
2439
5.76k
                        scanPosSigOff--;
2440
5.76k
                    }
2441
5.76k
                    while(scanPosSigOff >= 0);
2442
384
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
384
            }
2471
88.7k
            else // fast RD path
2472
88.7k
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
88.7k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
88.7k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
88.7k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
88.7k
                m_fracBits += sum;
2483
88.7k
            } // end of fast RD path -- !m_bitIf
2484
89.1k
        }
2485
127k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
127k
        uint32_t coeffSigns = coeffSign[subSet];
2488
127k
        numNonZero = coeffNum[subSet];
2489
127k
        if (numNonZero > 0)
2490
127k
        {
2491
127k
            uint32_t idx = 0;
2492
127k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
127k
            BSR(lastNZPosInCG, subCoeffFlag);
2494
127k
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
127k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
127k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
127k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
127k
            c1 = 1;
2501
127k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
127k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
127k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
127k
            if (!m_bitIf)
2507
125k
            {
2508
125k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
125k
                uint32_t firstC2Idx = (sum >> 28);
2510
125k
                c1 = ((sum >> 26) & 3);
2511
125k
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
125k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
125k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
125k
                if (numNonZero > firstC2Idx)
2518
121k
                {
2519
121k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
121k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
121k
                    m_fracBits += ((uint64_t)sum << 15);
2522
121k
                }
2523
125k
            }
2524
            // Standard path
2525
1.86k
            else
2526
1.86k
            {
2527
1.86k
                uint32_t firstC2Idx = 8;
2528
1.86k
                uint32_t firstC2Flag = 2;
2529
1.86k
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
1.86k
                idx = 0;
2532
1.86k
                do
2533
4.55k
                {
2534
4.55k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
4.55k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
4.55k
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
4.55k
                    if (symbol1)
2539
4.42k
                        c1Next = 0;
2540
2541
4.55k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
4.55k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
4.55k
                    c1 = (c1Next & 3);
2545
4.55k
                    c1Next >>= 2;
2546
4.55k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
4.55k
                    idx++;
2548
4.55k
                }
2549
4.55k
                while(idx < numC1Flag);
2550
2551
1.86k
                if (!c1)
2552
1.73k
                {
2553
1.73k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
1.73k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
1.73k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
1.73k
                }
2558
2559
1.86k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
1.86k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
1.86k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
1.73k
                {
2564
                    // Standard path
2565
1.73k
                    uint32_t goRiceParam = 0;
2566
1.73k
                    int baseLevel = 3;
2567
1.73k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
1.73k
                    idx = firstC2Idx;
2572
1.73k
                    do
2573
7.49k
                    {
2574
7.49k
                        if (idx >= C1FLAG_NUMBER)
2575
3.07k
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
7.49k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
7.49k
                        if (absCoeff[idx] >= baseLevel)
2580
7.49k
                        {
2581
7.49k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
7.49k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
7.49k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
7.49k
                            goRiceParam += adjust;
2585
7.49k
                            threshold += (adjust) ? threshold : 0;
2586
7.49k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
7.49k
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
7.49k
                        baseLevel = 2;
2592
7.49k
                        idx++;
2593
7.49k
                    }
2594
7.49k
                    while(idx < numNonZero);
2595
1.73k
                }
2596
1.86k
            } // end of !bitIf
2597
127k
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
127k
        numNonZero = 0;
2601
127k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
127k
    }
2603
54.7k
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
775k
{
2607
775k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
775k
    uint32_t isCodeNonZero = !!code;
2610
2611
775k
    encodeBinEP(isCodeNonZero);
2612
775k
    if (isCodeNonZero)
2613
86
    {
2614
86
        uint32_t isCodeLast = (maxSymbol > code);
2615
86
        uint32_t mask = (1 << (code - 1)) - 1;
2616
86
        uint32_t len = code - 1 + isCodeLast;
2617
86
        mask <<= isCodeLast;
2618
2619
86
        encodeBinsEP(mask, len);
2620
86
    }
2621
775k
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
7.13M
{
2626
7.13M
    estCBFBit(estBitsSbac);
2627
2628
7.13M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
7.13M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
7.13M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
7.13M
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
7.13M
{
2640
7.13M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
57.0M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
49.9M
    {
2644
49.9M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
49.9M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
49.9M
    }
2647
2648
7.13M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
7.13M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
7.13M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
7.13M
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
7.13M
{
2657
7.13M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
21.4M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
42.8M
        for (uint32_t bin = 0; bin < 2; bin++)
2661
28.5M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
7.13M
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
7.13M
{
2667
7.13M
    int firstCtx = 1, numCtx = 8;
2668
2669
7.13M
    if (log2TrSize >= 4)
2670
274k
    {
2671
274k
        firstCtx = bIsLuma ? 21 : 12;
2672
274k
        numCtx = bIsLuma ? 6 : 3;
2673
274k
    }
2674
6.86M
    else if (log2TrSize == 3)
2675
1.09M
    {
2676
1.09M
        firstCtx = 9;
2677
1.09M
        numCtx = bIsLuma ? 12 : 3;
2678
1.09M
    }
2679
2680
7.13M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
7.13M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
7.13M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
64.9M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
57.8M
    {
2687
57.8M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
57.8M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
57.8M
    }
2690
2691
7.13M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
7.13M
    if (bIsLuma)
2693
4.13M
    {
2694
4.13M
        if (log2TrSize == 2)
2695
3.14M
        {
2696
9.44M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
6.29M
            {
2698
6.29M
                int bits = 0;
2699
6.29M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
25.1M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
18.8M
                {
2703
18.8M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
18.8M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
18.8M
                }
2706
2707
6.29M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
6.29M
            }
2709
3.14M
        }
2710
989k
        else
2711
989k
        {
2712
989k
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
2.97M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
1.98M
            {
2716
1.98M
                int bits = 0;
2717
1.98M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
1.98M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
8.40M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
6.42M
                {
2722
6.42M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
6.42M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
6.42M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
6.42M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
6.42M
                    bits += 2 * cost1;
2727
6.42M
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
1.98M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
1.98M
            }
2731
989k
        }
2732
4.13M
    }
2733
2.99M
    else
2734
2.99M
    {
2735
2.99M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
2.99M
        const int ctxShift = log2TrSize - 2;
2737
2738
8.99M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
5.99M
        {
2740
5.99M
            int bits = 0;
2741
5.99M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
25.7M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
19.7M
            {
2745
19.7M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
19.7M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
19.7M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
19.7M
            }
2749
2750
5.99M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
5.99M
        }
2752
2.99M
    }
2753
7.13M
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
7.13M
{
2758
7.13M
    if (bIsLuma)
2759
4.13M
    {
2760
4.13M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
4.13M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
70.3M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
66.2M
        {
2765
66.2M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
66.2M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
66.2M
        }
2768
2769
20.6M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
16.5M
        {
2771
16.5M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
16.5M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
16.5M
        }
2774
4.13M
    }
2775
2.99M
    else
2776
2.99M
    {
2777
2.99M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
2.99M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
26.9M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
24.0M
        {
2782
24.0M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
24.0M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
24.0M
        }
2785
2786
8.99M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
6.00M
        {
2788
6.00M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
6.00M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
6.00M
        }
2791
2.99M
    }
2792
7.13M
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
9.73k
{
2797
9.73k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
9.73k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
9.73k
    markValid();
2801
9.73k
}
2802
2803
void Entropy::start()
2804
578
{
2805
578
    m_low = 0;
2806
578
    m_range = 510;
2807
578
    m_bitsLeft = -12;
2808
578
    m_numBufferedBytes = 0;
2809
578
    m_bufferedByte = 0xff;
2810
578
}
2811
2812
void Entropy::finish()
2813
2.77k
{
2814
2.77k
    if (m_low >> (21 + m_bitsLeft))
2815
4
    {
2816
4
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
5
        while (m_numBufferedBytes > 1)
2818
1
        {
2819
1
            m_bitIf->writeByte(0x00);
2820
1
            m_numBufferedBytes--;
2821
1
        }
2822
2823
4
        m_low -= 1 << (21 + m_bitsLeft);
2824
4
    }
2825
2.77k
    else
2826
2.77k
    {
2827
2.77k
        if (m_numBufferedBytes > 0)
2828
2.77k
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
2.77k
        while (m_numBufferedBytes > 1)
2831
5
        {
2832
5
            m_bitIf->writeByte(0xff);
2833
5
            m_numBufferedBytes--;
2834
5
        }
2835
2.77k
    }
2836
2.77k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
2.77k
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
9.67M
{
2841
9.67M
    m_low = other.m_low;
2842
9.67M
    m_range = other.m_range;
2843
9.67M
    m_bitsLeft = other.m_bitsLeft;
2844
9.67M
    m_bufferedByte = other.m_bufferedByte;
2845
9.67M
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
9.67M
    m_fracBits = other.m_fracBits;
2847
9.67M
}
2848
2849
void Entropy::resetBits()
2850
8.43M
{
2851
8.43M
    m_low = 0;
2852
8.43M
    m_bitsLeft = -12;
2853
8.43M
    m_numBufferedBytes = 0;
2854
8.43M
    m_bufferedByte = 0xff;
2855
8.43M
    m_fracBits &= 32767;
2856
8.43M
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
8.43M
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
25.3M
{
2863
25.3M
    uint32_t mstate = ctxModel;
2864
2865
25.3M
    ctxModel = sbacNext(mstate, binValue);
2866
2867
25.3M
    if (!m_bitIf)
2868
25.0M
    {
2869
25.0M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
25.0M
        return;
2871
25.0M
    }
2872
2873
296k
    uint32_t range = m_range;
2874
296k
    uint32_t state = sbacGetState(mstate);
2875
296k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
296k
    range -= lps;
2877
2878
296k
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
296k
    int numBits = (uint32_t)(range - 256) >> 31;
2881
296k
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
296k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
296k
    if ((binValue ^ mstate) & 1)
2886
31.3k
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
31.3k
        unsigned long idx;
2890
31.3k
        BSR(idx, lps);
2891
31.3k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
31.3k
        numBits = 8 - idx;
2894
31.3k
        if (state >= 63)
2895
0
            numBits = 6;
2896
31.3k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
31.3k
        low += range;
2899
31.3k
        range = lps;
2900
31.3k
    }
2901
296k
    m_low = (low << numBits);
2902
296k
    m_range = (range << numBits);
2903
296k
    m_bitsLeft += numBits;
2904
2905
296k
    if (m_bitsLeft >= 0)
2906
13.9k
        writeOut();
2907
296k
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
908k
{
2912
908k
    if (!m_bitIf)
2913
907k
    {
2914
907k
        m_fracBits += 32768;
2915
907k
        return;
2916
907k
    }
2917
433
    m_low <<= 1;
2918
433
    if (binValue)
2919
433
        m_low += m_range;
2920
433
    m_bitsLeft++;
2921
2922
433
    if (m_bitsLeft >= 0)
2923
86
        writeOut();
2924
433
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
7.51M
{
2929
7.51M
    if (!m_bitIf)
2930
7.44M
    {
2931
7.44M
        m_fracBits += 32768 * numBins;
2932
7.44M
        return;
2933
7.44M
    }
2934
2935
73.3k
    while (numBins > 8)
2936
3.05k
    {
2937
3.05k
        numBins -= 8;
2938
3.05k
        uint32_t pattern = binValues >> numBins;
2939
3.05k
        m_low <<= 8;
2940
3.05k
        m_low += m_range * pattern;
2941
3.05k
        binValues -= pattern << numBins;
2942
3.05k
        m_bitsLeft += 8;
2943
2944
3.05k
        if (m_bitsLeft >= 0)
2945
3.05k
            writeOut();
2946
3.05k
    }
2947
2948
70.2k
    m_low <<= numBins;
2949
70.2k
    m_low += m_range * binValues;
2950
70.2k
    m_bitsLeft += numBins;
2951
2952
70.2k
    if (m_bitsLeft >= 0)
2953
19.2k
        writeOut();
2954
70.2k
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
27.4k
{
2959
27.4k
    if (!m_bitIf)
2960
12.3k
    {
2961
12.3k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
12.3k
        return;
2963
12.3k
    }
2964
2965
15.1k
    m_range -= 2;
2966
15.1k
    if (binValue)
2967
2.77k
    {
2968
2.77k
        m_low += m_range;
2969
2.77k
        m_low <<= 7;
2970
2.77k
        m_range = 2 << 7;
2971
2.77k
        m_bitsLeft += 7;
2972
2.77k
    }
2973
12.3k
    else if (m_range >= 256)
2974
11.6k
        return;
2975
669
    else
2976
669
    {
2977
669
        m_low <<= 1;
2978
669
        m_range <<= 1;
2979
669
        m_bitsLeft++;
2980
669
    }
2981
2982
3.44k
    if (m_bitsLeft >= 0)
2983
2.56k
        writeOut();
2984
3.44k
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
38.8k
{
2989
38.8k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
38.8k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
38.8k
    m_bitsLeft -= 8;
2993
38.8k
    m_low &= low_mask;
2994
2995
38.8k
    if (leadByte == 0xff)
2996
2.94k
        m_numBufferedBytes++;
2997
35.8k
    else
2998
35.8k
    {
2999
35.8k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
35.8k
        if (numBufferedBytes > 0)
3001
33.1k
        {
3002
33.1k
            uint32_t carry = leadByte >> 8;
3003
33.1k
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
33.1k
            m_bitIf->writeByte(byteTowrite);
3005
3006
33.1k
            byteTowrite = (0xff + carry) & 0xff;
3007
36.0k
            while (numBufferedBytes > 1)
3008
2.93k
            {
3009
2.93k
                m_bitIf->writeByte(byteTowrite);
3010
2.93k
                numBufferedBytes--;
3011
2.93k
            }
3012
33.1k
        }
3013
35.8k
        m_numBufferedBytes = 1;
3014
35.8k
        m_bufferedByte = (uint8_t)leadByte;
3015
35.8k
    }
3016
38.8k
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074