Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
13.7k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.10k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
1.96M
{
226
1.96M
    markValid();
227
1.96M
    m_fracBits = 0;
228
1.96M
    m_pad = 0;
229
1.96M
    m_meanQP = 0;
230
1.96M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
1.96M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
619
{
239
619
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
619
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
619
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
619
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
619
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
619
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
619
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
619
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
619
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.23k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
619
    {
253
619
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
619
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
619
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
619
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
619
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
619
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
619
#endif
294
295
619
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
619
    WRITE_FLAG(0, "vps_extension_flag");
509
619
#endif
510
619
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
619
{
514
619
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
619
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
619
#endif
524
619
    {
525
619
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
619
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
619
    }
528
529
619
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
619
    {
536
619
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
619
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
619
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
619
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
619
        const Window& conf = sps.conformanceWindow;
545
619
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
619
        if (conf.bEnabled)
547
464
        {
548
464
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
464
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
464
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
464
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
464
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
464
        }
554
555
619
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
619
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
619
    }
558
559
619
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
619
    {
564
619
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.23k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
619
        {
568
619
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
619
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
619
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
619
        }
572
619
    }
573
574
619
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
619
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
619
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
619
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
619
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
619
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
619
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
619
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
619
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
619
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
619
    WRITE_FLAG(0, "pcm_enabled_flag");
600
619
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
619
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
619
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
619
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
619
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
619
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
619
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
619
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
619
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
619
{
645
619
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
619
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
619
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
619
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
619
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
619
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
619
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
619
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
619
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
619
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
619
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
619
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
619
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
619
    if (pps.bUseDQP)
661
491
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
619
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
619
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
619
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
619
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
619
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
619
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
619
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
619
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
619
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
619
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
619
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
619
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
619
    WRITE_FLAG(0, "lists_modification_present_flag");
688
619
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
619
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
619
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
619
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.23k
{
727
1.23k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.23k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.23k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
40.8k
    for (int j = 0; j < 32; j++)
731
39.6k
    {
732
39.6k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
39.6k
        else
735
39.6k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
39.6k
    }
737
738
1.23k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.23k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.23k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.23k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.23k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.23k
    else
771
1.23k
    {
772
1.23k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.23k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.23k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.23k
    }
776
1.23k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.23k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.23k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.23k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
619
{
795
619
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
619
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
619
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
619
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
619
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
619
    if (vui.videoSignalTypePresentFlag)
812
619
    {
813
619
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
619
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
619
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
619
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
619
    }
823
824
619
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
619
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
619
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
619
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
619
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
619
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
619
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
619
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
619
    else
847
619
    {
848
619
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
619
        else
851
619
        {
852
619
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
619
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
619
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
619
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
619
            if (!bEmitVUIHRDInfo)
857
0
                WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
858
619
            else
859
619
            {
860
619
                WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
861
619
                if (vui.hrdParametersPresentFlag)
862
0
                    codeHrdParameters(vui.hrdParameters, maxSubTLayers);
863
619
            }
864
619
        }
865
619
    }
866
867
619
    WRITE_FLAG(0, "bitstream_restriction_flag");
868
619
}
869
870
void Entropy::codeScalingList(const ScalingList& scalingList)
871
0
{
872
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
873
0
    {
874
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
875
0
        {
876
0
            int predList = scalingList.checkPredMode(sizeId, listId);
877
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
878
0
            if (predList >= 0)
879
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
880
0
            else // DPCM Mode
881
0
                codeScalingList(scalingList, sizeId, listId);
882
0
        }
883
0
    }
884
0
}
885
886
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
887
0
{
888
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
889
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
890
0
    int nextCoef = START_VALUE;
891
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
892
0
    int data;
893
894
0
    if (sizeId > BLOCK_8x8)
895
0
    {
896
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
897
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
898
0
    }
899
0
    for (int i = 0; i < coefNum; i++)
900
0
    {
901
0
        data = src[scan[i]] - nextCoef;
902
0
        if (data < -128)
903
0
            data += 256;
904
0
        if (data > 127)
905
0
            data -= 256;
906
0
        nextCoef = (nextCoef + data + 256) % 256;
907
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
908
0
    }
909
0
}
910
911
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
912
0
{
913
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
914
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
916
917
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
918
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
919
920
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
921
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
923
924
0
    for (int i = 0; i < maxSubTLayers; i++)
925
0
    {
926
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
927
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
928
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
929
930
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
931
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
932
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
933
0
    }
934
0
}
935
936
void Entropy::codeAUD(const Slice& slice)
937
0
{
938
0
    int picType;
939
940
0
    switch (slice.m_sliceType)
941
0
    {
942
0
    case I_SLICE:
943
0
        picType = 0;
944
0
        break;
945
0
    case P_SLICE:
946
0
        picType = 1;
947
0
        break;
948
0
    case B_SLICE:
949
0
        picType = 2;
950
0
        break;
951
0
    default:
952
0
        picType = 7;
953
0
        break;
954
0
    }
955
956
0
    WRITE_CODE(picType, 3, "pic_type");
957
0
}
958
959
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
960
619
{
961
619
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
962
619
    if (slice.getRapPicFlag())
963
619
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
964
965
619
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
966
967
    /* x265 does not use dependent slices, so always write all this data */
968
619
    if (slice_addr)
969
0
    {
970
        // if( dependent_slice_segments_enabled_flag )
971
        //     dependent_slice_segment_flag             u(1)
972
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
973
0
    }
974
975
#if ENABLE_MULTIVIEW
976
    if (encData.m_param->numViews > 1)
977
    {
978
        int esb = 0;
979
        if (2 > esb)
980
        {
981
            esb++;
982
            WRITE_FLAG(0, "discardable_flag");
983
        }
984
        if (2 > esb)
985
        {
986
            esb++;
987
            WRITE_FLAG(0, "cross_layer_bla_flag");
988
        }
989
    }
990
#endif
991
992
619
    WRITE_UVLC(slice.m_sliceType, "slice_type");
993
994
619
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
995
0
    {
996
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
997
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
998
0
    }
999
619
    if (!slice.getIdrPicFlag())
1000
0
    {
1001
#if _DEBUG || CHECKED_BUILD
1002
        // check for bitstream restriction stating that:
1003
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1004
        // Ideally this process should not be repeated for each slice in a picture
1005
        if (slice.isIRAP())
1006
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1007
            {
1008
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1009
            }
1010
#endif
1011
1012
0
        if (slice.m_rpsIdx < 0)
1013
0
        {
1014
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1015
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1016
0
        }
1017
0
        else
1018
0
        {
1019
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1020
0
            int numBits = 0;
1021
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1022
0
                numBits++;
1023
1024
0
            if (numBits > 0)
1025
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1026
0
        }
1027
1028
0
        if (slice.m_sps->bTemporalMVPEnabled)
1029
#if ENABLE_SCC_EXT
1030
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1031
#else
1032
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1033
0
#endif
1034
0
    }
1035
619
    const SAOParam *saoParam = encData.m_saoParam;
1036
619
    if (slice.m_bUseSao)
1037
619
    {
1038
619
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1039
619
        if (encData.m_param->internalCsp != X265_CSP_I400)
1040
619
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1041
619
    }
1042
0
    else if(encData.m_param->selectiveSAO)
1043
0
    {
1044
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1045
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1046
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1047
0
    }
1048
1049
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1050
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1051
1052
619
    if (!slice.isIntra())
1053
0
    {
1054
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1055
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1056
0
        if (overrideFlag)
1057
0
        {
1058
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1059
0
            if (slice.isInterB())
1060
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1061
0
            else
1062
0
            {
1063
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1064
0
            }
1065
0
        }
1066
0
    }
1067
619
    else
1068
619
    {
1069
619
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1070
619
    }
1071
1072
619
    if (slice.isInterB())
1073
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1074
1075
#if ENABLE_SCC_EXT
1076
    if (slice.m_bTemporalMvp)
1077
#else
1078
619
    if (slice.m_sps->bTemporalMVPEnabled)
1079
619
#endif
1080
619
    {
1081
619
        if (slice.m_sliceType == B_SLICE)
1082
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1083
1084
619
        if (slice.m_sliceType != I_SLICE &&
1085
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1086
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1087
0
        {
1088
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1089
0
        }
1090
619
    }
1091
619
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1092
0
        codePredWeightTable(slice);
1093
1094
619
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1095
619
    if (!slice.isIntra())
1096
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1097
1098
619
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1099
619
    WRITE_SVLC(code, "slice_qp_delta");
1100
1101
619
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1102
0
    {
1103
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1105
0
    }
1106
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1107
    //       We didn't support filter across slice board, so disable it now
1108
1109
619
    if (encData.m_param->maxSlices <= 1)
1110
619
    {
1111
619
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1112
619
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1113
1114
619
        if (isSAOEnabled || isDBFEnabled)
1115
619
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1116
619
    }
1117
619
}
1118
1119
/** write wavefront substreams sizes for the slice header */
1120
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1121
503
{
1122
503
    uint32_t offsetLen = 1;
1123
2.79k
    while (maxOffset >= (1U << offsetLen))
1124
2.29k
    {
1125
2.29k
        offsetLen++;
1126
2.29k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1127
2.29k
    }
1128
1129
503
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1130
503
    if (numSubStreams > 0)
1131
503
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1132
1133
2.71k
    for (uint32_t i = 0; i < numSubStreams; i++)
1134
2.21k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1135
503
}
1136
1137
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1138
0
{
1139
0
    if (idx > 0)
1140
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1141
1142
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1143
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1144
0
    int prev = 0;
1145
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1146
0
    {
1147
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1148
0
        prev = rps.deltaPOC[j];
1149
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1150
0
    }
1151
1152
0
    prev = 0;
1153
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1154
0
    {
1155
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1156
0
        prev = rps.deltaPOC[j];
1157
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1158
0
    }
1159
0
}
1160
1161
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1162
26.7k
{
1163
26.7k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1164
26.7k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1165
26.7k
}
1166
1167
/* encode a CU block recursively */
1168
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1169
106k
{
1170
106k
    const Slice* slice = ctu.m_slice;
1171
1172
106k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1173
106k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1174
1175
106k
    if (!cuUnsplitFlag)
1176
23.4k
    {
1177
23.4k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1178
23.4k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1179
6.63k
            bEncodeDQP = true;
1180
117k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1181
93.9k
        {
1182
93.9k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1183
93.9k
            if (childGeom.flags & CUGeom::PRESENT)
1184
52.2k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1185
93.9k
        }
1186
23.4k
        return;
1187
23.4k
    }
1188
1189
83.0k
    if (cuSplitFlag) 
1190
59.4k
        codeSplitFlag(ctu, absPartIdx, depth);
1191
1192
83.0k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1193
6.87k
    {
1194
6.87k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1195
6.87k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1196
274
            bEncodeDQP = true;
1197
34.3k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1198
27.4k
        {
1199
27.4k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1200
27.4k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1201
27.4k
        }
1202
6.87k
        return;
1203
6.87k
    }
1204
1205
76.1k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1206
34.4k
        bEncodeDQP = true;
1207
1208
76.1k
    if (slice->m_pps->bTransquantBypassEnabled)
1209
17.4k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1210
1211
76.1k
    if (!slice->isIntra())
1212
0
    {
1213
0
        codeSkipFlag(ctu, absPartIdx);
1214
0
        if (ctu.isSkipped(absPartIdx))
1215
0
        {
1216
0
            codeMergeIndex(ctu, absPartIdx);
1217
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1218
0
            return;
1219
0
        }
1220
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1221
0
    }
1222
1223
76.1k
    codePartSize(ctu, absPartIdx, depth);
1224
1225
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1226
76.1k
    codePredInfo(ctu, absPartIdx);
1227
1228
76.1k
    uint32_t tuDepthRange[2];
1229
76.1k
    if (ctu.isIntra(absPartIdx))
1230
76.1k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1231
5
    else
1232
5
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1233
1234
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1235
76.1k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1236
1237
    // --- write terminating bit ---
1238
76.1k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1239
76.1k
}
1240
1241
/* Return bit count of signaling inter mode */
1242
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1243
0
{
1244
0
    uint32_t bits;
1245
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1246
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1247
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1248
0
    switch (partSize)
1249
0
    {
1250
0
    case SIZE_2Nx2N:
1251
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1252
0
        break;
1253
1254
0
    case SIZE_2NxN:
1255
0
    case SIZE_2NxnU:
1256
0
    case SIZE_2NxnD:
1257
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1258
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1259
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1260
0
        {
1261
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1262
0
            if (partSize != SIZE_2NxN)
1263
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1264
0
        }
1265
0
        break;
1266
1267
0
    case SIZE_Nx2N:
1268
0
    case SIZE_nLx2N:
1269
0
    case SIZE_nRx2N:
1270
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1272
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1273
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1274
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1275
0
        {
1276
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1277
0
            if (partSize != SIZE_Nx2N)
1278
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1279
0
        }
1280
0
        break;
1281
0
    default:
1282
0
        X265_CHECK(0, "invalid CU partition\n");
1283
0
        break;
1284
0
    }
1285
1286
0
    return bits;
1287
0
}
1288
1289
/* finish encoding a cu and handle end-of-slice conditions */
1290
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1291
76.1k
{
1292
76.1k
    const Slice* slice = ctu.m_slice;
1293
76.1k
    uint32_t realEndAddress = slice->m_endCUAddr;
1294
76.1k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1295
76.1k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1296
1297
76.1k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1298
76.1k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1299
76.1k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1300
76.1k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1301
76.1k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1302
45.1k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1303
1304
76.1k
    if (slice->m_pps->bUseDQP)
1305
58.6k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1306
1307
76.1k
    if (granularityBoundary)
1308
26.7k
    {
1309
        // Encode slice finish
1310
26.7k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1311
26.7k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1312
1.23k
            bTerminateSlice = 1;
1313
1314
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1315
26.7k
        if (!bTerminateSlice)
1316
25.5k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1317
1318
26.7k
        if (!m_bitIf)
1319
13.3k
            resetBits(); // TODO: most likely unnecessary
1320
26.7k
    }
1321
76.1k
}
1322
1323
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1324
                              bool& bCodeDQP, const uint32_t depthRange[2])
1325
1.89M
{
1326
1.89M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1327
1328
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1329
     * so we have checks to make sure the implied value matches our intentions */
1330
1.89M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1331
288k
    {
1332
288k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1333
288k
    }
1334
1.60M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1335
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1336
0
    {
1337
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1338
0
    }
1339
1.60M
    else if (log2CurSize > depthRange[1])
1340
0
    {
1341
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1342
0
    }
1343
1.60M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1344
1.15M
    {
1345
1.15M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1346
1.15M
    }
1347
448k
    else
1348
448k
    {
1349
448k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1350
448k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1351
448k
    }
1352
1353
1.89M
    uint32_t hChromaShift = cu.m_hChromaShift;
1354
1.89M
    uint32_t vChromaShift = cu.m_vChromaShift;
1355
1.89M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1356
1.89M
    if (!curDepth || !bSmallChroma)
1357
738k
    {
1358
738k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1359
738k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1360
738k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1361
738k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1362
738k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1363
738k
    }
1364
1365
1.89M
    if (subdiv)
1366
289k
    {
1367
289k
        --log2CurSize;
1368
289k
        ++curDepth;
1369
1370
289k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1371
1372
289k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1373
289k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
289k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
289k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
289k
        return;
1377
289k
    }
1378
1379
1.60M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1380
1381
1.60M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1382
0
    {
1383
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1384
0
    }
1385
1.60M
    else
1386
1.60M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1387
1388
1.60M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1389
1.60M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1390
1.60M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1391
1.60M
    if (!(cbfY || cbfU || cbfV))
1392
1.59M
        return;
1393
1394
    // dQP: only for CTU once
1395
7.36k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1396
3.62k
    {
1397
3.62k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1398
3.62k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1399
3.62k
        codeDeltaQP(cu, absPartIdxLT);
1400
3.62k
        bCodeDQP = false;
1401
3.62k
    }
1402
1403
7.36k
    if (cbfY)
1404
4.38k
    {
1405
4.38k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1406
4.38k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1407
4.38k
        if (!(cbfU || cbfV))
1408
583
            return;
1409
4.38k
    }
1410
1411
6.77k
    if (bSmallChroma)
1412
4.02k
    {
1413
4.02k
        if ((absPartIdx & 3) != 3)
1414
3.02k
            return;
1415
1416
1.00k
        const uint32_t log2CurSizeC = 2;
1417
1.00k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1418
1.00k
        const uint32_t curPartNum = 4;
1419
1.00k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1420
3.02k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1421
2.01k
        {
1422
2.01k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1423
2.01k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1424
2.01k
            do
1425
2.01k
            {
1426
2.01k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1427
2.01k
                {
1428
2.01k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1429
2.01k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1430
2.01k
                }
1431
2.01k
            }
1432
2.01k
            while (tuIterator.isNextSection());
1433
2.01k
        }
1434
1.00k
    }
1435
2.75k
    else
1436
2.75k
    {
1437
2.75k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1438
2.75k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1439
2.75k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1440
2.75k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1441
8.31k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1442
5.56k
        {
1443
5.56k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1444
5.56k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1445
5.56k
            do
1446
5.56k
            {
1447
5.56k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1448
5.56k
                {
1449
5.56k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1450
5.56k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1451
5.56k
                }
1452
5.56k
            }
1453
5.56k
            while (tuIterator.isNextSection());
1454
5.56k
        }
1455
2.75k
    }
1456
6.77k
}
1457
1458
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1459
                              bool& bCodeDQP, const uint32_t depthRange[2])
1460
0
{
1461
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1462
1463
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1464
     * so we have checks to make sure the implied value matches our intentions */
1465
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1466
0
    {
1467
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1468
0
    }
1469
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1470
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1471
0
    {
1472
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1473
0
    }
1474
0
    else if (log2CurSize > depthRange[1])
1475
0
    {
1476
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1477
0
    }
1478
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1479
0
    {
1480
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1481
0
    }
1482
0
    else
1483
0
    {
1484
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1485
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1486
0
    }
1487
1488
0
    if (subdiv)
1489
0
    {
1490
0
        --log2CurSize;
1491
0
        ++curDepth;
1492
1493
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1494
1495
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1496
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        return;
1500
0
    }
1501
1502
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1503
0
    {
1504
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1505
0
    }
1506
0
    else
1507
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1508
1509
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1510
1511
0
    if (!cbfY)
1512
0
        return;
1513
1514
    // dQP: only for CTU once
1515
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1516
0
    {
1517
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1518
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1519
0
        codeDeltaQP(cu, absPartIdxLT);
1520
0
        bCodeDQP = false;
1521
0
    }
1522
1523
0
    if (cbfY)
1524
0
    {
1525
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1526
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1527
0
    }
1528
0
}
1529
1530
1531
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1532
737k
{
1533
737k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1534
737k
    {
1535
737k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1536
737k
        if (cu.m_chromaFormat != X265_CSP_I400)
1537
737k
        {
1538
737k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1539
737k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1540
1541
737k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1542
1543
737k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1544
0
            {
1545
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1546
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1547
0
                {
1548
0
                    absPartIdx += qNumParts;
1549
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1550
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1551
0
                }
1552
0
            }
1553
737k
        }
1554
737k
    }
1555
123
    else // if it is inter mode, encode motion vector and reference index
1556
123
        codePUWise(cu, absPartIdx);
1557
737k
}
1558
1559
/** encode motion information for every PU block */
1560
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1561
0
{
1562
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1563
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1564
1565
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1566
0
    {
1567
0
        codeMergeFlag(cu, subPartIdx);
1568
0
        if (cu.m_mergeFlag[subPartIdx])
1569
0
            codeMergeIndex(cu, subPartIdx);
1570
0
        else
1571
0
        {
1572
0
            if (cu.m_slice->isInterB())
1573
0
                codeInterDir(cu, subPartIdx);
1574
1575
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1576
0
            for (uint32_t list = 0; list < 2; list++)
1577
0
            {
1578
0
                if (interDir & (1 << list))
1579
0
                {
1580
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1581
1582
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1583
0
                    codeMvd(cu, subPartIdx, list);
1584
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1585
0
                }
1586
0
            }
1587
0
        }
1588
0
    }
1589
0
}
1590
1591
/** encode reference frame index for a PU block */
1592
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1593
0
{
1594
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1595
1596
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1597
0
        codeRefFrmIdx(cu, absPartIdx, list);
1598
0
}
1599
1600
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1601
737k
{
1602
737k
    if (!cu.isIntra(absPartIdx))
1603
0
    {
1604
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1605
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1606
0
        if (!cu.getQtRootCbf(absPartIdx))
1607
0
            return;
1608
0
    }
1609
1610
737k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1611
737k
    if (cu.m_chromaFormat == X265_CSP_I400)
1612
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1613
737k
    else
1614
737k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1615
737k
}
1616
1617
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1618
52.3k
{
1619
52.3k
    int typeIdx = ctuParam.typeIdx;
1620
1621
52.3k
    if (plane != 2)
1622
34.9k
    {
1623
34.9k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1624
34.9k
        if (typeIdx >= 0)
1625
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1626
34.9k
    }
1627
1628
52.3k
    if (typeIdx >= 0)
1629
0
    {
1630
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1631
0
        if (typeIdx == SAO_BO)
1632
0
        {
1633
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1634
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1635
1636
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1637
0
                if (ctuParam.offset[i] != 0)
1638
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1639
1640
0
            encodeBinsEP(ctuParam.bandPos, 5);
1641
0
        }
1642
0
        else // if (typeIdx < SAO_BO)
1643
0
        {
1644
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1645
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1648
0
            if (plane != 2)
1649
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1650
0
        }
1651
0
    }
1652
52.3k
}
1653
1654
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1655
160k
{
1656
160k
    if (plane != 2)
1657
107k
    {
1658
107k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1659
107k
        encodeBinEP(1);
1660
107k
    }
1661
1662
160k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1663
1664
160k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1665
160k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1666
160k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1667
160k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1668
160k
    if (plane != 2)
1669
107k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1670
160k
}
1671
1672
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1673
40.1k
{
1674
40.1k
    if (plane != 2)
1675
26.7k
    {
1676
26.7k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1677
26.7k
        encodeBinEP(0);
1678
26.7k
    }
1679
1680
40.1k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1681
1682
200k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1683
160k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1684
1685
200k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1686
160k
        if (offset[i] != 0)
1687
93
            encodeBinEP(offset[i] < 0);
1688
1689
40.1k
    encodeBinsEP(bandPos, 5);
1690
40.1k
}
1691
1692
/** initialize context model with respect to QP and initialization value */
1693
uint8_t sbacInit(int qp, int initValue)
1694
97.1k
{
1695
97.1k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1696
1697
97.1k
    int  slope      = (initValue >> 4) * 5 - 45;
1698
97.1k
    int  offset     = ((initValue & 15) << 3) - 16;
1699
97.1k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1700
97.1k
    uint32_t mpState = (initState >= 64);
1701
97.1k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1702
1703
97.1k
    return (uint8_t)state;
1704
97.1k
}
1705
1706
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1707
16.0k
{
1708
16.0k
    ctxModel += sliceType * size;
1709
1710
113k
    for (int n = 0; n < size; n++)
1711
97.1k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1712
16.0k
}
1713
1714
void Entropy::resetEntropy(const Slice& slice)
1715
619
{
1716
619
    int  qp              = slice.m_sliceQp;
1717
619
    SliceType sliceType  = slice.m_sliceType;
1718
1719
619
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1720
619
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1721
619
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1722
619
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1723
619
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1724
619
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1725
619
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1726
619
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1727
619
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1728
619
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1729
619
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1730
619
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1731
619
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1732
619
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1733
619
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1734
619
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1735
619
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1736
619
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1737
619
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
619
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1739
619
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1740
619
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1741
619
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1742
619
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1743
619
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1744
619
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1745
    // new structure
1746
1747
619
    start();
1748
619
}
1749
1750
/* code explicit wp tables */
1751
void Entropy::codePredWeightTable(const Slice& slice)
1752
0
{
1753
0
    const WeightParam *wp;
1754
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1755
0
    bool            bDenomCoded  = false;
1756
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1757
0
    uint32_t        totalSignalledWeightFlags = 0;
1758
1759
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1760
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1761
0
    {
1762
0
        for (int list = 0; list < numRefDirs; list++)
1763
0
        {
1764
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1765
0
            {
1766
0
                wp = slice.m_weightPredTable[list][ref];
1767
0
                if (!bDenomCoded)
1768
0
                {
1769
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1770
1771
0
                    if (bChroma)
1772
0
                    {
1773
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1774
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1775
0
                    }
1776
0
                    bDenomCoded = true;
1777
0
                }
1778
#if ENABLE_SCC_EXT
1779
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1780
                    assert(!wp[0].wtPresent);
1781
                else
1782
#endif
1783
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1784
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1785
0
            }
1786
1787
0
            if (bChroma)
1788
0
            {
1789
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1790
0
                {
1791
0
                    wp = slice.m_weightPredTable[list][ref];
1792
#if ENABLE_SCC_EXT
1793
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1794
                        assert(!wp[1].wtPresent);
1795
                    else
1796
#endif
1797
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1798
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1799
0
                }
1800
0
            }
1801
1802
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1803
0
            {
1804
0
                wp = slice.m_weightPredTable[list][ref];
1805
0
                if (wp[0].wtPresent)
1806
0
                {
1807
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1808
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1809
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1810
0
                }
1811
1812
0
                if (bChroma)
1813
0
                {
1814
0
                    if (wp[1].wtPresent)
1815
0
                    {
1816
0
                        for (int plane = 1; plane < 3; plane++)
1817
0
                        {
1818
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1819
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1820
1821
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1822
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1823
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1824
0
                        }
1825
0
                    }
1826
0
                }
1827
0
            }
1828
0
        }
1829
1830
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1831
0
    }
1832
0
}
1833
1834
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1835
4.82k
{
1836
4.82k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1837
1838
4.82k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1839
1840
4.82k
    if (!symbol)
1841
527
        return;
1842
1843
4.30k
    bool bCodeLast = (maxSymbol > symbol);
1844
1845
20.9k
    while (--symbol)
1846
16.6k
        encodeBin(1, scmModel[offset]);
1847
1848
4.30k
    if (bCodeLast)
1849
194
        encodeBin(0, scmModel[offset]);
1850
4.30k
}
1851
1852
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1853
4.10k
{
1854
4.10k
    uint32_t bins = 0;
1855
4.10k
    int numBins = 0;
1856
1857
16.2k
    while (symbol >= (uint32_t)(1 << count))
1858
12.0k
    {
1859
12.0k
        bins = 2 * bins + 1;
1860
12.0k
        numBins++;
1861
12.0k
        symbol -= 1 << count;
1862
12.0k
        count++;
1863
12.0k
    }
1864
1865
4.10k
    bins = 2 * bins + 0;
1866
4.10k
    numBins++;
1867
1868
4.10k
    bins = (bins << count) | symbol;
1869
4.10k
    numBins += count;
1870
1871
4.10k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1872
4.10k
    encodeBinsEP(bins, numBins);
1873
4.10k
}
1874
1875
/** Coding of coeff_abs_level_minus3 */
1876
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1877
7.61k
{
1878
7.61k
    uint32_t length;
1879
7.61k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1880
1881
7.61k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1882
0
    {
1883
0
        length = codeNumber >> absGoRice;
1884
1885
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1886
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1887
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1888
0
    }
1889
7.61k
    else
1890
7.61k
    {
1891
7.61k
        length = 0;
1892
7.61k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1893
7.61k
        {
1894
7.61k
            unsigned long idx;
1895
7.61k
            BSR(idx, codeNumber + 1);
1896
7.61k
            length = idx;
1897
7.61k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1898
7.61k
            codeNumber -= (1 << idx) - 1;
1899
7.61k
        }
1900
7.61k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1901
1902
7.61k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1903
7.61k
        encodeBinsEP(codeNumber, length + absGoRice);
1904
7.61k
    }
1905
7.61k
}
1906
1907
// SBAC RD
1908
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1909
1.52M
{
1910
1.52M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1911
1.52M
    m_fracBits = src.m_fracBits;
1912
1.52M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1913
1.52M
}
1914
1915
void Entropy::copyFrom(const Entropy& src)
1916
10.5M
{
1917
10.5M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1918
1919
10.5M
    copyState(src);
1920
1921
10.5M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1922
10.5M
    markValid();
1923
10.5M
}
1924
1925
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1926
2.44M
{
1927
2.44M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1928
1929
2.44M
    if (cu.isIntra(absPartIdx))
1930
2.44M
    {
1931
2.44M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1932
2.05M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1933
2.44M
        return;
1934
2.44M
    }
1935
1936
272
    switch (partSize)
1937
272
    {
1938
0
    case SIZE_2Nx2N:
1939
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1940
0
        break;
1941
1942
0
    case SIZE_2NxN:
1943
0
    case SIZE_2NxnU:
1944
0
    case SIZE_2NxnD:
1945
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1946
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1947
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1948
0
        {
1949
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1950
0
            if (partSize != SIZE_2NxN)
1951
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1952
0
        }
1953
0
        break;
1954
1955
0
    case SIZE_Nx2N:
1956
0
    case SIZE_nLx2N:
1957
0
    case SIZE_nRx2N:
1958
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1960
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1961
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1962
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1963
0
        {
1964
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1965
0
            if (partSize != SIZE_Nx2N)
1966
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1967
0
        }
1968
0
        break;
1969
0
    default:
1970
0
        X265_CHECK(0, "invalid CU partition\n");
1971
0
        break;
1972
272
    }
1973
272
}
1974
1975
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1976
0
{
1977
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1978
1979
0
    if (numCand > 1)
1980
0
    {
1981
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1982
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1983
1984
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1985
1986
0
        if (unaryIdx != 0)
1987
0
        {
1988
0
            uint32_t mask = (1 << unaryIdx) - 2;
1989
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1990
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1991
0
        }
1992
0
    }
1993
0
}
1994
1995
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1996
4.16M
{
1997
4.16M
    uint32_t dir[4], j;
1998
4.16M
    uint32_t preds[4][3];
1999
4.16M
    int predIdx[4];
2000
4.16M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2001
4.16M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2002
2003
9.19M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2004
5.02M
    {
2005
5.02M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2006
5.02M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2007
5.02M
        predIdx[j] = -1;
2008
20.0M
        for (uint32_t i = 0; i < 3; i++)
2009
15.0M
            if (dir[j] == preds[j][i])
2010
5.01M
                predIdx[j] = i;
2011
2012
5.02M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2013
5.02M
    }
2014
2015
9.18M
    for (j = 0; j < partNum; j++)
2016
5.02M
    {
2017
5.02M
        if (predIdx[j] != -1)
2018
5.01M
        {
2019
5.01M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2020
            // NOTE: Mapping
2021
            //       0 = 0
2022
            //       1 = 10
2023
            //       2 = 11
2024
5.01M
            int nonzero = (!!predIdx[j]);
2025
5.01M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2026
5.01M
        }
2027
10.1k
        else
2028
10.1k
        {
2029
10.1k
            if (preds[j][0] > preds[j][1])
2030
408
                std::swap(preds[j][0], preds[j][1]);
2031
2032
10.1k
            if (preds[j][0] > preds[j][2])
2033
0
                std::swap(preds[j][0], preds[j][2]);
2034
2035
10.1k
            if (preds[j][1] > preds[j][2])
2036
0
                std::swap(preds[j][1], preds[j][2]);
2037
2038
10.1k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2039
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2041
2042
10.1k
            encodeBinsEP(dir[j], 5);
2043
10.1k
        }
2044
5.02M
    }
2045
4.16M
}
2046
2047
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2048
4.04M
{
2049
4.04M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2050
2051
4.04M
    if (intraDirChroma == DM_CHROMA_IDX)
2052
1.07M
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2053
2.96M
    else
2054
2.96M
    {
2055
6.93M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2056
6.93M
        {
2057
6.93M
            if (intraDirChroma == chromaDirMode[i])
2058
2.96M
            {
2059
2.96M
                intraDirChroma = i;
2060
2.96M
                break;
2061
2.96M
            }
2062
6.93M
        }
2063
2064
2.96M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2065
2.96M
        encodeBinsEP(intraDirChroma, 2);
2066
2.96M
    }
2067
4.04M
}
2068
2069
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2070
0
{
2071
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2072
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2073
2074
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2075
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2076
0
    if (interDir < 2)
2077
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2078
0
}
2079
2080
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2081
0
{
2082
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2083
2084
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2085
2086
0
    if (refFrame > 0)
2087
0
    {
2088
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2089
0
        if (refNum == 0)
2090
0
            return;
2091
2092
0
        refFrame--;
2093
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2094
0
        if (refFrame > 0)
2095
0
        {
2096
0
            uint32_t mask = (1 << refFrame) - 2;
2097
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2098
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2099
0
        }
2100
0
    }
2101
0
}
2102
2103
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2104
0
{
2105
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2106
0
    const int hor = mvd.x;
2107
0
    const int ver = mvd.y;
2108
2109
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2110
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
2112
0
    const bool bHorAbsGr0 = hor != 0;
2113
0
    const bool bVerAbsGr0 = ver != 0;
2114
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2115
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2116
2117
0
    if (bHorAbsGr0)
2118
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2119
2120
0
    if (bVerAbsGr0)
2121
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2122
2123
0
    if (bHorAbsGr0)
2124
0
    {
2125
0
        if (horAbs > 1)
2126
0
            writeEpExGolomb(horAbs - 2, 1);
2127
2128
0
        encodeBinEP(0 > hor ? 1 : 0);
2129
0
    }
2130
2131
0
    if (bVerAbsGr0)
2132
0
    {
2133
0
        if (verAbs > 1)
2134
0
            writeEpExGolomb(verAbs - 2, 1);
2135
2136
0
        encodeBinEP(0 > ver ? 1 : 0);
2137
0
    }
2138
0
}
2139
2140
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2141
4.82k
{
2142
4.82k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2143
2144
4.82k
    int qpBdOffsetY = QP_BD_OFFSET;
2145
2146
4.82k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2147
2148
4.82k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2149
4.82k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2150
4.82k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2151
4.82k
    if (absDQp >= CU_DQP_TU_CMAX)
2152
4.10k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2153
2154
4.82k
    if (absDQp > 0)
2155
4.30k
    {
2156
4.30k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2157
4.30k
        encodeBinEP(sign);
2158
4.30k
    }
2159
4.82k
}
2160
2161
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2162
8.08M
{
2163
8.08M
    uint32_t ctx = tuDepth + 2;
2164
2165
8.08M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2166
8.08M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2167
8.08M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2168
2169
8.08M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2170
0
    {
2171
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2172
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2173
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2174
2175
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2176
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
    }
2178
8.08M
    else
2179
8.08M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2180
8.08M
}
2181
2182
#if CHECKED_BUILD || _DEBUG
2183
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2184
{
2185
    uint32_t goRiceParam = 0;
2186
    int firstCoeff2 = 1;
2187
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2188
2189
    uint32_t sum = 0;
2190
    int idx = 0;
2191
    do
2192
    {
2193
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2194
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2195
        baseLevelN >>= 2;
2196
        int codeNumber = absCoeff[idx] - baseLevel;
2197
2198
        if (codeNumber >= 0)
2199
        {
2200
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2201
            uint32_t length = 0;
2202
2203
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2204
            if (codeNumber >= 0)
2205
            {
2206
                {
2207
                    unsigned long cidx;
2208
                    BSR(cidx, codeNumber + 1);
2209
                    length = cidx;
2210
                }
2211
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2212
2213
                codeNumber = (length + length);
2214
            }
2215
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2216
2217
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2218
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2219
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2220
        }
2221
        if (absCoeff[idx] >= 2)
2222
            firstCoeff2 = 0;
2223
        idx++;
2224
    }
2225
    while(idx < numNonZero);
2226
2227
    return sum;
2228
}
2229
#endif // debug only code
2230
2231
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2232
59.1k
{
2233
59.1k
    uint32_t trSize = 1 << log2TrSize;
2234
59.1k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2235
    // compute number of significant coefficients
2236
59.1k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2237
59.1k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2238
59.1k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2239
2240
59.1k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2241
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2242
2243
59.1k
    bool bIsLuma = ttype == TEXT_LUMA;
2244
2245
    // select scans
2246
59.1k
    TUEntropyCodingParameters codingParameters;
2247
59.1k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2248
2249
59.1k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2250
59.1k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2251
59.1k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2252
2253
    //----- encode significance map -----
2254
2255
    // Find position of last coefficient
2256
59.1k
    int scanPosLast = 0;
2257
59.1k
    uint32_t posLast;
2258
59.1k
    uint64_t sigCoeffGroupFlag64 = 0;
2259
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2260
59.1k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2261
2262
59.1k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2263
59.1k
    posLast = codingParameters.scan[scanPosLast];
2264
2265
59.1k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2266
2267
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2268
133k
    for(int idx = 0; idx < lastScanSet; idx++)
2269
74.4k
    {
2270
74.4k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2271
74.4k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2272
74.4k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2273
74.4k
    }
2274
2275
2276
    // Code position of last coefficient
2277
59.1k
    {
2278
        // The last position is composed of a prefix and suffix.
2279
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2280
        // The bypass coded bins for both the x and y components are grouped together.
2281
59.1k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2282
59.1k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2283
        // swap
2284
59.1k
        if (codingParameters.scanType == SCAN_VER)
2285
4.26k
            std::swap(pos[0], pos[1]);
2286
2287
59.1k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2288
59.1k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2289
59.1k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2290
59.1k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2291
59.1k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2292
2293
59.1k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2294
177k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2295
118k
        {
2296
118k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2297
118k
            uint32_t prefixOnes = temp & 15;
2298
118k
            uint32_t suffixLen = temp >> 4;
2299
2300
239k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2301
121k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2302
2303
118k
            if (prefixOnes < maxGroupIdx)
2304
88.5k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2305
2306
118k
            packedSuffixBits <<= suffixLen;
2307
118k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2308
118k
            packedSuffixLen += suffixLen;
2309
118k
        }
2310
2311
59.1k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2312
59.1k
    }
2313
2314
    // code significance flag
2315
59.1k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2316
59.1k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2317
59.1k
    uint32_t c1 = 1;
2318
59.1k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2319
59.1k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2320
59.1k
    uint32_t numNonZero = 1;
2321
59.1k
    unsigned long lastNZPosInCG = 0;
2322
59.1k
    unsigned long firstNZPosInCG = 0;
2323
2324
#if _DEBUG
2325
    // Unnecessary, for Valgrind-3.10.0 only
2326
    memset(absCoeff, 0, sizeof(absCoeff));
2327
#endif
2328
2329
59.1k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2330
2331
192k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2332
133k
    {
2333
133k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2334
133k
        uint32_t scanFlagMask = subCoeffFlag;
2335
133k
        int subPosBase = subSet << MLS_CG_SIZE;
2336
        
2337
133k
        if (subSet == lastScanSet)
2338
59.1k
        {
2339
59.1k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2340
59.1k
            scanFlagMask >>= 1;
2341
59.1k
        }
2342
2343
        // encode significant_coeffgroup_flag
2344
133k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2345
133k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2346
133k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2347
133k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2348
2349
133k
        if (subSet == lastScanSet || !subSet)
2350
63.7k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2351
69.9k
        else
2352
69.9k
        {
2353
69.9k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2354
69.9k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2355
69.9k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2356
69.9k
        }
2357
2358
        // encode significant_coeff_flag
2359
133k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2360
91.2k
        {
2361
91.2k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2362
91.2k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2363
91.2k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2364
2365
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2366
91.2k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2367
91.2k
            {
2368
                // patternSigCtx = 0
2369
91.2k
                {
2370
91.2k
                    2, 1, 1, 0,
2371
91.2k
                    1, 1, 0, 0,
2372
91.2k
                    1, 0, 0, 0,
2373
91.2k
                    0, 0, 0, 0,
2374
91.2k
                },
2375
                // patternSigCtx = 1
2376
91.2k
                {
2377
91.2k
                    2, 2, 2, 2,
2378
91.2k
                    1, 1, 1, 1,
2379
91.2k
                    0, 0, 0, 0,
2380
91.2k
                    0, 0, 0, 0,
2381
91.2k
                },
2382
                // patternSigCtx = 2
2383
91.2k
                {
2384
91.2k
                    2, 1, 0, 0,
2385
91.2k
                    2, 1, 0, 0,
2386
91.2k
                    2, 1, 0, 0,
2387
91.2k
                    2, 1, 0, 0,
2388
91.2k
                },
2389
                // patternSigCtx = 3
2390
91.2k
                {
2391
91.2k
                    2, 2, 2, 2,
2392
91.2k
                    2, 2, 2, 2,
2393
91.2k
                    2, 2, 2, 2,
2394
91.2k
                    2, 2, 2, 2,
2395
91.2k
                },
2396
                // 4x4
2397
91.2k
                {
2398
91.2k
                    0, 1, 4, 5,
2399
91.2k
                    2, 3, 4, 5,
2400
91.2k
                    6, 6, 8, 8,
2401
91.2k
                    7, 7, 8, 8
2402
91.2k
                }
2403
91.2k
            };
2404
2405
91.2k
            const int offset = codingParameters.firstSignificanceMapContext;
2406
91.2k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2407
2408
91.2k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2409
91.2k
            if (m_bitIf)
2410
384
            {
2411
384
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2412
384
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2413
2414
                // TODO: accelerate by PABSW
2415
1.92k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
1.53k
                {
2417
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
1.53k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
1.53k
                }
2422
2423
384
                if (log2TrSize == 2)
2424
384
                {
2425
384
                    do
2426
5.76k
                    {
2427
5.76k
                        uint32_t blkPos, sig, ctxSig;
2428
5.76k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
5.76k
                        sig     = scanFlagMask & 1;
2430
5.76k
                        scanFlagMask >>= 1;
2431
5.76k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
5.76k
                        {
2433
5.76k
                            ctxSig = table_cnt[4][blkPos];
2434
5.76k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
5.76k
                            encodeBin(sig, baseCtx[ctxSig]);
2436
5.76k
                        }
2437
5.76k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
5.76k
                        numNonZero += sig;
2439
5.76k
                        scanPosSigOff--;
2440
5.76k
                    }
2441
5.76k
                    while(scanPosSigOff >= 0);
2442
384
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
384
            }
2471
90.8k
            else // fast RD path
2472
90.8k
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
90.8k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
90.8k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
90.8k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
90.8k
                m_fracBits += sum;
2483
90.8k
            } // end of fast RD path -- !m_bitIf
2484
91.2k
        }
2485
133k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
133k
        uint32_t coeffSigns = coeffSign[subSet];
2488
133k
        numNonZero = coeffNum[subSet];
2489
133k
        if (numNonZero > 0)
2490
133k
        {
2491
133k
            uint32_t idx = 0;
2492
133k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
133k
            BSR(lastNZPosInCG, subCoeffFlag);
2494
133k
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
133k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
133k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
133k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
133k
            c1 = 1;
2501
133k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
133k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
133k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
133k
            if (!m_bitIf)
2507
131k
            {
2508
131k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
131k
                uint32_t firstC2Idx = (sum >> 28);
2510
131k
                c1 = ((sum >> 26) & 3);
2511
131k
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
131k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
131k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
131k
                if (numNonZero > firstC2Idx)
2518
126k
                {
2519
126k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
126k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
126k
                    m_fracBits += ((uint64_t)sum << 15);
2522
126k
                }
2523
131k
            }
2524
            // Standard path
2525
2.00k
            else
2526
2.00k
            {
2527
2.00k
                uint32_t firstC2Idx = 8;
2528
2.00k
                uint32_t firstC2Flag = 2;
2529
2.00k
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
2.00k
                idx = 0;
2532
2.00k
                do
2533
4.68k
                {
2534
4.68k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
4.68k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
4.68k
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
4.68k
                    if (symbol1)
2539
4.54k
                        c1Next = 0;
2540
2541
4.68k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
4.68k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
4.68k
                    c1 = (c1Next & 3);
2545
4.68k
                    c1Next >>= 2;
2546
4.68k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
4.68k
                    idx++;
2548
4.68k
                }
2549
4.68k
                while(idx < numC1Flag);
2550
2551
2.00k
                if (!c1)
2552
1.85k
                {
2553
1.85k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
1.85k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
1.85k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
1.85k
                }
2558
2559
2.00k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
2.00k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
2.00k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
1.85k
                {
2564
                    // Standard path
2565
1.85k
                    uint32_t goRiceParam = 0;
2566
1.85k
                    int baseLevel = 3;
2567
1.85k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
1.85k
                    idx = firstC2Idx;
2572
1.85k
                    do
2573
7.61k
                    {
2574
7.61k
                        if (idx >= C1FLAG_NUMBER)
2575
3.07k
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
7.61k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
7.61k
                        if (absCoeff[idx] >= baseLevel)
2580
7.61k
                        {
2581
7.61k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
7.61k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
7.61k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
7.61k
                            goRiceParam += adjust;
2585
7.61k
                            threshold += (adjust) ? threshold : 0;
2586
7.61k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
7.61k
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
7.61k
                        baseLevel = 2;
2592
7.61k
                        idx++;
2593
7.61k
                    }
2594
7.61k
                    while(idx < numNonZero);
2595
1.85k
                }
2596
2.00k
            } // end of !bitIf
2597
133k
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
133k
        numNonZero = 0;
2601
133k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
133k
    }
2603
59.1k
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
803k
{
2607
803k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
803k
    uint32_t isCodeNonZero = !!code;
2610
2611
803k
    encodeBinEP(isCodeNonZero);
2612
803k
    if (isCodeNonZero)
2613
93
    {
2614
93
        uint32_t isCodeLast = (maxSymbol > code);
2615
93
        uint32_t mask = (1 << (code - 1)) - 1;
2616
93
        uint32_t len = code - 1 + isCodeLast;
2617
93
        mask <<= isCodeLast;
2618
2619
93
        encodeBinsEP(mask, len);
2620
93
    }
2621
803k
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
7.81M
{
2626
7.81M
    estCBFBit(estBitsSbac);
2627
2628
7.81M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
7.81M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
7.81M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
7.81M
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
7.83M
{
2640
7.83M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
62.6M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
54.8M
    {
2644
54.8M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
54.8M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
54.8M
    }
2647
2648
7.83M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
7.83M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
7.83M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
7.83M
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
7.83M
{
2657
7.83M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
23.5M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
47.0M
        for (uint32_t bin = 0; bin < 2; bin++)
2661
31.3M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
7.83M
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
7.83M
{
2667
7.83M
    int firstCtx = 1, numCtx = 8;
2668
2669
7.83M
    if (log2TrSize >= 4)
2670
304k
    {
2671
304k
        firstCtx = bIsLuma ? 21 : 12;
2672
304k
        numCtx = bIsLuma ? 6 : 3;
2673
304k
    }
2674
7.53M
    else if (log2TrSize == 3)
2675
1.20M
    {
2676
1.20M
        firstCtx = 9;
2677
1.20M
        numCtx = bIsLuma ? 12 : 3;
2678
1.20M
    }
2679
2680
7.83M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
7.83M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
7.83M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
71.4M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
63.5M
    {
2687
63.5M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
63.5M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
63.5M
    }
2690
2691
7.83M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
7.83M
    if (bIsLuma)
2693
4.55M
    {
2694
4.55M
        if (log2TrSize == 2)
2695
3.47M
        {
2696
10.4M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
6.94M
            {
2698
6.94M
                int bits = 0;
2699
6.94M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
27.7M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
20.8M
                {
2703
20.8M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
20.8M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
20.8M
                }
2706
2707
6.94M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
6.94M
            }
2709
3.47M
        }
2710
1.08M
        else
2711
1.08M
        {
2712
1.08M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
3.27M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
2.18M
            {
2716
2.18M
                int bits = 0;
2717
2.18M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
2.18M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
9.27M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
7.08M
                {
2722
7.08M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
7.08M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
7.08M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
7.08M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
7.08M
                    bits += 2 * cost1;
2727
7.08M
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
2.18M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
2.18M
            }
2731
1.08M
        }
2732
4.55M
    }
2733
3.27M
    else
2734
3.27M
    {
2735
3.27M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
3.27M
        const int ctxShift = log2TrSize - 2;
2737
2738
9.89M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
6.61M
        {
2740
6.61M
            int bits = 0;
2741
6.61M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
28.4M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
21.8M
            {
2745
21.8M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
21.8M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
21.8M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
21.8M
            }
2749
2750
6.61M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
6.61M
        }
2752
3.27M
    }
2753
7.83M
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
7.86M
{
2758
7.86M
    if (bIsLuma)
2759
4.56M
    {
2760
4.56M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
4.56M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
77.6M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
73.0M
        {
2765
73.0M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
73.0M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
73.0M
        }
2768
2769
22.8M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
18.2M
        {
2771
18.2M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
18.2M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
18.2M
        }
2774
4.56M
    }
2775
3.29M
    else
2776
3.29M
    {
2777
3.29M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
3.29M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
29.7M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
26.4M
        {
2782
26.4M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
26.4M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
26.4M
        }
2785
2786
9.92M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
6.62M
        {
2788
6.62M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
6.62M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
6.62M
        }
2791
3.29M
    }
2792
7.86M
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
9.85k
{
2797
9.85k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
9.85k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
9.85k
    markValid();
2801
9.85k
}
2802
2803
void Entropy::start()
2804
619
{
2805
619
    m_low = 0;
2806
619
    m_range = 510;
2807
619
    m_bitsLeft = -12;
2808
619
    m_numBufferedBytes = 0;
2809
619
    m_bufferedByte = 0xff;
2810
619
}
2811
2812
void Entropy::finish()
2813
2.83k
{
2814
2.83k
    if (m_low >> (21 + m_bitsLeft))
2815
6
    {
2816
6
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
7
        while (m_numBufferedBytes > 1)
2818
1
        {
2819
1
            m_bitIf->writeByte(0x00);
2820
1
            m_numBufferedBytes--;
2821
1
        }
2822
2823
6
        m_low -= 1 << (21 + m_bitsLeft);
2824
6
    }
2825
2.82k
    else
2826
2.82k
    {
2827
2.82k
        if (m_numBufferedBytes > 0)
2828
2.82k
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
2.83k
        while (m_numBufferedBytes > 1)
2831
6
        {
2832
6
            m_bitIf->writeByte(0xff);
2833
6
            m_numBufferedBytes--;
2834
6
        }
2835
2.82k
    }
2836
2.83k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
2.83k
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
10.5M
{
2841
10.5M
    m_low = other.m_low;
2842
10.5M
    m_range = other.m_range;
2843
10.5M
    m_bitsLeft = other.m_bitsLeft;
2844
10.5M
    m_bufferedByte = other.m_bufferedByte;
2845
10.5M
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
10.5M
    m_fracBits = other.m_fracBits;
2847
10.5M
}
2848
2849
void Entropy::resetBits()
2850
9.24M
{
2851
9.24M
    m_low = 0;
2852
9.24M
    m_bitsLeft = -12;
2853
9.24M
    m_numBufferedBytes = 0;
2854
9.24M
    m_bufferedByte = 0xff;
2855
9.24M
    m_fracBits &= 32767;
2856
9.24M
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
9.24M
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
27.6M
{
2863
27.6M
    uint32_t mstate = ctxModel;
2864
2865
27.6M
    ctxModel = sbacNext(mstate, binValue);
2866
2867
27.6M
    if (!m_bitIf)
2868
27.3M
    {
2869
27.3M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
27.3M
        return;
2871
27.3M
    }
2872
2873
319k
    uint32_t range = m_range;
2874
319k
    uint32_t state = sbacGetState(mstate);
2875
319k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
319k
    range -= lps;
2877
2878
319k
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
319k
    int numBits = (uint32_t)(range - 256) >> 31;
2881
319k
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
319k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
319k
    if ((binValue ^ mstate) & 1)
2886
33.0k
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
33.0k
        unsigned long idx;
2890
33.0k
        BSR(idx, lps);
2891
33.0k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
33.0k
        numBits = 8 - idx;
2894
33.0k
        if (state >= 63)
2895
0
            numBits = 6;
2896
33.0k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
33.0k
        low += range;
2899
33.0k
        range = lps;
2900
33.0k
    }
2901
319k
    m_low = (low << numBits);
2902
319k
    m_range = (range << numBits);
2903
319k
    m_bitsLeft += numBits;
2904
2905
319k
    if (m_bitsLeft >= 0)
2906
14.8k
        writeOut();
2907
319k
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
941k
{
2912
941k
    if (!m_bitIf)
2913
941k
    {
2914
941k
        m_fracBits += 32768;
2915
941k
        return;
2916
941k
    }
2917
473
    m_low <<= 1;
2918
473
    if (binValue)
2919
473
        m_low += m_range;
2920
473
    m_bitsLeft++;
2921
2922
473
    if (m_bitsLeft >= 0)
2923
99
        writeOut();
2924
473
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
8.20M
{
2929
8.20M
    if (!m_bitIf)
2930
8.12M
    {
2931
8.12M
        m_fracBits += 32768 * numBins;
2932
8.12M
        return;
2933
8.12M
    }
2934
2935
77.4k
    while (numBins > 8)
2936
3.21k
    {
2937
3.21k
        numBins -= 8;
2938
3.21k
        uint32_t pattern = binValues >> numBins;
2939
3.21k
        m_low <<= 8;
2940
3.21k
        m_low += m_range * pattern;
2941
3.21k
        binValues -= pattern << numBins;
2942
3.21k
        m_bitsLeft += 8;
2943
2944
3.21k
        if (m_bitsLeft >= 0)
2945
3.21k
            writeOut();
2946
3.21k
    }
2947
2948
74.2k
    m_low <<= numBins;
2949
74.2k
    m_low += m_range * binValues;
2950
74.2k
    m_bitsLeft += numBins;
2951
2952
74.2k
    if (m_bitsLeft >= 0)
2953
19.8k
        writeOut();
2954
74.2k
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
28.3k
{
2959
28.3k
    if (!m_bitIf)
2960
12.7k
    {
2961
12.7k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
12.7k
        return;
2963
12.7k
    }
2964
2965
15.6k
    m_range -= 2;
2966
15.6k
    if (binValue)
2967
2.83k
    {
2968
2.83k
        m_low += m_range;
2969
2.83k
        m_low <<= 7;
2970
2.83k
        m_range = 2 << 7;
2971
2.83k
        m_bitsLeft += 7;
2972
2.83k
    }
2973
12.7k
    else if (m_range >= 256)
2974
12.0k
        return;
2975
717
    else
2976
717
    {
2977
717
        m_low <<= 1;
2978
717
        m_range <<= 1;
2979
717
        m_bitsLeft++;
2980
717
    }
2981
2982
3.54k
    if (m_bitsLeft >= 0)
2983
2.62k
        writeOut();
2984
3.54k
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
40.6k
{
2989
40.6k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
40.6k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
40.6k
    m_bitsLeft -= 8;
2993
40.6k
    m_low &= low_mask;
2994
2995
40.6k
    if (leadByte == 0xff)
2996
3.00k
        m_numBufferedBytes++;
2997
37.6k
    else
2998
37.6k
    {
2999
37.6k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
37.6k
        if (numBufferedBytes > 0)
3001
34.8k
        {
3002
34.8k
            uint32_t carry = leadByte >> 8;
3003
34.8k
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
34.8k
            m_bitIf->writeByte(byteTowrite);
3005
3006
34.8k
            byteTowrite = (0xff + carry) & 0xff;
3007
37.8k
            while (numBufferedBytes > 1)
3008
2.99k
            {
3009
2.99k
                m_bitIf->writeByte(byteTowrite);
3010
2.99k
                numBufferedBytes--;
3011
2.99k
            }
3012
34.8k
        }
3013
37.6k
        m_numBufferedBytes = 1;
3014
37.6k
        m_bufferedByte = (uint8_t)leadByte;
3015
37.6k
    }
3016
40.6k
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074