Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
13.6k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.10k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
2.07M
{
226
2.07M
    markValid();
227
2.07M
    m_fracBits = 0;
228
2.07M
    m_pad = 0;
229
2.07M
    m_meanQP = 0;
230
2.07M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
2.07M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
654
{
239
654
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
654
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
654
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
654
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
654
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
654
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
654
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
654
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
654
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.30k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
654
    {
253
654
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
654
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
654
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
654
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
654
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
654
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
654
#endif
294
295
654
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
654
    WRITE_FLAG(0, "vps_extension_flag");
509
654
#endif
510
654
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
654
{
514
654
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
654
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
654
#endif
524
654
    {
525
654
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
654
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
654
    }
528
529
654
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
654
    {
536
654
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
654
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
654
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
654
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
654
        const Window& conf = sps.conformanceWindow;
545
654
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
654
        if (conf.bEnabled)
547
495
        {
548
495
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
495
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
495
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
495
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
495
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
495
        }
554
555
654
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
654
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
654
    }
558
559
654
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
654
    {
564
654
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.30k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
654
        {
568
654
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
654
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
654
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
654
        }
572
654
    }
573
574
654
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
654
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
654
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
654
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
654
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
654
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
654
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
654
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
654
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
654
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
654
    WRITE_FLAG(0, "pcm_enabled_flag");
600
654
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
654
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
654
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
654
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
654
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
654
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
654
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
654
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
654
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
654
{
645
654
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
654
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
654
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
654
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
654
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
654
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
654
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
654
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
654
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
654
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
654
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
654
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
654
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
654
    if (pps.bUseDQP)
661
489
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
654
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
654
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
654
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
654
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
654
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
654
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
654
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
654
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
654
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
654
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
654
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
654
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
654
    WRITE_FLAG(0, "lists_modification_present_flag");
688
654
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
654
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
654
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
654
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.30k
{
727
1.30k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.30k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.30k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
43.1k
    for (int j = 0; j < 32; j++)
731
41.8k
    {
732
41.8k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
41.8k
        else
735
41.8k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
41.8k
    }
737
738
1.30k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.30k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.30k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.30k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.30k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.30k
    else
771
1.30k
    {
772
1.30k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.30k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.30k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.30k
    }
776
1.30k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.30k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.30k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.30k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
654
{
795
654
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
654
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
654
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
654
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
654
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
654
    if (vui.videoSignalTypePresentFlag)
812
654
    {
813
654
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
654
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
654
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
654
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
654
    }
823
824
654
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
654
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
654
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
654
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
654
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
654
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
654
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
654
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
654
    else
847
654
    {
848
654
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
654
        else
851
654
        {
852
654
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
654
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
654
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
654
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
654
        }
857
858
654
        if (!bEmitVUIHRDInfo)
859
0
            WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
860
654
        else
861
654
        {
862
654
            WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
863
654
            if (vui.hrdParametersPresentFlag)
864
0
                codeHrdParameters(vui.hrdParameters, maxSubTLayers);
865
654
        }
866
654
    }
867
868
654
    WRITE_FLAG(0, "bitstream_restriction_flag");
869
654
}
870
871
void Entropy::codeScalingList(const ScalingList& scalingList)
872
0
{
873
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
874
0
    {
875
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
876
0
        {
877
0
            int predList = scalingList.checkPredMode(sizeId, listId);
878
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
879
0
            if (predList >= 0)
880
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
881
0
            else // DPCM Mode
882
0
                codeScalingList(scalingList, sizeId, listId);
883
0
        }
884
0
    }
885
0
}
886
887
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
888
0
{
889
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
890
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
891
0
    int nextCoef = START_VALUE;
892
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
893
0
    int data;
894
895
0
    if (sizeId > BLOCK_8x8)
896
0
    {
897
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
898
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
899
0
    }
900
0
    for (int i = 0; i < coefNum; i++)
901
0
    {
902
0
        data = src[scan[i]] - nextCoef;
903
0
        if (data < -128)
904
0
            data += 256;
905
0
        if (data > 127)
906
0
            data -= 256;
907
0
        nextCoef = (nextCoef + data + 256) % 256;
908
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
909
0
    }
910
0
}
911
912
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
913
0
{
914
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
916
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
917
918
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
919
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
920
921
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
923
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
924
925
0
    for (int i = 0; i < maxSubTLayers; i++)
926
0
    {
927
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
928
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
929
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
930
931
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
932
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
933
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
934
0
    }
935
0
}
936
937
void Entropy::codeAUD(const Slice& slice)
938
0
{
939
0
    int picType;
940
941
0
    switch (slice.m_sliceType)
942
0
    {
943
0
    case I_SLICE:
944
0
        picType = 0;
945
0
        break;
946
0
    case P_SLICE:
947
0
        picType = 1;
948
0
        break;
949
0
    case B_SLICE:
950
0
        picType = 2;
951
0
        break;
952
0
    default:
953
0
        picType = 7;
954
0
        break;
955
0
    }
956
957
0
    WRITE_CODE(picType, 3, "pic_type");
958
0
}
959
960
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
961
654
{
962
654
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
963
654
    if (slice.getRapPicFlag())
964
654
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
965
966
654
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
967
968
    /* x265 does not use dependent slices, so always write all this data */
969
654
    if (slice_addr)
970
0
    {
971
        // if( dependent_slice_segments_enabled_flag )
972
        //     dependent_slice_segment_flag             u(1)
973
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
974
0
    }
975
976
#if ENABLE_MULTIVIEW
977
    if (encData.m_param->numViews > 1)
978
    {
979
        int esb = 0;
980
        if (2 > esb)
981
        {
982
            esb++;
983
            WRITE_FLAG(0, "discardable_flag");
984
        }
985
        if (2 > esb)
986
        {
987
            esb++;
988
            WRITE_FLAG(0, "cross_layer_bla_flag");
989
        }
990
    }
991
#endif
992
993
654
    WRITE_UVLC(slice.m_sliceType, "slice_type");
994
995
654
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
996
0
    {
997
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
998
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
999
0
    }
1000
654
    if (!slice.getIdrPicFlag())
1001
0
    {
1002
#if _DEBUG || CHECKED_BUILD
1003
        // check for bitstream restriction stating that:
1004
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1005
        // Ideally this process should not be repeated for each slice in a picture
1006
        if (slice.isIRAP())
1007
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1008
            {
1009
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1010
            }
1011
#endif
1012
1013
0
        if (slice.m_rpsIdx < 0)
1014
0
        {
1015
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1016
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1017
0
        }
1018
0
        else
1019
0
        {
1020
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1021
0
            int numBits = 0;
1022
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1023
0
                numBits++;
1024
1025
0
            if (numBits > 0)
1026
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1027
0
        }
1028
1029
0
        if (slice.m_sps->bTemporalMVPEnabled)
1030
#if ENABLE_SCC_EXT
1031
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1032
#else
1033
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1034
0
#endif
1035
0
    }
1036
654
    const SAOParam *saoParam = encData.m_saoParam;
1037
654
    if (slice.m_bUseSao)
1038
654
    {
1039
654
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1040
654
        if (encData.m_param->internalCsp != X265_CSP_I400)
1041
654
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1042
654
    }
1043
0
    else if(encData.m_param->selectiveSAO)
1044
0
    {
1045
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1046
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1047
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1048
0
    }
1049
1050
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1051
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1052
1053
654
    if (!slice.isIntra())
1054
0
    {
1055
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1056
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1057
0
        if (overrideFlag)
1058
0
        {
1059
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1060
0
            if (slice.isInterB())
1061
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1062
0
            else
1063
0
            {
1064
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1065
0
            }
1066
0
        }
1067
0
    }
1068
654
    else
1069
654
    {
1070
654
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1071
654
    }
1072
1073
654
    if (slice.isInterB())
1074
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1075
1076
#if ENABLE_SCC_EXT
1077
    if (slice.m_bTemporalMvp)
1078
#else
1079
654
    if (slice.m_sps->bTemporalMVPEnabled)
1080
654
#endif
1081
654
    {
1082
654
        if (slice.m_sliceType == B_SLICE)
1083
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1084
1085
654
        if (slice.m_sliceType != I_SLICE &&
1086
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1087
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1088
0
        {
1089
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1090
0
        }
1091
654
    }
1092
654
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1093
0
        codePredWeightTable(slice);
1094
1095
654
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1096
654
    if (!slice.isIntra())
1097
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1098
1099
654
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1100
654
    WRITE_SVLC(code, "slice_qp_delta");
1101
1102
654
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1103
0
    {
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1105
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1106
0
    }
1107
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1108
    //       We didn't support filter across slice board, so disable it now
1109
1110
654
    if (encData.m_param->maxSlices <= 1)
1111
654
    {
1112
654
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1113
654
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1114
1115
654
        if (isSAOEnabled || isDBFEnabled)
1116
654
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1117
654
    }
1118
654
}
1119
1120
/** write wavefront substreams sizes for the slice header */
1121
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1122
531
{
1123
531
    uint32_t offsetLen = 1;
1124
2.99k
    while (maxOffset >= (1U << offsetLen))
1125
2.46k
    {
1126
2.46k
        offsetLen++;
1127
2.46k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1128
2.46k
    }
1129
1130
531
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1131
531
    if (numSubStreams > 0)
1132
531
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1133
1134
2.84k
    for (uint32_t i = 0; i < numSubStreams; i++)
1135
2.31k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1136
531
}
1137
1138
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1139
0
{
1140
0
    if (idx > 0)
1141
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1142
1143
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1144
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1145
0
    int prev = 0;
1146
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1147
0
    {
1148
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1149
0
        prev = rps.deltaPOC[j];
1150
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1151
0
    }
1152
1153
0
    prev = 0;
1154
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1155
0
    {
1156
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1157
0
        prev = rps.deltaPOC[j];
1158
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1159
0
    }
1160
0
}
1161
1162
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1163
27.4k
{
1164
27.4k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1165
27.4k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1166
27.4k
}
1167
1168
/* encode a CU block recursively */
1169
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1170
111k
{
1171
111k
    const Slice* slice = ctu.m_slice;
1172
1173
111k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1174
111k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1175
1176
111k
    if (!cuUnsplitFlag)
1177
24.7k
    {
1178
24.7k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1179
24.7k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1180
6.53k
            bEncodeDQP = true;
1181
123k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1182
98.9k
        {
1183
98.9k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1184
98.9k
            if (childGeom.flags & CUGeom::PRESENT)
1185
55.8k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1186
98.9k
        }
1187
24.7k
        return;
1188
24.7k
    }
1189
1190
86.8k
    if (cuSplitFlag) 
1191
62.3k
        codeSplitFlag(ctu, absPartIdx, depth);
1192
1193
86.8k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1194
7.09k
    {
1195
7.09k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1196
7.09k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1197
280
            bEncodeDQP = true;
1198
35.4k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1199
28.3k
        {
1200
28.3k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1201
28.3k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1202
28.3k
        }
1203
7.09k
        return;
1204
7.09k
    }
1205
1206
79.7k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1207
32.7k
        bEncodeDQP = true;
1208
1209
79.7k
    if (slice->m_pps->bTransquantBypassEnabled)
1210
23.3k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1211
1212
79.7k
    if (!slice->isIntra())
1213
0
    {
1214
0
        codeSkipFlag(ctu, absPartIdx);
1215
0
        if (ctu.isSkipped(absPartIdx))
1216
0
        {
1217
0
            codeMergeIndex(ctu, absPartIdx);
1218
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1219
0
            return;
1220
0
        }
1221
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1222
0
    }
1223
1224
79.7k
    codePartSize(ctu, absPartIdx, depth);
1225
1226
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1227
79.7k
    codePredInfo(ctu, absPartIdx);
1228
1229
79.7k
    uint32_t tuDepthRange[2];
1230
79.7k
    if (ctu.isIntra(absPartIdx))
1231
79.7k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1232
5
    else
1233
5
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1234
1235
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1236
79.7k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1237
1238
    // --- write terminating bit ---
1239
79.7k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1240
79.7k
}
1241
1242
/* Return bit count of signaling inter mode */
1243
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1244
0
{
1245
0
    uint32_t bits;
1246
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1247
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1248
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1249
0
    switch (partSize)
1250
0
    {
1251
0
    case SIZE_2Nx2N:
1252
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1253
0
        break;
1254
1255
0
    case SIZE_2NxN:
1256
0
    case SIZE_2NxnU:
1257
0
    case SIZE_2NxnD:
1258
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1259
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1260
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1261
0
        {
1262
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1263
0
            if (partSize != SIZE_2NxN)
1264
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1265
0
        }
1266
0
        break;
1267
1268
0
    case SIZE_Nx2N:
1269
0
    case SIZE_nLx2N:
1270
0
    case SIZE_nRx2N:
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1272
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1273
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1274
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1275
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1276
0
        {
1277
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1278
0
            if (partSize != SIZE_Nx2N)
1279
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1280
0
        }
1281
0
        break;
1282
0
    default:
1283
0
        X265_CHECK(0, "invalid CU partition\n");
1284
0
        break;
1285
0
    }
1286
1287
0
    return bits;
1288
0
}
1289
1290
/* finish encoding a cu and handle end-of-slice conditions */
1291
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1292
79.7k
{
1293
79.7k
    const Slice* slice = ctu.m_slice;
1294
79.7k
    uint32_t realEndAddress = slice->m_endCUAddr;
1295
79.7k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1296
79.7k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1297
1298
79.7k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1299
79.7k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1300
79.7k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1301
79.7k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1302
79.7k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1303
47.2k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1304
1305
79.7k
    if (slice->m_pps->bUseDQP)
1306
56.4k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1307
1308
79.7k
    if (granularityBoundary)
1309
27.4k
    {
1310
        // Encode slice finish
1311
27.4k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1312
27.4k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1313
1.30k
            bTerminateSlice = 1;
1314
1315
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1316
27.4k
        if (!bTerminateSlice)
1317
26.1k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1318
1319
27.4k
        if (!m_bitIf)
1320
13.7k
            resetBits(); // TODO: most likely unnecessary
1321
27.4k
    }
1322
79.7k
}
1323
1324
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1325
                              bool& bCodeDQP, const uint32_t depthRange[2])
1326
1.95M
{
1327
1.95M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1328
1329
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1330
     * so we have checks to make sure the implied value matches our intentions */
1331
1.95M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1332
298k
    {
1333
298k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1334
298k
    }
1335
1.66M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1336
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1337
0
    {
1338
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1339
0
    }
1340
1.66M
    else if (log2CurSize > depthRange[1])
1341
0
    {
1342
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1343
0
    }
1344
1.66M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1345
1.19M
    {
1346
1.19M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1347
1.19M
    }
1348
464k
    else
1349
464k
    {
1350
464k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1351
464k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1352
464k
    }
1353
1354
1.95M
    uint32_t hChromaShift = cu.m_hChromaShift;
1355
1.95M
    uint32_t vChromaShift = cu.m_vChromaShift;
1356
1.95M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1357
1.95M
    if (!curDepth || !bSmallChroma)
1358
764k
    {
1359
764k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1360
764k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1361
764k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1362
764k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1363
764k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1364
764k
    }
1365
1366
1.95M
    if (subdiv)
1367
299k
    {
1368
299k
        --log2CurSize;
1369
299k
        ++curDepth;
1370
1371
299k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1372
1373
299k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
299k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
299k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
299k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1377
299k
        return;
1378
299k
    }
1379
1380
1.66M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1381
1382
1.66M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1383
0
    {
1384
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1385
0
    }
1386
1.66M
    else
1387
1.66M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1388
1389
1.66M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1390
1.66M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1391
1.66M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1392
1.66M
    if (!(cbfY || cbfU || cbfV))
1393
1.65M
        return;
1394
1395
    // dQP: only for CTU once
1396
8.01k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1397
3.56k
    {
1398
3.56k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1399
3.56k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1400
3.56k
        codeDeltaQP(cu, absPartIdxLT);
1401
3.56k
        bCodeDQP = false;
1402
3.56k
    }
1403
1404
8.01k
    if (cbfY)
1405
4.57k
    {
1406
4.57k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1407
4.57k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1408
4.57k
        if (!(cbfU || cbfV))
1409
535
            return;
1410
4.57k
    }
1411
1412
7.47k
    if (bSmallChroma)
1413
4.62k
    {
1414
4.62k
        if ((absPartIdx & 3) != 3)
1415
3.47k
            return;
1416
1417
1.15k
        const uint32_t log2CurSizeC = 2;
1418
1.15k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1419
1.15k
        const uint32_t curPartNum = 4;
1420
1.15k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1421
3.47k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1422
2.31k
        {
1423
2.31k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1424
2.31k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1425
2.31k
            do
1426
2.31k
            {
1427
2.31k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1428
2.31k
                {
1429
2.31k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1430
2.31k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1431
2.31k
                }
1432
2.31k
            }
1433
2.31k
            while (tuIterator.isNextSection());
1434
2.31k
        }
1435
1.15k
    }
1436
2.84k
    else
1437
2.84k
    {
1438
2.84k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1439
2.84k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1440
2.84k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1441
2.84k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1442
8.52k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1443
5.68k
        {
1444
5.68k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1445
5.68k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1446
5.68k
            do
1447
5.68k
            {
1448
5.68k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1449
5.68k
                {
1450
5.68k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1451
5.68k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1452
5.68k
                }
1453
5.68k
            }
1454
5.68k
            while (tuIterator.isNextSection());
1455
5.68k
        }
1456
2.84k
    }
1457
7.47k
}
1458
1459
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1460
                              bool& bCodeDQP, const uint32_t depthRange[2])
1461
0
{
1462
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1463
1464
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1465
     * so we have checks to make sure the implied value matches our intentions */
1466
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1467
0
    {
1468
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1469
0
    }
1470
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1471
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1472
0
    {
1473
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1474
0
    }
1475
0
    else if (log2CurSize > depthRange[1])
1476
0
    {
1477
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1478
0
    }
1479
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1480
0
    {
1481
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1482
0
    }
1483
0
    else
1484
0
    {
1485
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1486
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1487
0
    }
1488
1489
0
    if (subdiv)
1490
0
    {
1491
0
        --log2CurSize;
1492
0
        ++curDepth;
1493
1494
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1495
1496
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1500
0
        return;
1501
0
    }
1502
1503
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1504
0
    {
1505
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1506
0
    }
1507
0
    else
1508
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1509
1510
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1511
1512
0
    if (!cbfY)
1513
0
        return;
1514
1515
    // dQP: only for CTU once
1516
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1517
0
    {
1518
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1519
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1520
0
        codeDeltaQP(cu, absPartIdxLT);
1521
0
        bCodeDQP = false;
1522
0
    }
1523
1524
0
    if (cbfY)
1525
0
    {
1526
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1527
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1528
0
    }
1529
0
}
1530
1531
1532
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1533
762k
{
1534
762k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1535
762k
    {
1536
762k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1537
762k
        if (cu.m_chromaFormat != X265_CSP_I400)
1538
762k
        {
1539
762k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1540
762k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1541
1542
762k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1543
1544
762k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1545
0
            {
1546
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1547
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1548
0
                {
1549
0
                    absPartIdx += qNumParts;
1550
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1551
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1552
0
                }
1553
0
            }
1554
762k
        }
1555
762k
    }
1556
158
    else // if it is inter mode, encode motion vector and reference index
1557
158
        codePUWise(cu, absPartIdx);
1558
762k
}
1559
1560
/** encode motion information for every PU block */
1561
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1562
0
{
1563
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1564
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1565
1566
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1567
0
    {
1568
0
        codeMergeFlag(cu, subPartIdx);
1569
0
        if (cu.m_mergeFlag[subPartIdx])
1570
0
            codeMergeIndex(cu, subPartIdx);
1571
0
        else
1572
0
        {
1573
0
            if (cu.m_slice->isInterB())
1574
0
                codeInterDir(cu, subPartIdx);
1575
1576
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1577
0
            for (uint32_t list = 0; list < 2; list++)
1578
0
            {
1579
0
                if (interDir & (1 << list))
1580
0
                {
1581
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1582
1583
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1584
0
                    codeMvd(cu, subPartIdx, list);
1585
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1586
0
                }
1587
0
            }
1588
0
        }
1589
0
    }
1590
0
}
1591
1592
/** encode reference frame index for a PU block */
1593
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1594
0
{
1595
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1596
1597
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1598
0
        codeRefFrmIdx(cu, absPartIdx, list);
1599
0
}
1600
1601
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1602
762k
{
1603
762k
    if (!cu.isIntra(absPartIdx))
1604
0
    {
1605
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1606
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1607
0
        if (!cu.getQtRootCbf(absPartIdx))
1608
0
            return;
1609
0
    }
1610
1611
762k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1612
762k
    if (cu.m_chromaFormat == X265_CSP_I400)
1613
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1614
762k
    else
1615
762k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1616
762k
}
1617
1618
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1619
54.2k
{
1620
54.2k
    int typeIdx = ctuParam.typeIdx;
1621
1622
54.2k
    if (plane != 2)
1623
36.1k
    {
1624
36.1k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1625
36.1k
        if (typeIdx >= 0)
1626
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1627
36.1k
    }
1628
1629
54.2k
    if (typeIdx >= 0)
1630
0
    {
1631
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1632
0
        if (typeIdx == SAO_BO)
1633
0
        {
1634
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1635
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1636
1637
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1638
0
                if (ctuParam.offset[i] != 0)
1639
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1640
1641
0
            encodeBinsEP(ctuParam.bandPos, 5);
1642
0
        }
1643
0
        else // if (typeIdx < SAO_BO)
1644
0
        {
1645
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1648
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1649
0
            if (plane != 2)
1650
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1651
0
        }
1652
0
    }
1653
54.2k
}
1654
1655
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1656
164k
{
1657
164k
    if (plane != 2)
1658
109k
    {
1659
109k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1660
109k
        encodeBinEP(1);
1661
109k
    }
1662
1663
164k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1664
1665
164k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1666
164k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1667
164k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1668
164k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1669
164k
    if (plane != 2)
1670
109k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1671
164k
}
1672
1673
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1674
41.1k
{
1675
41.1k
    if (plane != 2)
1676
27.4k
    {
1677
27.4k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1678
27.4k
        encodeBinEP(0);
1679
27.4k
    }
1680
1681
41.1k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1682
1683
205k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1684
164k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1685
1686
205k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1687
164k
        if (offset[i] != 0)
1688
80
            encodeBinEP(offset[i] < 0);
1689
1690
41.1k
    encodeBinsEP(bandPos, 5);
1691
41.1k
}
1692
1693
/** initialize context model with respect to QP and initialization value */
1694
uint8_t sbacInit(int qp, int initValue)
1695
102k
{
1696
102k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1697
1698
102k
    int  slope      = (initValue >> 4) * 5 - 45;
1699
102k
    int  offset     = ((initValue & 15) << 3) - 16;
1700
102k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1701
102k
    uint32_t mpState = (initState >= 64);
1702
102k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1703
1704
102k
    return (uint8_t)state;
1705
102k
}
1706
1707
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1708
17.0k
{
1709
17.0k
    ctxModel += sliceType * size;
1710
1711
119k
    for (int n = 0; n < size; n++)
1712
102k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1713
17.0k
}
1714
1715
void Entropy::resetEntropy(const Slice& slice)
1716
654
{
1717
654
    int  qp              = slice.m_sliceQp;
1718
654
    SliceType sliceType  = slice.m_sliceType;
1719
1720
654
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1721
654
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1722
654
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1723
654
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1724
654
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1725
654
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1726
654
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1727
654
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1728
654
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1729
654
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1730
654
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1731
654
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1732
654
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1733
654
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1734
654
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1735
654
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1736
654
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1737
654
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
654
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1739
654
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1740
654
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1741
654
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1742
654
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1743
654
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1744
654
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1745
654
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1746
    // new structure
1747
1748
654
    start();
1749
654
}
1750
1751
/* code explicit wp tables */
1752
void Entropy::codePredWeightTable(const Slice& slice)
1753
0
{
1754
0
    const WeightParam *wp;
1755
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1756
0
    bool            bDenomCoded  = false;
1757
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1758
0
    uint32_t        totalSignalledWeightFlags = 0;
1759
1760
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1761
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1762
0
    {
1763
0
        for (int list = 0; list < numRefDirs; list++)
1764
0
        {
1765
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1766
0
            {
1767
0
                wp = slice.m_weightPredTable[list][ref];
1768
0
                if (!bDenomCoded)
1769
0
                {
1770
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1771
1772
0
                    if (bChroma)
1773
0
                    {
1774
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1775
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1776
0
                    }
1777
0
                    bDenomCoded = true;
1778
0
                }
1779
#if ENABLE_SCC_EXT
1780
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1781
                    assert(!wp[0].wtPresent);
1782
                else
1783
#endif
1784
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1785
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1786
0
            }
1787
1788
0
            if (bChroma)
1789
0
            {
1790
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1791
0
                {
1792
0
                    wp = slice.m_weightPredTable[list][ref];
1793
#if ENABLE_SCC_EXT
1794
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1795
                        assert(!wp[1].wtPresent);
1796
                    else
1797
#endif
1798
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1799
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1800
0
                }
1801
0
            }
1802
1803
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1804
0
            {
1805
0
                wp = slice.m_weightPredTable[list][ref];
1806
0
                if (wp[0].wtPresent)
1807
0
                {
1808
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1809
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1810
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1811
0
                }
1812
1813
0
                if (bChroma)
1814
0
                {
1815
0
                    if (wp[1].wtPresent)
1816
0
                    {
1817
0
                        for (int plane = 1; plane < 3; plane++)
1818
0
                        {
1819
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1820
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1821
1822
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1823
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1824
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1825
0
                        }
1826
0
                    }
1827
0
                }
1828
0
            }
1829
0
        }
1830
1831
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1832
0
    }
1833
0
}
1834
1835
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1836
4.78k
{
1837
4.78k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1838
1839
4.78k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1840
1841
4.78k
    if (!symbol)
1842
556
        return;
1843
1844
4.23k
    bool bCodeLast = (maxSymbol > symbol);
1845
1846
20.8k
    while (--symbol)
1847
16.5k
        encodeBin(1, scmModel[offset]);
1848
1849
4.23k
    if (bCodeLast)
1850
123
        encodeBin(0, scmModel[offset]);
1851
4.23k
}
1852
1853
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1854
4.10k
{
1855
4.10k
    uint32_t bins = 0;
1856
4.10k
    int numBins = 0;
1857
1858
16.1k
    while (symbol >= (uint32_t)(1 << count))
1859
12.0k
    {
1860
12.0k
        bins = 2 * bins + 1;
1861
12.0k
        numBins++;
1862
12.0k
        symbol -= 1 << count;
1863
12.0k
        count++;
1864
12.0k
    }
1865
1866
4.10k
    bins = 2 * bins + 0;
1867
4.10k
    numBins++;
1868
1869
4.10k
    bins = (bins << count) | symbol;
1870
4.10k
    numBins += count;
1871
1872
4.10k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1873
4.10k
    encodeBinsEP(bins, numBins);
1874
4.10k
}
1875
1876
/** Coding of coeff_abs_level_minus3 */
1877
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1878
9.38k
{
1879
9.38k
    uint32_t length;
1880
9.38k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1881
1882
9.38k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1883
0
    {
1884
0
        length = codeNumber >> absGoRice;
1885
1886
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1887
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1888
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1889
0
    }
1890
9.38k
    else
1891
9.38k
    {
1892
9.38k
        length = 0;
1893
9.38k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1894
9.38k
        {
1895
9.38k
            unsigned long idx;
1896
9.38k
            BSR(idx, codeNumber + 1);
1897
9.38k
            length = idx;
1898
9.38k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1899
9.38k
            codeNumber -= (1 << idx) - 1;
1900
9.38k
        }
1901
9.38k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1902
1903
9.38k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1904
9.38k
        encodeBinsEP(codeNumber, length + absGoRice);
1905
9.38k
    }
1906
9.38k
}
1907
1908
// SBAC RD
1909
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1910
1.57M
{
1911
1.57M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1912
1.57M
    m_fracBits = src.m_fracBits;
1913
1.57M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1914
1.57M
}
1915
1916
void Entropy::copyFrom(const Entropy& src)
1917
10.9M
{
1918
10.9M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1919
1920
10.9M
    copyState(src);
1921
1922
10.9M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1923
10.9M
    markValid();
1924
10.9M
}
1925
1926
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1927
2.52M
{
1928
2.52M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1929
1930
2.52M
    if (cu.isIntra(absPartIdx))
1931
2.52M
    {
1932
2.52M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1933
2.11M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1934
2.52M
        return;
1935
2.52M
    }
1936
1937
437
    switch (partSize)
1938
437
    {
1939
0
    case SIZE_2Nx2N:
1940
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1941
0
        break;
1942
1943
0
    case SIZE_2NxN:
1944
0
    case SIZE_2NxnU:
1945
0
    case SIZE_2NxnD:
1946
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1947
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1948
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1949
0
        {
1950
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1951
0
            if (partSize != SIZE_2NxN)
1952
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1953
0
        }
1954
0
        break;
1955
1956
0
    case SIZE_Nx2N:
1957
0
    case SIZE_nLx2N:
1958
0
    case SIZE_nRx2N:
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1960
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1961
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1962
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1963
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1964
0
        {
1965
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1966
0
            if (partSize != SIZE_Nx2N)
1967
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1968
0
        }
1969
0
        break;
1970
0
    default:
1971
0
        X265_CHECK(0, "invalid CU partition\n");
1972
0
        break;
1973
437
    }
1974
437
}
1975
1976
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1977
0
{
1978
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1979
1980
0
    if (numCand > 1)
1981
0
    {
1982
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1983
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1984
1985
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1986
1987
0
        if (unaryIdx != 0)
1988
0
        {
1989
0
            uint32_t mask = (1 << unaryIdx) - 2;
1990
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1991
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1992
0
        }
1993
0
    }
1994
0
}
1995
1996
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1997
4.30M
{
1998
4.30M
    uint32_t dir[4], j;
1999
4.30M
    uint32_t preds[4][3];
2000
4.30M
    int predIdx[4];
2001
4.30M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2002
4.30M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2003
2004
9.49M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2005
5.19M
    {
2006
5.19M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2007
5.19M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2008
5.19M
        predIdx[j] = -1;
2009
20.7M
        for (uint32_t i = 0; i < 3; i++)
2010
15.5M
            if (dir[j] == preds[j][i])
2011
5.17M
                predIdx[j] = i;
2012
2013
5.19M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2014
5.19M
    }
2015
2016
9.49M
    for (j = 0; j < partNum; j++)
2017
5.18M
    {
2018
5.18M
        if (predIdx[j] != -1)
2019
5.17M
        {
2020
5.17M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2021
            // NOTE: Mapping
2022
            //       0 = 0
2023
            //       1 = 10
2024
            //       2 = 11
2025
5.17M
            int nonzero = (!!predIdx[j]);
2026
5.17M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2027
5.17M
        }
2028
10.5k
        else
2029
10.5k
        {
2030
10.5k
            if (preds[j][0] > preds[j][1])
2031
471
                std::swap(preds[j][0], preds[j][1]);
2032
2033
10.5k
            if (preds[j][0] > preds[j][2])
2034
0
                std::swap(preds[j][0], preds[j][2]);
2035
2036
10.5k
            if (preds[j][1] > preds[j][2])
2037
0
                std::swap(preds[j][1], preds[j][2]);
2038
2039
10.5k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2041
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2042
2043
10.5k
            encodeBinsEP(dir[j], 5);
2044
10.5k
        }
2045
5.18M
    }
2046
4.30M
}
2047
2048
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2049
4.17M
{
2050
4.17M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2051
2052
4.17M
    if (intraDirChroma == DM_CHROMA_IDX)
2053
1.09M
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2054
3.07M
    else
2055
3.07M
    {
2056
7.17M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2057
7.17M
        {
2058
7.17M
            if (intraDirChroma == chromaDirMode[i])
2059
3.07M
            {
2060
3.07M
                intraDirChroma = i;
2061
3.07M
                break;
2062
3.07M
            }
2063
7.17M
        }
2064
2065
3.07M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2066
3.07M
        encodeBinsEP(intraDirChroma, 2);
2067
3.07M
    }
2068
4.17M
}
2069
2070
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2071
0
{
2072
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2073
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2074
2075
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2076
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2077
0
    if (interDir < 2)
2078
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2079
0
}
2080
2081
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2082
0
{
2083
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2084
2085
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2086
2087
0
    if (refFrame > 0)
2088
0
    {
2089
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2090
0
        if (refNum == 0)
2091
0
            return;
2092
2093
0
        refFrame--;
2094
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2095
0
        if (refFrame > 0)
2096
0
        {
2097
0
            uint32_t mask = (1 << refFrame) - 2;
2098
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2099
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2100
0
        }
2101
0
    }
2102
0
}
2103
2104
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2105
0
{
2106
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2107
0
    const int hor = mvd.x;
2108
0
    const int ver = mvd.y;
2109
2110
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2112
2113
0
    const bool bHorAbsGr0 = hor != 0;
2114
0
    const bool bVerAbsGr0 = ver != 0;
2115
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2116
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2117
2118
0
    if (bHorAbsGr0)
2119
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2120
2121
0
    if (bVerAbsGr0)
2122
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2123
2124
0
    if (bHorAbsGr0)
2125
0
    {
2126
0
        if (horAbs > 1)
2127
0
            writeEpExGolomb(horAbs - 2, 1);
2128
2129
0
        encodeBinEP(0 > hor ? 1 : 0);
2130
0
    }
2131
2132
0
    if (bVerAbsGr0)
2133
0
    {
2134
0
        if (verAbs > 1)
2135
0
            writeEpExGolomb(verAbs - 2, 1);
2136
2137
0
        encodeBinEP(0 > ver ? 1 : 0);
2138
0
    }
2139
0
}
2140
2141
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2142
4.78k
{
2143
4.78k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2144
2145
4.78k
    int qpBdOffsetY = QP_BD_OFFSET;
2146
2147
4.78k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2148
2149
4.78k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2150
4.78k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2151
4.78k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2152
4.78k
    if (absDQp >= CU_DQP_TU_CMAX)
2153
4.10k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2154
2155
4.78k
    if (absDQp > 0)
2156
4.23k
    {
2157
4.23k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2158
4.23k
        encodeBinEP(sign);
2159
4.23k
    }
2160
4.78k
}
2161
2162
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2163
8.35M
{
2164
8.35M
    uint32_t ctx = tuDepth + 2;
2165
2166
8.35M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2167
8.35M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2168
8.35M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2169
2170
8.35M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2171
0
    {
2172
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2173
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2174
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2175
2176
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2178
0
    }
2179
8.35M
    else
2180
8.35M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2181
8.35M
}
2182
2183
#if CHECKED_BUILD || _DEBUG
2184
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2185
{
2186
    uint32_t goRiceParam = 0;
2187
    int firstCoeff2 = 1;
2188
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2189
2190
    uint32_t sum = 0;
2191
    int idx = 0;
2192
    do
2193
    {
2194
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2195
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2196
        baseLevelN >>= 2;
2197
        int codeNumber = absCoeff[idx] - baseLevel;
2198
2199
        if (codeNumber >= 0)
2200
        {
2201
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2202
            uint32_t length = 0;
2203
2204
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2205
            if (codeNumber >= 0)
2206
            {
2207
                {
2208
                    unsigned long cidx;
2209
                    BSR(cidx, codeNumber + 1);
2210
                    length = cidx;
2211
                }
2212
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2213
2214
                codeNumber = (length + length);
2215
            }
2216
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2217
2218
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2219
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2220
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2221
        }
2222
        if (absCoeff[idx] >= 2)
2223
            firstCoeff2 = 0;
2224
        idx++;
2225
    }
2226
    while(idx < numNonZero);
2227
2228
    return sum;
2229
}
2230
#endif // debug only code
2231
2232
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2233
61.9k
{
2234
61.9k
    uint32_t trSize = 1 << log2TrSize;
2235
61.9k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2236
    // compute number of significant coefficients
2237
61.9k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2238
61.9k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2239
61.9k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2240
2241
61.9k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2242
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2243
2244
61.9k
    bool bIsLuma = ttype == TEXT_LUMA;
2245
2246
    // select scans
2247
61.9k
    TUEntropyCodingParameters codingParameters;
2248
61.9k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2249
2250
61.9k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2251
61.9k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2252
61.9k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2253
2254
    //----- encode significance map -----
2255
2256
    // Find position of last coefficient
2257
61.9k
    int scanPosLast = 0;
2258
61.9k
    uint32_t posLast;
2259
61.9k
    uint64_t sigCoeffGroupFlag64 = 0;
2260
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2261
61.9k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2262
2263
61.9k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2264
61.9k
    posLast = codingParameters.scan[scanPosLast];
2265
2266
61.9k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2267
2268
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2269
159k
    for(int idx = 0; idx < lastScanSet; idx++)
2270
97.6k
    {
2271
97.6k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2272
97.6k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2273
97.6k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2274
97.6k
    }
2275
2276
2277
    // Code position of last coefficient
2278
61.9k
    {
2279
        // The last position is composed of a prefix and suffix.
2280
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2281
        // The bypass coded bins for both the x and y components are grouped together.
2282
61.9k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2283
61.9k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2284
        // swap
2285
61.9k
        if (codingParameters.scanType == SCAN_VER)
2286
4.57k
            std::swap(pos[0], pos[1]);
2287
2288
61.9k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2289
61.9k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2290
61.9k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2291
61.9k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2292
61.9k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2293
2294
61.9k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2295
185k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2296
123k
        {
2297
123k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2298
123k
            uint32_t prefixOnes = temp & 15;
2299
123k
            uint32_t suffixLen = temp >> 4;
2300
2301
274k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2302
150k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2303
2304
123k
            if (prefixOnes < maxGroupIdx)
2305
87.6k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2306
2307
123k
            packedSuffixBits <<= suffixLen;
2308
123k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2309
123k
            packedSuffixLen += suffixLen;
2310
123k
        }
2311
2312
61.9k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2313
61.9k
    }
2314
2315
    // code significance flag
2316
61.9k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2317
61.9k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2318
61.9k
    uint32_t c1 = 1;
2319
61.9k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2320
61.9k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2321
61.9k
    uint32_t numNonZero = 1;
2322
61.9k
    unsigned long lastNZPosInCG = 0;
2323
61.9k
    unsigned long firstNZPosInCG = 0;
2324
2325
#if _DEBUG
2326
    // Unnecessary, for Valgrind-3.10.0 only
2327
    memset(absCoeff, 0, sizeof(absCoeff));
2328
#endif
2329
2330
61.9k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2331
2332
221k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2333
159k
    {
2334
159k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2335
159k
        uint32_t scanFlagMask = subCoeffFlag;
2336
159k
        int subPosBase = subSet << MLS_CG_SIZE;
2337
        
2338
159k
        if (subSet == lastScanSet)
2339
61.9k
        {
2340
61.9k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2341
61.9k
            scanFlagMask >>= 1;
2342
61.9k
        }
2343
2344
        // encode significant_coeffgroup_flag
2345
159k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2346
159k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2347
159k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2348
159k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2349
2350
159k
        if (subSet == lastScanSet || !subSet)
2351
67.9k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2352
91.7k
        else
2353
91.7k
        {
2354
91.7k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2355
91.7k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2356
91.7k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2357
91.7k
        }
2358
2359
        // encode significant_coeff_flag
2360
159k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2361
117k
        {
2362
117k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2363
117k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2364
117k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2365
2366
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2367
117k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2368
117k
            {
2369
                // patternSigCtx = 0
2370
117k
                {
2371
117k
                    2, 1, 1, 0,
2372
117k
                    1, 1, 0, 0,
2373
117k
                    1, 0, 0, 0,
2374
117k
                    0, 0, 0, 0,
2375
117k
                },
2376
                // patternSigCtx = 1
2377
117k
                {
2378
117k
                    2, 2, 2, 2,
2379
117k
                    1, 1, 1, 1,
2380
117k
                    0, 0, 0, 0,
2381
117k
                    0, 0, 0, 0,
2382
117k
                },
2383
                // patternSigCtx = 2
2384
117k
                {
2385
117k
                    2, 1, 0, 0,
2386
117k
                    2, 1, 0, 0,
2387
117k
                    2, 1, 0, 0,
2388
117k
                    2, 1, 0, 0,
2389
117k
                },
2390
                // patternSigCtx = 3
2391
117k
                {
2392
117k
                    2, 2, 2, 2,
2393
117k
                    2, 2, 2, 2,
2394
117k
                    2, 2, 2, 2,
2395
117k
                    2, 2, 2, 2,
2396
117k
                },
2397
                // 4x4
2398
117k
                {
2399
117k
                    0, 1, 4, 5,
2400
117k
                    2, 3, 4, 5,
2401
117k
                    6, 6, 8, 8,
2402
117k
                    7, 7, 8, 8
2403
117k
                }
2404
117k
            };
2405
2406
117k
            const int offset = codingParameters.firstSignificanceMapContext;
2407
117k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2408
2409
117k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2410
117k
            if (m_bitIf)
2411
495
            {
2412
495
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2413
495
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2414
2415
                // TODO: accelerate by PABSW
2416
2.47k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2417
1.98k
                {
2418
1.98k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2419
1.98k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2420
1.98k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2421
1.98k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2422
1.98k
                }
2423
2424
495
                if (log2TrSize == 2)
2425
495
                {
2426
495
                    do
2427
7.42k
                    {
2428
7.42k
                        uint32_t blkPos, sig, ctxSig;
2429
7.42k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2430
7.42k
                        sig     = scanFlagMask & 1;
2431
7.42k
                        scanFlagMask >>= 1;
2432
7.42k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2433
7.42k
                        {
2434
7.42k
                            ctxSig = table_cnt[4][blkPos];
2435
7.42k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2436
7.42k
                            encodeBin(sig, baseCtx[ctxSig]);
2437
7.42k
                        }
2438
7.42k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2439
7.42k
                        numNonZero += sig;
2440
7.42k
                        scanPosSigOff--;
2441
7.42k
                    }
2442
7.42k
                    while(scanPosSigOff >= 0);
2443
495
                }
2444
0
                else
2445
0
                {
2446
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2447
2448
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2449
0
                    do
2450
0
                    {
2451
0
                        uint32_t blkPos, sig, ctxSig;
2452
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2453
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2454
0
                        sig     = scanFlagMask & 1;
2455
0
                        scanFlagMask >>= 1;
2456
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2457
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2458
0
                        {
2459
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2460
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2461
2462
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2463
0
                            encodeBin(sig, baseCtx[ctxSig]);
2464
0
                        }
2465
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2466
0
                        numNonZero += sig;
2467
0
                        scanPosSigOff--;
2468
0
                    }
2469
0
                    while(scanPosSigOff >= 0);
2470
0
                }
2471
495
            }
2472
117k
            else // fast RD path
2473
117k
            {
2474
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2475
117k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2476
117k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2477
117k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2478
2479
#if CHECKED_BUILD || _DEBUG
2480
                numNonZero = coeffNum[subSet];
2481
#endif
2482
                // update RD cost
2483
117k
                m_fracBits += sum;
2484
117k
            } // end of fast RD path -- !m_bitIf
2485
117k
        }
2486
159k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2487
2488
159k
        uint32_t coeffSigns = coeffSign[subSet];
2489
159k
        numNonZero = coeffNum[subSet];
2490
159k
        if (numNonZero > 0)
2491
159k
        {
2492
159k
            uint32_t idx = 0;
2493
159k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2494
159k
            BSR(lastNZPosInCG, subCoeffFlag);
2495
159k
            BSF(firstNZPosInCG, subCoeffFlag);
2496
2497
159k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2498
159k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2499
159k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2500
2501
159k
            c1 = 1;
2502
159k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2503
2504
159k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2505
159k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2506
2507
159k
            if (!m_bitIf)
2508
157k
            {
2509
157k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2510
157k
                uint32_t firstC2Idx = (sum >> 28);
2511
157k
                c1 = ((sum >> 26) & 3);
2512
157k
                m_fracBits += sum & 0x00FFFFFF;
2513
2514
157k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2515
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2516
157k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2517
2518
157k
                if (numNonZero > firstC2Idx)
2519
152k
                {
2520
152k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2521
152k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2522
152k
                    m_fracBits += ((uint64_t)sum << 15);
2523
152k
                }
2524
157k
            }
2525
            // Standard path
2526
2.13k
            else
2527
2.13k
            {
2528
2.13k
                uint32_t firstC2Idx = 8;
2529
2.13k
                uint32_t firstC2Flag = 2;
2530
2.13k
                uint32_t c1Next = 0xFFFFFFFE;
2531
2532
2.13k
                idx = 0;
2533
2.13k
                do
2534
5.59k
                {
2535
5.59k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2536
5.59k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2537
5.59k
                    encodeBin(symbol1, baseCtxMod[c1]);
2538
2539
5.59k
                    if (symbol1)
2540
5.42k
                        c1Next = 0;
2541
2542
5.59k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2543
5.59k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2544
2545
5.59k
                    c1 = (c1Next & 3);
2546
5.59k
                    c1Next >>= 2;
2547
5.59k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2548
5.59k
                    idx++;
2549
5.59k
                }
2550
5.59k
                while(idx < numC1Flag);
2551
2552
2.13k
                if (!c1)
2553
1.96k
                {
2554
1.96k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2555
2556
1.96k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2557
1.96k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2558
1.96k
                }
2559
2560
2.13k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2561
2.13k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2562
2563
2.13k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2564
1.96k
                {
2565
                    // Standard path
2566
1.96k
                    uint32_t goRiceParam = 0;
2567
1.96k
                    int baseLevel = 3;
2568
1.96k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2569
#if CHECKED_BUILD || _DEBUG
2570
                    int firstCoeff2 = 1;
2571
#endif
2572
1.96k
                    idx = firstC2Idx;
2573
1.96k
                    do
2574
9.38k
                    {
2575
9.38k
                        if (idx >= C1FLAG_NUMBER)
2576
3.96k
                            baseLevel = 1;
2577
                        // TODO: fast algorithm maybe broken this check logic
2578
9.38k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2579
2580
9.38k
                        if (absCoeff[idx] >= baseLevel)
2581
9.38k
                        {
2582
9.38k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2583
9.38k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2584
9.38k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2585
9.38k
                            goRiceParam += adjust;
2586
9.38k
                            threshold += (adjust) ? threshold : 0;
2587
9.38k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2588
9.38k
                        }
2589
#if CHECKED_BUILD || _DEBUG
2590
                        firstCoeff2 = 0;
2591
#endif
2592
9.38k
                        baseLevel = 2;
2593
9.38k
                        idx++;
2594
9.38k
                    }
2595
9.38k
                    while(idx < numNonZero);
2596
1.96k
                }
2597
2.13k
            } // end of !bitIf
2598
159k
        } // end of (numNonZero > 0)
2599
2600
        // Initialize value for next loop
2601
159k
        numNonZero = 0;
2602
159k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2603
159k
    }
2604
61.9k
}
2605
2606
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2607
822k
{
2608
822k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2609
2610
822k
    uint32_t isCodeNonZero = !!code;
2611
2612
822k
    encodeBinEP(isCodeNonZero);
2613
822k
    if (isCodeNonZero)
2614
80
    {
2615
80
        uint32_t isCodeLast = (maxSymbol > code);
2616
80
        uint32_t mask = (1 << (code - 1)) - 1;
2617
80
        uint32_t len = code - 1 + isCodeLast;
2618
80
        mask <<= isCodeLast;
2619
2620
80
        encodeBinsEP(mask, len);
2621
80
    }
2622
822k
}
2623
2624
/* estimate bit cost for CBP, significant map and significant coefficients */
2625
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2626
8.07M
{
2627
8.07M
    estCBFBit(estBitsSbac);
2628
2629
8.07M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2630
2631
    // encode significance map
2632
8.07M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2633
2634
    // encode significant coefficients
2635
8.07M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2636
8.07M
}
2637
2638
/* estimate bit cost for each CBP bit */
2639
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2640
8.08M
{
2641
8.08M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2642
2643
64.6M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2644
56.5M
    {
2645
56.5M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2646
56.5M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2647
56.5M
    }
2648
2649
8.08M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2650
2651
8.08M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2652
8.08M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2653
8.08M
}
2654
2655
/* estimate SAMBAC bit cost for significant coefficient group map */
2656
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2657
8.08M
{
2658
8.08M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2659
2660
24.2M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2661
48.5M
        for (uint32_t bin = 0; bin < 2; bin++)
2662
32.3M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2663
8.08M
}
2664
2665
/* estimate SAMBAC bit cost for significant coefficient map */
2666
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2667
8.08M
{
2668
8.08M
    int firstCtx = 1, numCtx = 8;
2669
2670
8.08M
    if (log2TrSize >= 4)
2671
312k
    {
2672
312k
        firstCtx = bIsLuma ? 21 : 12;
2673
312k
        numCtx = bIsLuma ? 6 : 3;
2674
312k
    }
2675
7.77M
    else if (log2TrSize == 3)
2676
1.24M
    {
2677
1.24M
        firstCtx = 9;
2678
1.24M
        numCtx = bIsLuma ? 12 : 3;
2679
1.24M
    }
2680
2681
8.08M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2682
2683
8.08M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2684
8.08M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2685
2686
73.7M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2687
65.6M
    {
2688
65.6M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2689
65.6M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2690
65.6M
    }
2691
2692
8.08M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2693
8.08M
    if (bIsLuma)
2694
4.70M
    {
2695
4.70M
        if (log2TrSize == 2)
2696
3.58M
        {
2697
10.7M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2698
7.16M
            {
2699
7.16M
                int bits = 0;
2700
7.16M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2701
2702
28.6M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2703
21.4M
                {
2704
21.4M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2705
21.4M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2706
21.4M
                }
2707
2708
7.16M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2709
7.16M
            }
2710
3.58M
        }
2711
1.12M
        else
2712
1.12M
        {
2713
1.12M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2714
2715
3.37M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2716
2.25M
            {
2717
2.25M
                int bits = 0;
2718
2.25M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2719
2.25M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2720
2721
9.56M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2722
7.31M
                {
2723
7.31M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2724
7.31M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2725
7.31M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2726
7.31M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2727
7.31M
                    bits += 2 * cost1;
2728
7.31M
                }
2729
                // correct latest bit cost, it didn't include cost0
2730
2.25M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2731
2.25M
            }
2732
1.12M
        }
2733
4.70M
    }
2734
3.37M
    else
2735
3.37M
    {
2736
3.37M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2737
3.37M
        const int ctxShift = log2TrSize - 2;
2738
2739
10.2M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2740
6.83M
        {
2741
6.83M
            int bits = 0;
2742
6.83M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2743
2744
29.3M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2745
22.5M
            {
2746
22.5M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2747
22.5M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2748
22.5M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2749
22.5M
            }
2750
2751
6.83M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2752
6.83M
        }
2753
3.37M
    }
2754
8.08M
}
2755
2756
/* estimate bit cost of significant coefficient */
2757
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2758
8.11M
{
2759
8.11M
    if (bIsLuma)
2760
4.71M
    {
2761
4.71M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2762
4.71M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2763
2764
80.1M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2765
75.4M
        {
2766
75.4M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2767
75.4M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2768
75.4M
        }
2769
2770
23.5M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2771
18.8M
        {
2772
18.8M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2773
18.8M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2774
18.8M
        }
2775
4.71M
    }
2776
3.40M
    else
2777
3.40M
    {
2778
3.40M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2779
3.40M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2780
2781
30.7M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2782
27.3M
        {
2783
27.3M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2784
27.3M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2785
27.3M
        }
2786
2787
10.2M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2788
6.83M
        {
2789
6.83M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2790
6.83M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2791
6.83M
        }
2792
3.40M
    }
2793
8.11M
}
2794
2795
/* Initialize our context information from the nominated source */
2796
void Entropy::copyContextsFrom(const Entropy& src)
2797
10.3k
{
2798
10.3k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2799
2800
10.3k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2801
10.3k
    markValid();
2802
10.3k
}
2803
2804
void Entropy::start()
2805
654
{
2806
654
    m_low = 0;
2807
654
    m_range = 510;
2808
654
    m_bitsLeft = -12;
2809
654
    m_numBufferedBytes = 0;
2810
654
    m_bufferedByte = 0xff;
2811
654
}
2812
2813
void Entropy::finish()
2814
2.96k
{
2815
2.96k
    if (m_low >> (21 + m_bitsLeft))
2816
6
    {
2817
6
        m_bitIf->writeByte(m_bufferedByte + 1);
2818
7
        while (m_numBufferedBytes > 1)
2819
1
        {
2820
1
            m_bitIf->writeByte(0x00);
2821
1
            m_numBufferedBytes--;
2822
1
        }
2823
2824
6
        m_low -= 1 << (21 + m_bitsLeft);
2825
6
    }
2826
2.95k
    else
2827
2.95k
    {
2828
2.95k
        if (m_numBufferedBytes > 0)
2829
2.95k
            m_bitIf->writeByte(m_bufferedByte);
2830
2831
2.96k
        while (m_numBufferedBytes > 1)
2832
7
        {
2833
7
            m_bitIf->writeByte(0xff);
2834
7
            m_numBufferedBytes--;
2835
7
        }
2836
2.95k
    }
2837
2.96k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2838
2.96k
}
2839
2840
void Entropy::copyState(const Entropy& other)
2841
10.9M
{
2842
10.9M
    m_low = other.m_low;
2843
10.9M
    m_range = other.m_range;
2844
10.9M
    m_bitsLeft = other.m_bitsLeft;
2845
10.9M
    m_bufferedByte = other.m_bufferedByte;
2846
10.9M
    m_numBufferedBytes = other.m_numBufferedBytes;
2847
10.9M
    m_fracBits = other.m_fracBits;
2848
10.9M
}
2849
2850
void Entropy::resetBits()
2851
9.54M
{
2852
9.54M
    m_low = 0;
2853
9.54M
    m_bitsLeft = -12;
2854
9.54M
    m_numBufferedBytes = 0;
2855
9.54M
    m_bufferedByte = 0xff;
2856
9.54M
    m_fracBits &= 32767;
2857
9.54M
    if (m_bitIf)
2858
0
        m_bitIf->resetBits();
2859
9.54M
}
2860
2861
/** Encode bin */
2862
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2863
28.5M
{
2864
28.5M
    uint32_t mstate = ctxModel;
2865
2866
28.5M
    ctxModel = sbacNext(mstate, binValue);
2867
2868
28.5M
    if (!m_bitIf)
2869
28.2M
    {
2870
28.2M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2871
28.2M
        return;
2872
28.2M
    }
2873
2874
340k
    uint32_t range = m_range;
2875
340k
    uint32_t state = sbacGetState(mstate);
2876
340k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2877
340k
    range -= lps;
2878
2879
340k
    X265_CHECK(lps >= 2, "lps is too small\n");
2880
2881
340k
    int numBits = (uint32_t)(range - 256) >> 31;
2882
340k
    uint32_t low = m_low;
2883
2884
    // NOTE: MPS must be LOWEST bit in mstate
2885
340k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2886
340k
    if ((binValue ^ mstate) & 1)
2887
35.9k
    {
2888
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2889
        //numBits = g_renormTable[lps >> 3];
2890
35.9k
        unsigned long idx;
2891
35.9k
        BSR(idx, lps);
2892
35.9k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2893
2894
35.9k
        numBits = 8 - idx;
2895
35.9k
        if (state >= 63)
2896
0
            numBits = 6;
2897
35.9k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2898
2899
35.9k
        low += range;
2900
35.9k
        range = lps;
2901
35.9k
    }
2902
340k
    m_low = (low << numBits);
2903
340k
    m_range = (range << numBits);
2904
340k
    m_bitsLeft += numBits;
2905
2906
340k
    if (m_bitsLeft >= 0)
2907
15.9k
        writeOut();
2908
340k
}
2909
2910
/** Encode equiprobable bin */
2911
void Entropy::encodeBinEP(uint32_t binValue)
2912
963k
{
2913
963k
    if (!m_bitIf)
2914
963k
    {
2915
963k
        m_fracBits += 32768;
2916
963k
        return;
2917
963k
    }
2918
475
    m_low <<= 1;
2919
475
    if (binValue)
2920
475
        m_low += m_range;
2921
475
    m_bitsLeft++;
2922
2923
475
    if (m_bitsLeft >= 0)
2924
96
        writeOut();
2925
475
}
2926
2927
/** Encode equiprobable bins */
2928
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2929
8.48M
{
2930
8.48M
    if (!m_bitIf)
2931
8.40M
    {
2932
8.40M
        m_fracBits += 32768 * numBins;
2933
8.40M
        return;
2934
8.40M
    }
2935
2936
85.0k
    while (numBins > 8)
2937
3.51k
    {
2938
3.51k
        numBins -= 8;
2939
3.51k
        uint32_t pattern = binValues >> numBins;
2940
3.51k
        m_low <<= 8;
2941
3.51k
        m_low += m_range * pattern;
2942
3.51k
        binValues -= pattern << numBins;
2943
3.51k
        m_bitsLeft += 8;
2944
2945
3.51k
        if (m_bitsLeft >= 0)
2946
3.51k
            writeOut();
2947
3.51k
    }
2948
2949
81.4k
    m_low <<= numBins;
2950
81.4k
    m_low += m_range * binValues;
2951
81.4k
    m_bitsLeft += numBins;
2952
2953
81.4k
    if (m_bitsLeft >= 0)
2954
23.3k
        writeOut();
2955
81.4k
}
2956
2957
/** Encode terminating bin */
2958
void Entropy::encodeBinTrm(uint32_t binValue)
2959
29.0k
{
2960
29.0k
    if (!m_bitIf)
2961
13.0k
    {
2962
13.0k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2963
13.0k
        return;
2964
13.0k
    }
2965
2966
16.0k
    m_range -= 2;
2967
16.0k
    if (binValue)
2968
2.96k
    {
2969
2.96k
        m_low += m_range;
2970
2.96k
        m_low <<= 7;
2971
2.96k
        m_range = 2 << 7;
2972
2.96k
        m_bitsLeft += 7;
2973
2.96k
    }
2974
13.0k
    else if (m_range >= 256)
2975
12.3k
        return;
2976
733
    else
2977
733
    {
2978
733
        m_low <<= 1;
2979
733
        m_range <<= 1;
2980
733
        m_bitsLeft++;
2981
733
    }
2982
2983
3.69k
    if (m_bitsLeft >= 0)
2984
2.73k
        writeOut();
2985
3.69k
}
2986
2987
/** Move bits from register into bitstream */
2988
void Entropy::writeOut()
2989
45.6k
{
2990
45.6k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2991
45.6k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2992
2993
45.6k
    m_bitsLeft -= 8;
2994
45.6k
    m_low &= low_mask;
2995
2996
45.6k
    if (leadByte == 0xff)
2997
3.53k
        m_numBufferedBytes++;
2998
42.1k
    else
2999
42.1k
    {
3000
42.1k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3001
42.1k
        if (numBufferedBytes > 0)
3002
39.1k
        {
3003
39.1k
            uint32_t carry = leadByte >> 8;
3004
39.1k
            uint32_t byteTowrite = m_bufferedByte + carry;
3005
39.1k
            m_bitIf->writeByte(byteTowrite);
3006
3007
39.1k
            byteTowrite = (0xff + carry) & 0xff;
3008
42.6k
            while (numBufferedBytes > 1)
3009
3.53k
            {
3010
3.53k
                m_bitIf->writeByte(byteTowrite);
3011
3.53k
                numBufferedBytes--;
3012
3.53k
            }
3013
39.1k
        }
3014
42.1k
        m_numBufferedBytes = 1;
3015
42.1k
        m_bufferedByte = (uint8_t)leadByte;
3016
42.1k
    }
3017
45.6k
}
3018
3019
const uint32_t g_entropyBits[128] =
3020
{
3021
    // Corrected table, most notably for last state
3022
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3023
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3024
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3025
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3026
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3027
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3028
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3029
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3030
};
3031
3032
const uint8_t g_nextState[128][2] =
3033
{
3034
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3035
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3036
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3037
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3038
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3039
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3040
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3041
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3042
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3043
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3044
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3045
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3046
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3047
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3048
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3049
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3050
};
3051
3052
}
3053
3054
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3055
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3056
{
3057
    // Corrected table, most notably for last state
3058
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3059
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3060
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3061
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3062
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3063
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3064
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3065
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3066
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3067
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3068
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3069
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3070
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3071
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3072
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3073
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3074
};
3075