Coverage Report

Created: 2026-05-16 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
13.0k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.00k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
1.91M
{
226
1.91M
    markValid();
227
1.91M
    m_fracBits = 0;
228
1.91M
    m_pad = 0;
229
1.91M
    m_meanQP = 0;
230
1.91M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
1.91M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
605
{
239
605
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
605
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
605
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
605
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
605
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
605
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
605
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
605
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
605
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.21k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
605
    {
253
605
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
605
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
605
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
605
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
605
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
605
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
605
#endif
294
295
605
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
605
    WRITE_FLAG(0, "vps_extension_flag");
509
605
#endif
510
605
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
605
{
514
605
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
605
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
605
#endif
524
605
    {
525
605
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
605
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
605
    }
528
529
605
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
605
    {
536
605
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
605
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
605
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
605
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
605
        const Window& conf = sps.conformanceWindow;
545
605
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
605
        if (conf.bEnabled)
547
444
        {
548
444
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
444
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
444
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
444
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
444
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
444
        }
554
555
605
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
605
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
605
    }
558
559
605
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
605
    {
564
605
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.21k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
605
        {
568
605
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
605
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
605
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
605
        }
572
605
    }
573
574
605
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
605
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
605
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
605
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
605
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
605
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
605
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
605
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
605
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
605
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
605
    WRITE_FLAG(0, "pcm_enabled_flag");
600
605
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
605
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
605
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
605
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
605
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
605
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
605
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
605
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
605
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
605
{
645
605
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
605
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
605
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
605
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
605
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
605
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
605
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
605
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
605
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
605
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
605
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
605
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
605
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
605
    if (pps.bUseDQP)
661
462
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
605
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
605
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
605
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
605
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
605
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
605
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
605
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
605
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
605
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
605
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
605
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
605
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
605
    WRITE_FLAG(0, "lists_modification_present_flag");
688
605
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
605
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
605
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
605
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.21k
{
727
1.21k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.21k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.21k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
39.9k
    for (int j = 0; j < 32; j++)
731
38.7k
    {
732
38.7k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
38.7k
        else
735
38.7k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
38.7k
    }
737
738
1.21k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.21k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.21k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.21k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.21k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.21k
    else
771
1.21k
    {
772
1.21k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.21k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.21k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.21k
    }
776
1.21k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.21k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.21k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.21k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
605
{
795
605
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
605
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
605
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
605
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
605
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
605
    if (vui.videoSignalTypePresentFlag)
812
605
    {
813
605
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
605
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
605
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
605
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
605
    }
823
824
605
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
605
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
605
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
605
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
605
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
605
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
605
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
605
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
605
    else
847
605
    {
848
605
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
605
        else
851
605
        {
852
605
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
605
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
605
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
605
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
605
            if (!bEmitVUIHRDInfo)
857
0
                WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
858
605
            else
859
605
            {
860
605
                WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
861
605
                if (vui.hrdParametersPresentFlag)
862
0
                    codeHrdParameters(vui.hrdParameters, maxSubTLayers);
863
605
            }
864
605
        }
865
605
    }
866
867
605
    WRITE_FLAG(0, "bitstream_restriction_flag");
868
605
}
869
870
void Entropy::codeScalingList(const ScalingList& scalingList)
871
0
{
872
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
873
0
    {
874
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
875
0
        {
876
0
            int predList = scalingList.checkPredMode(sizeId, listId);
877
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
878
0
            if (predList >= 0)
879
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
880
0
            else // DPCM Mode
881
0
                codeScalingList(scalingList, sizeId, listId);
882
0
        }
883
0
    }
884
0
}
885
886
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
887
0
{
888
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
889
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
890
0
    int nextCoef = START_VALUE;
891
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
892
0
    int data;
893
894
0
    if (sizeId > BLOCK_8x8)
895
0
    {
896
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
897
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
898
0
    }
899
0
    for (int i = 0; i < coefNum; i++)
900
0
    {
901
0
        data = src[scan[i]] - nextCoef;
902
0
        if (data < -128)
903
0
            data += 256;
904
0
        if (data > 127)
905
0
            data -= 256;
906
0
        nextCoef = (nextCoef + data + 256) % 256;
907
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
908
0
    }
909
0
}
910
911
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
912
0
{
913
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
914
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
916
917
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
918
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
919
920
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
921
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
923
924
0
    for (int i = 0; i < maxSubTLayers; i++)
925
0
    {
926
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
927
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
928
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
929
930
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
931
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
932
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
933
0
    }
934
0
}
935
936
void Entropy::codeAUD(const Slice& slice)
937
0
{
938
0
    int picType;
939
940
0
    switch (slice.m_sliceType)
941
0
    {
942
0
    case I_SLICE:
943
0
        picType = 0;
944
0
        break;
945
0
    case P_SLICE:
946
0
        picType = 1;
947
0
        break;
948
0
    case B_SLICE:
949
0
        picType = 2;
950
0
        break;
951
0
    default:
952
0
        picType = 7;
953
0
        break;
954
0
    }
955
956
0
    WRITE_CODE(picType, 3, "pic_type");
957
0
}
958
959
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
960
605
{
961
605
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
962
605
    if (slice.getRapPicFlag())
963
605
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
964
965
605
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
966
967
    /* x265 does not use dependent slices, so always write all this data */
968
605
    if (slice_addr)
969
0
    {
970
        // if( dependent_slice_segments_enabled_flag )
971
        //     dependent_slice_segment_flag             u(1)
972
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
973
0
    }
974
975
#if ENABLE_MULTIVIEW
976
    if (encData.m_param->numViews > 1)
977
    {
978
        int esb = 0;
979
        if (2 > esb)
980
        {
981
            esb++;
982
            WRITE_FLAG(0, "discardable_flag");
983
        }
984
        if (2 > esb)
985
        {
986
            esb++;
987
            WRITE_FLAG(0, "cross_layer_bla_flag");
988
        }
989
    }
990
#endif
991
992
605
    WRITE_UVLC(slice.m_sliceType, "slice_type");
993
994
605
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
995
0
    {
996
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
997
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
998
0
    }
999
605
    if (!slice.getIdrPicFlag())
1000
0
    {
1001
#if _DEBUG || CHECKED_BUILD
1002
        // check for bitstream restriction stating that:
1003
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1004
        // Ideally this process should not be repeated for each slice in a picture
1005
        if (slice.isIRAP())
1006
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1007
            {
1008
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1009
            }
1010
#endif
1011
1012
0
        if (slice.m_rpsIdx < 0)
1013
0
        {
1014
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1015
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1016
0
        }
1017
0
        else
1018
0
        {
1019
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1020
0
            int numBits = 0;
1021
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1022
0
                numBits++;
1023
1024
0
            if (numBits > 0)
1025
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1026
0
        }
1027
1028
0
        if (slice.m_sps->bTemporalMVPEnabled)
1029
#if ENABLE_SCC_EXT
1030
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1031
#else
1032
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1033
0
#endif
1034
0
    }
1035
605
    const SAOParam *saoParam = encData.m_saoParam;
1036
605
    if (slice.m_bUseSao)
1037
605
    {
1038
605
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1039
605
        if (encData.m_param->internalCsp != X265_CSP_I400)
1040
605
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1041
605
    }
1042
0
    else if(encData.m_param->selectiveSAO)
1043
0
    {
1044
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1045
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1046
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1047
0
    }
1048
1049
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1050
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1051
1052
605
    if (!slice.isIntra())
1053
0
    {
1054
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1055
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1056
0
        if (overrideFlag)
1057
0
        {
1058
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1059
0
            if (slice.isInterB())
1060
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1061
0
            else
1062
0
            {
1063
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1064
0
            }
1065
0
        }
1066
0
    }
1067
605
    else
1068
605
    {
1069
605
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1070
605
    }
1071
1072
605
    if (slice.isInterB())
1073
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1074
1075
#if ENABLE_SCC_EXT
1076
    if (slice.m_bTemporalMvp)
1077
#else
1078
605
    if (slice.m_sps->bTemporalMVPEnabled)
1079
605
#endif
1080
605
    {
1081
605
        if (slice.m_sliceType == B_SLICE)
1082
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1083
1084
605
        if (slice.m_sliceType != I_SLICE &&
1085
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1086
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1087
0
        {
1088
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1089
0
        }
1090
605
    }
1091
605
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1092
0
        codePredWeightTable(slice);
1093
1094
605
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1095
605
    if (!slice.isIntra())
1096
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1097
1098
605
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1099
605
    WRITE_SVLC(code, "slice_qp_delta");
1100
1101
605
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1102
0
    {
1103
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1105
0
    }
1106
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1107
    //       We didn't support filter across slice board, so disable it now
1108
1109
605
    if (encData.m_param->maxSlices <= 1)
1110
605
    {
1111
605
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1112
605
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1113
1114
605
        if (isSAOEnabled || isDBFEnabled)
1115
605
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1116
605
    }
1117
605
}
1118
1119
/** write wavefront substreams sizes for the slice header */
1120
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1121
484
{
1122
484
    uint32_t offsetLen = 1;
1123
2.72k
    while (maxOffset >= (1U << offsetLen))
1124
2.23k
    {
1125
2.23k
        offsetLen++;
1126
2.23k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1127
2.23k
    }
1128
1129
484
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1130
484
    if (numSubStreams > 0)
1131
484
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1132
1133
2.58k
    for (uint32_t i = 0; i < numSubStreams; i++)
1134
2.09k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1135
484
}
1136
1137
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1138
0
{
1139
0
    if (idx > 0)
1140
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1141
1142
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1143
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1144
0
    int prev = 0;
1145
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1146
0
    {
1147
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1148
0
        prev = rps.deltaPOC[j];
1149
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1150
0
    }
1151
1152
0
    prev = 0;
1153
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1154
0
    {
1155
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1156
0
        prev = rps.deltaPOC[j];
1157
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1158
0
    }
1159
0
}
1160
1161
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1162
25.5k
{
1163
25.5k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1164
25.5k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1165
25.5k
}
1166
1167
/* encode a CU block recursively */
1168
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1169
99.5k
{
1170
99.5k
    const Slice* slice = ctu.m_slice;
1171
1172
99.5k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1173
99.5k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1174
1175
99.5k
    if (!cuUnsplitFlag)
1176
21.5k
    {
1177
21.5k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1178
21.5k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1179
5.99k
            bEncodeDQP = true;
1180
107k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1181
86.2k
        {
1182
86.2k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1183
86.2k
            if (childGeom.flags & CUGeom::PRESENT)
1184
48.4k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1185
86.2k
        }
1186
21.5k
        return;
1187
21.5k
    }
1188
1189
77.9k
    if (cuSplitFlag) 
1190
56.5k
        codeSplitFlag(ctu, absPartIdx, depth);
1191
1192
77.9k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1193
6.38k
    {
1194
6.38k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1195
6.38k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1196
270
            bEncodeDQP = true;
1197
31.9k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1198
25.5k
        {
1199
25.5k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1200
25.5k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1201
25.5k
        }
1202
6.38k
        return;
1203
6.38k
    }
1204
1205
71.5k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1206
31.0k
        bEncodeDQP = true;
1207
1208
71.5k
    if (slice->m_pps->bTransquantBypassEnabled)
1209
19.0k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1210
1211
71.5k
    if (!slice->isIntra())
1212
0
    {
1213
0
        codeSkipFlag(ctu, absPartIdx);
1214
0
        if (ctu.isSkipped(absPartIdx))
1215
0
        {
1216
0
            codeMergeIndex(ctu, absPartIdx);
1217
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1218
0
            return;
1219
0
        }
1220
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1221
0
    }
1222
1223
71.5k
    codePartSize(ctu, absPartIdx, depth);
1224
1225
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1226
71.5k
    codePredInfo(ctu, absPartIdx);
1227
1228
71.5k
    uint32_t tuDepthRange[2];
1229
71.5k
    if (ctu.isIntra(absPartIdx))
1230
71.5k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1231
18.4E
    else
1232
18.4E
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1233
1234
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1235
71.5k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1236
1237
    // --- write terminating bit ---
1238
71.5k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1239
71.5k
}
1240
1241
/* Return bit count of signaling inter mode */
1242
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1243
0
{
1244
0
    uint32_t bits;
1245
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1246
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1247
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1248
0
    switch (partSize)
1249
0
    {
1250
0
    case SIZE_2Nx2N:
1251
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1252
0
        break;
1253
1254
0
    case SIZE_2NxN:
1255
0
    case SIZE_2NxnU:
1256
0
    case SIZE_2NxnD:
1257
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1258
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1259
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1260
0
        {
1261
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1262
0
            if (partSize != SIZE_2NxN)
1263
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1264
0
        }
1265
0
        break;
1266
1267
0
    case SIZE_Nx2N:
1268
0
    case SIZE_nLx2N:
1269
0
    case SIZE_nRx2N:
1270
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1272
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1273
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1274
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1275
0
        {
1276
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1277
0
            if (partSize != SIZE_Nx2N)
1278
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1279
0
        }
1280
0
        break;
1281
0
    default:
1282
0
        X265_CHECK(0, "invalid CU partition\n");
1283
0
        break;
1284
0
    }
1285
1286
0
    return bits;
1287
0
}
1288
1289
/* finish encoding a cu and handle end-of-slice conditions */
1290
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1291
71.5k
{
1292
71.5k
    const Slice* slice = ctu.m_slice;
1293
71.5k
    uint32_t realEndAddress = slice->m_endCUAddr;
1294
71.5k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1295
71.5k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1296
1297
71.5k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1298
71.5k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1299
71.5k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1300
71.5k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1301
71.5k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1302
42.4k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1303
1304
71.5k
    if (slice->m_pps->bUseDQP)
1305
52.5k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1306
1307
71.5k
    if (granularityBoundary)
1308
25.5k
    {
1309
        // Encode slice finish
1310
25.5k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1311
25.5k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1312
1.21k
            bTerminateSlice = 1;
1313
1314
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1315
25.5k
        if (!bTerminateSlice)
1316
24.2k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1317
1318
25.5k
        if (!m_bitIf)
1319
12.7k
            resetBits(); // TODO: most likely unnecessary
1320
25.5k
    }
1321
71.5k
}
1322
1323
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1324
                              bool& bCodeDQP, const uint32_t depthRange[2])
1325
1.76M
{
1326
1.76M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1327
1328
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1329
     * so we have checks to make sure the implied value matches our intentions */
1330
1.76M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1331
268k
    {
1332
268k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1333
268k
    }
1334
1.49M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1335
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1336
0
    {
1337
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1338
0
    }
1339
1.49M
    else if (log2CurSize > depthRange[1])
1340
0
    {
1341
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1342
0
    }
1343
1.49M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1344
1.07M
    {
1345
1.07M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1346
1.07M
    }
1347
417k
    else
1348
417k
    {
1349
417k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1350
417k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1351
417k
    }
1352
1353
1.76M
    uint32_t hChromaShift = cu.m_hChromaShift;
1354
1.76M
    uint32_t vChromaShift = cu.m_vChromaShift;
1355
1.76M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1356
1.76M
    if (!curDepth || !bSmallChroma)
1357
687k
    {
1358
687k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1359
687k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1360
687k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1361
687k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1362
687k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1363
687k
    }
1364
1365
1.76M
    if (subdiv)
1366
269k
    {
1367
269k
        --log2CurSize;
1368
269k
        ++curDepth;
1369
1370
269k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1371
1372
269k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1373
269k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
269k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
269k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
269k
        return;
1377
269k
    }
1378
1379
1.49M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1380
1381
1.49M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1382
0
    {
1383
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1384
0
    }
1385
1.49M
    else
1386
1.49M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1387
1388
1.49M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1389
1.49M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1390
1.49M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1391
1.49M
    if (!(cbfY || cbfU || cbfV))
1392
1.48M
        return;
1393
1394
    // dQP: only for CTU once
1395
7.17k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1396
3.38k
    {
1397
3.38k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1398
3.38k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1399
3.38k
        codeDeltaQP(cu, absPartIdxLT);
1400
3.38k
        bCodeDQP = false;
1401
3.38k
    }
1402
1403
7.17k
    if (cbfY)
1404
4.29k
    {
1405
4.29k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1406
4.29k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1407
4.29k
        if (!(cbfU || cbfV))
1408
567
            return;
1409
4.29k
    }
1410
1411
6.60k
    if (bSmallChroma)
1412
4.15k
    {
1413
4.15k
        if ((absPartIdx & 3) != 3)
1414
3.11k
            return;
1415
1416
1.03k
        const uint32_t log2CurSizeC = 2;
1417
1.03k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1418
1.03k
        const uint32_t curPartNum = 4;
1419
1.03k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1420
3.11k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1421
2.07k
        {
1422
2.07k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1423
2.07k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1424
2.07k
            do
1425
2.07k
            {
1426
2.07k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1427
2.07k
                {
1428
2.07k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1429
2.07k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1430
2.07k
                }
1431
2.07k
            }
1432
2.07k
            while (tuIterator.isNextSection());
1433
2.07k
        }
1434
1.03k
    }
1435
2.45k
    else
1436
2.45k
    {
1437
2.45k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1438
2.45k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1439
2.45k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1440
2.45k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1441
7.69k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1442
5.23k
        {
1443
5.23k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1444
5.23k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1445
5.23k
            do
1446
5.23k
            {
1447
5.23k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1448
5.23k
                {
1449
5.23k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1450
5.23k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1451
5.23k
                }
1452
5.23k
            }
1453
5.23k
            while (tuIterator.isNextSection());
1454
5.23k
        }
1455
2.45k
    }
1456
6.60k
}
1457
1458
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1459
                              bool& bCodeDQP, const uint32_t depthRange[2])
1460
0
{
1461
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1462
1463
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1464
     * so we have checks to make sure the implied value matches our intentions */
1465
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1466
0
    {
1467
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1468
0
    }
1469
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1470
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1471
0
    {
1472
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1473
0
    }
1474
0
    else if (log2CurSize > depthRange[1])
1475
0
    {
1476
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1477
0
    }
1478
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1479
0
    {
1480
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1481
0
    }
1482
0
    else
1483
0
    {
1484
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1485
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1486
0
    }
1487
1488
0
    if (subdiv)
1489
0
    {
1490
0
        --log2CurSize;
1491
0
        ++curDepth;
1492
1493
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1494
1495
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1496
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        return;
1500
0
    }
1501
1502
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1503
0
    {
1504
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1505
0
    }
1506
0
    else
1507
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1508
1509
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1510
1511
0
    if (!cbfY)
1512
0
        return;
1513
1514
    // dQP: only for CTU once
1515
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1516
0
    {
1517
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1518
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1519
0
        codeDeltaQP(cu, absPartIdxLT);
1520
0
        bCodeDQP = false;
1521
0
    }
1522
1523
0
    if (cbfY)
1524
0
    {
1525
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1526
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1527
0
    }
1528
0
}
1529
1530
1531
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1532
686k
{
1533
686k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1534
686k
    {
1535
686k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1536
686k
        if (cu.m_chromaFormat != X265_CSP_I400)
1537
686k
        {
1538
686k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1539
686k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1540
1541
686k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1542
1543
686k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1544
0
            {
1545
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1546
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1547
0
                {
1548
0
                    absPartIdx += qNumParts;
1549
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1550
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1551
0
                }
1552
0
            }
1553
686k
        }
1554
686k
    }
1555
18.4E
    else // if it is inter mode, encode motion vector and reference index
1556
18.4E
        codePUWise(cu, absPartIdx);
1557
686k
}
1558
1559
/** encode motion information for every PU block */
1560
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1561
0
{
1562
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1563
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1564
1565
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1566
0
    {
1567
0
        codeMergeFlag(cu, subPartIdx);
1568
0
        if (cu.m_mergeFlag[subPartIdx])
1569
0
            codeMergeIndex(cu, subPartIdx);
1570
0
        else
1571
0
        {
1572
0
            if (cu.m_slice->isInterB())
1573
0
                codeInterDir(cu, subPartIdx);
1574
1575
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1576
0
            for (uint32_t list = 0; list < 2; list++)
1577
0
            {
1578
0
                if (interDir & (1 << list))
1579
0
                {
1580
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1581
1582
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1583
0
                    codeMvd(cu, subPartIdx, list);
1584
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1585
0
                }
1586
0
            }
1587
0
        }
1588
0
    }
1589
0
}
1590
1591
/** encode reference frame index for a PU block */
1592
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1593
0
{
1594
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1595
1596
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1597
0
        codeRefFrmIdx(cu, absPartIdx, list);
1598
0
}
1599
1600
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1601
686k
{
1602
686k
    if (!cu.isIntra(absPartIdx))
1603
0
    {
1604
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1605
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1606
0
        if (!cu.getQtRootCbf(absPartIdx))
1607
0
            return;
1608
0
    }
1609
1610
686k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1611
686k
    if (cu.m_chromaFormat == X265_CSP_I400)
1612
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1613
686k
    else
1614
686k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1615
686k
}
1616
1617
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1618
49.9k
{
1619
49.9k
    int typeIdx = ctuParam.typeIdx;
1620
1621
49.9k
    if (plane != 2)
1622
33.3k
    {
1623
33.3k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1624
33.3k
        if (typeIdx >= 0)
1625
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1626
33.3k
    }
1627
1628
49.9k
    if (typeIdx >= 0)
1629
0
    {
1630
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1631
0
        if (typeIdx == SAO_BO)
1632
0
        {
1633
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1634
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1635
1636
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1637
0
                if (ctuParam.offset[i] != 0)
1638
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1639
1640
0
            encodeBinsEP(ctuParam.bandPos, 5);
1641
0
        }
1642
0
        else // if (typeIdx < SAO_BO)
1643
0
        {
1644
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1645
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1648
0
            if (plane != 2)
1649
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1650
0
        }
1651
0
    }
1652
49.9k
}
1653
1654
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1655
153k
{
1656
153k
    if (plane != 2)
1657
102k
    {
1658
102k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1659
102k
        encodeBinEP(1);
1660
102k
    }
1661
1662
153k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1663
1664
153k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1665
153k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1666
153k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1667
153k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1668
153k
    if (plane != 2)
1669
102k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1670
153k
}
1671
1672
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1673
38.2k
{
1674
38.2k
    if (plane != 2)
1675
25.5k
    {
1676
25.5k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1677
25.5k
        encodeBinEP(0);
1678
25.5k
    }
1679
1680
38.2k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1681
1682
191k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1683
153k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1684
1685
191k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1686
153k
        if (offset[i] != 0)
1687
88
            encodeBinEP(offset[i] < 0);
1688
1689
38.2k
    encodeBinsEP(bandPos, 5);
1690
38.2k
}
1691
1692
/** initialize context model with respect to QP and initialization value */
1693
uint8_t sbacInit(int qp, int initValue)
1694
94.9k
{
1695
94.9k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1696
1697
94.9k
    int  slope      = (initValue >> 4) * 5 - 45;
1698
94.9k
    int  offset     = ((initValue & 15) << 3) - 16;
1699
94.9k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1700
94.9k
    uint32_t mpState = (initState >= 64);
1701
94.9k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1702
1703
94.9k
    return (uint8_t)state;
1704
94.9k
}
1705
1706
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1707
15.7k
{
1708
15.7k
    ctxModel += sliceType * size;
1709
1710
110k
    for (int n = 0; n < size; n++)
1711
94.9k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1712
15.7k
}
1713
1714
void Entropy::resetEntropy(const Slice& slice)
1715
605
{
1716
605
    int  qp              = slice.m_sliceQp;
1717
605
    SliceType sliceType  = slice.m_sliceType;
1718
1719
605
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1720
605
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1721
605
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1722
605
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1723
605
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1724
605
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1725
605
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1726
605
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1727
605
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1728
605
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1729
605
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1730
605
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1731
605
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1732
605
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1733
605
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1734
605
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1735
605
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1736
605
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1737
605
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
605
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1739
605
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1740
605
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1741
605
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1742
605
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1743
605
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1744
605
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1745
    // new structure
1746
1747
605
    start();
1748
605
}
1749
1750
/* code explicit wp tables */
1751
void Entropy::codePredWeightTable(const Slice& slice)
1752
0
{
1753
0
    const WeightParam *wp;
1754
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1755
0
    bool            bDenomCoded  = false;
1756
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1757
0
    uint32_t        totalSignalledWeightFlags = 0;
1758
1759
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1760
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1761
0
    {
1762
0
        for (int list = 0; list < numRefDirs; list++)
1763
0
        {
1764
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1765
0
            {
1766
0
                wp = slice.m_weightPredTable[list][ref];
1767
0
                if (!bDenomCoded)
1768
0
                {
1769
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1770
1771
0
                    if (bChroma)
1772
0
                    {
1773
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1774
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1775
0
                    }
1776
0
                    bDenomCoded = true;
1777
0
                }
1778
#if ENABLE_SCC_EXT
1779
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1780
                    assert(!wp[0].wtPresent);
1781
                else
1782
#endif
1783
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1784
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1785
0
            }
1786
1787
0
            if (bChroma)
1788
0
            {
1789
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1790
0
                {
1791
0
                    wp = slice.m_weightPredTable[list][ref];
1792
#if ENABLE_SCC_EXT
1793
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1794
                        assert(!wp[1].wtPresent);
1795
                    else
1796
#endif
1797
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1798
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1799
0
                }
1800
0
            }
1801
1802
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1803
0
            {
1804
0
                wp = slice.m_weightPredTable[list][ref];
1805
0
                if (wp[0].wtPresent)
1806
0
                {
1807
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1808
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1809
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1810
0
                }
1811
1812
0
                if (bChroma)
1813
0
                {
1814
0
                    if (wp[1].wtPresent)
1815
0
                    {
1816
0
                        for (int plane = 1; plane < 3; plane++)
1817
0
                        {
1818
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1819
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1820
1821
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1822
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1823
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1824
0
                        }
1825
0
                    }
1826
0
                }
1827
0
            }
1828
0
        }
1829
1830
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1831
0
    }
1832
0
}
1833
1834
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1835
4.52k
{
1836
4.52k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1837
1838
4.52k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1839
1840
4.52k
    if (!symbol)
1841
389
        return;
1842
1843
4.13k
    bool bCodeLast = (maxSymbol > symbol);
1844
1845
20.3k
    while (--symbol)
1846
16.2k
        encodeBin(1, scmModel[offset]);
1847
1848
4.13k
    if (bCodeLast)
1849
131
        encodeBin(0, scmModel[offset]);
1850
4.13k
}
1851
1852
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1853
4.00k
{
1854
4.00k
    uint32_t bins = 0;
1855
4.00k
    int numBins = 0;
1856
1857
15.6k
    while (symbol >= (uint32_t)(1 << count))
1858
11.6k
    {
1859
11.6k
        bins = 2 * bins + 1;
1860
11.6k
        numBins++;
1861
11.6k
        symbol -= 1 << count;
1862
11.6k
        count++;
1863
11.6k
    }
1864
1865
4.00k
    bins = 2 * bins + 0;
1866
4.00k
    numBins++;
1867
1868
4.00k
    bins = (bins << count) | symbol;
1869
4.00k
    numBins += count;
1870
1871
4.00k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1872
4.00k
    encodeBinsEP(bins, numBins);
1873
4.00k
}
1874
1875
/** Coding of coeff_abs_level_minus3 */
1876
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1877
8.25k
{
1878
8.25k
    uint32_t length;
1879
8.25k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1880
1881
8.25k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1882
0
    {
1883
0
        length = codeNumber >> absGoRice;
1884
1885
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1886
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1887
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1888
0
    }
1889
8.25k
    else
1890
8.25k
    {
1891
8.25k
        length = 0;
1892
8.25k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1893
8.25k
        {
1894
8.25k
            unsigned long idx;
1895
8.25k
            BSR(idx, codeNumber + 1);
1896
8.25k
            length = idx;
1897
8.25k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1898
8.25k
            codeNumber -= (1 << idx) - 1;
1899
8.25k
        }
1900
8.25k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1901
1902
8.25k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1903
8.25k
        encodeBinsEP(codeNumber, length + absGoRice);
1904
8.25k
    }
1905
8.25k
}
1906
1907
// SBAC RD
1908
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1909
1.41M
{
1910
1.41M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1911
1.41M
    m_fracBits = src.m_fracBits;
1912
1.41M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1913
1.41M
}
1914
1915
void Entropy::copyFrom(const Entropy& src)
1916
9.89M
{
1917
9.89M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1918
1919
9.89M
    copyState(src);
1920
1921
9.89M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1922
9.89M
    markValid();
1923
9.89M
}
1924
1925
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1926
2.27M
{
1927
2.27M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1928
1929
2.27M
    if (cu.isIntra(absPartIdx))
1930
2.27M
    {
1931
2.27M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1932
1.90M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1933
2.27M
        return;
1934
2.27M
    }
1935
1936
36
    switch (partSize)
1937
36
    {
1938
0
    case SIZE_2Nx2N:
1939
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1940
0
        break;
1941
1942
0
    case SIZE_2NxN:
1943
0
    case SIZE_2NxnU:
1944
0
    case SIZE_2NxnD:
1945
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1946
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1947
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1948
0
        {
1949
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1950
0
            if (partSize != SIZE_2NxN)
1951
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1952
0
        }
1953
0
        break;
1954
1955
0
    case SIZE_Nx2N:
1956
0
    case SIZE_nLx2N:
1957
0
    case SIZE_nRx2N:
1958
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1960
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1961
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1962
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1963
0
        {
1964
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1965
0
            if (partSize != SIZE_Nx2N)
1966
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1967
0
        }
1968
0
        break;
1969
0
    default:
1970
0
        X265_CHECK(0, "invalid CU partition\n");
1971
0
        break;
1972
36
    }
1973
36
}
1974
1975
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1976
0
{
1977
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1978
1979
0
    if (numCand > 1)
1980
0
    {
1981
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1982
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1983
1984
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1985
1986
0
        if (unaryIdx != 0)
1987
0
        {
1988
0
            uint32_t mask = (1 << unaryIdx) - 2;
1989
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1990
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1991
0
        }
1992
0
    }
1993
0
}
1994
1995
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1996
3.88M
{
1997
3.88M
    uint32_t dir[4], j;
1998
3.88M
    uint32_t preds[4][3];
1999
3.88M
    int predIdx[4];
2000
3.88M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2001
3.88M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2002
2003
8.58M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2004
4.69M
    {
2005
4.69M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2006
4.69M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2007
4.69M
        predIdx[j] = -1;
2008
18.7M
        for (uint32_t i = 0; i < 3; i++)
2009
14.0M
            if (dir[j] == preds[j][i])
2010
4.68M
                predIdx[j] = i;
2011
2012
4.69M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2013
4.69M
    }
2014
2015
8.58M
    for (j = 0; j < partNum; j++)
2016
4.69M
    {
2017
4.69M
        if (predIdx[j] != -1)
2018
4.68M
        {
2019
4.68M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2020
            // NOTE: Mapping
2021
            //       0 = 0
2022
            //       1 = 10
2023
            //       2 = 11
2024
4.68M
            int nonzero = (!!predIdx[j]);
2025
4.68M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2026
4.68M
        }
2027
10.0k
        else
2028
10.0k
        {
2029
10.0k
            if (preds[j][0] > preds[j][1])
2030
350
                std::swap(preds[j][0], preds[j][1]);
2031
2032
10.0k
            if (preds[j][0] > preds[j][2])
2033
0
                std::swap(preds[j][0], preds[j][2]);
2034
2035
10.0k
            if (preds[j][1] > preds[j][2])
2036
0
                std::swap(preds[j][1], preds[j][2]);
2037
2038
10.0k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2039
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2041
2042
10.0k
            encodeBinsEP(dir[j], 5);
2043
10.0k
        }
2044
4.69M
    }
2045
3.88M
}
2046
2047
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2048
3.75M
{
2049
3.75M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2050
2051
3.75M
    if (intraDirChroma == DM_CHROMA_IDX)
2052
987k
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2053
2.77M
    else
2054
2.77M
    {
2055
6.45M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2056
6.45M
        {
2057
6.45M
            if (intraDirChroma == chromaDirMode[i])
2058
2.77M
            {
2059
2.77M
                intraDirChroma = i;
2060
2.77M
                break;
2061
2.77M
            }
2062
6.45M
        }
2063
2064
2.77M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2065
2.77M
        encodeBinsEP(intraDirChroma, 2);
2066
2.77M
    }
2067
3.75M
}
2068
2069
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2070
0
{
2071
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2072
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2073
2074
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2075
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2076
0
    if (interDir < 2)
2077
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2078
0
}
2079
2080
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2081
0
{
2082
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2083
2084
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2085
2086
0
    if (refFrame > 0)
2087
0
    {
2088
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2089
0
        if (refNum == 0)
2090
0
            return;
2091
2092
0
        refFrame--;
2093
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2094
0
        if (refFrame > 0)
2095
0
        {
2096
0
            uint32_t mask = (1 << refFrame) - 2;
2097
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2098
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2099
0
        }
2100
0
    }
2101
0
}
2102
2103
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2104
0
{
2105
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2106
0
    const int hor = mvd.x;
2107
0
    const int ver = mvd.y;
2108
2109
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2110
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
2112
0
    const bool bHorAbsGr0 = hor != 0;
2113
0
    const bool bVerAbsGr0 = ver != 0;
2114
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2115
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2116
2117
0
    if (bHorAbsGr0)
2118
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2119
2120
0
    if (bVerAbsGr0)
2121
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2122
2123
0
    if (bHorAbsGr0)
2124
0
    {
2125
0
        if (horAbs > 1)
2126
0
            writeEpExGolomb(horAbs - 2, 1);
2127
2128
0
        encodeBinEP(0 > hor ? 1 : 0);
2129
0
    }
2130
2131
0
    if (bVerAbsGr0)
2132
0
    {
2133
0
        if (verAbs > 1)
2134
0
            writeEpExGolomb(verAbs - 2, 1);
2135
2136
0
        encodeBinEP(0 > ver ? 1 : 0);
2137
0
    }
2138
0
}
2139
2140
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2141
4.52k
{
2142
4.52k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2143
2144
4.52k
    int qpBdOffsetY = QP_BD_OFFSET;
2145
2146
4.52k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2147
2148
4.52k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2149
4.52k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2150
4.52k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2151
4.52k
    if (absDQp >= CU_DQP_TU_CMAX)
2152
4.00k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2153
2154
4.52k
    if (absDQp > 0)
2155
4.13k
    {
2156
4.13k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2157
4.13k
        encodeBinEP(sign);
2158
4.13k
    }
2159
4.52k
}
2160
2161
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2162
7.52M
{
2163
7.52M
    uint32_t ctx = tuDepth + 2;
2164
2165
7.52M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2166
7.52M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2167
7.52M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2168
2169
7.52M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2170
0
    {
2171
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2172
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2173
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2174
2175
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2176
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
    }
2178
7.52M
    else
2179
7.52M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2180
7.52M
}
2181
2182
#if CHECKED_BUILD || _DEBUG
2183
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2184
{
2185
    uint32_t goRiceParam = 0;
2186
    int firstCoeff2 = 1;
2187
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2188
2189
    uint32_t sum = 0;
2190
    int idx = 0;
2191
    do
2192
    {
2193
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2194
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2195
        baseLevelN >>= 2;
2196
        int codeNumber = absCoeff[idx] - baseLevel;
2197
2198
        if (codeNumber >= 0)
2199
        {
2200
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2201
            uint32_t length = 0;
2202
2203
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2204
            if (codeNumber >= 0)
2205
            {
2206
                {
2207
                    unsigned long cidx;
2208
                    BSR(cidx, codeNumber + 1);
2209
                    length = cidx;
2210
                }
2211
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2212
2213
                codeNumber = (length + length);
2214
            }
2215
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2216
2217
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2218
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2219
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2220
        }
2221
        if (absCoeff[idx] >= 2)
2222
            firstCoeff2 = 0;
2223
        idx++;
2224
    }
2225
    while(idx < numNonZero);
2226
2227
    return sum;
2228
}
2229
#endif // debug only code
2230
2231
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2232
57.4k
{
2233
57.4k
    uint32_t trSize = 1 << log2TrSize;
2234
57.4k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2235
    // compute number of significant coefficients
2236
57.4k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2237
57.4k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2238
57.4k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2239
2240
57.4k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2241
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2242
2243
57.4k
    bool bIsLuma = ttype == TEXT_LUMA;
2244
2245
    // select scans
2246
57.4k
    TUEntropyCodingParameters codingParameters;
2247
57.4k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2248
2249
57.4k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2250
57.4k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2251
57.4k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2252
2253
    //----- encode significance map -----
2254
2255
    // Find position of last coefficient
2256
57.4k
    int scanPosLast = 0;
2257
57.4k
    uint32_t posLast;
2258
57.4k
    uint64_t sigCoeffGroupFlag64 = 0;
2259
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2260
57.4k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2261
2262
57.4k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2263
57.4k
    posLast = codingParameters.scan[scanPosLast];
2264
2265
57.4k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2266
2267
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2268
141k
    for(int idx = 0; idx < lastScanSet; idx++)
2269
83.7k
    {
2270
83.7k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2271
83.7k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2272
83.7k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2273
83.7k
    }
2274
2275
2276
    // Code position of last coefficient
2277
57.4k
    {
2278
        // The last position is composed of a prefix and suffix.
2279
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2280
        // The bypass coded bins for both the x and y components are grouped together.
2281
57.4k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2282
57.4k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2283
        // swap
2284
57.4k
        if (codingParameters.scanType == SCAN_VER)
2285
4.19k
            std::swap(pos[0], pos[1]);
2286
2287
57.4k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2288
57.4k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2289
57.4k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2290
57.4k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2291
57.4k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2292
2293
57.4k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2294
172k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2295
114k
        {
2296
114k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2297
114k
            uint32_t prefixOnes = temp & 15;
2298
114k
            uint32_t suffixLen = temp >> 4;
2299
2300
246k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2301
131k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2302
2303
114k
            if (prefixOnes < maxGroupIdx)
2304
82.8k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2305
2306
114k
            packedSuffixBits <<= suffixLen;
2307
114k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2308
114k
            packedSuffixLen += suffixLen;
2309
114k
        }
2310
2311
57.4k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2312
57.4k
    }
2313
2314
    // code significance flag
2315
57.4k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2316
57.4k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2317
57.4k
    uint32_t c1 = 1;
2318
57.4k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2319
57.4k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2320
57.4k
    uint32_t numNonZero = 1;
2321
57.4k
    unsigned long lastNZPosInCG = 0;
2322
57.4k
    unsigned long firstNZPosInCG = 0;
2323
2324
#if _DEBUG
2325
    // Unnecessary, for Valgrind-3.10.0 only
2326
    memset(absCoeff, 0, sizeof(absCoeff));
2327
#endif
2328
2329
57.4k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2330
2331
198k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2332
141k
    {
2333
141k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2334
141k
        uint32_t scanFlagMask = subCoeffFlag;
2335
141k
        int subPosBase = subSet << MLS_CG_SIZE;
2336
        
2337
141k
        if (subSet == lastScanSet)
2338
57.4k
        {
2339
57.4k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2340
57.4k
            scanFlagMask >>= 1;
2341
57.4k
        }
2342
2343
        // encode significant_coeffgroup_flag
2344
141k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2345
141k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2346
141k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2347
141k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2348
2349
141k
        if (subSet == lastScanSet || !subSet)
2350
62.5k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2351
78.6k
        else
2352
78.6k
        {
2353
78.6k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2354
78.6k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2355
78.6k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2356
78.6k
        }
2357
2358
        // encode significant_coeff_flag
2359
141k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2360
101k
        {
2361
101k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2362
101k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2363
101k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2364
2365
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2366
101k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2367
101k
            {
2368
                // patternSigCtx = 0
2369
101k
                {
2370
101k
                    2, 1, 1, 0,
2371
101k
                    1, 1, 0, 0,
2372
101k
                    1, 0, 0, 0,
2373
101k
                    0, 0, 0, 0,
2374
101k
                },
2375
                // patternSigCtx = 1
2376
101k
                {
2377
101k
                    2, 2, 2, 2,
2378
101k
                    1, 1, 1, 1,
2379
101k
                    0, 0, 0, 0,
2380
101k
                    0, 0, 0, 0,
2381
101k
                },
2382
                // patternSigCtx = 2
2383
101k
                {
2384
101k
                    2, 1, 0, 0,
2385
101k
                    2, 1, 0, 0,
2386
101k
                    2, 1, 0, 0,
2387
101k
                    2, 1, 0, 0,
2388
101k
                },
2389
                // patternSigCtx = 3
2390
101k
                {
2391
101k
                    2, 2, 2, 2,
2392
101k
                    2, 2, 2, 2,
2393
101k
                    2, 2, 2, 2,
2394
101k
                    2, 2, 2, 2,
2395
101k
                },
2396
                // 4x4
2397
101k
                {
2398
101k
                    0, 1, 4, 5,
2399
101k
                    2, 3, 4, 5,
2400
101k
                    6, 6, 8, 8,
2401
101k
                    7, 7, 8, 8
2402
101k
                }
2403
101k
            };
2404
2405
101k
            const int offset = codingParameters.firstSignificanceMapContext;
2406
101k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2407
2408
101k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2409
101k
            if (m_bitIf)
2410
429
            {
2411
429
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2412
429
                memset(tmpCoeff, 0, sizeof(tmpCoeff));
2413
2414
                // TODO: accelerate by PABSW
2415
2.14k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
1.71k
                {
2417
1.71k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
1.71k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
1.71k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
1.71k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
1.71k
                }
2422
2423
429
                if (log2TrSize == 2)
2424
429
                {
2425
429
                    do
2426
6.43k
                    {
2427
6.43k
                        uint32_t blkPos, sig, ctxSig;
2428
6.43k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
6.43k
                        sig     = scanFlagMask & 1;
2430
6.43k
                        scanFlagMask >>= 1;
2431
6.43k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
6.43k
                        {
2433
6.43k
                            ctxSig = table_cnt[4][blkPos];
2434
6.43k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
6.43k
                            encodeBin(sig, baseCtx[ctxSig]);
2436
6.43k
                        }
2437
6.43k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
6.43k
                        numNonZero += sig;
2439
6.43k
                        scanPosSigOff--;
2440
6.43k
                    }
2441
6.43k
                    while(scanPosSigOff >= 0);
2442
429
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
429
            }
2471
101k
            else // fast RD path
2472
101k
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
101k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
101k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
101k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
101k
                m_fracBits += sum;
2483
101k
            } // end of fast RD path -- !m_bitIf
2484
101k
        }
2485
141k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
141k
        uint32_t coeffSigns = coeffSign[subSet];
2488
141k
        numNonZero = coeffNum[subSet];
2489
141k
        if (numNonZero > 0)
2490
141k
        {
2491
141k
            uint32_t idx = 0;
2492
141k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
141k
            BSR(lastNZPosInCG, subCoeffFlag);
2494
141k
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
141k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
141k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
141k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
141k
            c1 = 1;
2501
141k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
141k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
141k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
141k
            if (!m_bitIf)
2507
139k
            {
2508
139k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
139k
                uint32_t firstC2Idx = (sum >> 28);
2510
139k
                c1 = ((sum >> 26) & 3);
2511
139k
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
139k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
139k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
139k
                if (numNonZero > firstC2Idx)
2518
134k
                {
2519
134k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
134k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
134k
                    m_fracBits += ((uint64_t)sum << 15);
2522
134k
                }
2523
139k
            }
2524
            // Standard path
2525
1.93k
            else
2526
1.93k
            {
2527
1.93k
                uint32_t firstC2Idx = 8;
2528
1.93k
                uint32_t firstC2Flag = 2;
2529
1.93k
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
1.93k
                idx = 0;
2532
1.93k
                do
2533
4.93k
                {
2534
4.93k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
4.93k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
4.93k
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
4.93k
                    if (symbol1)
2539
4.81k
                        c1Next = 0;
2540
2541
4.93k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
4.93k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
4.93k
                    c1 = (c1Next & 3);
2545
4.93k
                    c1Next >>= 2;
2546
4.93k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
4.93k
                    idx++;
2548
4.93k
                }
2549
4.93k
                while(idx < numC1Flag);
2550
2551
1.93k
                if (!c1)
2552
1.81k
                {
2553
1.81k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
1.81k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
1.81k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
1.81k
                }
2558
2559
1.93k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
1.93k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
1.93k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
1.81k
                {
2564
                    // Standard path
2565
1.81k
                    uint32_t goRiceParam = 0;
2566
1.81k
                    int baseLevel = 3;
2567
1.81k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
1.81k
                    idx = firstC2Idx;
2572
1.81k
                    do
2573
8.25k
                    {
2574
8.25k
                        if (idx >= C1FLAG_NUMBER)
2575
3.43k
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
8.25k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
8.25k
                        if (absCoeff[idx] >= baseLevel)
2580
8.25k
                        {
2581
8.25k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
8.25k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
8.25k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
8.25k
                            goRiceParam += adjust;
2585
8.25k
                            threshold += (adjust) ? threshold : 0;
2586
8.25k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
8.25k
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
8.25k
                        baseLevel = 2;
2592
8.25k
                        idx++;
2593
8.25k
                    }
2594
8.25k
                    while(idx < numNonZero);
2595
1.81k
                }
2596
1.93k
            } // end of !bitIf
2597
141k
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
141k
        numNonZero = 0;
2601
141k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
141k
    }
2603
57.4k
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
765k
{
2607
765k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
765k
    uint32_t isCodeNonZero = !!code;
2610
2611
765k
    encodeBinEP(isCodeNonZero);
2612
765k
    if (isCodeNonZero)
2613
88
    {
2614
88
        uint32_t isCodeLast = (maxSymbol > code);
2615
88
        uint32_t mask = (1 << (code - 1)) - 1;
2616
88
        uint32_t len = code - 1 + isCodeLast;
2617
88
        mask <<= isCodeLast;
2618
2619
88
        encodeBinsEP(mask, len);
2620
88
    }
2621
765k
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
7.31M
{
2626
7.31M
    estCBFBit(estBitsSbac);
2627
2628
7.31M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
7.31M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
7.31M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
7.31M
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
7.31M
{
2640
7.31M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
58.4M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
51.1M
    {
2644
51.1M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
51.1M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
51.1M
    }
2647
2648
7.31M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
7.31M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
7.31M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
7.31M
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
7.30M
{
2657
7.30M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
21.9M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
43.8M
        for (uint32_t bin = 0; bin < 2; bin++)
2661
29.2M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
7.30M
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
7.30M
{
2667
7.30M
    int firstCtx = 1, numCtx = 8;
2668
2669
7.30M
    if (log2TrSize >= 4)
2670
281k
    {
2671
281k
        firstCtx = bIsLuma ? 21 : 12;
2672
281k
        numCtx = bIsLuma ? 6 : 3;
2673
281k
    }
2674
7.02M
    else if (log2TrSize == 3)
2675
1.12M
    {
2676
1.12M
        firstCtx = 9;
2677
1.12M
        numCtx = bIsLuma ? 12 : 3;
2678
1.12M
    }
2679
2680
7.30M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
7.30M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
7.30M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
66.5M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
59.2M
    {
2687
59.2M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
59.2M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
59.2M
    }
2690
2691
7.30M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
7.30M
    if (bIsLuma)
2693
4.23M
    {
2694
4.23M
        if (log2TrSize == 2)
2695
3.22M
        {
2696
9.67M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
6.45M
            {
2698
6.45M
                int bits = 0;
2699
6.45M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
25.7M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
19.3M
                {
2703
19.3M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
19.3M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
19.3M
                }
2706
2707
6.45M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
6.45M
            }
2709
3.22M
        }
2710
1.01M
        else
2711
1.01M
        {
2712
1.01M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
3.04M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
2.03M
            {
2716
2.03M
                int bits = 0;
2717
2.03M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
2.03M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
8.60M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
6.57M
                {
2722
6.57M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
6.57M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
6.57M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
6.57M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
6.57M
                    bits += 2 * cost1;
2727
6.57M
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
2.03M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
2.03M
            }
2731
1.01M
        }
2732
4.23M
    }
2733
3.07M
    else
2734
3.07M
    {
2735
3.07M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
3.07M
        const int ctxShift = log2TrSize - 2;
2737
2738
9.21M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
6.14M
        {
2740
6.14M
            int bits = 0;
2741
6.14M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
26.4M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
20.2M
            {
2745
20.2M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
20.2M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
20.2M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
20.2M
            }
2749
2750
6.14M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
6.14M
        }
2752
3.07M
    }
2753
7.30M
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
7.30M
{
2758
7.30M
    if (bIsLuma)
2759
4.24M
    {
2760
4.24M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
4.24M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
72.0M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
67.8M
        {
2765
67.8M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
67.8M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
67.8M
        }
2768
2769
21.1M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
16.9M
        {
2771
16.9M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
16.9M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
16.9M
        }
2774
4.24M
    }
2775
3.06M
    else
2776
3.06M
    {
2777
3.06M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
3.06M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
27.6M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
24.5M
        {
2782
24.5M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
24.5M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
24.5M
        }
2785
2786
9.21M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
6.14M
        {
2788
6.14M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
6.14M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
6.14M
        }
2791
3.06M
    }
2792
7.30M
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
9.35k
{
2797
9.35k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
9.35k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
9.35k
    markValid();
2801
9.35k
}
2802
2803
void Entropy::start()
2804
605
{
2805
605
    m_low = 0;
2806
605
    m_range = 510;
2807
605
    m_bitsLeft = -12;
2808
605
    m_numBufferedBytes = 0;
2809
605
    m_bufferedByte = 0xff;
2810
605
}
2811
2812
void Entropy::finish()
2813
2.70k
{
2814
2.70k
    if (m_low >> (21 + m_bitsLeft))
2815
5
    {
2816
5
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
6
        while (m_numBufferedBytes > 1)
2818
1
        {
2819
1
            m_bitIf->writeByte(0x00);
2820
1
            m_numBufferedBytes--;
2821
1
        }
2822
2823
5
        m_low -= 1 << (21 + m_bitsLeft);
2824
5
    }
2825
2.69k
    else
2826
2.69k
    {
2827
2.69k
        if (m_numBufferedBytes > 0)
2828
2.69k
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
2.70k
        while (m_numBufferedBytes > 1)
2831
4
        {
2832
4
            m_bitIf->writeByte(0xff);
2833
4
            m_numBufferedBytes--;
2834
4
        }
2835
2.69k
    }
2836
2.70k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
2.70k
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
9.90M
{
2841
9.90M
    m_low = other.m_low;
2842
9.90M
    m_range = other.m_range;
2843
9.90M
    m_bitsLeft = other.m_bitsLeft;
2844
9.90M
    m_bufferedByte = other.m_bufferedByte;
2845
9.90M
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
9.90M
    m_fracBits = other.m_fracBits;
2847
9.90M
}
2848
2849
void Entropy::resetBits()
2850
8.62M
{
2851
8.62M
    m_low = 0;
2852
8.62M
    m_bitsLeft = -12;
2853
8.62M
    m_numBufferedBytes = 0;
2854
8.62M
    m_bufferedByte = 0xff;
2855
8.62M
    m_fracBits &= 32767;
2856
8.62M
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
8.62M
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
26.0M
{
2863
26.0M
    uint32_t mstate = ctxModel;
2864
2865
26.0M
    ctxModel = sbacNext(mstate, binValue);
2866
2867
26.0M
    if (!m_bitIf)
2868
25.7M
    {
2869
25.7M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
25.7M
        return;
2871
25.7M
    }
2872
2873
303k
    uint32_t range = m_range;
2874
303k
    uint32_t state = sbacGetState(mstate);
2875
303k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
303k
    range -= lps;
2877
2878
303k
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
303k
    int numBits = (uint32_t)(range - 256) >> 31;
2881
303k
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
303k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
303k
    if ((binValue ^ mstate) & 1)
2886
32.6k
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
32.6k
        unsigned long idx;
2890
32.6k
        BSR(idx, lps);
2891
32.6k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
32.6k
        numBits = 8 - idx;
2894
32.6k
        if (state >= 63)
2895
0
            numBits = 6;
2896
32.6k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
32.6k
        low += range;
2899
32.6k
        range = lps;
2900
32.6k
    }
2901
303k
    m_low = (low << numBits);
2902
303k
    m_range = (range << numBits);
2903
303k
    m_bitsLeft += numBits;
2904
2905
303k
    if (m_bitsLeft >= 0)
2906
14.5k
        writeOut();
2907
303k
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
896k
{
2912
896k
    if (!m_bitIf)
2913
896k
    {
2914
896k
        m_fracBits += 32768;
2915
896k
        return;
2916
896k
    }
2917
456
    m_low <<= 1;
2918
456
    if (binValue)
2919
456
        m_low += m_range;
2920
456
    m_bitsLeft++;
2921
2922
456
    if (m_bitsLeft >= 0)
2923
82
        writeOut();
2924
456
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
7.68M
{
2929
7.68M
    if (!m_bitIf)
2930
7.60M
    {
2931
7.60M
        m_fracBits += 32768 * numBins;
2932
7.60M
        return;
2933
7.60M
    }
2934
2935
76.0k
    while (numBins > 8)
2936
3.23k
    {
2937
3.23k
        numBins -= 8;
2938
3.23k
        uint32_t pattern = binValues >> numBins;
2939
3.23k
        m_low <<= 8;
2940
3.23k
        m_low += m_range * pattern;
2941
3.23k
        binValues -= pattern << numBins;
2942
3.23k
        m_bitsLeft += 8;
2943
2944
3.23k
        if (m_bitsLeft >= 0)
2945
3.23k
            writeOut();
2946
3.23k
    }
2947
2948
72.8k
    m_low <<= numBins;
2949
72.8k
    m_low += m_range * binValues;
2950
72.8k
    m_bitsLeft += numBins;
2951
2952
72.8k
    if (m_bitsLeft >= 0)
2953
20.4k
        writeOut();
2954
72.8k
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
26.9k
{
2959
26.9k
    if (!m_bitIf)
2960
12.1k
    {
2961
12.1k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
12.1k
        return;
2963
12.1k
    }
2964
2965
14.8k
    m_range -= 2;
2966
14.8k
    if (binValue)
2967
2.70k
    {
2968
2.70k
        m_low += m_range;
2969
2.70k
        m_low <<= 7;
2970
2.70k
        m_range = 2 << 7;
2971
2.70k
        m_bitsLeft += 7;
2972
2.70k
    }
2973
12.1k
    else if (m_range >= 256)
2974
11.5k
        return;
2975
635
    else
2976
635
    {
2977
635
        m_low <<= 1;
2978
635
        m_range <<= 1;
2979
635
        m_bitsLeft++;
2980
635
    }
2981
2982
3.33k
    if (m_bitsLeft >= 0)
2983
2.49k
        writeOut();
2984
3.33k
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
40.8k
{
2989
40.8k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
40.8k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
40.8k
    m_bitsLeft -= 8;
2993
40.8k
    m_low &= low_mask;
2994
2995
40.8k
    if (leadByte == 0xff)
2996
3.18k
        m_numBufferedBytes++;
2997
37.6k
    else
2998
37.6k
    {
2999
37.6k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
37.6k
        if (numBufferedBytes > 0)
3001
34.9k
        {
3002
34.9k
            uint32_t carry = leadByte >> 8;
3003
34.9k
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
34.9k
            m_bitIf->writeByte(byteTowrite);
3005
3006
34.9k
            byteTowrite = (0xff + carry) & 0xff;
3007
38.1k
            while (numBufferedBytes > 1)
3008
3.17k
            {
3009
3.17k
                m_bitIf->writeByte(byteTowrite);
3010
3.17k
                numBufferedBytes--;
3011
3.17k
            }
3012
34.9k
        }
3013
37.6k
        m_numBufferedBytes = 1;
3014
37.6k
        m_bufferedByte = (uint8_t)leadByte;
3015
37.6k
    }
3016
40.8k
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074