Coverage Report

Created: 2026-02-26 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/entropy.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
14.4k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.36k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
2.27M
{
226
2.27M
    markValid();
227
2.27M
    m_fracBits = 0;
228
2.27M
    m_pad = 0;
229
2.27M
    m_meanQP = 0;
230
2.27M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
2.27M
}
232
233
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
234
void Entropy::codeVPS(const VPS& vps, const SPS& sps)
235
#else
236
void Entropy::codeVPS(const VPS& vps)
237
#endif
238
714
{
239
714
    int maxLayers = (vps.m_numLayers > 1 || vps.m_numViews > 1) + 1;
240
714
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
241
714
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
242
714
    WRITE_CODE(maxLayers - 1, 6, "vps_reserved_zero_6bits");
243
714
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
244
714
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
245
714
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
246
247
714
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
248
249
714
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
250
251
1.42k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
252
714
    {
253
714
        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
254
714
        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
255
714
        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
256
714
    }
257
258
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
259
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
260
    {
261
        WRITE_CODE(maxLayers - 1, 6, "vps_max_nuh_reserved_zero_layer_id");
262
        WRITE_UVLC(vps.m_vpsNumLayerSetsMinus1, "vps_num_layer_sets_minus1");
263
        for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
264
        {
265
#if ENABLE_MULTIVIEW
266
            if (vps.m_numViews > 1)
267
            {
268
                for (int j = 0; j < vps.m_numViews; j++)
269
                {
270
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
271
                }
272
            }
273
#endif
274
#if ENABLE_ALPHA
275
            if (vps.m_numLayers > 1)
276
            {
277
                for (int j = 0; j < vps.m_numLayers; j++)
278
                {
279
                    WRITE_FLAG(1, "layer_id_included_flag[opsIdx][i]");
280
                }
281
            }
282
#endif
283
        }
284
    }
285
    else
286
    {
287
        WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
288
        WRITE_UVLC(0, "vps_max_op_sets_minus1");
289
    }
290
#else
291
714
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
292
714
    WRITE_UVLC(0, "vps_max_op_sets_minus1");
293
714
#endif
294
295
714
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
296
297
#if ENABLE_ALPHA || ENABLE_MULTIVIEW
298
    if (vps.m_numLayers > 1 || vps.m_numViews > 1)
299
    {
300
        WRITE_FLAG(vps.vps_extension_flag, "vps_extension_flag");
301
302
        if (vps.vps_extension_flag)
303
        {
304
            while (m_bitIf->getNumberOfWrittenBits() % X265_BYTE != 0)
305
            {
306
                WRITE_FLAG(1, "vps_extension_alignment_bit_equal_to_one");
307
            }
308
309
            WRITE_CODE(vps.ptl.levelIdc, 8, "general_level_idc");
310
            if (vps.maxTempSubLayers > 1)
311
            {
312
                for (uint32_t i = 0; i < vps.maxTempSubLayers - 1; i++)
313
                {
314
                    WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
315
                    WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
316
                }
317
                for (int i = vps.maxTempSubLayers - 1; i < 8; i++)
318
                    WRITE_CODE(0, 2, "reserved_zero_2bits");
319
            }
320
321
            WRITE_FLAG(vps.splitting_flag, "splitting flag");
322
            for (int i = 0; i < MAX_VPS_NUM_SCALABILITY_TYPES; i++)
323
            {
324
                WRITE_FLAG(vps.m_scalabilityMask[i], "scalability_mask[i]");
325
            }
326
            for (int i = 0; i < vps.scalabilityTypes - vps.splitting_flag; i++)
327
            {
328
                WRITE_CODE(vps.m_dimensionIdLen[i] - 1, 3, "dimension_id_len_minus1[i]");
329
            }
330
            WRITE_FLAG(vps.m_nuhLayerIdPresentFlag, "vps_nuh_layer_id_present_flag");
331
            for (int i = 1; i < maxLayers; i++)
332
            {
333
                if (vps.m_nuhLayerIdPresentFlag)
334
                    WRITE_CODE(vps.m_layerIdInNuh[i], 6, "layer_id_in_nuh[i]");
335
336
                if (!vps.splitting_flag)
337
                {
338
                    for (int j = 0; j < vps.scalabilityTypes; j++)
339
                    {
340
                        uint8_t bits = vps.m_dimensionIdLen[j];
341
                        WRITE_CODE(vps.m_dimensionId[i][j], bits, "dimension_id[i][j]");
342
                    }
343
                }
344
            }
345
            WRITE_CODE(vps.m_viewIdLen, 4, "view_id_len");
346
347
#if ENABLE_ALPHA
348
            if (vps.m_numLayers > 1)
349
            {
350
                WRITE_FLAG(0, "direct_dependency_flag[1][0]");
351
                WRITE_UVLC(0, "num_add_layer_sets");
352
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
353
                WRITE_FLAG(0, "max_tid_ref_present_flag");
354
                WRITE_FLAG(0, "default_ref_layers_active_flag");
355
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
356
                WRITE_FLAG(1, "vps_profile_present_flag");
357
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
358
359
                WRITE_UVLC(0, "num_add_olss");
360
                WRITE_CODE(0, 2, "default_output_layer_idc");
361
                WRITE_CODE(1, 2, "profile_tier_level_idx[ i ][ j ]");
362
                WRITE_CODE(2, 2, "profile_tier_level_idx[ i ][ j ]");
363
364
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
365
366
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
367
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
368
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
369
370
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
371
372
                if (sps.chromaFormatIdc == X265_CSP_I444)
373
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
374
375
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
376
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
377
378
                const Window& conf = sps.conformanceWindow;
379
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
380
                if (conf.bEnabled)
381
                {
382
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
383
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
384
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
385
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
386
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
387
                }
388
389
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
390
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
391
                WRITE_FLAG(1, "poc_lsb_not_present_flag[");
392
393
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
394
                {
395
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
396
                    for (uint32_t j = 0; j < vps.maxTempSubLayers ; j++)
397
                    {
398
                        if(j > 0)
399
                        WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
400
401
                        for(int k = 0; k < vps.m_numLayersInIdList[i]; k++)
402
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
403
404
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
405
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
406
                    }
407
                }
408
409
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
410
411
                WRITE_FLAG(0, "default_direct_dependency_flag");
412
                WRITE_UVLC(0, "vps_non_vui_extension_length");
413
                WRITE_FLAG(0, "vps_vui_present_flag");
414
                WRITE_FLAG(0, "vps_extension2_flag");
415
        }
416
#endif
417
418
#if ENABLE_MULTIVIEW
419
            if (vps.m_numViews > 1)
420
            {
421
                for (uint8_t i = 0; i < vps.m_numViews; i++)
422
                    WRITE_CODE(i, vps.m_viewIdLen, "view_id_val[i]");
423
424
                for (int i = 1; i < vps.m_numViews; i++)
425
                {
426
                    for (int j = 0; j < i; j++)
427
                    {
428
                        if (j == 0)
429
                            WRITE_FLAG(1, "direct_dependency_flag[1][0]");
430
                        else
431
                            WRITE_FLAG(0, "direct_dependency_flag[1][0]");
432
                    }
433
                }
434
                WRITE_FLAG(0, "vps_sub_layers_max_minus1_present_flag");
435
                WRITE_FLAG(0, "max_tid_ref_present_flag");
436
                WRITE_FLAG(1, "default_ref_layers_active_flag");
437
                WRITE_UVLC(2, "vps_num_profile_tier_level_minus1");
438
                WRITE_FLAG(1, "vps_profile_present_flag[i]");
439
                codeProfileTier(vps.ptl, vps.maxTempSubLayers, 1);
440
                WRITE_UVLC(0, "num_add_olss");
441
                WRITE_CODE(0, 2, "default_output_layer_idc");
442
443
                for (int i = 1; i <= vps.m_vpsNumLayerSetsMinus1; i++)
444
                {
445
                    for (int j = 0; j < vps.m_numViews; j++)
446
                    {
447
                        WRITE_CODE((j == 0) ? 1 : 2, 2, "profile_tier_level_idx[ i ][ j ]");
448
                    }
449
                }
450
                WRITE_UVLC(0, "vps_num_rep_formats_minus1");
451
452
                WRITE_CODE(sps.picWidthInLumaSamples, 16, "pic_width_vps_in_luma_samples");
453
                WRITE_CODE(sps.picHeightInLumaSamples, 16, "pic_height_vps_in_luma_samples");
454
                WRITE_FLAG(1, "chroma_and_bit_depth_vps_present_flag");
455
456
                WRITE_CODE(sps.chromaFormatIdc, 2, "chroma_format_vps_idc");
457
458
                if (sps.chromaFormatIdc == X265_CSP_I444)
459
                    WRITE_FLAG(0, "separate_colour_plane_vps_flag");
460
461
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_luma_minus8");
462
                WRITE_CODE(X265_DEPTH - 8, 4, "bit_depth_vps_chroma_minus8");
463
464
                const Window& conf = sps.conformanceWindow;
465
                WRITE_FLAG(conf.bEnabled, "conformance_window_vps_flag");
466
                if (conf.bEnabled)
467
                {
468
                    int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
469
                    WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_vps_left_offset");
470
                    WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_vps_right_offset");
471
                    WRITE_UVLC(conf.topOffset >> vShift, "conf_win_vps_top_offset");
472
                    WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_vps_bottom_offset");
473
                }
474
475
                WRITE_FLAG(1, "max_one_active_ref_layer_flag");
476
                WRITE_FLAG(0, "vps_poc_lsb_aligned_flag");
477
478
                for (int i = 1; i < vps.m_vpsNumLayerSetsMinus1 + 1; i++)
479
                {
480
                    WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_flag_info_present_flag");
481
                    for (uint32_t j = 0; j < vps.maxTempSubLayers; j++)
482
                    {
483
                        if (j > 0)
484
                            WRITE_FLAG(vps.maxTempSubLayers > 1, "sub_layer_dpb_info_present_flag");
485
486
                        for (int k = 0; k < vps.m_numLayersInIdList[i]; k++)
487
                            WRITE_UVLC(vps.maxDecPicBuffering[j] - 1, "vps_max_dec_pic_buffering_minus1[i]");
488
489
                        WRITE_UVLC(vps.numReorderPics[0], "vps_num_reorder_pics[i]");
490
                        WRITE_UVLC(vps.maxLatencyIncrease[0] + 1, "vps_max_latency_increase_plus1[i]");
491
                    }
492
                }
493
494
                WRITE_UVLC(0, "direct_dep_type_len_minus2");
495
496
                WRITE_FLAG(1, "default_direct_dependency_flag");
497
                WRITE_CODE(2, 2, "default_direct_dependency_type");
498
                WRITE_UVLC(0, "vps_non_vui_extension_length");
499
                WRITE_FLAG(0, "vps_vui_present_flag");
500
                WRITE_FLAG(0, "vps_extension2_flag");
501
            }
502
#endif
503
        }
504
    }
505
    else
506
        WRITE_FLAG(0, "vps_extension_flag");
507
#else
508
714
    WRITE_FLAG(0, "vps_extension_flag");
509
714
#endif
510
714
}
511
512
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl, int layer)
513
714
{
514
714
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
515
#if ENABLE_MULTIVIEW
516
    if(layer != 0)
517
        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
518
    else
519
        WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
520
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
521
#else
522
714
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
523
714
#endif
524
714
    {
525
714
        WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
526
714
        codeProfileTier(ptl, sps.maxTempSubLayers);
527
714
    }
528
529
714
    WRITE_UVLC(layer, "sps_seq_parameter_set_id");
530
#if ENABLE_MULTIVIEW
531
    if (layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
532
        WRITE_FLAG(0, "update_rep_format_flag");
533
    else
534
#endif
535
714
    {
536
714
        WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
537
538
714
        if (sps.chromaFormatIdc == X265_CSP_I444)
539
0
            WRITE_FLAG(0,                       "separate_colour_plane_flag");
540
541
714
        WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
542
714
        WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
543
544
714
        const Window& conf = sps.conformanceWindow;
545
714
        WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
546
714
        if (conf.bEnabled)
547
537
        {
548
537
            int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
549
537
            WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
550
537
            WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
551
537
            WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
552
537
            WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
553
537
        }
554
555
714
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
556
714
        WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
557
714
    }
558
559
714
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
560
#if ENABLE_MULTIVIEW
561
    if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
562
#endif
563
714
    {
564
714
        WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
565
566
1.42k
        for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
567
714
        {
568
714
            WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
569
714
            WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
570
714
            WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
571
714
        }
572
714
    }
573
574
714
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
575
714
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
576
714
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
577
714
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
578
714
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
579
714
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
580
714
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
581
714
    if (scalingList.m_bEnabled)
582
0
    {
583
#if ENABLE_MULTIVIEW
584
        if ((layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
585
            WRITE_FLAG(sps.spsInferScalingListFlag, "sps_infer_scaling_list_flag");
586
        if(sps.spsInferScalingListFlag)
587
            WRITE_CODE(0, 6, "sps_scaling_list_ref_layer_id");
588
        else
589
#endif
590
0
        {
591
0
            WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
592
0
            if (scalingList.m_bDataPresent)
593
0
                codeScalingList(scalingList);
594
0
        }
595
0
    }
596
714
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
597
714
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
598
599
714
    WRITE_FLAG(0, "pcm_enabled_flag");
600
714
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
601
714
    for (int i = 0; i < sps.spsrpsNum; i++)
602
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
603
714
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
604
605
714
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
606
714
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
607
608
714
    WRITE_FLAG(1, "vui_parameters_present_flag");
609
714
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo, layer);
610
611
714
    WRITE_FLAG(sps.sps_extension_flag, "sps_extension_flag");
612
613
#if ENABLE_MULTIVIEW
614
    if (sps.sps_extension_flag && sps.maxViews > 1)
615
    {
616
        WRITE_FLAG(0, "sps_range_extensions_flag");
617
        WRITE_FLAG(sps.maxViews > 1, "sps_multilayer_extension_flag");
618
        WRITE_FLAG(0, "sps_3d_extension_flag");
619
        WRITE_CODE(0, 5, "sps_extension_5bits");
620
621
        if (layer == 0)
622
            WRITE_FLAG(0, "inter_view_mv_vert_constraint_flag");
623
        else
624
            WRITE_FLAG(1, "inter_view_mv_vert_constraint_flag");
625
    }
626
#endif
627
628
#if ENABLE_SCC_EXT
629
    if (ptl.profileIdc[0] == Profile::MAINSCC)
630
    {
631
        bool sps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
632
        sps_extension_flags[SCC_EXT_IDX] = true;
633
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
634
            WRITE_FLAG(sps_extension_flags[i], "sps_extension_flag");
635
        WRITE_FLAG(1, "intra_block_copy_enabled_flag");
636
        WRITE_FLAG(0, "palette_mode_enabled_flag");
637
        WRITE_CODE(0, 2, "motion_vector_resolution_control_idc");
638
        WRITE_FLAG(0, "intra_boundary_filter_disabled_flag");
639
    }
640
#endif
641
714
}
642
643
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26, int layer)
644
714
{
645
714
    WRITE_UVLC(layer,                          "pps_pic_parameter_set_id");
646
714
    WRITE_UVLC(layer,                          "pps_seq_parameter_set_id");
647
714
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
648
714
    WRITE_FLAG(0,                          "output_flag_present_flag");
649
714
    WRITE_CODE(pps.maxViews > 1 ? 2 : 0, 3,"num_extra_slice_header_bits");
650
714
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
651
714
    WRITE_FLAG(0,                          "cabac_init_present_flag");
652
714
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
653
714
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
654
655
714
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
656
714
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
657
714
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
658
659
714
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
660
714
    if (pps.bUseDQP)
661
531
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
662
663
714
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
664
714
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
665
714
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
666
667
714
    WRITE_FLAG(layer ? 0 : pps.bUseWeightPred,            "weighted_pred_flag");
668
714
    WRITE_FLAG(layer ? 0 : pps.bUseWeightedBiPred,        "weighted_bipred_flag");
669
714
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
670
714
    WRITE_FLAG(0,                             "tiles_enabled_flag");
671
714
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
672
714
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
673
674
714
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
675
714
    if (pps.bDeblockingFilterControlPresent)
676
0
    {
677
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
678
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
679
0
        if (!pps.bPicDisableDeblockingFilter)
680
0
        {
681
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
682
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
683
0
        }
684
0
    }
685
686
714
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
687
714
    WRITE_FLAG(0, "lists_modification_present_flag");
688
714
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
689
714
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
690
714
    WRITE_FLAG(pps.pps_extension_flag, "pps_extension_flag");
691
692
#if ENABLE_MULTIVIEW
693
    if (pps.pps_extension_flag && pps.maxViews > 1)
694
    {
695
        WRITE_FLAG(0, "pps_range_extensions_flag");
696
        WRITE_FLAG(pps.maxViews > 1, "pps_multilayer_extension_flag");
697
        WRITE_FLAG(0, "pps_3d_extension_flag");
698
        WRITE_CODE(0, 5, "pps_extension_5bits");
699
700
        if (pps.maxViews > 1)
701
        {
702
            WRITE_FLAG(0, "poc_reset_info_present_flag");
703
            WRITE_FLAG(0, "pps_infer_scaling_list_flag");
704
            WRITE_UVLC(0, "num_ref_loc_offsets");
705
            WRITE_FLAG(0, "colour_mapping_enabled_flag");
706
        }
707
    }
708
#endif
709
710
711
#if ENABLE_SCC_EXT
712
    if (pps.profileIdc == Profile::MAINSCC)
713
    {
714
        bool pps_extension_flags[NUM_EXTENSION_FLAGS] = { false };
715
        pps_extension_flags[SCC_EXT_IDX] = true;
716
        for (int i = 0; i < NUM_EXTENSION_FLAGS; i++)
717
            WRITE_FLAG(pps_extension_flags[i], "pps_extension_flag");
718
        WRITE_FLAG(1, "curr_pic_as_ref_enabled_pps_flag");
719
        WRITE_FLAG(0, "adaptive_colour_trans_flag");
720
        WRITE_FLAG(0, "palette_predictor_initializer_flag");
721
    }
722
#endif
723
714
}
724
725
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers, int layer)
726
1.42k
{
727
1.42k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
728
1.42k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
729
1.42k
    WRITE_CODE(ptl.profileIdc[layer], 5,   "XXX_profile_idc[]");
730
47.1k
    for (int j = 0; j < 32; j++)
731
45.6k
    {
732
45.6k
        if (layer)
733
0
            WRITE_FLAG(j == ptl.profileIdc[layer] ? 1 : 0, "XXX_profile_compatibility_flag[][j]");
734
45.6k
        else
735
45.6k
            WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
736
45.6k
    }
737
738
1.42k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
739
1.42k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
740
1.42k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
741
1.42k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
742
743
1.42k
    if (ptl.profileIdc[layer] == Profile::MAINREXT || ptl.profileIdc[layer] == Profile::HIGHTHROUGHPUTREXT || ptl.profileIdc[layer] == Profile::SCALABLEMAIN || ptl.profileIdc[layer] == Profile::SCALABLEMAIN10 || ptl.profileIdc[layer] == Profile::MULTIVIEWMAIN || ptl.profileIdc[layer] == Profile::MAINSCC)
744
0
    {
745
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
746
0
        int csp = ptl.chromaFormatConstraint;
747
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
748
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
749
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
750
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
751
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
752
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
753
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
754
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
755
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
756
0
        if (ptl.profileIdc[layer] == Profile::MAINSCC)
757
0
        {
758
0
            WRITE_FLAG(bitDepthConstraint <= 14, "max_14bit_constraint_flag");
759
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[0..15]");
760
0
            WRITE_CODE(0, 16, "reserved_zero_33bits[16..31]");
761
0
            WRITE_FLAG(0, "reserved_zero_33bits[32]");
762
0
        }
763
0
        else
764
0
        {
765
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[0..15]");
766
0
            WRITE_CODE(0, 16, "XXX_reserved_zero_35bits[16..31]");
767
0
            WRITE_CODE(0, 3, "XXX_reserved_zero_35bits[32..34]");
768
0
        }
769
0
    }
770
1.42k
    else
771
1.42k
    {
772
1.42k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
773
1.42k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
774
1.42k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
775
1.42k
    }
776
1.42k
    if (ptl.profileIdc[layer] == Profile::MAINSCC)
777
0
        WRITE_FLAG(false, "inbld_flag");
778
779
1.42k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
780
781
1.42k
    if (maxTempSubLayers > 1)
782
0
    {
783
0
        for(int i = 0; i < maxTempSubLayers - 1; i++)
784
0
        {
785
0
            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
786
0
            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
787
0
        }
788
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
789
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
790
0
    }
791
1.42k
}
792
793
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo, int layer)
794
714
{
795
714
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
796
714
    if (vui.aspectRatioInfoPresentFlag)
797
0
    {
798
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
799
0
        if (vui.aspectRatioIdc == 255)
800
0
        {
801
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
802
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
803
0
        }
804
0
    }
805
806
714
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
807
714
    if (vui.overscanInfoPresentFlag)
808
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
809
810
714
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
811
714
    if (vui.videoSignalTypePresentFlag)
812
714
    {
813
714
        WRITE_CODE(vui.videoFormat, 3, "video_format");
814
714
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
815
714
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
816
714
        if (vui.colourDescriptionPresentFlag)
817
0
        {
818
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
819
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
820
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
821
0
        }
822
714
    }
823
824
714
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
825
714
    if (vui.chromaLocInfoPresentFlag)
826
0
    {
827
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
828
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
829
0
    }
830
831
714
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
832
714
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
833
714
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
834
835
714
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
836
714
    if (vui.defaultDisplayWindow.bEnabled)
837
0
    {
838
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
839
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
840
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
841
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
842
0
    }
843
844
714
    if(layer)
845
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
846
714
    else
847
714
    {
848
714
        if (!bEmitVUITimingInfo)
849
0
            WRITE_FLAG(0, "vui_timing_info_present_flag");
850
714
        else
851
714
        {
852
714
            WRITE_FLAG(1, "vui_timing_info_present_flag");
853
714
            WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
854
714
            WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
855
714
            WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
856
714
        }
857
858
714
        if (!bEmitVUIHRDInfo)
859
0
            WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
860
714
        else
861
714
        {
862
714
            WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
863
714
            if (vui.hrdParametersPresentFlag)
864
0
                codeHrdParameters(vui.hrdParameters, maxSubTLayers);
865
714
        }
866
714
    }
867
868
714
    WRITE_FLAG(0, "bitstream_restriction_flag");
869
714
}
870
871
void Entropy::codeScalingList(const ScalingList& scalingList)
872
0
{
873
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
874
0
    {
875
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
876
0
        {
877
0
            int predList = scalingList.checkPredMode(sizeId, listId);
878
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
879
0
            if (predList >= 0)
880
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
881
0
            else // DPCM Mode
882
0
                codeScalingList(scalingList, sizeId, listId);
883
0
        }
884
0
    }
885
0
}
886
887
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
888
0
{
889
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
890
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
891
0
    int nextCoef = START_VALUE;
892
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
893
0
    int data;
894
895
0
    if (sizeId > BLOCK_8x8)
896
0
    {
897
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
898
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
899
0
    }
900
0
    for (int i = 0; i < coefNum; i++)
901
0
    {
902
0
        data = src[scan[i]] - nextCoef;
903
0
        if (data < -128)
904
0
            data += 256;
905
0
        if (data > 127)
906
0
            data -= 256;
907
0
        nextCoef = (nextCoef + data + 256) % 256;
908
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
909
0
    }
910
0
}
911
912
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
913
0
{
914
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
915
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
916
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
917
918
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
919
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
920
921
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
922
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
923
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
924
925
0
    for (int i = 0; i < maxSubTLayers; i++)
926
0
    {
927
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
928
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
929
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
930
931
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
932
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
933
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
934
0
    }
935
0
}
936
937
void Entropy::codeAUD(const Slice& slice)
938
0
{
939
0
    int picType;
940
941
0
    switch (slice.m_sliceType)
942
0
    {
943
0
    case I_SLICE:
944
0
        picType = 0;
945
0
        break;
946
0
    case P_SLICE:
947
0
        picType = 1;
948
0
        break;
949
0
    case B_SLICE:
950
0
        picType = 2;
951
0
        break;
952
0
    default:
953
0
        picType = 7;
954
0
        break;
955
0
    }
956
957
0
    WRITE_CODE(picType, 3, "pic_type");
958
0
}
959
960
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp, int layer)
961
714
{
962
714
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
963
714
    if (slice.getRapPicFlag())
964
714
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
965
966
714
    WRITE_UVLC(layer, "slice_pic_parameter_set_id");
967
968
    /* x265 does not use dependent slices, so always write all this data */
969
714
    if (slice_addr)
970
0
    {
971
        // if( dependent_slice_segments_enabled_flag )
972
        //     dependent_slice_segment_flag             u(1)
973
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
974
0
    }
975
976
#if ENABLE_MULTIVIEW
977
    if (encData.m_param->numViews > 1)
978
    {
979
        int esb = 0;
980
        if (2 > esb)
981
        {
982
            esb++;
983
            WRITE_FLAG(0, "discardable_flag");
984
        }
985
        if (2 > esb)
986
        {
987
            esb++;
988
            WRITE_FLAG(0, "cross_layer_bla_flag");
989
        }
990
    }
991
#endif
992
993
714
    WRITE_UVLC(slice.m_sliceType, "slice_type");
994
995
714
    if ((slice.m_param->numViews > 1 && layer > 0) || !slice.getIdrPicFlag())
996
0
    {
997
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
998
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
999
0
    }
1000
714
    if (!slice.getIdrPicFlag())
1001
0
    {
1002
#if _DEBUG || CHECKED_BUILD
1003
        // check for bitstream restriction stating that:
1004
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
1005
        // Ideally this process should not be repeated for each slice in a picture
1006
        if (slice.isIRAP())
1007
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
1008
            {
1009
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
1010
            }
1011
#endif
1012
1013
0
        if (slice.m_rpsIdx < 0)
1014
0
        {
1015
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
1016
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
1017
0
        }
1018
0
        else
1019
0
        {
1020
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
1021
0
            int numBits = 0;
1022
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
1023
0
                numBits++;
1024
1025
0
            if (numBits > 0)
1026
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
1027
0
        }
1028
1029
0
        if (slice.m_sps->bTemporalMVPEnabled)
1030
#if ENABLE_SCC_EXT
1031
            WRITE_FLAG(slice.m_bTemporalMvp, "slice_temporal_mvp_enable_flag");
1032
#else
1033
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
1034
0
#endif
1035
0
    }
1036
714
    const SAOParam *saoParam = encData.m_saoParam;
1037
714
    if (slice.m_bUseSao)
1038
714
    {
1039
714
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
1040
714
        if (encData.m_param->internalCsp != X265_CSP_I400)
1041
714
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
1042
714
    }
1043
0
    else if(encData.m_param->selectiveSAO)
1044
0
    {
1045
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
1046
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
1047
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
1048
0
    }
1049
1050
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
1051
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
1052
1053
714
    if (!slice.isIntra())
1054
0
    {
1055
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
1056
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
1057
0
        if (overrideFlag)
1058
0
        {
1059
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
1060
0
            if (slice.isInterB())
1061
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
1062
0
            else
1063
0
            {
1064
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
1065
0
            }
1066
0
        }
1067
0
    }
1068
714
    else
1069
714
    {
1070
714
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
1071
714
    }
1072
1073
714
    if (slice.isInterB())
1074
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
1075
1076
#if ENABLE_SCC_EXT
1077
    if (slice.m_bTemporalMvp)
1078
#else
1079
714
    if (slice.m_sps->bTemporalMVPEnabled)
1080
714
#endif
1081
714
    {
1082
714
        if (slice.m_sliceType == B_SLICE)
1083
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
1084
1085
714
        if (slice.m_sliceType != I_SLICE &&
1086
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
1087
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
1088
0
        {
1089
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
1090
0
        }
1091
714
    }
1092
714
    if (((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE)) && !layer)
1093
0
        codePredWeightTable(slice);
1094
1095
714
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
1096
714
    if (!slice.isIntra())
1097
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
1098
1099
714
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
1100
714
    WRITE_SVLC(code, "slice_qp_delta");
1101
1102
714
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
1103
0
    {
1104
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
1105
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
1106
0
    }
1107
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
1108
    //       We didn't support filter across slice board, so disable it now
1109
1110
714
    if (encData.m_param->maxSlices <= 1)
1111
714
    {
1112
714
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
1113
714
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
1114
1115
714
        if (isSAOEnabled || isDBFEnabled)
1116
714
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
1117
714
    }
1118
714
}
1119
1120
/** write wavefront substreams sizes for the slice header */
1121
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
1122
535
{
1123
535
    uint32_t offsetLen = 1;
1124
3.02k
    while (maxOffset >= (1U << offsetLen))
1125
2.48k
    {
1126
2.48k
        offsetLen++;
1127
2.48k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
1128
2.48k
    }
1129
1130
535
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
1131
535
    if (numSubStreams > 0)
1132
535
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
1133
1134
2.81k
    for (uint32_t i = 0; i < numSubStreams; i++)
1135
2.27k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
1136
535
}
1137
1138
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
1139
0
{
1140
0
    if (idx > 0)
1141
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
1142
1143
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
1144
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
1145
0
    int prev = 0;
1146
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
1147
0
    {
1148
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
1149
0
        prev = rps.deltaPOC[j];
1150
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
1151
0
    }
1152
1153
0
    prev = 0;
1154
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
1155
0
    {
1156
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
1157
0
        prev = rps.deltaPOC[j];
1158
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
1159
0
    }
1160
0
}
1161
1162
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
1163
28.7k
{
1164
28.7k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
1165
28.7k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
1166
28.7k
}
1167
1168
/* encode a CU block recursively */
1169
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
1170
112k
{
1171
112k
    const Slice* slice = ctu.m_slice;
1172
1173
112k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
1174
112k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
1175
1176
112k
    if (!cuUnsplitFlag)
1177
24.2k
    {
1178
24.2k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1179
24.2k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1180
6.55k
            bEncodeDQP = true;
1181
121k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1182
96.9k
        {
1183
96.9k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1184
96.9k
            if (childGeom.flags & CUGeom::PRESENT)
1185
55.1k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1186
96.9k
        }
1187
24.2k
        return;
1188
24.2k
    }
1189
1190
88.3k
    if (cuSplitFlag) 
1191
65.1k
        codeSplitFlag(ctu, absPartIdx, depth);
1192
1193
88.3k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
1194
7.17k
    {
1195
7.17k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
1196
7.17k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1197
340
            bEncodeDQP = true;
1198
35.8k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
1199
28.6k
        {
1200
28.6k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
1201
28.6k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
1202
28.6k
        }
1203
7.17k
        return;
1204
7.17k
    }
1205
1206
81.1k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
1207
33.8k
        bEncodeDQP = true;
1208
1209
81.1k
    if (slice->m_pps->bTransquantBypassEnabled)
1210
23.5k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
1211
1212
81.1k
    if (!slice->isIntra())
1213
0
    {
1214
0
        codeSkipFlag(ctu, absPartIdx);
1215
0
        if (ctu.isSkipped(absPartIdx))
1216
0
        {
1217
0
            codeMergeIndex(ctu, absPartIdx);
1218
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1219
0
            return;
1220
0
        }
1221
0
        codePredMode(ctu.m_predMode[absPartIdx]);
1222
0
    }
1223
1224
81.1k
    codePartSize(ctu, absPartIdx, depth);
1225
1226
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1227
81.1k
    codePredInfo(ctu, absPartIdx);
1228
1229
81.1k
    uint32_t tuDepthRange[2];
1230
81.1k
    if (ctu.isIntra(absPartIdx))
1231
81.1k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
1232
18.4E
    else
1233
18.4E
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
1234
1235
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
1236
81.1k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
1237
1238
    // --- write terminating bit ---
1239
81.1k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
1240
81.1k
}
1241
1242
/* Return bit count of signaling inter mode */
1243
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
1244
0
{
1245
0
    uint32_t bits;
1246
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
1247
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
1248
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1249
0
    switch (partSize)
1250
0
    {
1251
0
    case SIZE_2Nx2N:
1252
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1253
0
        break;
1254
1255
0
    case SIZE_2NxN:
1256
0
    case SIZE_2NxnU:
1257
0
    case SIZE_2NxnD:
1258
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1259
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1260
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1261
0
        {
1262
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1263
0
            if (partSize != SIZE_2NxN)
1264
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1265
0
        }
1266
0
        break;
1267
1268
0
    case SIZE_Nx2N:
1269
0
    case SIZE_nLx2N:
1270
0
    case SIZE_nRx2N:
1271
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1272
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1273
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1274
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1275
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1276
0
        {
1277
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1278
0
            if (partSize != SIZE_Nx2N)
1279
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1280
0
        }
1281
0
        break;
1282
0
    default:
1283
0
        X265_CHECK(0, "invalid CU partition\n");
1284
0
        break;
1285
0
    }
1286
1287
0
    return bits;
1288
0
}
1289
1290
/* finish encoding a cu and handle end-of-slice conditions */
1291
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
1292
81.1k
{
1293
81.1k
    const Slice* slice = ctu.m_slice;
1294
81.1k
    uint32_t realEndAddress = slice->m_endCUAddr;
1295
81.1k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
1296
81.1k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
1297
1298
81.1k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
1299
81.1k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
1300
81.1k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
1301
81.1k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
1302
81.1k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
1303
48.4k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
1304
1305
81.1k
    if (slice->m_pps->bUseDQP)
1306
57.6k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
1307
1308
81.1k
    if (granularityBoundary)
1309
28.7k
    {
1310
        // Encode slice finish
1311
28.7k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
1312
28.7k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
1313
1.42k
            bTerminateSlice = 1;
1314
1315
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
1316
28.7k
        if (!bTerminateSlice)
1317
27.3k
            encodeBinTrm(0);    // end_of_slice_segment_flag
1318
1319
28.7k
        if (!m_bitIf)
1320
14.3k
            resetBits(); // TODO: most likely unnecessary
1321
28.7k
    }
1322
81.1k
}
1323
1324
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1325
                              bool& bCodeDQP, const uint32_t depthRange[2])
1326
1.98M
{
1327
1.98M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1328
1329
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1330
     * so we have checks to make sure the implied value matches our intentions */
1331
1.98M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1332
302k
    {
1333
302k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1334
302k
    }
1335
1.68M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1336
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1337
0
    {
1338
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1339
0
    }
1340
1.68M
    else if (log2CurSize > depthRange[1])
1341
0
    {
1342
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1343
0
    }
1344
1.68M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1345
1.21M
    {
1346
1.21M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1347
1.21M
    }
1348
470k
    else
1349
470k
    {
1350
470k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1351
470k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1352
470k
    }
1353
1354
1.98M
    uint32_t hChromaShift = cu.m_hChromaShift;
1355
1.98M
    uint32_t vChromaShift = cu.m_vChromaShift;
1356
1.98M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
1357
1.98M
    if (!curDepth || !bSmallChroma)
1358
774k
    {
1359
774k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
1360
774k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
1361
774k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
1362
774k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
1363
774k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
1364
774k
    }
1365
1366
1.98M
    if (subdiv)
1367
302k
    {
1368
302k
        --log2CurSize;
1369
302k
        ++curDepth;
1370
1371
302k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1372
1373
302k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1374
302k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1375
302k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1376
302k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1377
302k
        return;
1378
302k
    }
1379
1380
1.68M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
1381
1382
1.68M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
1383
0
    {
1384
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
1385
0
    }
1386
1.68M
    else
1387
1.68M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1388
1389
1.68M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1390
1.68M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
1391
1.68M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
1392
1.68M
    if (!(cbfY || cbfU || cbfV))
1393
1.67M
        return;
1394
1395
    // dQP: only for CTU once
1396
8.49k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1397
3.73k
    {
1398
3.73k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1399
3.73k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1400
3.73k
        codeDeltaQP(cu, absPartIdxLT);
1401
3.73k
        bCodeDQP = false;
1402
3.73k
    }
1403
1404
8.49k
    if (cbfY)
1405
4.95k
    {
1406
4.95k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1407
4.95k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1408
4.95k
        if (!(cbfU || cbfV))
1409
565
            return;
1410
4.95k
    }
1411
1412
7.93k
    if (bSmallChroma)
1413
5.07k
    {
1414
5.07k
        if ((absPartIdx & 3) != 3)
1415
3.80k
            return;
1416
1417
1.26k
        const uint32_t log2CurSizeC = 2;
1418
1.26k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1419
1.26k
        const uint32_t curPartNum = 4;
1420
1.26k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1421
3.80k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1422
2.53k
        {
1423
2.53k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1424
2.53k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1425
2.53k
            do
1426
2.53k
            {
1427
2.53k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1428
2.53k
                {
1429
2.53k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1430
2.53k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1431
2.53k
                }
1432
2.53k
            }
1433
2.53k
            while (tuIterator.isNextSection());
1434
2.53k
        }
1435
1.26k
    }
1436
2.85k
    else
1437
2.85k
    {
1438
2.85k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1439
2.85k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1440
2.85k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1441
2.85k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1442
8.81k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1443
5.96k
        {
1444
5.96k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1445
5.96k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1446
5.96k
            do
1447
5.96k
            {
1448
5.96k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1449
5.96k
                {
1450
5.96k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1451
5.96k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1452
5.96k
                }
1453
5.96k
            }
1454
5.96k
            while (tuIterator.isNextSection());
1455
5.96k
        }
1456
2.85k
    }
1457
7.93k
}
1458
1459
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1460
                              bool& bCodeDQP, const uint32_t depthRange[2])
1461
0
{
1462
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1463
1464
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1465
     * so we have checks to make sure the implied value matches our intentions */
1466
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1467
0
    {
1468
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1469
0
    }
1470
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1471
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1472
0
    {
1473
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1474
0
    }
1475
0
    else if (log2CurSize > depthRange[1])
1476
0
    {
1477
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1478
0
    }
1479
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1480
0
    {
1481
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1482
0
    }
1483
0
    else
1484
0
    {
1485
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1486
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1487
0
    }
1488
1489
0
    if (subdiv)
1490
0
    {
1491
0
        --log2CurSize;
1492
0
        ++curDepth;
1493
1494
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1495
1496
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1497
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1498
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1499
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1500
0
        return;
1501
0
    }
1502
1503
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1504
0
    {
1505
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1506
0
    }
1507
0
    else
1508
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1509
1510
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1511
1512
0
    if (!cbfY)
1513
0
        return;
1514
1515
    // dQP: only for CTU once
1516
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1517
0
    {
1518
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1519
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1520
0
        codeDeltaQP(cu, absPartIdxLT);
1521
0
        bCodeDQP = false;
1522
0
    }
1523
1524
0
    if (cbfY)
1525
0
    {
1526
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1527
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1528
0
    }
1529
0
}
1530
1531
1532
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1533
772k
{
1534
772k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1535
772k
    {
1536
772k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1537
772k
        if (cu.m_chromaFormat != X265_CSP_I400)
1538
772k
        {
1539
772k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1540
772k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1541
1542
772k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1543
1544
772k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1545
0
            {
1546
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1547
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1548
0
                {
1549
0
                    absPartIdx += qNumParts;
1550
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1551
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1552
0
                }
1553
0
            }
1554
772k
        }
1555
772k
    }
1556
18.4E
    else // if it is inter mode, encode motion vector and reference index
1557
18.4E
        codePUWise(cu, absPartIdx);
1558
772k
}
1559
1560
/** encode motion information for every PU block */
1561
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1562
0
{
1563
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1564
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1565
1566
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1567
0
    {
1568
0
        codeMergeFlag(cu, subPartIdx);
1569
0
        if (cu.m_mergeFlag[subPartIdx])
1570
0
            codeMergeIndex(cu, subPartIdx);
1571
0
        else
1572
0
        {
1573
0
            if (cu.m_slice->isInterB())
1574
0
                codeInterDir(cu, subPartIdx);
1575
1576
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1577
0
            for (uint32_t list = 0; list < 2; list++)
1578
0
            {
1579
0
                if (interDir & (1 << list))
1580
0
                {
1581
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1582
1583
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1584
0
                    codeMvd(cu, subPartIdx, list);
1585
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1586
0
                }
1587
0
            }
1588
0
        }
1589
0
    }
1590
0
}
1591
1592
/** encode reference frame index for a PU block */
1593
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1594
0
{
1595
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1596
1597
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1598
0
        codeRefFrmIdx(cu, absPartIdx, list);
1599
0
}
1600
1601
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1602
772k
{
1603
772k
    if (!cu.isIntra(absPartIdx))
1604
0
    {
1605
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1606
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1607
0
        if (!cu.getQtRootCbf(absPartIdx))
1608
0
            return;
1609
0
    }
1610
1611
772k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1612
772k
    if (cu.m_chromaFormat == X265_CSP_I400)
1613
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1614
772k
    else
1615
772k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1616
772k
}
1617
1618
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1619
56.6k
{
1620
56.6k
    int typeIdx = ctuParam.typeIdx;
1621
1622
56.6k
    if (plane != 2)
1623
37.7k
    {
1624
37.7k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1625
37.7k
        if (typeIdx >= 0)
1626
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1627
37.7k
    }
1628
1629
56.6k
    if (typeIdx >= 0)
1630
0
    {
1631
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1632
0
        if (typeIdx == SAO_BO)
1633
0
        {
1634
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1635
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1636
1637
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1638
0
                if (ctuParam.offset[i] != 0)
1639
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1640
1641
0
            encodeBinsEP(ctuParam.bandPos, 5);
1642
0
        }
1643
0
        else // if (typeIdx < SAO_BO)
1644
0
        {
1645
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1646
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1647
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1648
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1649
0
            if (plane != 2)
1650
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1651
0
        }
1652
0
    }
1653
56.6k
}
1654
1655
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1656
172k
{
1657
172k
    if (plane != 2)
1658
115k
    {
1659
115k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1660
115k
        encodeBinEP(1);
1661
115k
    }
1662
1663
172k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1664
1665
172k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1666
172k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1667
172k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1668
172k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1669
172k
    if (plane != 2)
1670
115k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1671
172k
}
1672
1673
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1674
43.1k
{
1675
43.1k
    if (plane != 2)
1676
28.7k
    {
1677
28.7k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1678
28.7k
        encodeBinEP(0);
1679
28.7k
    }
1680
1681
43.1k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1682
1683
215k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1684
172k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1685
1686
215k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1687
172k
        if (offset[i] != 0)
1688
58
            encodeBinEP(offset[i] < 0);
1689
1690
43.1k
    encodeBinsEP(bandPos, 5);
1691
43.1k
}
1692
1693
/** initialize context model with respect to QP and initialization value */
1694
uint8_t sbacInit(int qp, int initValue)
1695
112k
{
1696
112k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1697
1698
112k
    int  slope      = (initValue >> 4) * 5 - 45;
1699
112k
    int  offset     = ((initValue & 15) << 3) - 16;
1700
112k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1701
112k
    uint32_t mpState = (initState >= 64);
1702
112k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1703
1704
112k
    return (uint8_t)state;
1705
112k
}
1706
1707
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1708
18.5k
{
1709
18.5k
    ctxModel += sliceType * size;
1710
1711
130k
    for (int n = 0; n < size; n++)
1712
112k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1713
18.5k
}
1714
1715
void Entropy::resetEntropy(const Slice& slice)
1716
714
{
1717
714
    int  qp              = slice.m_sliceQp;
1718
714
    SliceType sliceType  = slice.m_sliceType;
1719
1720
714
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1721
714
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1722
714
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1723
714
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1724
714
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1725
714
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1726
714
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1727
714
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1728
714
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1729
714
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1730
714
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1731
714
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1732
714
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1733
714
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1734
714
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1735
714
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1736
714
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1737
714
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1738
714
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1739
714
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1740
714
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1741
714
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1742
714
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1743
714
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1744
714
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1745
714
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1746
    // new structure
1747
1748
714
    start();
1749
714
}
1750
1751
/* code explicit wp tables */
1752
void Entropy::codePredWeightTable(const Slice& slice)
1753
0
{
1754
0
    const WeightParam *wp;
1755
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1756
0
    bool            bDenomCoded  = false;
1757
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1758
0
    uint32_t        totalSignalledWeightFlags = 0;
1759
1760
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1761
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1762
0
    {
1763
0
        for (int list = 0; list < numRefDirs; list++)
1764
0
        {
1765
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1766
0
            {
1767
0
                wp = slice.m_weightPredTable[list][ref];
1768
0
                if (!bDenomCoded)
1769
0
                {
1770
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1771
1772
0
                    if (bChroma)
1773
0
                    {
1774
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1775
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1776
0
                    }
1777
0
                    bDenomCoded = true;
1778
0
                }
1779
#if ENABLE_SCC_EXT
1780
                if (slice.m_poc == slice.m_refPOCList[list][ref])
1781
                    assert(!wp[0].wtPresent);
1782
                else
1783
#endif
1784
0
                    WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1785
0
                totalSignalledWeightFlags = totalSignalledWeightFlags + wp[0].wtPresent;
1786
0
            }
1787
1788
0
            if (bChroma)
1789
0
            {
1790
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1791
0
                {
1792
0
                    wp = slice.m_weightPredTable[list][ref];
1793
#if ENABLE_SCC_EXT
1794
                    if (slice.m_poc == slice.m_refPOCList[list][ref])
1795
                        assert(!wp[1].wtPresent);
1796
                    else
1797
#endif
1798
0
                        WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1799
0
                    totalSignalledWeightFlags = totalSignalledWeightFlags + 2 * wp[1].wtPresent;
1800
0
                }
1801
0
            }
1802
1803
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1804
0
            {
1805
0
                wp = slice.m_weightPredTable[list][ref];
1806
0
                if (wp[0].wtPresent)
1807
0
                {
1808
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1809
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1810
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1811
0
                }
1812
1813
0
                if (bChroma)
1814
0
                {
1815
0
                    if (wp[1].wtPresent)
1816
0
                    {
1817
0
                        for (int plane = 1; plane < 3; plane++)
1818
0
                        {
1819
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1820
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1821
1822
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1823
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1824
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1825
0
                        }
1826
0
                    }
1827
0
                }
1828
0
            }
1829
0
        }
1830
1831
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1832
0
    }
1833
0
}
1834
1835
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1836
5.05k
{
1837
5.05k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1838
1839
5.05k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1840
1841
5.05k
    if (!symbol)
1842
474
        return;
1843
1844
4.58k
    bool bCodeLast = (maxSymbol > symbol);
1845
1846
22.2k
    while (--symbol)
1847
17.6k
        encodeBin(1, scmModel[offset]);
1848
1849
4.58k
    if (bCodeLast)
1850
217
        encodeBin(0, scmModel[offset]);
1851
4.58k
}
1852
1853
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1854
4.36k
{
1855
4.36k
    uint32_t bins = 0;
1856
4.36k
    int numBins = 0;
1857
1858
16.9k
    while (symbol >= (uint32_t)(1 << count))
1859
12.6k
    {
1860
12.6k
        bins = 2 * bins + 1;
1861
12.6k
        numBins++;
1862
12.6k
        symbol -= 1 << count;
1863
12.6k
        count++;
1864
12.6k
    }
1865
1866
4.36k
    bins = 2 * bins + 0;
1867
4.36k
    numBins++;
1868
1869
4.36k
    bins = (bins << count) | symbol;
1870
4.36k
    numBins += count;
1871
1872
4.36k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1873
4.36k
    encodeBinsEP(bins, numBins);
1874
4.36k
}
1875
1876
/** Coding of coeff_abs_level_minus3 */
1877
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1878
10.3k
{
1879
10.3k
    uint32_t length;
1880
10.3k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1881
1882
10.3k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1883
0
    {
1884
0
        length = codeNumber >> absGoRice;
1885
1886
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1887
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1888
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1889
0
    }
1890
10.3k
    else
1891
10.3k
    {
1892
10.3k
        length = 0;
1893
10.3k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1894
10.3k
        {
1895
10.3k
            unsigned long idx;
1896
10.3k
            BSR(idx, codeNumber + 1);
1897
10.3k
            length = idx;
1898
10.3k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1899
10.3k
            codeNumber -= (1 << idx) - 1;
1900
10.3k
        }
1901
10.3k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1902
1903
10.3k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1904
10.3k
        encodeBinsEP(codeNumber, length + absGoRice);
1905
10.3k
    }
1906
10.3k
}
1907
1908
// SBAC RD
1909
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1910
1.59M
{
1911
1.59M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1912
1.59M
    m_fracBits = src.m_fracBits;
1913
1.59M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1914
1.59M
}
1915
1916
void Entropy::copyFrom(const Entropy& src)
1917
11.1M
{
1918
11.1M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1919
1920
11.1M
    copyState(src);
1921
1922
11.1M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1923
11.1M
    markValid();
1924
11.1M
}
1925
1926
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1927
2.56M
{
1928
2.56M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1929
1930
2.56M
    if (cu.isIntra(absPartIdx))
1931
2.56M
    {
1932
2.56M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1933
2.14M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1934
2.56M
        return;
1935
2.56M
    }
1936
1937
18.4E
    switch (partSize)
1938
18.4E
    {
1939
0
    case SIZE_2Nx2N:
1940
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1941
0
        break;
1942
1943
0
    case SIZE_2NxN:
1944
0
    case SIZE_2NxnU:
1945
0
    case SIZE_2NxnD:
1946
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1947
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1948
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1949
0
        {
1950
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1951
0
            if (partSize != SIZE_2NxN)
1952
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1953
0
        }
1954
0
        break;
1955
1956
0
    case SIZE_Nx2N:
1957
0
    case SIZE_nLx2N:
1958
0
    case SIZE_nRx2N:
1959
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1960
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1961
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1962
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1963
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1964
0
        {
1965
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1966
0
            if (partSize != SIZE_Nx2N)
1967
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1968
0
        }
1969
0
        break;
1970
0
    default:
1971
0
        X265_CHECK(0, "invalid CU partition\n");
1972
0
        break;
1973
18.4E
    }
1974
18.4E
}
1975
1976
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1977
0
{
1978
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1979
1980
0
    if (numCand > 1)
1981
0
    {
1982
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1983
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1984
1985
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1986
1987
0
        if (unaryIdx != 0)
1988
0
        {
1989
0
            uint32_t mask = (1 << unaryIdx) - 2;
1990
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1991
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1992
0
        }
1993
0
    }
1994
0
}
1995
1996
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1997
4.37M
{
1998
4.37M
    uint32_t dir[4], j;
1999
4.37M
    uint32_t preds[4][3];
2000
4.37M
    int predIdx[4];
2001
4.37M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
2002
4.37M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
2003
2004
9.66M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
2005
5.28M
    {
2006
5.28M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
2007
5.28M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
2008
5.28M
        predIdx[j] = -1;
2009
21.1M
        for (uint32_t i = 0; i < 3; i++)
2010
15.8M
            if (dir[j] == preds[j][i])
2011
5.27M
                predIdx[j] = i;
2012
2013
5.28M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
2014
5.28M
    }
2015
2016
9.66M
    for (j = 0; j < partNum; j++)
2017
5.28M
    {
2018
5.28M
        if (predIdx[j] != -1)
2019
5.27M
        {
2020
5.27M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
2021
            // NOTE: Mapping
2022
            //       0 = 0
2023
            //       1 = 10
2024
            //       2 = 11
2025
5.27M
            int nonzero = (!!predIdx[j]);
2026
5.27M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
2027
5.27M
        }
2028
11.9k
        else
2029
11.9k
        {
2030
11.9k
            if (preds[j][0] > preds[j][1])
2031
502
                std::swap(preds[j][0], preds[j][1]);
2032
2033
11.9k
            if (preds[j][0] > preds[j][2])
2034
0
                std::swap(preds[j][0], preds[j][2]);
2035
2036
11.9k
            if (preds[j][1] > preds[j][2])
2037
0
                std::swap(preds[j][1], preds[j][2]);
2038
2039
11.9k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
2040
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
2041
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
2042
2043
11.9k
            encodeBinsEP(dir[j], 5);
2044
11.9k
        }
2045
5.28M
    }
2046
4.37M
}
2047
2048
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
2049
4.23M
{
2050
4.23M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
2051
2052
4.23M
    if (intraDirChroma == DM_CHROMA_IDX)
2053
1.08M
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
2054
3.14M
    else
2055
3.14M
    {
2056
7.29M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
2057
7.29M
        {
2058
7.29M
            if (intraDirChroma == chromaDirMode[i])
2059
3.14M
            {
2060
3.14M
                intraDirChroma = i;
2061
3.14M
                break;
2062
3.14M
            }
2063
7.29M
        }
2064
2065
3.14M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
2066
3.14M
        encodeBinsEP(intraDirChroma, 2);
2067
3.14M
    }
2068
4.23M
}
2069
2070
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
2071
0
{
2072
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
2073
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
2074
2075
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
2076
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
2077
0
    if (interDir < 2)
2078
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
2079
0
}
2080
2081
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
2082
0
{
2083
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
2084
2085
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
2086
2087
0
    if (refFrame > 0)
2088
0
    {
2089
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
2090
0
        if (refNum == 0)
2091
0
            return;
2092
2093
0
        refFrame--;
2094
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
2095
0
        if (refFrame > 0)
2096
0
        {
2097
0
            uint32_t mask = (1 << refFrame) - 2;
2098
0
            mask >>= (refFrame == refNum) ? 1 : 0;
2099
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
2100
0
        }
2101
0
    }
2102
0
}
2103
2104
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
2105
0
{
2106
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
2107
0
    const int hor = mvd.x;
2108
0
    const int ver = mvd.y;
2109
2110
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2111
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
2112
2113
0
    const bool bHorAbsGr0 = hor != 0;
2114
0
    const bool bVerAbsGr0 = ver != 0;
2115
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
2116
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
2117
2118
0
    if (bHorAbsGr0)
2119
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2120
2121
0
    if (bVerAbsGr0)
2122
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
2123
2124
0
    if (bHorAbsGr0)
2125
0
    {
2126
0
        if (horAbs > 1)
2127
0
            writeEpExGolomb(horAbs - 2, 1);
2128
2129
0
        encodeBinEP(0 > hor ? 1 : 0);
2130
0
    }
2131
2132
0
    if (bVerAbsGr0)
2133
0
    {
2134
0
        if (verAbs > 1)
2135
0
            writeEpExGolomb(verAbs - 2, 1);
2136
2137
0
        encodeBinEP(0 > ver ? 1 : 0);
2138
0
    }
2139
0
}
2140
2141
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
2142
5.05k
{
2143
5.05k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
2144
2145
5.05k
    int qpBdOffsetY = QP_BD_OFFSET;
2146
2147
5.05k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
2148
2149
5.05k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
2150
5.05k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
2151
5.05k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
2152
5.05k
    if (absDQp >= CU_DQP_TU_CMAX)
2153
4.36k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
2154
2155
5.05k
    if (absDQp > 0)
2156
4.58k
    {
2157
4.58k
        uint32_t sign = (dqp > 0 ? 0 : 1);
2158
4.58k
        encodeBinEP(sign);
2159
4.58k
    }
2160
5.05k
}
2161
2162
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
2163
8.47M
{
2164
8.47M
    uint32_t ctx = tuDepth + 2;
2165
2166
8.47M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
2167
8.47M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
2168
8.47M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
2169
2170
8.47M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
2171
0
    {
2172
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
2173
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
2174
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
2175
2176
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2177
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2178
0
    }
2179
8.47M
    else
2180
8.47M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
2181
8.47M
}
2182
2183
#if CHECKED_BUILD || _DEBUG
2184
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
2185
{
2186
    uint32_t goRiceParam = 0;
2187
    int firstCoeff2 = 1;
2188
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
2189
2190
    uint32_t sum = 0;
2191
    int idx = 0;
2192
    do
2193
    {
2194
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
2195
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2196
        baseLevelN >>= 2;
2197
        int codeNumber = absCoeff[idx] - baseLevel;
2198
2199
        if (codeNumber >= 0)
2200
        {
2201
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2202
            uint32_t length = 0;
2203
2204
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
2205
            if (codeNumber >= 0)
2206
            {
2207
                {
2208
                    unsigned long cidx;
2209
                    BSR(cidx, codeNumber + 1);
2210
                    length = cidx;
2211
                }
2212
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
2213
2214
                codeNumber = (length + length);
2215
            }
2216
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
2217
2218
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
2219
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
2220
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2221
        }
2222
        if (absCoeff[idx] >= 2)
2223
            firstCoeff2 = 0;
2224
        idx++;
2225
    }
2226
    while(idx < numNonZero);
2227
2228
    return sum;
2229
}
2230
#endif // debug only code
2231
2232
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
2233
66.7k
{
2234
66.7k
    uint32_t trSize = 1 << log2TrSize;
2235
66.7k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
2236
    // compute number of significant coefficients
2237
66.7k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
2238
66.7k
    X265_CHECK(numSig > 0, "cbf check fail\n");
2239
66.7k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
2240
2241
66.7k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
2242
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
2243
2244
66.7k
    bool bIsLuma = ttype == TEXT_LUMA;
2245
2246
    // select scans
2247
66.7k
    TUEntropyCodingParameters codingParameters;
2248
66.7k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
2249
2250
66.7k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
2251
66.7k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
2252
66.7k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
2253
2254
    //----- encode significance map -----
2255
2256
    // Find position of last coefficient
2257
66.7k
    int scanPosLast = 0;
2258
66.7k
    uint32_t posLast;
2259
66.7k
    uint64_t sigCoeffGroupFlag64 = 0;
2260
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
2261
66.7k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
2262
2263
66.7k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
2264
66.7k
    posLast = codingParameters.scan[scanPosLast];
2265
2266
66.7k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
2267
2268
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
2269
178k
    for(int idx = 0; idx < lastScanSet; idx++)
2270
111k
    {
2271
111k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
2272
111k
        const uint8_t nonZero = (coeffNum[idx] != 0);
2273
111k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
2274
111k
    }
2275
2276
2277
    // Code position of last coefficient
2278
66.7k
    {
2279
        // The last position is composed of a prefix and suffix.
2280
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
2281
        // The bypass coded bins for both the x and y components are grouped together.
2282
66.7k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
2283
66.7k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
2284
        // swap
2285
66.7k
        if (codingParameters.scanType == SCAN_VER)
2286
4.99k
            std::swap(pos[0], pos[1]);
2287
2288
66.7k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
2289
66.7k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
2290
66.7k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
2291
66.7k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
2292
66.7k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
2293
2294
66.7k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
2295
200k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2296
133k
        {
2297
133k
            uint32_t temp = g_lastCoeffTable[pos[i]];
2298
133k
            uint32_t prefixOnes = temp & 15;
2299
133k
            uint32_t suffixLen = temp >> 4;
2300
2301
302k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
2302
169k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
2303
2304
133k
            if (prefixOnes < maxGroupIdx)
2305
92.6k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
2306
2307
133k
            packedSuffixBits <<= suffixLen;
2308
133k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
2309
133k
            packedSuffixLen += suffixLen;
2310
133k
        }
2311
2312
66.7k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
2313
66.7k
    }
2314
2315
    // code significance flag
2316
66.7k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
2317
66.7k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
2318
66.7k
    uint32_t c1 = 1;
2319
66.7k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
2320
66.7k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
2321
66.7k
    uint32_t numNonZero = 1;
2322
66.7k
    unsigned long lastNZPosInCG;
2323
66.7k
    unsigned long firstNZPosInCG;
2324
2325
#if _DEBUG
2326
    // Unnecessary, for Valgrind-3.10.0 only
2327
    memset(absCoeff, 0, sizeof(absCoeff));
2328
#endif
2329
2330
66.7k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
2331
2332
245k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
2333
178k
    {
2334
178k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
2335
178k
        uint32_t scanFlagMask = subCoeffFlag;
2336
178k
        int subPosBase = subSet << MLS_CG_SIZE;
2337
        
2338
178k
        if (subSet == lastScanSet)
2339
66.7k
        {
2340
66.7k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
2341
66.7k
            scanFlagMask >>= 1;
2342
66.7k
        }
2343
2344
        // encode significant_coeffgroup_flag
2345
178k
        const int cgBlkPos = codingParameters.scanCG[subSet];
2346
178k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
2347
178k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
2348
178k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
2349
2350
178k
        if (subSet == lastScanSet || !subSet)
2351
73.5k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
2352
104k
        else
2353
104k
        {
2354
104k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
2355
104k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2356
104k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
2357
104k
        }
2358
2359
        // encode significant_coeff_flag
2360
178k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
2361
134k
        {
2362
134k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
2363
134k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
2364
134k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
2365
2366
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
2367
134k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
2368
134k
            {
2369
                // patternSigCtx = 0
2370
134k
                {
2371
134k
                    2, 1, 1, 0,
2372
134k
                    1, 1, 0, 0,
2373
134k
                    1, 0, 0, 0,
2374
134k
                    0, 0, 0, 0,
2375
134k
                },
2376
                // patternSigCtx = 1
2377
134k
                {
2378
134k
                    2, 2, 2, 2,
2379
134k
                    1, 1, 1, 1,
2380
134k
                    0, 0, 0, 0,
2381
134k
                    0, 0, 0, 0,
2382
134k
                },
2383
                // patternSigCtx = 2
2384
134k
                {
2385
134k
                    2, 1, 0, 0,
2386
134k
                    2, 1, 0, 0,
2387
134k
                    2, 1, 0, 0,
2388
134k
                    2, 1, 0, 0,
2389
134k
                },
2390
                // patternSigCtx = 3
2391
134k
                {
2392
134k
                    2, 2, 2, 2,
2393
134k
                    2, 2, 2, 2,
2394
134k
                    2, 2, 2, 2,
2395
134k
                    2, 2, 2, 2,
2396
134k
                },
2397
                // 4x4
2398
134k
                {
2399
134k
                    0, 1, 4, 5,
2400
134k
                    2, 3, 4, 5,
2401
134k
                    6, 6, 8, 8,
2402
134k
                    7, 7, 8, 8
2403
134k
                }
2404
134k
            };
2405
2406
134k
            const int offset = codingParameters.firstSignificanceMapContext;
2407
134k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2408
2409
134k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2410
134k
            if (m_bitIf)
2411
549
            {
2412
549
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2413
2414
                // TODO: accelerate by PABSW
2415
2.74k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2416
2.19k
                {
2417
2.19k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2418
2.19k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2419
2.19k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2420
2.19k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2421
2.19k
                }
2422
2423
549
                if (log2TrSize == 2)
2424
549
                {
2425
549
                    do
2426
8.23k
                    {
2427
8.23k
                        uint32_t blkPos, sig, ctxSig;
2428
8.23k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2429
8.23k
                        sig     = scanFlagMask & 1;
2430
8.23k
                        scanFlagMask >>= 1;
2431
8.23k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2432
8.23k
                        {
2433
8.23k
                            ctxSig = table_cnt[4][blkPos];
2434
8.23k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2435
8.23k
                            encodeBin(sig, baseCtx[ctxSig]);
2436
8.23k
                        }
2437
8.23k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2438
8.23k
                        numNonZero += sig;
2439
8.23k
                        scanPosSigOff--;
2440
8.23k
                    }
2441
8.23k
                    while(scanPosSigOff >= 0);
2442
549
                }
2443
0
                else
2444
0
                {
2445
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2446
2447
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2448
0
                    do
2449
0
                    {
2450
0
                        uint32_t blkPos, sig, ctxSig;
2451
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2452
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2453
0
                        sig     = scanFlagMask & 1;
2454
0
                        scanFlagMask >>= 1;
2455
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2456
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2457
0
                        {
2458
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2459
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2460
2461
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2462
0
                            encodeBin(sig, baseCtx[ctxSig]);
2463
0
                        }
2464
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2465
0
                        numNonZero += sig;
2466
0
                        scanPosSigOff--;
2467
0
                    }
2468
0
                    while(scanPosSigOff >= 0);
2469
0
                }
2470
549
            }
2471
133k
            else // fast RD path
2472
133k
            {
2473
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2474
133k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2475
133k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2476
133k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2477
2478
#if CHECKED_BUILD || _DEBUG
2479
                numNonZero = coeffNum[subSet];
2480
#endif
2481
                // update RD cost
2482
133k
                m_fracBits += sum;
2483
133k
            } // end of fast RD path -- !m_bitIf
2484
134k
        }
2485
178k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2486
2487
178k
        uint32_t coeffSigns = coeffSign[subSet];
2488
178k
        numNonZero = coeffNum[subSet];
2489
178k
        if (numNonZero > 0)
2490
178k
        {
2491
178k
            uint32_t idx;
2492
178k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2493
178k
            BSR(lastNZPosInCG, subCoeffFlag);
2494
178k
            BSF(firstNZPosInCG, subCoeffFlag);
2495
2496
178k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2497
178k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2498
178k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2499
2500
178k
            c1 = 1;
2501
178k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2502
2503
178k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2504
178k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2505
2506
178k
            if (!m_bitIf)
2507
176k
            {
2508
176k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2509
176k
                uint32_t firstC2Idx = (sum >> 28);
2510
176k
                c1 = ((sum >> 26) & 3);
2511
176k
                m_fracBits += sum & 0x00FFFFFF;
2512
2513
176k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2514
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2515
176k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2516
2517
176k
                if (numNonZero > firstC2Idx)
2518
171k
                {
2519
171k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2520
171k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2521
171k
                    m_fracBits += ((uint64_t)sum << 15);
2522
171k
                }
2523
176k
            }
2524
            // Standard path
2525
2.26k
            else
2526
2.26k
            {
2527
2.26k
                uint32_t firstC2Idx = 8;
2528
2.26k
                uint32_t firstC2Flag = 2;
2529
2.26k
                uint32_t c1Next = 0xFFFFFFFE;
2530
2531
2.26k
                idx = 0;
2532
2.26k
                do
2533
6.11k
                {
2534
6.11k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2535
6.11k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2536
6.11k
                    encodeBin(symbol1, baseCtxMod[c1]);
2537
2538
6.11k
                    if (symbol1)
2539
5.98k
                        c1Next = 0;
2540
2541
6.11k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2542
6.11k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2543
2544
6.11k
                    c1 = (c1Next & 3);
2545
6.11k
                    c1Next >>= 2;
2546
6.11k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2547
6.11k
                    idx++;
2548
6.11k
                }
2549
6.11k
                while(idx < numC1Flag);
2550
2551
2.26k
                if (!c1)
2552
2.14k
                {
2553
2.14k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2554
2555
2.14k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2556
2.14k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2557
2.14k
                }
2558
2559
2.26k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2560
2.26k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2561
2562
2.26k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2563
2.14k
                {
2564
                    // Standard path
2565
2.14k
                    uint32_t goRiceParam = 0;
2566
2.14k
                    int baseLevel = 3;
2567
2.14k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2568
#if CHECKED_BUILD || _DEBUG
2569
                    int firstCoeff2 = 1;
2570
#endif
2571
2.14k
                    idx = firstC2Idx;
2572
2.14k
                    do
2573
10.3k
                    {
2574
10.3k
                        if (idx >= C1FLAG_NUMBER)
2575
4.39k
                            baseLevel = 1;
2576
                        // TODO: fast algorithm maybe broken this check logic
2577
10.3k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2578
2579
10.3k
                        if (absCoeff[idx] >= baseLevel)
2580
10.3k
                        {
2581
10.3k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2582
10.3k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2583
10.3k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2584
10.3k
                            goRiceParam += adjust;
2585
10.3k
                            threshold += (adjust) ? threshold : 0;
2586
10.3k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2587
10.3k
                        }
2588
#if CHECKED_BUILD || _DEBUG
2589
                        firstCoeff2 = 0;
2590
#endif
2591
10.3k
                        baseLevel = 2;
2592
10.3k
                        idx++;
2593
10.3k
                    }
2594
10.3k
                    while(idx < numNonZero);
2595
2.14k
                }
2596
2.26k
            } // end of !bitIf
2597
178k
        } // end of (numNonZero > 0)
2598
2599
        // Initialize value for next loop
2600
178k
        numNonZero = 0;
2601
178k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2602
178k
    }
2603
66.7k
}
2604
2605
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2606
862k
{
2607
862k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2608
2609
862k
    uint32_t isCodeNonZero = !!code;
2610
2611
862k
    encodeBinEP(isCodeNonZero);
2612
862k
    if (isCodeNonZero)
2613
58
    {
2614
58
        uint32_t isCodeLast = (maxSymbol > code);
2615
58
        uint32_t mask = (1 << (code - 1)) - 1;
2616
58
        uint32_t len = code - 1 + isCodeLast;
2617
58
        mask <<= isCodeLast;
2618
2619
58
        encodeBinsEP(mask, len);
2620
58
    }
2621
862k
}
2622
2623
/* estimate bit cost for CBP, significant map and significant coefficients */
2624
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2625
8.23M
{
2626
8.23M
    estCBFBit(estBitsSbac);
2627
2628
8.23M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2629
2630
    // encode significance map
2631
8.23M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2632
2633
    // encode significant coefficients
2634
8.23M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2635
8.23M
}
2636
2637
/* estimate bit cost for each CBP bit */
2638
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2639
8.23M
{
2640
8.23M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2641
2642
65.8M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2643
57.6M
    {
2644
57.6M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2645
57.6M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2646
57.6M
    }
2647
2648
8.23M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2649
2650
8.23M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2651
8.23M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2652
8.23M
}
2653
2654
/* estimate SAMBAC bit cost for significant coefficient group map */
2655
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2656
8.23M
{
2657
8.23M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2658
2659
24.7M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2660
49.4M
        for (uint32_t bin = 0; bin < 2; bin++)
2661
32.9M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2662
8.23M
}
2663
2664
/* estimate SAMBAC bit cost for significant coefficient map */
2665
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2666
8.23M
{
2667
8.23M
    int firstCtx = 1, numCtx = 8;
2668
2669
8.23M
    if (log2TrSize >= 4)
2670
315k
    {
2671
315k
        firstCtx = bIsLuma ? 21 : 12;
2672
315k
        numCtx = bIsLuma ? 6 : 3;
2673
315k
    }
2674
7.91M
    else if (log2TrSize == 3)
2675
1.26M
    {
2676
1.26M
        firstCtx = 9;
2677
1.26M
        numCtx = bIsLuma ? 12 : 3;
2678
1.26M
    }
2679
2680
8.23M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2681
2682
8.23M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2683
8.23M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2684
2685
75.0M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2686
66.7M
    {
2687
66.7M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2688
66.7M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2689
66.7M
    }
2690
2691
8.23M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2692
8.23M
    if (bIsLuma)
2693
4.77M
    {
2694
4.77M
        if (log2TrSize == 2)
2695
3.63M
        {
2696
10.8M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2697
7.26M
            {
2698
7.26M
                int bits = 0;
2699
7.26M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2700
2701
29.0M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2702
21.7M
                {
2703
21.7M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2704
21.7M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2705
21.7M
                }
2706
2707
7.26M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2708
7.26M
            }
2709
3.63M
        }
2710
1.14M
        else
2711
1.14M
        {
2712
1.14M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2713
2714
3.42M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2715
2.28M
            {
2716
2.28M
                int bits = 0;
2717
2.28M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2718
2.28M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2719
2720
9.69M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2721
7.40M
                {
2722
7.40M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2723
7.40M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2724
7.40M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2725
7.40M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2726
7.40M
                    bits += 2 * cost1;
2727
7.40M
                }
2728
                // correct latest bit cost, it didn't include cost0
2729
2.28M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2730
2.28M
            }
2731
1.14M
        }
2732
4.77M
    }
2733
3.46M
    else
2734
3.46M
    {
2735
3.46M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2736
3.46M
        const int ctxShift = log2TrSize - 2;
2737
2738
10.3M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2739
6.92M
        {
2740
6.92M
            int bits = 0;
2741
6.92M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2742
2743
29.7M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2744
22.8M
            {
2745
22.8M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2746
22.8M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2747
22.8M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2748
22.8M
            }
2749
2750
6.92M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2751
6.92M
        }
2752
3.46M
    }
2753
8.23M
}
2754
2755
/* estimate bit cost of significant coefficient */
2756
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2757
8.23M
{
2758
8.23M
    if (bIsLuma)
2759
4.77M
    {
2760
4.77M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2761
4.77M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2762
2763
81.1M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2764
76.4M
        {
2765
76.4M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2766
76.4M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2767
76.4M
        }
2768
2769
23.8M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2770
19.1M
        {
2771
19.1M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2772
19.1M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2773
19.1M
        }
2774
4.77M
    }
2775
3.45M
    else
2776
3.45M
    {
2777
3.45M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2778
3.45M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2779
2780
31.1M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2781
27.7M
        {
2782
27.7M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2783
27.7M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2784
27.7M
        }
2785
2786
10.3M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2787
6.92M
        {
2788
6.92M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2789
6.92M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2790
6.92M
        }
2791
3.45M
    }
2792
8.23M
}
2793
2794
/* Initialize our context information from the nominated source */
2795
void Entropy::copyContextsFrom(const Entropy& src)
2796
10.1k
{
2797
10.1k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2798
2799
10.1k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2800
10.1k
    markValid();
2801
10.1k
}
2802
2803
void Entropy::start()
2804
714
{
2805
714
    m_low = 0;
2806
714
    m_range = 510;
2807
714
    m_bitsLeft = -12;
2808
714
    m_numBufferedBytes = 0;
2809
714
    m_bufferedByte = 0xff;
2810
714
}
2811
2812
void Entropy::finish()
2813
2.99k
{
2814
2.99k
    if (m_low >> (21 + m_bitsLeft))
2815
6
    {
2816
6
        m_bitIf->writeByte(m_bufferedByte + 1);
2817
8
        while (m_numBufferedBytes > 1)
2818
2
        {
2819
2
            m_bitIf->writeByte(0x00);
2820
2
            m_numBufferedBytes--;
2821
2
        }
2822
2823
6
        m_low -= 1 << (21 + m_bitsLeft);
2824
6
    }
2825
2.98k
    else
2826
2.98k
    {
2827
2.98k
        if (m_numBufferedBytes > 0)
2828
2.98k
            m_bitIf->writeByte(m_bufferedByte);
2829
2830
2.99k
        while (m_numBufferedBytes > 1)
2831
5
        {
2832
5
            m_bitIf->writeByte(0xff);
2833
5
            m_numBufferedBytes--;
2834
5
        }
2835
2.98k
    }
2836
2.99k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2837
2.99k
}
2838
2839
void Entropy::copyState(const Entropy& other)
2840
11.1M
{
2841
11.1M
    m_low = other.m_low;
2842
11.1M
    m_range = other.m_range;
2843
11.1M
    m_bitsLeft = other.m_bitsLeft;
2844
11.1M
    m_bufferedByte = other.m_bufferedByte;
2845
11.1M
    m_numBufferedBytes = other.m_numBufferedBytes;
2846
11.1M
    m_fracBits = other.m_fracBits;
2847
11.1M
}
2848
2849
void Entropy::resetBits()
2850
9.71M
{
2851
9.71M
    m_low = 0;
2852
9.71M
    m_bitsLeft = -12;
2853
9.71M
    m_numBufferedBytes = 0;
2854
9.71M
    m_bufferedByte = 0xff;
2855
9.71M
    m_fracBits &= 32767;
2856
9.71M
    if (m_bitIf)
2857
0
        m_bitIf->resetBits();
2858
9.71M
}
2859
2860
/** Encode bin */
2861
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2862
29.3M
{
2863
29.3M
    uint32_t mstate = ctxModel;
2864
2865
29.3M
    ctxModel = sbacNext(mstate, binValue);
2866
2867
29.3M
    if (!m_bitIf)
2868
29.0M
    {
2869
29.0M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2870
29.0M
        return;
2871
29.0M
    }
2872
2873
347k
    uint32_t range = m_range;
2874
347k
    uint32_t state = sbacGetState(mstate);
2875
347k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2876
347k
    range -= lps;
2877
2878
347k
    X265_CHECK(lps >= 2, "lps is too small\n");
2879
2880
347k
    int numBits = (uint32_t)(range - 256) >> 31;
2881
347k
    uint32_t low = m_low;
2882
2883
    // NOTE: MPS must be LOWEST bit in mstate
2884
347k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2885
347k
    if ((binValue ^ mstate) & 1)
2886
39.1k
    {
2887
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2888
        //numBits = g_renormTable[lps >> 3];
2889
39.1k
        unsigned long idx;
2890
39.1k
        BSR(idx, lps);
2891
39.1k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2892
2893
39.1k
        numBits = 8 - idx;
2894
39.1k
        if (state >= 63)
2895
0
            numBits = 6;
2896
39.1k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2897
2898
39.1k
        low += range;
2899
39.1k
        range = lps;
2900
39.1k
    }
2901
347k
    m_low = (low << numBits);
2902
347k
    m_range = (range << numBits);
2903
347k
    m_bitsLeft += numBits;
2904
2905
347k
    if (m_bitsLeft >= 0)
2906
17.2k
        writeOut();
2907
347k
}
2908
2909
/** Encode equiprobable bin */
2910
void Entropy::encodeBinEP(uint32_t binValue)
2911
1.01M
{
2912
1.01M
    if (!m_bitIf)
2913
1.01M
    {
2914
1.01M
        m_fracBits += 32768;
2915
1.01M
        return;
2916
1.01M
    }
2917
519
    m_low <<= 1;
2918
519
    if (binValue)
2919
519
        m_low += m_range;
2920
519
    m_bitsLeft++;
2921
2922
519
    if (m_bitsLeft >= 0)
2923
74
        writeOut();
2924
519
}
2925
2926
/** Encode equiprobable bins */
2927
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2928
8.67M
{
2929
8.67M
    if (!m_bitIf)
2930
8.58M
    {
2931
8.58M
        m_fracBits += 32768 * numBins;
2932
8.58M
        return;
2933
8.58M
    }
2934
2935
89.1k
    while (numBins > 8)
2936
3.93k
    {
2937
3.93k
        numBins -= 8;
2938
3.93k
        uint32_t pattern = binValues >> numBins;
2939
3.93k
        m_low <<= 8;
2940
3.93k
        m_low += m_range * pattern;
2941
3.93k
        binValues -= pattern << numBins;
2942
3.93k
        m_bitsLeft += 8;
2943
2944
3.93k
        if (m_bitsLeft >= 0)
2945
3.93k
            writeOut();
2946
3.93k
    }
2947
2948
85.1k
    m_low <<= numBins;
2949
85.1k
    m_low += m_range * binValues;
2950
85.1k
    m_bitsLeft += numBins;
2951
2952
85.1k
    if (m_bitsLeft >= 0)
2953
25.1k
        writeOut();
2954
85.1k
}
2955
2956
/** Encode terminating bin */
2957
void Entropy::encodeBinTrm(uint32_t binValue)
2958
30.3k
{
2959
30.3k
    if (!m_bitIf)
2960
13.6k
    {
2961
13.6k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2962
13.6k
        return;
2963
13.6k
    }
2964
2965
16.6k
    m_range -= 2;
2966
16.6k
    if (binValue)
2967
2.99k
    {
2968
2.99k
        m_low += m_range;
2969
2.99k
        m_low <<= 7;
2970
2.99k
        m_range = 2 << 7;
2971
2.99k
        m_bitsLeft += 7;
2972
2.99k
    }
2973
13.6k
    else if (m_range >= 256)
2974
12.9k
        return;
2975
754
    else
2976
754
    {
2977
754
        m_low <<= 1;
2978
754
        m_range <<= 1;
2979
754
        m_bitsLeft++;
2980
754
    }
2981
2982
3.74k
    if (m_bitsLeft >= 0)
2983
2.79k
        writeOut();
2984
3.74k
}
2985
2986
/** Move bits from register into bitstream */
2987
void Entropy::writeOut()
2988
49.1k
{
2989
49.1k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2990
49.1k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2991
2992
49.1k
    m_bitsLeft -= 8;
2993
49.1k
    m_low &= low_mask;
2994
2995
49.1k
    if (leadByte == 0xff)
2996
4.03k
        m_numBufferedBytes++;
2997
45.1k
    else
2998
45.1k
    {
2999
45.1k
        uint32_t numBufferedBytes = m_numBufferedBytes;
3000
45.1k
        if (numBufferedBytes > 0)
3001
42.1k
        {
3002
42.1k
            uint32_t carry = leadByte >> 8;
3003
42.1k
            uint32_t byteTowrite = m_bufferedByte + carry;
3004
42.1k
            m_bitIf->writeByte(byteTowrite);
3005
3006
42.1k
            byteTowrite = (0xff + carry) & 0xff;
3007
46.1k
            while (numBufferedBytes > 1)
3008
4.02k
            {
3009
4.02k
                m_bitIf->writeByte(byteTowrite);
3010
4.02k
                numBufferedBytes--;
3011
4.02k
            }
3012
42.1k
        }
3013
45.1k
        m_numBufferedBytes = 1;
3014
45.1k
        m_bufferedByte = (uint8_t)leadByte;
3015
45.1k
    }
3016
49.1k
}
3017
3018
const uint32_t g_entropyBits[128] =
3019
{
3020
    // Corrected table, most notably for last state
3021
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
3022
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
3023
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
3024
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
3025
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
3026
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
3027
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
3028
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
3029
};
3030
3031
const uint8_t g_nextState[128][2] =
3032
{
3033
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
3034
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
3035
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
3036
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
3037
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
3038
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
3039
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
3040
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
3041
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
3042
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
3043
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
3044
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
3045
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
3046
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
3047
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
3048
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
3049
};
3050
3051
}
3052
3053
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
3054
extern "C" const uint32_t PFX(entropyStateBits)[128] =
3055
{
3056
    // Corrected table, most notably for last state
3057
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
3058
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
3059
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
3060
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
3061
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
3062
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
3063
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
3064
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
3065
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
3066
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
3067
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
3068
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
3069
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
3070
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
3071
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
3072
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
3073
};
3074