Coverage Report

Created: 2022-08-24 06:17

/src/x265/source/encoder/entropy.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
0
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
0
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
0
{
226
0
    markValid();
227
0
    m_fracBits = 0;
228
0
    m_pad = 0;
229
0
    m_meanQP = 0;
230
0
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
0
}
232
233
void Entropy::codeVPS(const VPS& vps)
234
0
{
235
0
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
236
0
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
237
0
    WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
238
0
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
239
0
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
240
0
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
241
242
0
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
243
244
0
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
245
246
0
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
247
0
    {
248
0
        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
249
0
        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
250
0
        WRITE_UVLC(vps.maxLatencyIncrease + 1, "vps_max_latency_increase_plus1[i]");
251
0
    }
252
253
0
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
254
0
    WRITE_UVLC(0,    "vps_max_op_sets_minus1");
255
0
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
256
0
    WRITE_FLAG(0,    "vps_extension_flag");
257
0
}
258
259
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
260
0
{
261
0
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
262
0
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
263
0
    WRITE_FLAG(sps.maxTempSubLayers == 1,   "sps_temporal_id_nesting_flag");
264
265
0
    codeProfileTier(ptl, sps.maxTempSubLayers);
266
267
0
    WRITE_UVLC(0, "sps_seq_parameter_set_id");
268
0
    WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
269
270
0
    if (sps.chromaFormatIdc == X265_CSP_I444)
271
0
        WRITE_FLAG(0,                       "separate_colour_plane_flag");
272
273
0
    WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
274
0
    WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
275
276
0
    const Window& conf = sps.conformanceWindow;
277
0
    WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
278
0
    if (conf.bEnabled)
279
0
    {
280
0
        int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
281
0
        WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
282
0
        WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
283
0
        WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
284
0
        WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
285
0
    }
286
287
0
    WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
288
0
    WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
289
0
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
290
0
    WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
291
292
0
    for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
293
0
    {
294
0
        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
295
0
        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
296
0
        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
297
0
    }
298
299
0
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
300
0
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
301
0
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
302
0
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
303
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
304
0
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
305
0
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
306
0
    if (scalingList.m_bEnabled)
307
0
    {
308
0
        WRITE_FLAG(scalingList.m_bDataPresent,    "sps_scaling_list_data_present_flag");
309
0
        if (scalingList.m_bDataPresent)
310
0
            codeScalingList(scalingList);
311
0
    }
312
0
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
313
0
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
314
315
0
    WRITE_FLAG(0, "pcm_enabled_flag");
316
0
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
317
0
    for (int i = 0; i < sps.spsrpsNum; i++)
318
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
319
0
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
320
321
0
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
322
0
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
323
324
0
    WRITE_FLAG(1, "vui_parameters_present_flag");
325
0
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo);
326
327
0
    WRITE_FLAG(0, "sps_extension_flag");
328
0
}
329
330
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26 )
331
0
{
332
0
    WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
333
0
    WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
334
0
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
335
0
    WRITE_FLAG(0,                          "output_flag_present_flag");
336
0
    WRITE_CODE(0, 3,                       "num_extra_slice_header_bits");
337
0
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
338
0
    WRITE_FLAG(0,                          "cabac_init_present_flag");
339
0
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
340
0
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
341
342
0
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
343
0
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
344
0
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
345
346
0
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
347
0
    if (pps.bUseDQP)
348
0
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
349
350
0
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
351
0
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
352
0
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
353
354
0
    WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
355
0
    WRITE_FLAG(pps.bUseWeightedBiPred,        "weighted_bipred_flag");
356
0
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
357
0
    WRITE_FLAG(0,                             "tiles_enabled_flag");
358
0
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
359
0
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
360
361
0
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
362
0
    if (pps.bDeblockingFilterControlPresent)
363
0
    {
364
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
365
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
366
0
        if (!pps.bPicDisableDeblockingFilter)
367
0
        {
368
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
369
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
370
0
        }
371
0
    }
372
373
0
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
374
0
    WRITE_FLAG(0, "lists_modification_present_flag");
375
0
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
376
0
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
377
0
    WRITE_FLAG(0, "pps_extension_flag");
378
0
}
379
380
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers)
381
0
{
382
0
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
383
0
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
384
0
    WRITE_CODE(ptl.profileIdc, 5,   "XXX_profile_idc[]");
385
0
    for (int j = 0; j < 32; j++)
386
0
        WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
387
388
0
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
389
0
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
390
0
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
391
0
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
392
393
0
    if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
394
0
    {
395
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
396
0
        int csp = ptl.chromaFormatConstraint;
397
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
398
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
399
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
400
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
401
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
402
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
403
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
404
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
405
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
406
0
        WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
407
0
        WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
408
0
        WRITE_CODE(0 ,  3, "XXX_reserved_zero_35bits[32..34]");
409
0
    }
410
0
    else
411
0
    {
412
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
413
0
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
414
0
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
415
0
    }
416
417
0
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
418
419
0
    if (maxTempSubLayers > 1)
420
0
    {
421
0
         WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
422
0
         WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
423
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
424
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
425
0
    }
426
0
}
427
428
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo)
429
0
{
430
0
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
431
0
    if (vui.aspectRatioInfoPresentFlag)
432
0
    {
433
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
434
0
        if (vui.aspectRatioIdc == 255)
435
0
        {
436
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
437
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
438
0
        }
439
0
    }
440
441
0
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
442
0
    if (vui.overscanInfoPresentFlag)
443
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
444
445
0
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
446
0
    if (vui.videoSignalTypePresentFlag)
447
0
    {
448
0
        WRITE_CODE(vui.videoFormat, 3, "video_format");
449
0
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
450
0
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
451
0
        if (vui.colourDescriptionPresentFlag)
452
0
        {
453
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
454
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
455
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
456
0
        }
457
0
    }
458
459
0
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
460
0
    if (vui.chromaLocInfoPresentFlag)
461
0
    {
462
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
463
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
464
0
    }
465
466
0
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
467
0
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
468
0
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
469
470
0
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
471
0
    if (vui.defaultDisplayWindow.bEnabled)
472
0
    {
473
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
474
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
475
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
476
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
477
0
    }
478
479
0
    if (!bEmitVUITimingInfo)
480
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
481
0
    else
482
0
    {
483
0
        WRITE_FLAG(1, "vui_timing_info_present_flag");
484
0
        WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
485
0
        WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
486
0
        WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
487
0
    }
488
489
0
    if (!bEmitVUIHRDInfo)
490
0
        WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
491
0
    else
492
0
    {
493
0
        WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
494
0
        if (vui.hrdParametersPresentFlag)
495
0
            codeHrdParameters(vui.hrdParameters, maxSubTLayers);
496
0
    }
497
498
0
    WRITE_FLAG(0, "bitstream_restriction_flag");
499
0
}
500
501
void Entropy::codeScalingList(const ScalingList& scalingList)
502
0
{
503
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
504
0
    {
505
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
506
0
        {
507
0
            int predList = scalingList.checkPredMode(sizeId, listId);
508
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
509
0
            if (predList >= 0)
510
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
511
0
            else // DPCM Mode
512
0
                codeScalingList(scalingList, sizeId, listId);
513
0
        }
514
0
    }
515
0
}
516
517
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
518
0
{
519
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
520
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
521
0
    int nextCoef = START_VALUE;
522
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
523
0
    int data;
524
525
0
    if (sizeId > BLOCK_8x8)
526
0
    {
527
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
528
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
529
0
    }
530
0
    for (int i = 0; i < coefNum; i++)
531
0
    {
532
0
        data = src[scan[i]] - nextCoef;
533
0
        if (data < -128)
534
0
            data += 256;
535
0
        if (data > 127)
536
0
            data -= 256;
537
0
        nextCoef = (nextCoef + data + 256) % 256;
538
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
539
0
    }
540
0
}
541
542
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
543
0
{
544
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
545
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
546
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
547
548
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
549
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
550
551
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
552
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
553
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
554
555
0
    for (int i = 0; i < maxSubTLayers; i++)
556
0
    {
557
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
558
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
559
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
560
561
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
562
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
563
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
564
0
    }
565
0
}
566
567
void Entropy::codeAUD(const Slice& slice)
568
0
{
569
0
    int picType;
570
571
0
    switch (slice.m_sliceType)
572
0
    {
573
0
    case I_SLICE:
574
0
        picType = 0;
575
0
        break;
576
0
    case P_SLICE:
577
0
        picType = 1;
578
0
        break;
579
0
    case B_SLICE:
580
0
        picType = 2;
581
0
        break;
582
0
    default:
583
0
        picType = 7;
584
0
        break;
585
0
    }
586
587
0
    WRITE_CODE(picType, 3, "pic_type");
588
0
}
589
590
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp)
591
0
{
592
0
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
593
0
    if (slice.getRapPicFlag())
594
0
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
595
596
0
    WRITE_UVLC(0, "slice_pic_parameter_set_id");
597
598
    /* x265 does not use dependent slices, so always write all this data */
599
0
    if (slice_addr)
600
0
    {
601
        // if( dependent_slice_segments_enabled_flag )
602
        //     dependent_slice_segment_flag             u(1)
603
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
604
0
    }
605
606
0
    WRITE_UVLC(slice.m_sliceType, "slice_type");
607
608
0
    if (!slice.getIdrPicFlag())
609
0
    {
610
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
611
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
612
613
#if _DEBUG || CHECKED_BUILD
614
        // check for bitstream restriction stating that:
615
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
616
        // Ideally this process should not be repeated for each slice in a picture
617
        if (slice.isIRAP())
618
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
619
            {
620
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
621
            }
622
#endif
623
624
0
        if (slice.m_rpsIdx < 0)
625
0
        {
626
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
627
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
628
0
        }
629
0
        else
630
0
        {
631
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
632
0
            int numBits = 0;
633
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
634
0
                numBits++;
635
636
0
            if (numBits > 0)
637
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
638
0
        }
639
640
0
        if (slice.m_sps->bTemporalMVPEnabled)
641
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
642
0
    }
643
0
    const SAOParam *saoParam = encData.m_saoParam;
644
0
    if (slice.m_bUseSao)
645
0
    {
646
0
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
647
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
648
0
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
649
0
    }
650
0
    else if(encData.m_param->selectiveSAO)
651
0
    {
652
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
653
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
654
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
655
0
    }
656
657
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
658
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
659
660
0
    if (!slice.isIntra())
661
0
    {
662
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
663
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
664
0
        if (overrideFlag)
665
0
        {
666
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
667
0
            if (slice.isInterB())
668
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
669
0
            else
670
0
            {
671
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
672
0
            }
673
0
        }
674
0
    }
675
0
    else
676
0
    {
677
0
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
678
0
    }
679
680
0
    if (slice.isInterB())
681
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
682
683
0
    if (slice.m_sps->bTemporalMVPEnabled)
684
0
    {
685
0
        if (slice.m_sliceType == B_SLICE)
686
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
687
688
0
        if (slice.m_sliceType != I_SLICE &&
689
0
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
690
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
691
0
        {
692
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
693
0
        }
694
0
    }
695
0
    if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
696
0
        codePredWeightTable(slice);
697
698
0
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
699
0
    if (!slice.isIntra())
700
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
701
702
0
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
703
0
    WRITE_SVLC(code, "slice_qp_delta");
704
705
0
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
706
0
    {
707
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
708
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
709
0
    }
710
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
711
    //       We didn't support filter across slice board, so disable it now
712
713
0
    if (encData.m_param->maxSlices <= 1)
714
0
    {
715
0
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
716
0
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
717
718
0
        if (isSAOEnabled || isDBFEnabled)
719
0
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
720
0
    }
721
0
}
722
723
/** write wavefront substreams sizes for the slice header */
724
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
725
0
{
726
0
    uint32_t offsetLen = 1;
727
0
    while (maxOffset >= (1U << offsetLen))
728
0
    {
729
0
        offsetLen++;
730
0
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
731
0
    }
732
733
0
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
734
0
    if (numSubStreams > 0)
735
0
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
736
737
0
    for (uint32_t i = 0; i < numSubStreams; i++)
738
0
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
739
0
}
740
741
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
742
0
{
743
0
    if (idx > 0)
744
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
745
746
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
747
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
748
0
    int prev = 0;
749
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
750
0
    {
751
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
752
0
        prev = rps.deltaPOC[j];
753
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
754
0
    }
755
756
0
    prev = 0;
757
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
758
0
    {
759
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
760
0
        prev = rps.deltaPOC[j];
761
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
762
0
    }
763
0
}
764
765
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
766
0
{
767
0
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
768
0
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
769
0
}
770
771
/* encode a CU block recursively */
772
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
773
0
{
774
0
    const Slice* slice = ctu.m_slice;
775
776
0
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
777
0
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
778
779
0
    if (!cuUnsplitFlag)
780
0
    {
781
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
782
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
783
0
            bEncodeDQP = true;
784
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
785
0
        {
786
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
787
0
            if (childGeom.flags & CUGeom::PRESENT)
788
0
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
789
0
        }
790
0
        return;
791
0
    }
792
793
0
    if (cuSplitFlag) 
794
0
        codeSplitFlag(ctu, absPartIdx, depth);
795
796
0
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
797
0
    {
798
0
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
799
0
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
800
0
            bEncodeDQP = true;
801
0
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
802
0
        {
803
0
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
804
0
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
805
0
        }
806
0
        return;
807
0
    }
808
809
0
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
810
0
        bEncodeDQP = true;
811
812
0
    if (slice->m_pps->bTransquantBypassEnabled)
813
0
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
814
815
0
    if (!slice->isIntra())
816
0
    {
817
0
        codeSkipFlag(ctu, absPartIdx);
818
0
        if (ctu.isSkipped(absPartIdx))
819
0
        {
820
0
            codeMergeIndex(ctu, absPartIdx);
821
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
822
0
            return;
823
0
        }
824
0
        codePredMode(ctu.m_predMode[absPartIdx]);
825
0
    }
826
827
0
    codePartSize(ctu, absPartIdx, depth);
828
829
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
830
0
    codePredInfo(ctu, absPartIdx);
831
832
0
    uint32_t tuDepthRange[2];
833
0
    if (ctu.isIntra(absPartIdx))
834
0
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
835
0
    else
836
0
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
837
838
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
839
0
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
840
841
    // --- write terminating bit ---
842
0
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
843
0
}
844
845
/* Return bit count of signaling inter mode */
846
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
847
0
{
848
0
    uint32_t bits;
849
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
850
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
851
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
852
0
    switch (partSize)
853
0
    {
854
0
    case SIZE_2Nx2N:
855
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
856
0
        break;
857
858
0
    case SIZE_2NxN:
859
0
    case SIZE_2NxnU:
860
0
    case SIZE_2NxnD:
861
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
862
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
863
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
864
0
        {
865
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
866
0
            if (partSize != SIZE_2NxN)
867
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
868
0
        }
869
0
        break;
870
871
0
    case SIZE_Nx2N:
872
0
    case SIZE_nLx2N:
873
0
    case SIZE_nRx2N:
874
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
875
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
876
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
877
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
878
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
879
0
        {
880
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
881
0
            if (partSize != SIZE_Nx2N)
882
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
883
0
        }
884
0
        break;
885
0
    default:
886
0
        X265_CHECK(0, "invalid CU partition\n");
887
0
        break;
888
0
    }
889
890
0
    return bits;
891
0
}
892
893
/* finish encoding a cu and handle end-of-slice conditions */
894
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
895
0
{
896
0
    const Slice* slice = ctu.m_slice;
897
0
    uint32_t realEndAddress = slice->m_endCUAddr;
898
0
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
899
0
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
900
901
0
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
902
0
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
903
0
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
904
0
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
905
0
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
906
0
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
907
908
0
    if (slice->m_pps->bUseDQP)
909
0
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
910
911
0
    if (granularityBoundary)
912
0
    {
913
        // Encode slice finish
914
0
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
915
0
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
916
0
            bTerminateSlice = 1;
917
918
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
919
0
        if (!bTerminateSlice)
920
0
            encodeBinTrm(0);    // end_of_slice_segment_flag
921
922
0
        if (!m_bitIf)
923
0
            resetBits(); // TODO: most likely unnecessary
924
0
    }
925
0
}
926
927
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
928
                              bool& bCodeDQP, const uint32_t depthRange[2])
929
0
{
930
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
931
932
    /* in each of these conditions, the subdiv flag is implied and not signaled,
933
     * so we have checks to make sure the implied value matches our intentions */
934
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
935
0
    {
936
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
937
0
    }
938
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
939
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
940
0
    {
941
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
942
0
    }
943
0
    else if (log2CurSize > depthRange[1])
944
0
    {
945
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
946
0
    }
947
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
948
0
    {
949
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
950
0
    }
951
0
    else
952
0
    {
953
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
954
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
955
0
    }
956
957
0
    uint32_t hChromaShift = cu.m_hChromaShift;
958
0
    uint32_t vChromaShift = cu.m_vChromaShift;
959
0
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
960
0
    if (!curDepth || !bSmallChroma)
961
0
    {
962
0
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
963
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
964
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
965
0
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
966
0
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
967
0
    }
968
969
0
    if (subdiv)
970
0
    {
971
0
        --log2CurSize;
972
0
        ++curDepth;
973
974
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
975
976
0
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
977
0
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
978
0
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
979
0
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
980
0
        return;
981
0
    }
982
983
0
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
984
985
0
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
986
0
    {
987
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
988
0
    }
989
0
    else
990
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
991
992
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
993
0
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
994
0
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
995
0
    if (!(cbfY || cbfU || cbfV))
996
0
        return;
997
998
    // dQP: only for CTU once
999
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1000
0
    {
1001
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1002
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1003
0
        codeDeltaQP(cu, absPartIdxLT);
1004
0
        bCodeDQP = false;
1005
0
    }
1006
1007
0
    if (cbfY)
1008
0
    {
1009
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1010
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1011
0
        if (!(cbfU || cbfV))
1012
0
            return;
1013
0
    }
1014
1015
0
    if (bSmallChroma)
1016
0
    {
1017
0
        if ((absPartIdx & 3) != 3)
1018
0
            return;
1019
1020
0
        const uint32_t log2CurSizeC = 2;
1021
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1022
0
        const uint32_t curPartNum = 4;
1023
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1024
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1025
0
        {
1026
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1027
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1028
0
            do
1029
0
            {
1030
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1031
0
                {
1032
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1033
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1034
0
                }
1035
0
            }
1036
0
            while (tuIterator.isNextSection());
1037
0
        }
1038
0
    }
1039
0
    else
1040
0
    {
1041
0
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1042
0
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1043
0
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1044
0
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1045
0
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1046
0
        {
1047
0
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1048
0
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1049
0
            do
1050
0
            {
1051
0
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1052
0
                {
1053
0
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1054
0
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1055
0
                }
1056
0
            }
1057
0
            while (tuIterator.isNextSection());
1058
0
        }
1059
0
    }
1060
0
}
1061
1062
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1063
                              bool& bCodeDQP, const uint32_t depthRange[2])
1064
0
{
1065
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1066
1067
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1068
     * so we have checks to make sure the implied value matches our intentions */
1069
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1070
0
    {
1071
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1072
0
    }
1073
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1074
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1075
0
    {
1076
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1077
0
    }
1078
0
    else if (log2CurSize > depthRange[1])
1079
0
    {
1080
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1081
0
    }
1082
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1083
0
    {
1084
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1085
0
    }
1086
0
    else
1087
0
    {
1088
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1089
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1090
0
    }
1091
1092
0
    if (subdiv)
1093
0
    {
1094
0
        --log2CurSize;
1095
0
        ++curDepth;
1096
1097
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1098
1099
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1100
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1101
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1102
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1103
0
        return;
1104
0
    }
1105
1106
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1107
0
    {
1108
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1109
0
    }
1110
0
    else
1111
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1112
1113
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1114
1115
0
    if (!cbfY)
1116
0
        return;
1117
1118
    // dQP: only for CTU once
1119
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1120
0
    {
1121
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1122
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1123
0
        codeDeltaQP(cu, absPartIdxLT);
1124
0
        bCodeDQP = false;
1125
0
    }
1126
1127
0
    if (cbfY)
1128
0
    {
1129
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1130
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1131
0
    }
1132
0
}
1133
1134
1135
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1136
0
{
1137
0
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1138
0
    {
1139
0
        codeIntraDirLumaAng(cu, absPartIdx, true);
1140
0
        if (cu.m_chromaFormat != X265_CSP_I400)
1141
0
        {
1142
0
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1143
0
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1144
1145
0
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1146
1147
0
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1148
0
            {
1149
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1150
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1151
0
                {
1152
0
                    absPartIdx += qNumParts;
1153
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1154
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1155
0
                }
1156
0
            }
1157
0
        }
1158
0
    }
1159
0
    else // if it is inter mode, encode motion vector and reference index
1160
0
        codePUWise(cu, absPartIdx);
1161
0
}
1162
1163
/** encode motion information for every PU block */
1164
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1165
0
{
1166
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1167
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1168
1169
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1170
0
    {
1171
0
        codeMergeFlag(cu, subPartIdx);
1172
0
        if (cu.m_mergeFlag[subPartIdx])
1173
0
            codeMergeIndex(cu, subPartIdx);
1174
0
        else
1175
0
        {
1176
0
            if (cu.m_slice->isInterB())
1177
0
                codeInterDir(cu, subPartIdx);
1178
1179
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1180
0
            for (uint32_t list = 0; list < 2; list++)
1181
0
            {
1182
0
                if (interDir & (1 << list))
1183
0
                {
1184
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1185
1186
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1187
0
                    codeMvd(cu, subPartIdx, list);
1188
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1189
0
                }
1190
0
            }
1191
0
        }
1192
0
    }
1193
0
}
1194
1195
/** encode reference frame index for a PU block */
1196
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1197
0
{
1198
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1199
1200
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1201
0
        codeRefFrmIdx(cu, absPartIdx, list);
1202
0
}
1203
1204
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1205
0
{
1206
0
    if (!cu.isIntra(absPartIdx))
1207
0
    {
1208
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1209
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1210
0
        if (!cu.getQtRootCbf(absPartIdx))
1211
0
            return;
1212
0
    }
1213
1214
0
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1215
0
    if (cu.m_chromaFormat == X265_CSP_I400)
1216
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1217
0
    else
1218
0
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1219
0
}
1220
1221
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1222
0
{
1223
0
    int typeIdx = ctuParam.typeIdx;
1224
1225
0
    if (plane != 2)
1226
0
    {
1227
0
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1228
0
        if (typeIdx >= 0)
1229
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1230
0
    }
1231
1232
0
    if (typeIdx >= 0)
1233
0
    {
1234
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1235
0
        if (typeIdx == SAO_BO)
1236
0
        {
1237
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1238
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1239
1240
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1241
0
                if (ctuParam.offset[i] != 0)
1242
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1243
1244
0
            encodeBinsEP(ctuParam.bandPos, 5);
1245
0
        }
1246
0
        else // if (typeIdx < SAO_BO)
1247
0
        {
1248
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1249
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1250
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1251
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1252
0
            if (plane != 2)
1253
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1254
0
        }
1255
0
    }
1256
0
}
1257
1258
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1259
0
{
1260
0
    if (plane != 2)
1261
0
    {
1262
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1263
0
        encodeBinEP(1);
1264
0
    }
1265
1266
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1267
1268
0
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1269
0
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1270
0
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1271
0
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1272
0
    if (plane != 2)
1273
0
        encodeBinsEP((uint32_t)(typeIdx), 2);
1274
0
}
1275
1276
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1277
0
{
1278
0
    if (plane != 2)
1279
0
    {
1280
0
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1281
0
        encodeBinEP(0);
1282
0
    }
1283
1284
0
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1285
1286
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1287
0
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1288
1289
0
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1290
0
        if (offset[i] != 0)
1291
0
            encodeBinEP(offset[i] < 0);
1292
1293
0
    encodeBinsEP(bandPos, 5);
1294
0
}
1295
1296
/** initialize context model with respect to QP and initialization value */
1297
uint8_t sbacInit(int qp, int initValue)
1298
0
{
1299
0
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1300
1301
0
    int  slope      = (initValue >> 4) * 5 - 45;
1302
0
    int  offset     = ((initValue & 15) << 3) - 16;
1303
0
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1304
0
    uint32_t mpState = (initState >= 64);
1305
0
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1306
1307
0
    return (uint8_t)state;
1308
0
}
1309
1310
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1311
0
{
1312
0
    ctxModel += sliceType * size;
1313
1314
0
    for (int n = 0; n < size; n++)
1315
0
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1316
0
}
1317
1318
void Entropy::resetEntropy(const Slice& slice)
1319
0
{
1320
0
    int  qp              = slice.m_sliceQp;
1321
0
    SliceType sliceType  = slice.m_sliceType;
1322
1323
0
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1324
0
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1325
0
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1326
0
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1327
0
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1328
0
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1329
0
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1330
0
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1331
0
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1332
0
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1333
0
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1334
0
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1335
0
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1336
0
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1337
0
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1338
0
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1339
0
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1340
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1341
0
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1342
0
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1343
0
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1344
0
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1345
0
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1346
0
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1347
0
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1348
0
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1349
    // new structure
1350
1351
0
    start();
1352
0
}
1353
1354
/* code explicit wp tables */
1355
void Entropy::codePredWeightTable(const Slice& slice)
1356
0
{
1357
0
    const WeightParam *wp;
1358
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1359
0
    bool            bDenomCoded  = false;
1360
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1361
0
    uint32_t        totalSignalledWeightFlags = 0;
1362
1363
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1364
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1365
0
    {
1366
0
        for (int list = 0; list < numRefDirs; list++)
1367
0
        {
1368
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1369
0
            {
1370
0
                wp = slice.m_weightPredTable[list][ref];
1371
0
                if (!bDenomCoded)
1372
0
                {
1373
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1374
1375
0
                    if (bChroma)
1376
0
                    {
1377
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1378
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1379
0
                    }
1380
0
                    bDenomCoded = true;
1381
0
                }
1382
0
                WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1383
0
                totalSignalledWeightFlags += wp[0].wtPresent;
1384
0
            }
1385
1386
0
            if (bChroma)
1387
0
            {
1388
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1389
0
                {
1390
0
                    wp = slice.m_weightPredTable[list][ref];
1391
0
                    WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1392
0
                    totalSignalledWeightFlags += 2 * wp[1].wtPresent;
1393
0
                }
1394
0
            }
1395
1396
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1397
0
            {
1398
0
                wp = slice.m_weightPredTable[list][ref];
1399
0
                if (wp[0].wtPresent)
1400
0
                {
1401
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1402
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1403
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1404
0
                }
1405
1406
0
                if (bChroma)
1407
0
                {
1408
0
                    if (wp[1].wtPresent)
1409
0
                    {
1410
0
                        for (int plane = 1; plane < 3; plane++)
1411
0
                        {
1412
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1413
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1414
1415
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1416
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1417
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1418
0
                        }
1419
0
                    }
1420
0
                }
1421
0
            }
1422
0
        }
1423
1424
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1425
0
    }
1426
0
}
1427
1428
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1429
0
{
1430
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1431
1432
0
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1433
1434
0
    if (!symbol)
1435
0
        return;
1436
1437
0
    bool bCodeLast = (maxSymbol > symbol);
1438
1439
0
    while (--symbol)
1440
0
        encodeBin(1, scmModel[offset]);
1441
1442
0
    if (bCodeLast)
1443
0
        encodeBin(0, scmModel[offset]);
1444
0
}
1445
1446
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1447
0
{
1448
0
    uint32_t bins = 0;
1449
0
    int numBins = 0;
1450
1451
0
    while (symbol >= (uint32_t)(1 << count))
1452
0
    {
1453
0
        bins = 2 * bins + 1;
1454
0
        numBins++;
1455
0
        symbol -= 1 << count;
1456
0
        count++;
1457
0
    }
1458
1459
0
    bins = 2 * bins + 0;
1460
0
    numBins++;
1461
1462
0
    bins = (bins << count) | symbol;
1463
0
    numBins += count;
1464
1465
0
    X265_CHECK(numBins <= 32, "numBins too large\n");
1466
0
    encodeBinsEP(bins, numBins);
1467
0
}
1468
1469
/** Coding of coeff_abs_level_minus3 */
1470
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1471
0
{
1472
0
    uint32_t length;
1473
0
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1474
1475
0
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1476
0
    {
1477
0
        length = codeNumber >> absGoRice;
1478
1479
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1480
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1481
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1482
0
    }
1483
0
    else
1484
0
    {
1485
0
        length = 0;
1486
0
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1487
0
        {
1488
0
            unsigned long idx;
1489
0
            CLZ(idx, codeNumber + 1);
1490
0
            length = idx;
1491
0
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1492
0
            codeNumber -= (1 << idx) - 1;
1493
0
        }
1494
0
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1495
1496
0
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1497
0
        encodeBinsEP(codeNumber, length + absGoRice);
1498
0
    }
1499
0
}
1500
1501
// SBAC RD
1502
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1503
0
{
1504
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1505
0
    m_fracBits = src.m_fracBits;
1506
0
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1507
0
}
1508
1509
void Entropy::copyFrom(const Entropy& src)
1510
0
{
1511
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1512
1513
0
    copyState(src);
1514
1515
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1516
0
    markValid();
1517
0
}
1518
1519
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1520
0
{
1521
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1522
1523
0
    if (cu.isIntra(absPartIdx))
1524
0
    {
1525
0
        if (depth == cu.m_encData->m_param->maxCUDepth)
1526
0
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1527
0
        return;
1528
0
    }
1529
1530
0
    switch (partSize)
1531
0
    {
1532
0
    case SIZE_2Nx2N:
1533
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1534
0
        break;
1535
1536
0
    case SIZE_2NxN:
1537
0
    case SIZE_2NxnU:
1538
0
    case SIZE_2NxnD:
1539
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1540
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1541
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1542
0
        {
1543
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1544
0
            if (partSize != SIZE_2NxN)
1545
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1546
0
        }
1547
0
        break;
1548
1549
0
    case SIZE_Nx2N:
1550
0
    case SIZE_nLx2N:
1551
0
    case SIZE_nRx2N:
1552
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1553
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1554
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1555
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1556
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1557
0
        {
1558
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1559
0
            if (partSize != SIZE_Nx2N)
1560
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1561
0
        }
1562
0
        break;
1563
0
    default:
1564
0
        X265_CHECK(0, "invalid CU partition\n");
1565
0
        break;
1566
0
    }
1567
0
}
1568
1569
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1570
0
{
1571
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1572
1573
0
    if (numCand > 1)
1574
0
    {
1575
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1576
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1577
1578
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1579
1580
0
        if (unaryIdx != 0)
1581
0
        {
1582
0
            uint32_t mask = (1 << unaryIdx) - 2;
1583
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1584
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1585
0
        }
1586
0
    }
1587
0
}
1588
1589
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1590
0
{
1591
0
    uint32_t dir[4], j;
1592
0
    uint32_t preds[4][3];
1593
0
    int predIdx[4];
1594
0
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1595
0
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1596
1597
0
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1598
0
    {
1599
0
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
1600
0
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1601
0
        predIdx[j] = -1;
1602
0
        for (uint32_t i = 0; i < 3; i++)
1603
0
            if (dir[j] == preds[j][i])
1604
0
                predIdx[j] = i;
1605
1606
0
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1607
0
    }
1608
1609
0
    for (j = 0; j < partNum; j++)
1610
0
    {
1611
0
        if (predIdx[j] != -1)
1612
0
        {
1613
0
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1614
            // NOTE: Mapping
1615
            //       0 = 0
1616
            //       1 = 10
1617
            //       2 = 11
1618
0
            int nonzero = (!!predIdx[j]);
1619
0
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1620
0
        }
1621
0
        else
1622
0
        {
1623
0
            if (preds[j][0] > preds[j][1])
1624
0
                std::swap(preds[j][0], preds[j][1]);
1625
1626
0
            if (preds[j][0] > preds[j][2])
1627
0
                std::swap(preds[j][0], preds[j][2]);
1628
1629
0
            if (preds[j][1] > preds[j][2])
1630
0
                std::swap(preds[j][1], preds[j][2]);
1631
1632
0
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1633
0
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1634
0
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1635
1636
0
            encodeBinsEP(dir[j], 5);
1637
0
        }
1638
0
    }
1639
0
}
1640
1641
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1642
0
{
1643
0
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1644
1645
0
    if (intraDirChroma == DM_CHROMA_IDX)
1646
0
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1647
0
    else
1648
0
    {
1649
0
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1650
0
        {
1651
0
            if (intraDirChroma == chromaDirMode[i])
1652
0
            {
1653
0
                intraDirChroma = i;
1654
0
                break;
1655
0
            }
1656
0
        }
1657
1658
0
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1659
0
        encodeBinsEP(intraDirChroma, 2);
1660
0
    }
1661
0
}
1662
1663
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1664
0
{
1665
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1666
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1667
1668
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1669
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1670
0
    if (interDir < 2)
1671
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1672
0
}
1673
1674
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1675
0
{
1676
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1677
1678
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1679
1680
0
    if (refFrame > 0)
1681
0
    {
1682
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1683
0
        if (refNum == 0)
1684
0
            return;
1685
1686
0
        refFrame--;
1687
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1688
0
        if (refFrame > 0)
1689
0
        {
1690
0
            uint32_t mask = (1 << refFrame) - 2;
1691
0
            mask >>= (refFrame == refNum) ? 1 : 0;
1692
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
1693
0
        }
1694
0
    }
1695
0
}
1696
1697
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1698
0
{
1699
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
1700
0
    const int hor = mvd.x;
1701
0
    const int ver = mvd.y;
1702
1703
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1704
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1705
1706
0
    const bool bHorAbsGr0 = hor != 0;
1707
0
    const bool bVerAbsGr0 = ver != 0;
1708
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
1709
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
1710
1711
0
    if (bHorAbsGr0)
1712
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1713
1714
0
    if (bVerAbsGr0)
1715
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1716
1717
0
    if (bHorAbsGr0)
1718
0
    {
1719
0
        if (horAbs > 1)
1720
0
            writeEpExGolomb(horAbs - 2, 1);
1721
1722
0
        encodeBinEP(0 > hor ? 1 : 0);
1723
0
    }
1724
1725
0
    if (bVerAbsGr0)
1726
0
    {
1727
0
        if (verAbs > 1)
1728
0
            writeEpExGolomb(verAbs - 2, 1);
1729
1730
0
        encodeBinEP(0 > ver ? 1 : 0);
1731
0
    }
1732
0
}
1733
1734
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1735
0
{
1736
0
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1737
1738
0
    int qpBdOffsetY = QP_BD_OFFSET;
1739
1740
0
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1741
1742
0
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
1743
0
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1744
0
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1745
0
    if (absDQp >= CU_DQP_TU_CMAX)
1746
0
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1747
1748
0
    if (absDQp > 0)
1749
0
    {
1750
0
        uint32_t sign = (dqp > 0 ? 0 : 1);
1751
0
        encodeBinEP(sign);
1752
0
    }
1753
0
}
1754
1755
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1756
0
{
1757
0
    uint32_t ctx = tuDepth + 2;
1758
1759
0
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1760
0
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
1761
0
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1762
1763
0
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1764
0
    {
1765
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1766
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
1767
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1768
1769
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1770
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1771
0
    }
1772
0
    else
1773
0
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1774
0
}
1775
1776
#if CHECKED_BUILD || _DEBUG
1777
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
1778
{
1779
    uint32_t goRiceParam = 0;
1780
    int firstCoeff2 = 1;
1781
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
1782
1783
    uint32_t sum = 0;
1784
    int idx = 0;
1785
    do
1786
    {
1787
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
1788
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
1789
        baseLevelN >>= 2;
1790
        int codeNumber = absCoeff[idx] - baseLevel;
1791
1792
        if (codeNumber >= 0)
1793
        {
1794
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1795
            uint32_t length = 0;
1796
1797
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
1798
            if (codeNumber >= 0)
1799
            {
1800
                {
1801
                    unsigned long cidx;
1802
                    CLZ(cidx, codeNumber + 1);
1803
                    length = cidx;
1804
                }
1805
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1806
1807
                codeNumber = (length + length);
1808
            }
1809
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
1810
1811
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
1812
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
1813
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
1814
        }
1815
        if (absCoeff[idx] >= 2)
1816
            firstCoeff2 = 0;
1817
        idx++;
1818
    }
1819
    while(idx < numNonZero);
1820
1821
    return sum;
1822
}
1823
#endif // debug only code
1824
1825
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1826
0
{
1827
0
    uint32_t trSize = 1 << log2TrSize;
1828
0
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
1829
    // compute number of significant coefficients
1830
0
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
1831
0
    X265_CHECK(numSig > 0, "cbf check fail\n");
1832
0
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
1833
1834
0
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
1835
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
1836
1837
0
    bool bIsLuma = ttype == TEXT_LUMA;
1838
1839
    // select scans
1840
0
    TUEntropyCodingParameters codingParameters;
1841
0
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1842
1843
0
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
1844
0
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
1845
0
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
1846
1847
    //----- encode significance map -----
1848
1849
    // Find position of last coefficient
1850
0
    int scanPosLast = 0;
1851
0
    uint32_t posLast;
1852
0
    uint64_t sigCoeffGroupFlag64 = 0;
1853
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1854
0
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
1855
1856
0
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
1857
0
    posLast = codingParameters.scan[scanPosLast];
1858
1859
0
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1860
1861
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
1862
0
    for(int idx = 0; idx < lastScanSet; idx++)
1863
0
    {
1864
0
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
1865
0
        const uint8_t nonZero = (coeffNum[idx] != 0);
1866
0
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
1867
0
    }
1868
1869
1870
    // Code position of last coefficient
1871
0
    {
1872
        // The last position is composed of a prefix and suffix.
1873
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
1874
        // The bypass coded bins for both the x and y components are grouped together.
1875
0
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
1876
0
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
1877
        // swap
1878
0
        if (codingParameters.scanType == SCAN_VER)
1879
0
            std::swap(pos[0], pos[1]);
1880
1881
0
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1882
0
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
1883
0
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
1884
0
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
1885
0
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
1886
1887
0
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
1888
0
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
1889
0
        {
1890
0
            uint32_t temp = g_lastCoeffTable[pos[i]];
1891
0
            uint32_t prefixOnes = temp & 15;
1892
0
            uint32_t suffixLen = temp >> 4;
1893
1894
0
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
1895
0
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
1896
1897
0
            if (prefixOnes < maxGroupIdx)
1898
0
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
1899
1900
0
            packedSuffixBits <<= suffixLen;
1901
0
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
1902
0
            packedSuffixLen += suffixLen;
1903
0
        }
1904
1905
0
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
1906
0
    }
1907
1908
    // code significance flag
1909
0
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1910
0
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1911
0
    uint32_t c1 = 1;
1912
0
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
1913
0
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
1914
0
    uint32_t numNonZero = 1;
1915
0
    unsigned long lastNZPosInCG;
1916
0
    unsigned long firstNZPosInCG;
1917
1918
#if _DEBUG
1919
    // Unnecessary, for Valgrind-3.10.0 only
1920
    memset(absCoeff, 0, sizeof(absCoeff));
1921
#endif
1922
1923
0
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
1924
1925
0
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
1926
0
    {
1927
0
        const uint32_t subCoeffFlag = coeffFlag[subSet];
1928
0
        uint32_t scanFlagMask = subCoeffFlag;
1929
0
        int subPosBase = subSet << MLS_CG_SIZE;
1930
        
1931
0
        if (subSet == lastScanSet)
1932
0
        {
1933
0
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
1934
0
            scanFlagMask >>= 1;
1935
0
        }
1936
1937
        // encode significant_coeffgroup_flag
1938
0
        const int cgBlkPos = codingParameters.scanCG[subSet];
1939
0
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
1940
0
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
1941
0
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1942
1943
0
        if (subSet == lastScanSet || !subSet)
1944
0
            sigCoeffGroupFlag64 |= cgBlkPosMask;
1945
0
        else
1946
0
        {
1947
0
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1948
0
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1949
0
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1950
0
        }
1951
1952
        // encode significant_coeff_flag
1953
0
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
1954
0
        {
1955
0
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
1956
0
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1957
0
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
1958
1959
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
1960
0
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
1961
0
            {
1962
                // patternSigCtx = 0
1963
0
                {
1964
0
                    2, 1, 1, 0,
1965
0
                    1, 1, 0, 0,
1966
0
                    1, 0, 0, 0,
1967
0
                    0, 0, 0, 0,
1968
0
                },
1969
                // patternSigCtx = 1
1970
0
                {
1971
0
                    2, 2, 2, 2,
1972
0
                    1, 1, 1, 1,
1973
0
                    0, 0, 0, 0,
1974
0
                    0, 0, 0, 0,
1975
0
                },
1976
                // patternSigCtx = 2
1977
0
                {
1978
0
                    2, 1, 0, 0,
1979
0
                    2, 1, 0, 0,
1980
0
                    2, 1, 0, 0,
1981
0
                    2, 1, 0, 0,
1982
0
                },
1983
                // patternSigCtx = 3
1984
0
                {
1985
0
                    2, 2, 2, 2,
1986
0
                    2, 2, 2, 2,
1987
0
                    2, 2, 2, 2,
1988
0
                    2, 2, 2, 2,
1989
0
                },
1990
                // 4x4
1991
0
                {
1992
0
                    0, 1, 4, 5,
1993
0
                    2, 3, 4, 5,
1994
0
                    6, 6, 8, 8,
1995
0
                    7, 7, 8, 8
1996
0
                }
1997
0
            };
1998
1999
0
            const int offset = codingParameters.firstSignificanceMapContext;
2000
0
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2001
2002
0
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2003
0
            if (m_bitIf)
2004
0
            {
2005
0
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2006
2007
                // TODO: accelerate by PABSW
2008
0
                for (int i = 0; i < MLS_CG_SIZE; i++)
2009
0
                {
2010
0
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2011
0
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2012
0
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2013
0
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2014
0
                }
2015
2016
0
                if (log2TrSize == 2)
2017
0
                {
2018
0
                    do
2019
0
                    {
2020
0
                        uint32_t blkPos, sig, ctxSig;
2021
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2022
0
                        sig     = scanFlagMask & 1;
2023
0
                        scanFlagMask >>= 1;
2024
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2025
0
                        {
2026
0
                            ctxSig = table_cnt[4][blkPos];
2027
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2028
0
                            encodeBin(sig, baseCtx[ctxSig]);
2029
0
                        }
2030
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2031
0
                        numNonZero += sig;
2032
0
                        scanPosSigOff--;
2033
0
                    }
2034
0
                    while(scanPosSigOff >= 0);
2035
0
                }
2036
0
                else
2037
0
                {
2038
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2039
2040
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2041
0
                    do
2042
0
                    {
2043
0
                        uint32_t blkPos, sig, ctxSig;
2044
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2045
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2046
0
                        sig     = scanFlagMask & 1;
2047
0
                        scanFlagMask >>= 1;
2048
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2049
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2050
0
                        {
2051
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2052
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2053
2054
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2055
0
                            encodeBin(sig, baseCtx[ctxSig]);
2056
0
                        }
2057
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2058
0
                        numNonZero += sig;
2059
0
                        scanPosSigOff--;
2060
0
                    }
2061
0
                    while(scanPosSigOff >= 0);
2062
0
                }
2063
0
            }
2064
0
            else // fast RD path
2065
0
            {
2066
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2067
0
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2068
0
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2069
0
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2070
2071
#if CHECKED_BUILD || _DEBUG
2072
                numNonZero = coeffNum[subSet];
2073
#endif
2074
                // update RD cost
2075
0
                m_fracBits += sum;
2076
0
            } // end of fast RD path -- !m_bitIf
2077
0
        }
2078
0
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2079
2080
0
        uint32_t coeffSigns = coeffSign[subSet];
2081
0
        numNonZero = coeffNum[subSet];
2082
0
        if (numNonZero > 0)
2083
0
        {
2084
0
            uint32_t idx;
2085
0
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2086
0
            CLZ(lastNZPosInCG, subCoeffFlag);
2087
0
            CTZ(firstNZPosInCG, subCoeffFlag);
2088
2089
0
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2090
0
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2091
0
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2092
2093
0
            c1 = 1;
2094
0
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2095
2096
0
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2097
0
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2098
2099
0
            if (!m_bitIf)
2100
0
            {
2101
0
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2102
0
                uint32_t firstC2Idx = (sum >> 28);
2103
0
                c1 = ((sum >> 26) & 3);
2104
0
                m_fracBits += sum & 0x00FFFFFF;
2105
2106
0
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2107
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2108
0
                m_fracBits += (numNonZero + hiddenShift) << 15;
2109
2110
0
                if (numNonZero > firstC2Idx)
2111
0
                {
2112
0
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2113
0
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2114
0
                    m_fracBits += ((uint64_t)sum << 15);
2115
0
                }
2116
0
            }
2117
            // Standard path
2118
0
            else
2119
0
            {
2120
0
                uint32_t firstC2Idx = 8;
2121
0
                uint32_t firstC2Flag = 2;
2122
0
                uint32_t c1Next = 0xFFFFFFFE;
2123
2124
0
                idx = 0;
2125
0
                do
2126
0
                {
2127
0
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2128
0
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2129
0
                    encodeBin(symbol1, baseCtxMod[c1]);
2130
2131
0
                    if (symbol1)
2132
0
                        c1Next = 0;
2133
2134
0
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2135
0
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2136
2137
0
                    c1 = (c1Next & 3);
2138
0
                    c1Next >>= 2;
2139
0
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2140
0
                    idx++;
2141
0
                }
2142
0
                while(idx < numC1Flag);
2143
2144
0
                if (!c1)
2145
0
                {
2146
0
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2147
2148
0
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2149
0
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2150
0
                }
2151
2152
0
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2153
0
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2154
2155
0
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2156
0
                {
2157
                    // Standard path
2158
0
                    uint32_t goRiceParam = 0;
2159
0
                    int baseLevel = 3;
2160
0
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2161
#if CHECKED_BUILD || _DEBUG
2162
                    int firstCoeff2 = 1;
2163
#endif
2164
0
                    idx = firstC2Idx;
2165
0
                    do
2166
0
                    {
2167
0
                        if (idx >= C1FLAG_NUMBER)
2168
0
                            baseLevel = 1;
2169
                        // TODO: fast algorithm maybe broken this check logic
2170
0
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2171
2172
0
                        if (absCoeff[idx] >= baseLevel)
2173
0
                        {
2174
0
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2175
0
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2176
0
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2177
0
                            goRiceParam += adjust;
2178
0
                            threshold += (adjust) ? threshold : 0;
2179
0
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2180
0
                        }
2181
#if CHECKED_BUILD || _DEBUG
2182
                        firstCoeff2 = 0;
2183
#endif
2184
0
                        baseLevel = 2;
2185
0
                        idx++;
2186
0
                    }
2187
0
                    while(idx < numNonZero);
2188
0
                }
2189
0
            } // end of !bitIf
2190
0
        } // end of (numNonZero > 0)
2191
2192
        // Initialize value for next loop
2193
0
        numNonZero = 0;
2194
0
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2195
0
    }
2196
0
}
2197
2198
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2199
0
{
2200
0
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2201
2202
0
    uint32_t isCodeNonZero = !!code;
2203
2204
0
    encodeBinEP(isCodeNonZero);
2205
0
    if (isCodeNonZero)
2206
0
    {
2207
0
        uint32_t isCodeLast = (maxSymbol > code);
2208
0
        uint32_t mask = (1 << (code - 1)) - 1;
2209
0
        uint32_t len = code - 1 + isCodeLast;
2210
0
        mask <<= isCodeLast;
2211
2212
0
        encodeBinsEP(mask, len);
2213
0
    }
2214
0
}
2215
2216
/* estimate bit cost for CBP, significant map and significant coefficients */
2217
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2218
0
{
2219
0
    estCBFBit(estBitsSbac);
2220
2221
0
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2222
2223
    // encode significance map
2224
0
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2225
2226
    // encode significant coefficients
2227
0
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2228
0
}
2229
2230
/* estimate bit cost for each CBP bit */
2231
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2232
0
{
2233
0
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2234
2235
0
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2236
0
    {
2237
0
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2238
0
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2239
0
    }
2240
2241
0
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2242
2243
0
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2244
0
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2245
0
}
2246
2247
/* estimate SAMBAC bit cost for significant coefficient group map */
2248
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2249
0
{
2250
0
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2251
2252
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2253
0
        for (uint32_t bin = 0; bin < 2; bin++)
2254
0
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2255
0
}
2256
2257
/* estimate SAMBAC bit cost for significant coefficient map */
2258
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2259
0
{
2260
0
    int firstCtx = 1, numCtx = 8;
2261
2262
0
    if (log2TrSize >= 4)
2263
0
    {
2264
0
        firstCtx = bIsLuma ? 21 : 12;
2265
0
        numCtx = bIsLuma ? 6 : 3;
2266
0
    }
2267
0
    else if (log2TrSize == 3)
2268
0
    {
2269
0
        firstCtx = 9;
2270
0
        numCtx = bIsLuma ? 12 : 3;
2271
0
    }
2272
2273
0
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2274
2275
0
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2276
0
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2277
2278
0
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2279
0
    {
2280
0
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2281
0
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2282
0
    }
2283
2284
0
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2285
0
    if (bIsLuma)
2286
0
    {
2287
0
        if (log2TrSize == 2)
2288
0
        {
2289
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2290
0
            {
2291
0
                int bits = 0;
2292
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2293
2294
0
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2295
0
                {
2296
0
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2297
0
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2298
0
                }
2299
2300
0
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2301
0
            }
2302
0
        }
2303
0
        else
2304
0
        {
2305
0
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2306
2307
0
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2308
0
            {
2309
0
                int bits = 0;
2310
0
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2311
0
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2312
2313
0
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2314
0
                {
2315
0
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2316
0
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2317
0
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2318
0
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2319
0
                    bits += 2 * cost1;
2320
0
                }
2321
                // correct latest bit cost, it didn't include cost0
2322
0
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2323
0
            }
2324
0
        }
2325
0
    }
2326
0
    else
2327
0
    {
2328
0
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2329
0
        const int ctxShift = log2TrSize - 2;
2330
2331
0
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2332
0
        {
2333
0
            int bits = 0;
2334
0
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2335
2336
0
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2337
0
            {
2338
0
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2339
0
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2340
0
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2341
0
            }
2342
2343
0
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2344
0
        }
2345
0
    }
2346
0
}
2347
2348
/* estimate bit cost of significant coefficient */
2349
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2350
0
{
2351
0
    if (bIsLuma)
2352
0
    {
2353
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2354
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2355
2356
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2357
0
        {
2358
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2359
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2360
0
        }
2361
2362
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2363
0
        {
2364
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2365
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2366
0
        }
2367
0
    }
2368
0
    else
2369
0
    {
2370
0
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2371
0
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2372
2373
0
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2374
0
        {
2375
0
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2376
0
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2377
0
        }
2378
2379
0
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2380
0
        {
2381
0
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2382
0
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2383
0
        }
2384
0
    }
2385
0
}
2386
2387
/* Initialize our context information from the nominated source */
2388
void Entropy::copyContextsFrom(const Entropy& src)
2389
0
{
2390
0
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2391
2392
0
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2393
0
    markValid();
2394
0
}
2395
2396
void Entropy::start()
2397
0
{
2398
0
    m_low = 0;
2399
0
    m_range = 510;
2400
0
    m_bitsLeft = -12;
2401
0
    m_numBufferedBytes = 0;
2402
0
    m_bufferedByte = 0xff;
2403
0
}
2404
2405
void Entropy::finish()
2406
0
{
2407
0
    if (m_low >> (21 + m_bitsLeft))
2408
0
    {
2409
0
        m_bitIf->writeByte(m_bufferedByte + 1);
2410
0
        while (m_numBufferedBytes > 1)
2411
0
        {
2412
0
            m_bitIf->writeByte(0x00);
2413
0
            m_numBufferedBytes--;
2414
0
        }
2415
2416
0
        m_low -= 1 << (21 + m_bitsLeft);
2417
0
    }
2418
0
    else
2419
0
    {
2420
0
        if (m_numBufferedBytes > 0)
2421
0
            m_bitIf->writeByte(m_bufferedByte);
2422
2423
0
        while (m_numBufferedBytes > 1)
2424
0
        {
2425
0
            m_bitIf->writeByte(0xff);
2426
0
            m_numBufferedBytes--;
2427
0
        }
2428
0
    }
2429
0
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2430
0
}
2431
2432
void Entropy::copyState(const Entropy& other)
2433
0
{
2434
0
    m_low = other.m_low;
2435
0
    m_range = other.m_range;
2436
0
    m_bitsLeft = other.m_bitsLeft;
2437
0
    m_bufferedByte = other.m_bufferedByte;
2438
0
    m_numBufferedBytes = other.m_numBufferedBytes;
2439
0
    m_fracBits = other.m_fracBits;
2440
0
}
2441
2442
void Entropy::resetBits()
2443
0
{
2444
0
    m_low = 0;
2445
0
    m_bitsLeft = -12;
2446
0
    m_numBufferedBytes = 0;
2447
0
    m_bufferedByte = 0xff;
2448
0
    m_fracBits &= 32767;
2449
0
    if (m_bitIf)
2450
0
        m_bitIf->resetBits();
2451
0
}
2452
2453
/** Encode bin */
2454
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2455
0
{
2456
0
    uint32_t mstate = ctxModel;
2457
2458
0
    ctxModel = sbacNext(mstate, binValue);
2459
2460
0
    if (!m_bitIf)
2461
0
    {
2462
0
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2463
0
        return;
2464
0
    }
2465
2466
0
    uint32_t range = m_range;
2467
0
    uint32_t state = sbacGetState(mstate);
2468
0
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2469
0
    range -= lps;
2470
2471
0
    X265_CHECK(lps >= 2, "lps is too small\n");
2472
2473
0
    int numBits = (uint32_t)(range - 256) >> 31;
2474
0
    uint32_t low = m_low;
2475
2476
    // NOTE: MPS must be LOWEST bit in mstate
2477
0
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2478
0
    if ((binValue ^ mstate) & 1)
2479
0
    {
2480
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2481
        //numBits = g_renormTable[lps >> 3];
2482
0
        unsigned long idx;
2483
0
        CLZ(idx, lps);
2484
0
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2485
2486
0
        numBits = 8 - idx;
2487
0
        if (state >= 63)
2488
0
            numBits = 6;
2489
0
        X265_CHECK(numBits <= 6, "numBits failure\n");
2490
2491
0
        low += range;
2492
0
        range = lps;
2493
0
    }
2494
0
    m_low = (low << numBits);
2495
0
    m_range = (range << numBits);
2496
0
    m_bitsLeft += numBits;
2497
2498
0
    if (m_bitsLeft >= 0)
2499
0
        writeOut();
2500
0
}
2501
2502
/** Encode equiprobable bin */
2503
void Entropy::encodeBinEP(uint32_t binValue)
2504
0
{
2505
0
    if (!m_bitIf)
2506
0
    {
2507
0
        m_fracBits += 32768;
2508
0
        return;
2509
0
    }
2510
0
    m_low <<= 1;
2511
0
    if (binValue)
2512
0
        m_low += m_range;
2513
0
    m_bitsLeft++;
2514
2515
0
    if (m_bitsLeft >= 0)
2516
0
        writeOut();
2517
0
}
2518
2519
/** Encode equiprobable bins */
2520
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2521
0
{
2522
0
    if (!m_bitIf)
2523
0
    {
2524
0
        m_fracBits += 32768 * numBins;
2525
0
        return;
2526
0
    }
2527
2528
0
    while (numBins > 8)
2529
0
    {
2530
0
        numBins -= 8;
2531
0
        uint32_t pattern = binValues >> numBins;
2532
0
        m_low <<= 8;
2533
0
        m_low += m_range * pattern;
2534
0
        binValues -= pattern << numBins;
2535
0
        m_bitsLeft += 8;
2536
2537
0
        if (m_bitsLeft >= 0)
2538
0
            writeOut();
2539
0
    }
2540
2541
0
    m_low <<= numBins;
2542
0
    m_low += m_range * binValues;
2543
0
    m_bitsLeft += numBins;
2544
2545
0
    if (m_bitsLeft >= 0)
2546
0
        writeOut();
2547
0
}
2548
2549
/** Encode terminating bin */
2550
void Entropy::encodeBinTrm(uint32_t binValue)
2551
0
{
2552
0
    if (!m_bitIf)
2553
0
    {
2554
0
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2555
0
        return;
2556
0
    }
2557
2558
0
    m_range -= 2;
2559
0
    if (binValue)
2560
0
    {
2561
0
        m_low += m_range;
2562
0
        m_low <<= 7;
2563
0
        m_range = 2 << 7;
2564
0
        m_bitsLeft += 7;
2565
0
    }
2566
0
    else if (m_range >= 256)
2567
0
        return;
2568
0
    else
2569
0
    {
2570
0
        m_low <<= 1;
2571
0
        m_range <<= 1;
2572
0
        m_bitsLeft++;
2573
0
    }
2574
2575
0
    if (m_bitsLeft >= 0)
2576
0
        writeOut();
2577
0
}
2578
2579
/** Move bits from register into bitstream */
2580
void Entropy::writeOut()
2581
0
{
2582
0
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2583
0
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2584
2585
0
    m_bitsLeft -= 8;
2586
0
    m_low &= low_mask;
2587
2588
0
    if (leadByte == 0xff)
2589
0
        m_numBufferedBytes++;
2590
0
    else
2591
0
    {
2592
0
        uint32_t numBufferedBytes = m_numBufferedBytes;
2593
0
        if (numBufferedBytes > 0)
2594
0
        {
2595
0
            uint32_t carry = leadByte >> 8;
2596
0
            uint32_t byteTowrite = m_bufferedByte + carry;
2597
0
            m_bitIf->writeByte(byteTowrite);
2598
2599
0
            byteTowrite = (0xff + carry) & 0xff;
2600
0
            while (numBufferedBytes > 1)
2601
0
            {
2602
0
                m_bitIf->writeByte(byteTowrite);
2603
0
                numBufferedBytes--;
2604
0
            }
2605
0
        }
2606
0
        m_numBufferedBytes = 1;
2607
0
        m_bufferedByte = (uint8_t)leadByte;
2608
0
    }
2609
0
}
2610
2611
const uint32_t g_entropyBits[128] =
2612
{
2613
    // Corrected table, most notably for last state
2614
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2615
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2616
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2617
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2618
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2619
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2620
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2621
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2622
};
2623
2624
const uint8_t g_nextState[128][2] =
2625
{
2626
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2627
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2628
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2629
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2630
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2631
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2632
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2633
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2634
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2635
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2636
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2637
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2638
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2639
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2640
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2641
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2642
};
2643
2644
}
2645
2646
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
2647
extern "C" const uint32_t PFX(entropyStateBits)[128] =
2648
{
2649
    // Corrected table, most notably for last state
2650
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
2651
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
2652
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
2653
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
2654
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
2655
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
2656
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
2657
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
2658
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
2659
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
2660
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
2661
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
2662
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
2663
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
2664
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
2665
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
2666
};
2667