Coverage Report

Created: 2022-08-24 06:17

/src/x265/source/encoder/entropy.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Steve Borho <steve@borho.org>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "framedata.h"
27
#include "scalinglist.h"
28
#include "quant.h"
29
#include "contexts.h"
30
#include "picyuv.h"
31
32
#include "sao.h"
33
#include "entropy.h"
34
35
14.3k
#define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36
4.38k
#define CU_DQP_EG_k    0 // exp-golomb order
37
0
#define START_VALUE    8 // start value for dpcm mode
38
39
namespace X265_NS {
40
41
// initial probability for cu_transquant_bypass flag
42
static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43
{
44
    { 154 },
45
    { 154 },
46
    { 154 },
47
};
48
49
// initial probability for split flag
50
static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51
{
52
    { 107,  139,  126, },
53
    { 107,  139,  126, },
54
    { 139,  141,  157, },
55
};
56
57
static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58
{
59
    { 197,  185,  201, },
60
    { 197,  185,  201, },
61
    { CNU,  CNU,  CNU, },
62
};
63
64
static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65
{
66
    { 154, },
67
    { 110, },
68
    { CNU, },
69
};
70
71
static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72
{
73
    { 137, },
74
    { 122, },
75
    { CNU, },
76
};
77
78
static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79
{
80
    { 154,  139,  154, 154 },
81
    { 154,  139,  154, 154 },
82
    { 184,  CNU,  CNU, CNU },
83
};
84
85
static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86
{
87
    { 134, },
88
    { 149, },
89
    { CNU, },
90
};
91
92
static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93
{
94
    { 183, },
95
    { 154, },
96
    { 184, },
97
};
98
99
static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100
{
101
    { 152,  139, },
102
    { 152,  139, },
103
    {  63,  139, },
104
};
105
106
static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107
{
108
    {  95,   79,   63,   31,  31, },
109
    {  95,   79,   63,   31,  31, },
110
    { CNU,  CNU,  CNU,  CNU, CNU, },
111
};
112
113
static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114
{
115
    { 169,  198, },
116
    { 140,  198, },
117
    { CNU,  CNU, },
118
};
119
120
static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121
{
122
    { 153,  153 },
123
    { 153,  153 },
124
    { CNU,  CNU },
125
};
126
127
static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128
{
129
    { 154,  154,  154, },
130
    { 154,  154,  154, },
131
    { 154,  154,  154, },
132
};
133
134
static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135
{
136
    { 153,  111,  149,   92,  167,  154,  154 },
137
    { 153,  111,  149,  107,  167,  154,  154 },
138
    { 111,  141,   94,  138,  182,  154,  154 },
139
};
140
141
static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142
{
143
    {  79, },
144
    {  79, },
145
    { CNU, },
146
};
147
148
static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149
{
150
    { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151
      108,  123,   93 },
152
    { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153
      108,  123,  108 },
154
    { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155
      108,  123,   63 },
156
};
157
158
static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159
{
160
    { 121,  140,
161
      61,  154, },
162
    { 121,  140,
163
      61,  154, },
164
    {  91,  171,
165
       134,  141, },
166
};
167
168
static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169
{
170
    { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171
    { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172
    { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173
};
174
175
static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176
{
177
    { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178
    { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179
    { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180
};
181
182
static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183
{
184
    { 107,  167,   91,  107,  107,  167, },
185
    { 107,  167,   91,  122,  107,  167, },
186
    { 138,  153,  136,  167,  152,  152, },
187
};
188
189
static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190
{
191
    { 168 },
192
    { 168 },
193
    { CNU },
194
};
195
196
static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197
{
198
    { 153,  },
199
    { 153,  },
200
    { 153,  },
201
};
202
203
static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204
{
205
    { 160, },
206
    { 185, },
207
    { 200, },
208
};
209
210
static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211
{
212
    { 224,  167,  122, },
213
    { 124,  138,   94, },
214
    { 153,  138,  138, },
215
};
216
217
static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218
{
219
    { 139,  139 },
220
    { 139,  139 },
221
    { 139,  139 },
222
};
223
224
Entropy::Entropy()
225
2.20M
{
226
2.20M
    markValid();
227
2.20M
    m_fracBits = 0;
228
2.20M
    m_pad = 0;
229
2.20M
    m_meanQP = 0;
230
2.20M
    X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231
2.20M
}
232
233
void Entropy::codeVPS(const VPS& vps)
234
698
{
235
698
    WRITE_CODE(0,       4, "vps_video_parameter_set_id");
236
698
    WRITE_CODE(3,       2, "vps_reserved_three_2bits");
237
698
    WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
238
698
    WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
239
698
    WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
240
698
    WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
241
242
698
    codeProfileTier(vps.ptl, vps.maxTempSubLayers);
243
244
698
    WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
245
246
1.39k
    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
247
698
    {
248
698
        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
249
698
        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
250
698
        WRITE_UVLC(vps.maxLatencyIncrease + 1, "vps_max_latency_increase_plus1[i]");
251
698
    }
252
253
698
    WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
254
698
    WRITE_UVLC(0,    "vps_max_op_sets_minus1");
255
698
    WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
256
698
    WRITE_FLAG(0,    "vps_extension_flag");
257
698
}
258
259
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
260
698
{
261
698
    WRITE_CODE(0, 4, "sps_video_parameter_set_id");
262
698
    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
263
698
    WRITE_FLAG(sps.maxTempSubLayers == 1,   "sps_temporal_id_nesting_flag");
264
265
698
    codeProfileTier(ptl, sps.maxTempSubLayers);
266
267
698
    WRITE_UVLC(0, "sps_seq_parameter_set_id");
268
698
    WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
269
270
698
    if (sps.chromaFormatIdc == X265_CSP_I444)
271
0
        WRITE_FLAG(0,                       "separate_colour_plane_flag");
272
273
698
    WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
274
698
    WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
275
276
698
    const Window& conf = sps.conformanceWindow;
277
698
    WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
278
698
    if (conf.bEnabled)
279
567
    {
280
567
        int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
281
567
        WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
282
567
        WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
283
567
        WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
284
567
        WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
285
567
    }
286
287
698
    WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
288
698
    WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
289
698
    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
290
698
    WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
291
292
1.39k
    for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
293
698
    {
294
698
        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
295
698
        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
296
698
        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
297
698
    }
298
299
698
    WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
300
698
    WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
301
698
    WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
302
698
    WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
303
698
    WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
304
698
    WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
305
698
    WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
306
698
    if (scalingList.m_bEnabled)
307
0
    {
308
0
        WRITE_FLAG(scalingList.m_bDataPresent,    "sps_scaling_list_data_present_flag");
309
0
        if (scalingList.m_bDataPresent)
310
0
            codeScalingList(scalingList);
311
0
    }
312
698
    WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
313
698
    WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
314
315
698
    WRITE_FLAG(0, "pcm_enabled_flag");
316
698
    WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
317
698
    for (int i = 0; i < sps.spsrpsNum; i++)
318
0
        codeShortTermRefPicSet(sps.spsrps[i], i);
319
698
    WRITE_FLAG(0, "long_term_ref_pics_present_flag");
320
321
698
    WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
322
698
    WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
323
324
698
    WRITE_FLAG(1, "vui_parameters_present_flag");
325
698
    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo);
326
327
698
    WRITE_FLAG(0, "sps_extension_flag");
328
698
}
329
330
void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26 )
331
698
{
332
698
    WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
333
698
    WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
334
698
    WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
335
698
    WRITE_FLAG(0,                          "output_flag_present_flag");
336
698
    WRITE_CODE(0, 3,                       "num_extra_slice_header_bits");
337
698
    WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
338
698
    WRITE_FLAG(0,                          "cabac_init_present_flag");
339
698
    WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
340
698
    WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
341
342
698
    WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
343
698
    WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
344
698
    WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
345
346
698
    WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
347
698
    if (pps.bUseDQP)
348
503
        WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
349
350
698
    WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
351
698
    WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
352
698
    WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
353
354
698
    WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
355
698
    WRITE_FLAG(pps.bUseWeightedBiPred,        "weighted_bipred_flag");
356
698
    WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
357
698
    WRITE_FLAG(0,                             "tiles_enabled_flag");
358
698
    WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
359
698
    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
360
361
698
    WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
362
698
    if (pps.bDeblockingFilterControlPresent)
363
0
    {
364
0
        WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
365
0
        WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
366
0
        if (!pps.bPicDisableDeblockingFilter)
367
0
        {
368
0
            WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
369
0
            WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
370
0
        }
371
0
    }
372
373
698
    WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
374
698
    WRITE_FLAG(0, "lists_modification_present_flag");
375
698
    WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
376
698
    WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
377
698
    WRITE_FLAG(0, "pps_extension_flag");
378
698
}
379
380
void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers)
381
1.39k
{
382
1.39k
    WRITE_CODE(0, 2,                "XXX_profile_space[]");
383
1.39k
    WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
384
1.39k
    WRITE_CODE(ptl.profileIdc, 5,   "XXX_profile_idc[]");
385
46.0k
    for (int j = 0; j < 32; j++)
386
44.6k
        WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
387
388
1.39k
    WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
389
1.39k
    WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
390
1.39k
    WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
391
1.39k
    WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
392
393
1.39k
    if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
394
0
    {
395
0
        uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
396
0
        int csp = ptl.chromaFormatConstraint;
397
0
        WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
398
0
        WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
399
0
        WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
400
0
        WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
401
0
        WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
402
0
        WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
403
0
        WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
404
0
        WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
405
0
        WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
406
0
        WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
407
0
        WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
408
0
        WRITE_CODE(0 ,  3, "XXX_reserved_zero_35bits[32..34]");
409
0
    }
410
1.39k
    else
411
1.39k
    {
412
1.39k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
413
1.39k
        WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
414
1.39k
        WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
415
1.39k
    }
416
417
1.39k
    WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
418
419
1.39k
    if (maxTempSubLayers > 1)
420
0
    {
421
0
         WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
422
0
         WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
423
0
         for (int i = maxTempSubLayers - 1; i < 8 ; i++)
424
0
             WRITE_CODE(0, 2, "reserved_zero_2bits");
425
0
    }
426
1.39k
}
427
428
void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo)
429
698
{
430
698
    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
431
698
    if (vui.aspectRatioInfoPresentFlag)
432
0
    {
433
0
        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
434
0
        if (vui.aspectRatioIdc == 255)
435
0
        {
436
0
            WRITE_CODE(vui.sarWidth, 16, "sar_width");
437
0
            WRITE_CODE(vui.sarHeight, 16, "sar_height");
438
0
        }
439
0
    }
440
441
698
    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
442
698
    if (vui.overscanInfoPresentFlag)
443
0
        WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
444
445
698
    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
446
698
    if (vui.videoSignalTypePresentFlag)
447
698
    {
448
698
        WRITE_CODE(vui.videoFormat, 3, "video_format");
449
698
        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
450
698
        WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
451
698
        if (vui.colourDescriptionPresentFlag)
452
0
        {
453
0
            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
454
0
            WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
455
0
            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
456
0
        }
457
698
    }
458
459
698
    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
460
698
    if (vui.chromaLocInfoPresentFlag)
461
0
    {
462
0
        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
463
0
        WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
464
0
    }
465
466
698
    WRITE_FLAG(0, "neutral_chroma_indication_flag");
467
698
    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
468
698
    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
469
470
698
    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
471
698
    if (vui.defaultDisplayWindow.bEnabled)
472
0
    {
473
0
        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
474
0
        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
475
0
        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
476
0
        WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
477
0
    }
478
479
698
    if (!bEmitVUITimingInfo)
480
0
        WRITE_FLAG(0, "vui_timing_info_present_flag");
481
698
    else
482
698
    {
483
698
        WRITE_FLAG(1, "vui_timing_info_present_flag");
484
698
        WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
485
698
        WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
486
698
        WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
487
698
    }
488
489
698
    if (!bEmitVUIHRDInfo)
490
0
        WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
491
698
    else
492
698
    {
493
698
        WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
494
698
        if (vui.hrdParametersPresentFlag)
495
0
            codeHrdParameters(vui.hrdParameters, maxSubTLayers);
496
698
    }
497
498
698
    WRITE_FLAG(0, "bitstream_restriction_flag");
499
698
}
500
501
void Entropy::codeScalingList(const ScalingList& scalingList)
502
0
{
503
0
    for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
504
0
    {
505
0
        for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
506
0
        {
507
0
            int predList = scalingList.checkPredMode(sizeId, listId);
508
0
            WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
509
0
            if (predList >= 0)
510
0
                WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
511
0
            else // DPCM Mode
512
0
                codeScalingList(scalingList, sizeId, listId);
513
0
        }
514
0
    }
515
0
}
516
517
void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
518
0
{
519
0
    int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
520
0
    const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
521
0
    int nextCoef = START_VALUE;
522
0
    int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
523
0
    int data;
524
525
0
    if (sizeId > BLOCK_8x8)
526
0
    {
527
0
        WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
528
0
        nextCoef = scalingList.m_scalingListDC[sizeId][listId];
529
0
    }
530
0
    for (int i = 0; i < coefNum; i++)
531
0
    {
532
0
        data = src[scan[i]] - nextCoef;
533
0
        if (data < -128)
534
0
            data += 256;
535
0
        if (data > 127)
536
0
            data -= 256;
537
0
        nextCoef = (nextCoef + data + 256) % 256;
538
0
        WRITE_SVLC(data,  "scaling_list_delta_coef");
539
0
    }
540
0
}
541
542
void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
543
0
{
544
0
    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
545
0
    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
546
0
    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
547
548
0
    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
549
0
    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
550
551
0
    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
552
0
    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
553
0
    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
554
555
0
    for (int i = 0; i < maxSubTLayers; i++)
556
0
    {
557
0
        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
558
0
        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
559
0
        WRITE_UVLC(0, "cpb_cnt_minus1");
560
561
0
        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
562
0
        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
563
0
        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
564
0
    }
565
0
}
566
567
void Entropy::codeAUD(const Slice& slice)
568
0
{
569
0
    int picType;
570
571
0
    switch (slice.m_sliceType)
572
0
    {
573
0
    case I_SLICE:
574
0
        picType = 0;
575
0
        break;
576
0
    case P_SLICE:
577
0
        picType = 1;
578
0
        break;
579
0
    case B_SLICE:
580
0
        picType = 2;
581
0
        break;
582
0
    default:
583
0
        picType = 7;
584
0
        break;
585
0
    }
586
587
0
    WRITE_CODE(picType, 3, "pic_type");
588
0
}
589
590
void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp)
591
698
{
592
698
    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
593
698
    if (slice.getRapPicFlag())
594
698
        WRITE_FLAG(0, "no_output_of_prior_pics_flag");
595
596
698
    WRITE_UVLC(0, "slice_pic_parameter_set_id");
597
598
    /* x265 does not use dependent slices, so always write all this data */
599
698
    if (slice_addr)
600
0
    {
601
        // if( dependent_slice_segments_enabled_flag )
602
        //     dependent_slice_segment_flag             u(1)
603
0
        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
604
0
    }
605
606
698
    WRITE_UVLC(slice.m_sliceType, "slice_type");
607
608
698
    if (!slice.getIdrPicFlag())
609
0
    {
610
0
        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
611
0
        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
612
613
#if _DEBUG || CHECKED_BUILD
614
        // check for bitstream restriction stating that:
615
        // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
616
        // Ideally this process should not be repeated for each slice in a picture
617
        if (slice.isIRAP())
618
            for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
619
            {
620
                X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
621
            }
622
#endif
623
624
0
        if (slice.m_rpsIdx < 0)
625
0
        {
626
0
            WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
627
0
            codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
628
0
        }
629
0
        else
630
0
        {
631
0
            WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
632
0
            int numBits = 0;
633
0
            while ((1 << numBits) < slice.m_iNumRPSInSPS)
634
0
                numBits++;
635
636
0
            if (numBits > 0)
637
0
                WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
638
0
        }
639
640
0
        if (slice.m_sps->bTemporalMVPEnabled)
641
0
            WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
642
0
    }
643
698
    const SAOParam *saoParam = encData.m_saoParam;
644
698
    if (slice.m_bUseSao)
645
698
    {
646
698
        WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
647
698
        if (encData.m_param->internalCsp != X265_CSP_I400)
648
698
            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
649
698
    }
650
0
    else if(encData.m_param->selectiveSAO)
651
0
    {
652
0
        WRITE_FLAG(0, "slice_sao_luma_flag");
653
0
        if (encData.m_param->internalCsp != X265_CSP_I400)
654
0
            WRITE_FLAG(0, "slice_sao_chroma_flag");
655
0
    }
656
657
    // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
658
    // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
659
660
698
    if (!slice.isIntra())
661
0
    {
662
0
        bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
663
0
        WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
664
0
        if (overrideFlag)
665
0
        {
666
0
            WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
667
0
            if (slice.isInterB())
668
0
                WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
669
0
            else
670
0
            {
671
0
                X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
672
0
            }
673
0
        }
674
0
    }
675
698
    else
676
698
    {
677
698
        X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
678
698
    }
679
680
698
    if (slice.isInterB())
681
0
        WRITE_FLAG(0, "mvd_l1_zero_flag");
682
683
698
    if (slice.m_sps->bTemporalMVPEnabled)
684
698
    {
685
698
        if (slice.m_sliceType == B_SLICE)
686
0
            WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
687
688
698
        if (slice.m_sliceType != I_SLICE &&
689
698
            ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
690
0
            (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
691
0
        {
692
0
            WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
693
0
        }
694
698
    }
695
698
    if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
696
0
        codePredWeightTable(slice);
697
698
698
    X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
699
698
    if (!slice.isIntra())
700
0
        WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
701
702
698
    int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
703
698
    WRITE_SVLC(code, "slice_qp_delta");
704
705
698
    if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
706
0
    {
707
0
        WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
708
0
        WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
709
0
    }
710
    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
711
    //       We didn't support filter across slice board, so disable it now
712
713
698
    if (encData.m_param->maxSlices <= 1)
714
698
    {
715
698
        bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
716
698
        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
717
718
698
        if (isSAOEnabled || isDBFEnabled)
719
698
            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
720
698
    }
721
698
}
722
723
/** write wavefront substreams sizes for the slice header */
724
void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
725
584
{
726
584
    uint32_t offsetLen = 1;
727
3.35k
    while (maxOffset >= (1U << offsetLen))
728
2.76k
    {
729
2.76k
        offsetLen++;
730
2.76k
        X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
731
2.76k
    }
732
733
584
    WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
734
584
    if (numSubStreams > 0)
735
584
        WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
736
737
2.82k
    for (uint32_t i = 0; i < numSubStreams; i++)
738
2.23k
        WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
739
584
}
740
741
void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
742
0
{
743
0
    if (idx > 0)
744
0
        WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
745
746
0
    WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
747
0
    WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
748
0
    int prev = 0;
749
0
    for (int j = 0; j < rps.numberOfNegativePictures; j++)
750
0
    {
751
0
        WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
752
0
        prev = rps.deltaPOC[j];
753
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
754
0
    }
755
756
0
    prev = 0;
757
0
    for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
758
0
    {
759
0
        WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
760
0
        prev = rps.deltaPOC[j];
761
0
        WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
762
0
    }
763
0
}
764
765
void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
766
27.9k
{
767
27.9k
    bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
768
27.9k
    encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
769
27.9k
}
770
771
/* encode a CU block recursively */
772
void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
773
120k
{
774
120k
    const Slice* slice = ctu.m_slice;
775
776
120k
    int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
777
120k
    int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
778
779
120k
    if (!cuUnsplitFlag)
780
26.1k
    {
781
26.1k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
782
26.1k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
783
6.52k
            bEncodeDQP = true;
784
130k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
785
104k
        {
786
104k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
787
104k
            if (childGeom.flags & CUGeom::PRESENT)
788
59.2k
                encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
789
104k
        }
790
26.1k
        return;
791
26.1k
    }
792
793
94.8k
    if (cuSplitFlag) 
794
67.9k
        codeSplitFlag(ctu, absPartIdx, depth);
795
796
94.8k
    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
797
8.44k
    {
798
8.44k
        uint32_t qNumParts = cuGeom.numPartitions >> 2;
799
8.44k
        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
800
386
            bEncodeDQP = true;
801
42.2k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
802
33.7k
        {
803
33.7k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
804
33.7k
            encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
805
33.7k
        }
806
8.44k
        return;
807
8.44k
    }
808
809
86.3k
    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
810
34.7k
        bEncodeDQP = true;
811
812
86.3k
    if (slice->m_pps->bTransquantBypassEnabled)
813
26.6k
        codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
814
815
86.3k
    if (!slice->isIntra())
816
0
    {
817
0
        codeSkipFlag(ctu, absPartIdx);
818
0
        if (ctu.isSkipped(absPartIdx))
819
0
        {
820
0
            codeMergeIndex(ctu, absPartIdx);
821
0
            finishCU(ctu, absPartIdx, depth, bEncodeDQP);
822
0
            return;
823
0
        }
824
0
        codePredMode(ctu.m_predMode[absPartIdx]);
825
0
    }
826
827
86.3k
    codePartSize(ctu, absPartIdx, depth);
828
829
    // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
830
86.3k
    codePredInfo(ctu, absPartIdx);
831
832
86.3k
    uint32_t tuDepthRange[2];
833
86.3k
    if (ctu.isIntra(absPartIdx))
834
86.3k
        ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
835
5
    else
836
5
        ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
837
838
    // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
839
86.3k
    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
840
841
    // --- write terminating bit ---
842
86.3k
    finishCU(ctu, absPartIdx, depth, bEncodeDQP);
843
86.3k
}
844
845
/* Return bit count of signaling inter mode */
846
uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
847
0
{
848
0
    uint32_t bits;
849
0
    bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
850
0
    bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
851
0
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
852
0
    switch (partSize)
853
0
    {
854
0
    case SIZE_2Nx2N:
855
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
856
0
        break;
857
858
0
    case SIZE_2NxN:
859
0
    case SIZE_2NxnU:
860
0
    case SIZE_2NxnD:
861
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
862
0
        bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
863
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
864
0
        {
865
0
            bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
866
0
            if (partSize != SIZE_2NxN)
867
0
                bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
868
0
        }
869
0
        break;
870
871
0
    case SIZE_Nx2N:
872
0
    case SIZE_nLx2N:
873
0
    case SIZE_nRx2N:
874
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
875
0
        bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
876
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
877
0
            bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
878
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
879
0
        {
880
0
            bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
881
0
            if (partSize != SIZE_Nx2N)
882
0
                bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
883
0
        }
884
0
        break;
885
0
    default:
886
0
        X265_CHECK(0, "invalid CU partition\n");
887
0
        break;
888
0
    }
889
890
0
    return bits;
891
0
}
892
893
/* finish encoding a cu and handle end-of-slice conditions */
894
void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
895
86.3k
{
896
86.3k
    const Slice* slice = ctu.m_slice;
897
86.3k
    uint32_t realEndAddress = slice->m_endCUAddr;
898
86.3k
    uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
899
86.3k
    X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
900
901
86.3k
    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
902
86.3k
    uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
903
86.3k
    uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
904
86.3k
    uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
905
86.3k
    bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
906
86.3k
                                ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
907
908
86.3k
    if (slice->m_pps->bUseDQP)
909
59.6k
        const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
910
911
86.3k
    if (granularityBoundary)
912
27.9k
    {
913
        // Encode slice finish
914
27.9k
        uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
915
27.9k
        if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
916
1.39k
            bTerminateSlice = 1;
917
918
        // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
919
27.9k
        if (!bTerminateSlice)
920
26.5k
            encodeBinTrm(0);    // end_of_slice_segment_flag
921
922
27.9k
        if (!m_bitIf)
923
13.9k
            resetBits(); // TODO: most likely unnecessary
924
27.9k
    }
925
86.3k
}
926
927
void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
928
                              bool& bCodeDQP, const uint32_t depthRange[2])
929
2.21M
{
930
2.21M
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
931
932
    /* in each of these conditions, the subdiv flag is implied and not signaled,
933
     * so we have checks to make sure the implied value matches our intentions */
934
2.21M
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
935
338k
    {
936
338k
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
937
338k
    }
938
1.87M
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
939
1.87M
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
940
0
    {
941
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
942
0
    }
943
1.87M
    else if (log2CurSize > depthRange[1])
944
0
    {
945
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
946
0
    }
947
1.87M
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
948
1.35M
    {
949
1.35M
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
950
1.35M
    }
951
522k
    else
952
522k
    {
953
522k
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
954
522k
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
955
522k
    }
956
957
2.21M
    uint32_t hChromaShift = cu.m_hChromaShift;
958
2.21M
    uint32_t vChromaShift = cu.m_vChromaShift;
959
2.21M
    bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
960
2.21M
    if (!curDepth || !bSmallChroma)
961
863k
    {
962
863k
        uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
963
863k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
964
863k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
965
863k
        if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
966
863k
            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
967
863k
    }
968
969
2.21M
    if (subdiv)
970
339k
    {
971
339k
        --log2CurSize;
972
339k
        ++curDepth;
973
974
339k
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
975
976
339k
        encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
977
339k
        encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
978
339k
        encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
979
339k
        encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
980
339k
        return;
981
339k
    }
982
983
1.87M
    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
984
985
1.87M
    if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
986
0
    {
987
0
        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
988
0
    }
989
1.87M
    else
990
1.87M
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
991
992
1.87M
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
993
1.87M
    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
994
1.87M
    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
995
1.87M
    if (!(cbfY || cbfU || cbfV))
996
1.87M
        return;
997
998
    // dQP: only for CTU once
999
8.60k
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1000
3.71k
    {
1001
3.71k
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1002
3.71k
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1003
3.71k
        codeDeltaQP(cu, absPartIdxLT);
1004
3.71k
        bCodeDQP = false;
1005
3.71k
    }
1006
1007
8.60k
    if (cbfY)
1008
5.00k
    {
1009
5.00k
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1010
5.00k
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1011
5.00k
        if (!(cbfU || cbfV))
1012
648
            return;
1013
5.00k
    }
1014
1015
7.95k
    if (bSmallChroma)
1016
5.16k
    {
1017
5.16k
        if ((absPartIdx & 3) != 3)
1018
3.87k
            return;
1019
1020
1.29k
        const uint32_t log2CurSizeC = 2;
1021
1.29k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1022
1.29k
        const uint32_t curPartNum = 4;
1023
1.29k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1024
3.87k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1025
2.58k
        {
1026
2.58k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1027
2.58k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1028
2.58k
            do
1029
2.58k
            {
1030
2.58k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1031
2.58k
                {
1032
2.58k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1033
2.58k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1034
2.58k
                }
1035
2.58k
            }
1036
2.58k
            while (tuIterator.isNextSection());
1037
2.58k
        }
1038
1.29k
    }
1039
2.79k
    else
1040
2.79k
    {
1041
2.79k
        uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1042
2.79k
        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1043
2.79k
        uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1044
2.79k
        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1045
8.63k
        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1046
5.84k
        {
1047
5.84k
            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1048
5.84k
            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1049
5.84k
            do
1050
5.84k
            {
1051
5.84k
                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1052
5.84k
                {
1053
5.84k
                    uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1054
5.84k
                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1055
5.84k
                }
1056
5.84k
            }
1057
5.84k
            while (tuIterator.isNextSection());
1058
5.84k
        }
1059
2.79k
    }
1060
7.95k
}
1061
1062
void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1063
                              bool& bCodeDQP, const uint32_t depthRange[2])
1064
0
{
1065
0
    const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1066
1067
    /* in each of these conditions, the subdiv flag is implied and not signaled,
1068
     * so we have checks to make sure the implied value matches our intentions */
1069
0
    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1070
0
    {
1071
0
        X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1072
0
    }
1073
0
    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1074
0
             !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1075
0
    {
1076
0
        X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1077
0
    }
1078
0
    else if (log2CurSize > depthRange[1])
1079
0
    {
1080
0
        X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1081
0
    }
1082
0
    else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1083
0
    {
1084
0
        X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1085
0
    }
1086
0
    else
1087
0
    {
1088
0
        X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1089
0
        codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1090
0
    }
1091
1092
0
    if (subdiv)
1093
0
    {
1094
0
        --log2CurSize;
1095
0
        ++curDepth;
1096
1097
0
        uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1098
1099
0
        encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1100
0
        encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1101
0
        encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1102
0
        encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1103
0
        return;
1104
0
    }
1105
1106
0
    if (!cu.isIntra(absPartIdx) && !curDepth)
1107
0
    {
1108
0
        X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1109
0
    }
1110
0
    else
1111
0
        codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1112
1113
0
    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1114
1115
0
    if (!cbfY)
1116
0
        return;
1117
1118
    // dQP: only for CTU once
1119
0
    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1120
0
    {
1121
0
        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1122
0
        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1123
0
        codeDeltaQP(cu, absPartIdxLT);
1124
0
        bCodeDQP = false;
1125
0
    }
1126
1127
0
    if (cbfY)
1128
0
    {
1129
0
        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1130
0
        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1131
0
    }
1132
0
}
1133
1134
1135
void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1136
861k
{
1137
861k
    if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1138
861k
    {
1139
861k
        codeIntraDirLumaAng(cu, absPartIdx, true);
1140
861k
        if (cu.m_chromaFormat != X265_CSP_I400)
1141
861k
        {
1142
861k
            uint32_t chromaDirMode[NUM_CHROMA_MODE];
1143
861k
            cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1144
1145
861k
            codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1146
1147
861k
            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1148
0
            {
1149
0
                uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1150
0
                for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1151
0
                {
1152
0
                    absPartIdx += qNumParts;
1153
0
                    cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1154
0
                    codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1155
0
                }
1156
0
            }
1157
861k
        }
1158
861k
    }
1159
47
    else // if it is inter mode, encode motion vector and reference index
1160
47
        codePUWise(cu, absPartIdx);
1161
861k
}
1162
1163
/** encode motion information for every PU block */
1164
void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1165
0
{
1166
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1167
0
    uint32_t numPU = cu.getNumPartInter(absPartIdx);
1168
1169
0
    for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1170
0
    {
1171
0
        codeMergeFlag(cu, subPartIdx);
1172
0
        if (cu.m_mergeFlag[subPartIdx])
1173
0
            codeMergeIndex(cu, subPartIdx);
1174
0
        else
1175
0
        {
1176
0
            if (cu.m_slice->isInterB())
1177
0
                codeInterDir(cu, subPartIdx);
1178
1179
0
            uint32_t interDir = cu.m_interDir[subPartIdx];
1180
0
            for (uint32_t list = 0; list < 2; list++)
1181
0
            {
1182
0
                if (interDir & (1 << list))
1183
0
                {
1184
0
                    X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1185
1186
0
                    codeRefFrmIdxPU(cu, subPartIdx, list);
1187
0
                    codeMvd(cu, subPartIdx, list);
1188
0
                    codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1189
0
                }
1190
0
            }
1191
0
        }
1192
0
    }
1193
0
}
1194
1195
/** encode reference frame index for a PU block */
1196
void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1197
0
{
1198
0
    X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1199
1200
0
    if (cu.m_slice->m_numRefIdx[list] > 1)
1201
0
        codeRefFrmIdx(cu, absPartIdx, list);
1202
0
}
1203
1204
void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1205
861k
{
1206
861k
    if (!cu.isIntra(absPartIdx))
1207
0
    {
1208
0
        if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1209
0
            codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1210
0
        if (!cu.getQtRootCbf(absPartIdx))
1211
0
            return;
1212
0
    }
1213
1214
861k
    uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1215
861k
    if (cu.m_chromaFormat == X265_CSP_I400)
1216
0
        encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1217
861k
    else
1218
861k
        encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1219
861k
}
1220
1221
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1222
55.2k
{
1223
55.2k
    int typeIdx = ctuParam.typeIdx;
1224
1225
55.2k
    if (plane != 2)
1226
36.8k
    {
1227
36.8k
        encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1228
36.8k
        if (typeIdx >= 0)
1229
0
            encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1230
36.8k
    }
1231
1232
55.2k
    if (typeIdx >= 0)
1233
0
    {
1234
0
        enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1235
0
        if (typeIdx == SAO_BO)
1236
0
        {
1237
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1238
0
                codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1239
1240
0
            for (int i = 0; i < SAO_NUM_OFFSET; i++)
1241
0
                if (ctuParam.offset[i] != 0)
1242
0
                    encodeBinEP(ctuParam.offset[i] < 0);
1243
1244
0
            encodeBinsEP(ctuParam.bandPos, 5);
1245
0
        }
1246
0
        else // if (typeIdx < SAO_BO)
1247
0
        {
1248
0
            codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1249
0
            codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1250
0
            codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1251
0
            codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1252
0
            if (plane != 2)
1253
0
                encodeBinsEP((uint32_t)(typeIdx), 2);
1254
0
        }
1255
0
    }
1256
55.2k
}
1257
1258
void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1259
167k
{
1260
167k
    if (plane != 2)
1261
111k
    {
1262
111k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1263
111k
        encodeBinEP(1);
1264
111k
    }
1265
1266
167k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1267
1268
167k
    codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1269
167k
    codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1270
167k
    codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1271
167k
    codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1272
167k
    if (plane != 2)
1273
111k
        encodeBinsEP((uint32_t)(typeIdx), 2);
1274
167k
}
1275
1276
void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1277
41.9k
{
1278
41.9k
    if (plane != 2)
1279
27.9k
    {
1280
27.9k
        encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1281
27.9k
        encodeBinEP(0);
1282
27.9k
    }
1283
1284
41.9k
    enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1285
1286
209k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1287
167k
        codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1288
1289
209k
    for (int i = 0; i < SAO_NUM_OFFSET; i++)
1290
167k
        if (offset[i] != 0)
1291
74
            encodeBinEP(offset[i] < 0);
1292
1293
41.9k
    encodeBinsEP(bandPos, 5);
1294
41.9k
}
1295
1296
/** initialize context model with respect to QP and initialization value */
1297
uint8_t sbacInit(int qp, int initValue)
1298
109k
{
1299
109k
    qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1300
1301
109k
    int  slope      = (initValue >> 4) * 5 - 45;
1302
109k
    int  offset     = ((initValue & 15) << 3) - 16;
1303
109k
    int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1304
109k
    uint32_t mpState = (initState >= 64);
1305
109k
    uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1306
1307
109k
    return (uint8_t)state;
1308
109k
}
1309
1310
static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1311
18.1k
{
1312
18.1k
    ctxModel += sliceType * size;
1313
1314
127k
    for (int n = 0; n < size; n++)
1315
109k
        contextModel[n] = sbacInit(qp, ctxModel[n]);
1316
18.1k
}
1317
1318
void Entropy::resetEntropy(const Slice& slice)
1319
698
{
1320
698
    int  qp              = slice.m_sliceQp;
1321
698
    SliceType sliceType  = slice.m_sliceType;
1322
1323
698
    initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1324
698
    initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1325
698
    initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1326
698
    initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1327
698
    initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1328
698
    initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1329
698
    initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1330
698
    initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1331
698
    initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1332
698
    initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1333
698
    initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1334
698
    initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1335
698
    initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1336
698
    initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1337
698
    initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1338
698
    initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1339
698
    initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1340
698
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1341
698
    initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1342
698
    initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1343
698
    initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1344
698
    initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1345
698
    initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1346
698
    initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1347
698
    initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1348
698
    initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1349
    // new structure
1350
1351
698
    start();
1352
698
}
1353
1354
/* code explicit wp tables */
1355
void Entropy::codePredWeightTable(const Slice& slice)
1356
0
{
1357
0
    const WeightParam *wp;
1358
0
    bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1359
0
    bool            bDenomCoded  = false;
1360
0
    int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1361
0
    uint32_t        totalSignalledWeightFlags = 0;
1362
1363
0
    if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1364
0
        (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1365
0
    {
1366
0
        for (int list = 0; list < numRefDirs; list++)
1367
0
        {
1368
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1369
0
            {
1370
0
                wp = slice.m_weightPredTable[list][ref];
1371
0
                if (!bDenomCoded)
1372
0
                {
1373
0
                    WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1374
1375
0
                    if (bChroma)
1376
0
                    {
1377
0
                        int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1378
0
                        WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1379
0
                    }
1380
0
                    bDenomCoded = true;
1381
0
                }
1382
0
                WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1383
0
                totalSignalledWeightFlags += wp[0].wtPresent;
1384
0
            }
1385
1386
0
            if (bChroma)
1387
0
            {
1388
0
                for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1389
0
                {
1390
0
                    wp = slice.m_weightPredTable[list][ref];
1391
0
                    WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1392
0
                    totalSignalledWeightFlags += 2 * wp[1].wtPresent;
1393
0
                }
1394
0
            }
1395
1396
0
            for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1397
0
            {
1398
0
                wp = slice.m_weightPredTable[list][ref];
1399
0
                if (wp[0].wtPresent)
1400
0
                {
1401
0
                    int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1402
0
                    WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1403
0
                    WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1404
0
                }
1405
1406
0
                if (bChroma)
1407
0
                {
1408
0
                    if (wp[1].wtPresent)
1409
0
                    {
1410
0
                        for (int plane = 1; plane < 3; plane++)
1411
0
                        {
1412
0
                            int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1413
0
                            WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1414
1415
0
                            int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1416
0
                            int deltaChroma = (wp[plane].inputOffset - pred);
1417
0
                            WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1418
0
                        }
1419
0
                    }
1420
0
                }
1421
0
            }
1422
0
        }
1423
1424
0
        X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1425
0
    }
1426
0
}
1427
1428
void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1429
5.00k
{
1430
5.00k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1431
1432
5.00k
    encodeBin(symbol ? 1 : 0, scmModel[0]);
1433
1434
5.00k
    if (!symbol)
1435
458
        return;
1436
1437
4.54k
    bool bCodeLast = (maxSymbol > symbol);
1438
1439
22.2k
    while (--symbol)
1440
17.7k
        encodeBin(1, scmModel[offset]);
1441
1442
4.54k
    if (bCodeLast)
1443
154
        encodeBin(0, scmModel[offset]);
1444
4.54k
}
1445
1446
void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1447
4.38k
{
1448
4.38k
    uint32_t bins = 0;
1449
4.38k
    int numBins = 0;
1450
1451
17.3k
    while (symbol >= (uint32_t)(1 << count))
1452
12.9k
    {
1453
12.9k
        bins = 2 * bins + 1;
1454
12.9k
        numBins++;
1455
12.9k
        symbol -= 1 << count;
1456
12.9k
        count++;
1457
12.9k
    }
1458
1459
4.38k
    bins = 2 * bins + 0;
1460
4.38k
    numBins++;
1461
1462
4.38k
    bins = (bins << count) | symbol;
1463
4.38k
    numBins += count;
1464
1465
4.38k
    X265_CHECK(numBins <= 32, "numBins too large\n");
1466
4.38k
    encodeBinsEP(bins, numBins);
1467
4.38k
}
1468
1469
/** Coding of coeff_abs_level_minus3 */
1470
void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1471
10.8k
{
1472
10.8k
    uint32_t length;
1473
10.8k
    const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1474
1475
10.8k
    if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1476
0
    {
1477
0
        length = codeNumber >> absGoRice;
1478
1479
0
        X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1480
0
        X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1481
0
        encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1482
0
    }
1483
10.8k
    else
1484
10.8k
    {
1485
10.8k
        length = 0;
1486
10.8k
        codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1487
10.8k
        {
1488
10.8k
            unsigned long idx;
1489
10.8k
            CLZ(idx, codeNumber + 1);
1490
10.8k
            length = idx;
1491
10.8k
            X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1492
10.8k
            codeNumber -= (1 << idx) - 1;
1493
10.8k
        }
1494
10.8k
        codeNumber = (codeNumber << absGoRice) + codeRemain;
1495
1496
10.8k
        encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1497
10.8k
        encodeBinsEP(codeNumber, length + absGoRice);
1498
10.8k
    }
1499
10.8k
}
1500
1501
// SBAC RD
1502
void Entropy::loadIntraDirModeLuma(const Entropy& src)
1503
1.78M
{
1504
1.78M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1505
1.78M
    m_fracBits = src.m_fracBits;
1506
1.78M
    m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1507
1.78M
}
1508
1509
void Entropy::copyFrom(const Entropy& src)
1510
12.3M
{
1511
12.3M
    X265_CHECK(src.m_valid, "invalid copy source context\n");
1512
1513
12.3M
    copyState(src);
1514
1515
12.3M
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1516
12.3M
    markValid();
1517
12.3M
}
1518
1519
void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1520
2.86M
{
1521
2.86M
    PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1522
1523
2.86M
    if (cu.isIntra(absPartIdx))
1524
2.86M
    {
1525
2.86M
        if (depth == cu.m_encData->m_param->maxCUDepth)
1526
2.40M
            encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1527
2.86M
        return;
1528
2.86M
    }
1529
1530
364
    switch (partSize)
1531
364
    {
1532
0
    case SIZE_2Nx2N:
1533
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1534
0
        break;
1535
1536
0
    case SIZE_2NxN:
1537
0
    case SIZE_2NxnU:
1538
0
    case SIZE_2NxnD:
1539
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1540
0
        encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1541
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1542
0
        {
1543
0
            encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1544
0
            if (partSize != SIZE_2NxN)
1545
0
                encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1546
0
        }
1547
0
        break;
1548
1549
0
    case SIZE_Nx2N:
1550
0
    case SIZE_nLx2N:
1551
0
    case SIZE_nRx2N:
1552
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1553
0
        encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1554
0
        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1555
0
            encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1556
0
        if (cu.m_slice->m_sps->maxAMPDepth > depth)
1557
0
        {
1558
0
            encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1559
0
            if (partSize != SIZE_Nx2N)
1560
0
                encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1561
0
        }
1562
0
        break;
1563
0
    default:
1564
0
        X265_CHECK(0, "invalid CU partition\n");
1565
0
        break;
1566
364
    }
1567
364
}
1568
1569
void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1570
0
{
1571
0
    uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1572
1573
0
    if (numCand > 1)
1574
0
    {
1575
0
        uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx 
1576
0
        encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1577
1578
0
        X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1579
1580
0
        if (unaryIdx != 0)
1581
0
        {
1582
0
            uint32_t mask = (1 << unaryIdx) - 2;
1583
0
            mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1584
0
            encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1585
0
        }
1586
0
    }
1587
0
}
1588
1589
void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1590
4.87M
{
1591
4.87M
    uint32_t dir[4], j;
1592
4.87M
    uint32_t preds[4][3];
1593
4.87M
    int predIdx[4];
1594
4.87M
    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1595
4.87M
    uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1596
1597
10.7M
    for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1598
5.88M
    {
1599
5.88M
        dir[j] = cu.m_lumaIntraDir[absPartIdx];
1600
5.88M
        cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1601
5.88M
        predIdx[j] = -1;
1602
23.5M
        for (uint32_t i = 0; i < 3; i++)
1603
17.6M
            if (dir[j] == preds[j][i])
1604
5.86M
                predIdx[j] = i;
1605
1606
5.88M
        encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1607
5.88M
    }
1608
1609
10.7M
    for (j = 0; j < partNum; j++)
1610
5.87M
    {
1611
5.87M
        if (predIdx[j] != -1)
1612
5.86M
        {
1613
5.86M
            X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1614
            // NOTE: Mapping
1615
            //       0 = 0
1616
            //       1 = 10
1617
            //       2 = 11
1618
5.86M
            int nonzero = (!!predIdx[j]);
1619
5.86M
            encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1620
5.86M
        }
1621
11.5k
        else
1622
11.5k
        {
1623
11.5k
            if (preds[j][0] > preds[j][1])
1624
463
                std::swap(preds[j][0], preds[j][1]);
1625
1626
11.5k
            if (preds[j][0] > preds[j][2])
1627
0
                std::swap(preds[j][0], preds[j][2]);
1628
1629
11.5k
            if (preds[j][1] > preds[j][2])
1630
0
                std::swap(preds[j][1], preds[j][2]);
1631
1632
11.5k
            dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1633
18.4E
            dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1634
18.4E
            dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1635
1636
11.5k
            encodeBinsEP(dir[j], 5);
1637
11.5k
        }
1638
5.87M
    }
1639
4.87M
}
1640
1641
void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1642
4.73M
{
1643
4.73M
    uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1644
1645
4.73M
    if (intraDirChroma == DM_CHROMA_IDX)
1646
1.23M
        encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1647
3.49M
    else
1648
3.49M
    {
1649
8.14M
        for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1650
8.14M
        {
1651
8.14M
            if (intraDirChroma == chromaDirMode[i])
1652
3.50M
            {
1653
3.50M
                intraDirChroma = i;
1654
3.50M
                break;
1655
3.50M
            }
1656
8.14M
        }
1657
1658
3.49M
        encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1659
3.49M
        encodeBinsEP(intraDirChroma, 2);
1660
3.49M
    }
1661
4.73M
}
1662
1663
void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1664
0
{
1665
0
    const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1666
0
    const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1667
1668
0
    if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1669
0
        encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1670
0
    if (interDir < 2)
1671
0
        encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1672
0
}
1673
1674
void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1675
0
{
1676
0
    uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1677
1678
0
    encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1679
1680
0
    if (refFrame > 0)
1681
0
    {
1682
0
        uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1683
0
        if (refNum == 0)
1684
0
            return;
1685
1686
0
        refFrame--;
1687
0
        encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1688
0
        if (refFrame > 0)
1689
0
        {
1690
0
            uint32_t mask = (1 << refFrame) - 2;
1691
0
            mask >>= (refFrame == refNum) ? 1 : 0;
1692
0
            encodeBinsEP(mask, refFrame - (refFrame == refNum));
1693
0
        }
1694
0
    }
1695
0
}
1696
1697
void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1698
0
{
1699
0
    const MV& mvd = cu.m_mvd[list][absPartIdx];
1700
0
    const int hor = mvd.x;
1701
0
    const int ver = mvd.y;
1702
1703
0
    encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1704
0
    encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1705
1706
0
    const bool bHorAbsGr0 = hor != 0;
1707
0
    const bool bVerAbsGr0 = ver != 0;
1708
0
    const uint32_t horAbs   = 0 > hor ? -hor : hor;
1709
0
    const uint32_t verAbs   = 0 > ver ? -ver : ver;
1710
1711
0
    if (bHorAbsGr0)
1712
0
        encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1713
1714
0
    if (bVerAbsGr0)
1715
0
        encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1716
1717
0
    if (bHorAbsGr0)
1718
0
    {
1719
0
        if (horAbs > 1)
1720
0
            writeEpExGolomb(horAbs - 2, 1);
1721
1722
0
        encodeBinEP(0 > hor ? 1 : 0);
1723
0
    }
1724
1725
0
    if (bVerAbsGr0)
1726
0
    {
1727
0
        if (verAbs > 1)
1728
0
            writeEpExGolomb(verAbs - 2, 1);
1729
1730
0
        encodeBinEP(0 > ver ? 1 : 0);
1731
0
    }
1732
0
}
1733
1734
void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1735
5.00k
{
1736
5.00k
    int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1737
1738
5.00k
    int qpBdOffsetY = QP_BD_OFFSET;
1739
1740
5.00k
    dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1741
1742
5.00k
    uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
1743
5.00k
    uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1744
5.00k
    writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1745
5.00k
    if (absDQp >= CU_DQP_TU_CMAX)
1746
4.38k
        writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1747
1748
5.00k
    if (absDQp > 0)
1749
4.54k
    {
1750
4.54k
        uint32_t sign = (dqp > 0 ? 0 : 1);
1751
4.54k
        encodeBinEP(sign);
1752
4.54k
    }
1753
5.00k
}
1754
1755
void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1756
9.46M
{
1757
9.46M
    uint32_t ctx = tuDepth + 2;
1758
1759
9.46M
    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1760
9.46M
    bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
1761
9.46M
    uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1762
1763
9.46M
    if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1764
0
    {
1765
0
        uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1766
                                                          // Otherwise, this must be the level above the lowest level (as specified above)
1767
0
        uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1768
1769
0
        encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1770
0
        encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1771
0
    }
1772
9.46M
    else
1773
9.46M
        encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1774
9.46M
}
1775
1776
#if CHECKED_BUILD || _DEBUG
1777
uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
1778
{
1779
    uint32_t goRiceParam = 0;
1780
    int firstCoeff2 = 1;
1781
    uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
1782
1783
    uint32_t sum = 0;
1784
    int idx = 0;
1785
    do
1786
    {
1787
        int baseLevel = (baseLevelN & 3) | firstCoeff2;
1788
        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
1789
        baseLevelN >>= 2;
1790
        int codeNumber = absCoeff[idx] - baseLevel;
1791
1792
        if (codeNumber >= 0)
1793
        {
1794
            //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1795
            uint32_t length = 0;
1796
1797
            codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
1798
            if (codeNumber >= 0)
1799
            {
1800
                {
1801
                    unsigned long cidx;
1802
                    CLZ(cidx, codeNumber + 1);
1803
                    length = cidx;
1804
                }
1805
                X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1806
1807
                codeNumber = (length + length);
1808
            }
1809
            sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
1810
1811
            if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
1812
                goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
1813
            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
1814
        }
1815
        if (absCoeff[idx] >= 2)
1816
            firstCoeff2 = 0;
1817
        idx++;
1818
    }
1819
    while(idx < numNonZero);
1820
1821
    return sum;
1822
}
1823
#endif // debug only code
1824
1825
void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1826
66.8k
{
1827
66.8k
    uint32_t trSize = 1 << log2TrSize;
1828
66.8k
    uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
1829
    // compute number of significant coefficients
1830
66.8k
    uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
1831
66.8k
    X265_CHECK(numSig > 0, "cbf check fail\n");
1832
66.8k
    bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
1833
1834
66.8k
    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
1835
0
        codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
1836
1837
66.8k
    bool bIsLuma = ttype == TEXT_LUMA;
1838
1839
    // select scans
1840
66.8k
    TUEntropyCodingParameters codingParameters;
1841
66.8k
    cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1842
1843
66.8k
    uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
1844
66.8k
    uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
1845
66.8k
    uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
1846
1847
    //----- encode significance map -----
1848
1849
    // Find position of last coefficient
1850
66.8k
    int scanPosLast = 0;
1851
66.8k
    uint32_t posLast;
1852
66.8k
    uint64_t sigCoeffGroupFlag64 = 0;
1853
    //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1854
66.8k
    X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
1855
1856
66.8k
    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
1857
66.8k
    posLast = codingParameters.scan[scanPosLast];
1858
1859
66.8k
    const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1860
1861
    // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
1862
184k
    for(int idx = 0; idx < lastScanSet; idx++)
1863
117k
    {
1864
117k
        const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
1865
117k
        const uint8_t nonZero = (coeffNum[idx] != 0);
1866
117k
        sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
1867
117k
    }
1868
1869
1870
    // Code position of last coefficient
1871
66.8k
    {
1872
        // The last position is composed of a prefix and suffix.
1873
        // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
1874
        // The bypass coded bins for both the x and y components are grouped together.
1875
66.8k
        uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
1876
66.8k
        uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
1877
        // swap
1878
66.8k
        if (codingParameters.scanType == SCAN_VER)
1879
4.97k
            std::swap(pos[0], pos[1]);
1880
1881
66.8k
        int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1882
66.8k
        int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
1883
66.8k
        uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
1884
66.8k
        X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
1885
66.8k
        X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
1886
1887
66.8k
        uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
1888
200k
        for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
1889
133k
        {
1890
133k
            uint32_t temp = g_lastCoeffTable[pos[i]];
1891
133k
            uint32_t prefixOnes = temp & 15;
1892
133k
            uint32_t suffixLen = temp >> 4;
1893
1894
309k
            for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
1895
175k
                encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
1896
1897
133k
            if (prefixOnes < maxGroupIdx)
1898
91.7k
                encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
1899
1900
133k
            packedSuffixBits <<= suffixLen;
1901
133k
            packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
1902
133k
            packedSuffixLen += suffixLen;
1903
133k
        }
1904
1905
66.8k
        encodeBinsEP(packedSuffixBits, packedSuffixLen);
1906
66.8k
    }
1907
1908
    // code significance flag
1909
66.8k
    uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1910
66.8k
    uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1911
66.8k
    uint32_t c1 = 1;
1912
66.8k
    int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
1913
66.8k
    ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
1914
66.8k
    uint32_t numNonZero = 1;
1915
66.8k
    unsigned long lastNZPosInCG;
1916
66.8k
    unsigned long firstNZPosInCG;
1917
1918
#if _DEBUG
1919
    // Unnecessary, for Valgrind-3.10.0 only
1920
    memset(absCoeff, 0, sizeof(absCoeff));
1921
#endif
1922
1923
66.8k
    absCoeff[0] = (uint16_t)abs(coeff[posLast]);
1924
1925
251k
    for (int subSet = lastScanSet; subSet >= 0; subSet--)
1926
184k
    {
1927
184k
        const uint32_t subCoeffFlag = coeffFlag[subSet];
1928
184k
        uint32_t scanFlagMask = subCoeffFlag;
1929
184k
        int subPosBase = subSet << MLS_CG_SIZE;
1930
        
1931
184k
        if (subSet == lastScanSet)
1932
66.8k
        {
1933
66.8k
            X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
1934
66.8k
            scanFlagMask >>= 1;
1935
66.8k
        }
1936
1937
        // encode significant_coeffgroup_flag
1938
184k
        const int cgBlkPos = codingParameters.scanCG[subSet];
1939
184k
        const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
1940
184k
        const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
1941
184k
        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1942
1943
184k
        if (subSet == lastScanSet || !subSet)
1944
74.0k
            sigCoeffGroupFlag64 |= cgBlkPosMask;
1945
110k
        else
1946
110k
        {
1947
110k
            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1948
110k
            uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1949
110k
            encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1950
110k
        }
1951
1952
        // encode significant_coeff_flag
1953
184k
        if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
1954
140k
        {
1955
140k
            X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
1956
140k
            const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1957
140k
            const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
1958
1959
            // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
1960
140k
            static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
1961
140k
            {
1962
                // patternSigCtx = 0
1963
140k
                {
1964
140k
                    2, 1, 1, 0,
1965
140k
                    1, 1, 0, 0,
1966
140k
                    1, 0, 0, 0,
1967
140k
                    0, 0, 0, 0,
1968
140k
                },
1969
                // patternSigCtx = 1
1970
140k
                {
1971
140k
                    2, 2, 2, 2,
1972
140k
                    1, 1, 1, 1,
1973
140k
                    0, 0, 0, 0,
1974
140k
                    0, 0, 0, 0,
1975
140k
                },
1976
                // patternSigCtx = 2
1977
140k
                {
1978
140k
                    2, 1, 0, 0,
1979
140k
                    2, 1, 0, 0,
1980
140k
                    2, 1, 0, 0,
1981
140k
                    2, 1, 0, 0,
1982
140k
                },
1983
                // patternSigCtx = 3
1984
140k
                {
1985
140k
                    2, 2, 2, 2,
1986
140k
                    2, 2, 2, 2,
1987
140k
                    2, 2, 2, 2,
1988
140k
                    2, 2, 2, 2,
1989
140k
                },
1990
                // 4x4
1991
140k
                {
1992
140k
                    0, 1, 4, 5,
1993
140k
                    2, 3, 4, 5,
1994
140k
                    6, 6, 8, 8,
1995
140k
                    7, 7, 8, 8
1996
140k
                }
1997
140k
            };
1998
1999
140k
            const int offset = codingParameters.firstSignificanceMapContext;
2000
140k
            const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2001
2002
140k
            X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2003
140k
            if (m_bitIf)
2004
585
            {
2005
585
                ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2006
2007
                // TODO: accelerate by PABSW
2008
2.92k
                for (int i = 0; i < MLS_CG_SIZE; i++)
2009
2.34k
                {
2010
2.34k
                    tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2011
2.34k
                    tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2012
2.34k
                    tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2013
2.34k
                    tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2014
2.34k
                }
2015
2016
585
                if (log2TrSize == 2)
2017
585
                {
2018
585
                    do
2019
8.77k
                    {
2020
8.77k
                        uint32_t blkPos, sig, ctxSig;
2021
8.77k
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2022
8.77k
                        sig     = scanFlagMask & 1;
2023
8.77k
                        scanFlagMask >>= 1;
2024
8.77k
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2025
8.77k
                        {
2026
8.77k
                            ctxSig = table_cnt[4][blkPos];
2027
8.77k
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2028
8.77k
                            encodeBin(sig, baseCtx[ctxSig]);
2029
8.77k
                        }
2030
8.77k
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2031
8.77k
                        numNonZero += sig;
2032
8.77k
                        scanPosSigOff--;
2033
8.77k
                    }
2034
8.77k
                    while(scanPosSigOff >= 0);
2035
585
                }
2036
0
                else
2037
0
                {
2038
0
                    X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2039
2040
0
                    const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2041
0
                    do
2042
0
                    {
2043
0
                        uint32_t blkPos, sig, ctxSig;
2044
0
                        blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2045
0
                        const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2046
0
                        sig     = scanFlagMask & 1;
2047
0
                        scanFlagMask >>= 1;
2048
0
                        X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2049
0
                        if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2050
0
                        {
2051
0
                            const uint32_t cnt = tabSigCtx[blkPos] + offset;
2052
0
                            ctxSig = (cnt + posOffset) & posZeroMask;
2053
2054
0
                            X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2055
0
                            encodeBin(sig, baseCtx[ctxSig]);
2056
0
                        }
2057
0
                        absCoeff[numNonZero] = tmpCoeff[blkPos];
2058
0
                        numNonZero += sig;
2059
0
                        scanPosSigOff--;
2060
0
                    }
2061
0
                    while(scanPosSigOff >= 0);
2062
0
                }
2063
585
            }
2064
140k
            else // fast RD path
2065
140k
            {
2066
                // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2067
140k
                const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2068
140k
                X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2069
140k
                uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2070
2071
#if CHECKED_BUILD || _DEBUG
2072
                numNonZero = coeffNum[subSet];
2073
#endif
2074
                // update RD cost
2075
140k
                m_fracBits += sum;
2076
140k
            } // end of fast RD path -- !m_bitIf
2077
140k
        }
2078
184k
        X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2079
2080
184k
        uint32_t coeffSigns = coeffSign[subSet];
2081
184k
        numNonZero = coeffNum[subSet];
2082
184k
        if (numNonZero > 0)
2083
184k
        {
2084
184k
            uint32_t idx;
2085
184k
            X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2086
184k
            CLZ(lastNZPosInCG, subCoeffFlag);
2087
184k
            CTZ(firstNZPosInCG, subCoeffFlag);
2088
2089
184k
            bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2090
184k
            const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2091
184k
            X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2092
2093
184k
            c1 = 1;
2094
184k
            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2095
2096
184k
            uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2097
184k
            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2098
2099
184k
            if (!m_bitIf)
2100
182k
            {
2101
182k
                uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2102
182k
                uint32_t firstC2Idx = (sum >> 28);
2103
182k
                c1 = ((sum >> 26) & 3);
2104
182k
                m_fracBits += sum & 0x00FFFFFF;
2105
2106
182k
                const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2107
                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2108
182k
                m_fracBits += (numNonZero + hiddenShift) << 15;
2109
2110
182k
                if (numNonZero > firstC2Idx)
2111
176k
                {
2112
176k
                    sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2113
176k
                    X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2114
176k
                    m_fracBits += ((uint64_t)sum << 15);
2115
176k
                }
2116
182k
            }
2117
            // Standard path
2118
2.22k
            else
2119
2.22k
            {
2120
2.22k
                uint32_t firstC2Idx = 8;
2121
2.22k
                uint32_t firstC2Flag = 2;
2122
2.22k
                uint32_t c1Next = 0xFFFFFFFE;
2123
2124
2.22k
                idx = 0;
2125
2.22k
                do
2126
6.31k
                {
2127
6.31k
                    const uint32_t symbol1 = absCoeff[idx] > 1;
2128
6.31k
                    const uint32_t symbol2 = absCoeff[idx] > 2;
2129
6.31k
                    encodeBin(symbol1, baseCtxMod[c1]);
2130
2131
6.31k
                    if (symbol1)
2132
6.18k
                        c1Next = 0;
2133
2134
6.31k
                    firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2135
6.31k
                    firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2136
2137
6.31k
                    c1 = (c1Next & 3);
2138
6.31k
                    c1Next >>= 2;
2139
6.31k
                    X265_CHECK(c1 <= 3, "c1 check failure\n");
2140
6.31k
                    idx++;
2141
6.31k
                }
2142
6.31k
                while(idx < numC1Flag);
2143
2144
2.22k
                if (!c1)
2145
2.09k
                {
2146
2.09k
                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2147
2148
2.09k
                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2149
2.09k
                    encodeBin(firstC2Flag, baseCtxMod[0]);
2150
2.09k
                }
2151
2152
2.22k
                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2153
2.22k
                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2154
2155
2.22k
                if (!c1 || numNonZero > C1FLAG_NUMBER)
2156
2.09k
                {
2157
                    // Standard path
2158
2.09k
                    uint32_t goRiceParam = 0;
2159
2.09k
                    int baseLevel = 3;
2160
2.09k
                    uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2161
#if CHECKED_BUILD || _DEBUG
2162
                    int firstCoeff2 = 1;
2163
#endif
2164
2.09k
                    idx = firstC2Idx;
2165
2.09k
                    do
2166
10.8k
                    {
2167
10.8k
                        if (idx >= C1FLAG_NUMBER)
2168
4.68k
                            baseLevel = 1;
2169
                        // TODO: fast algorithm maybe broken this check logic
2170
10.8k
                        X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2171
2172
10.8k
                        if (absCoeff[idx] >= baseLevel)
2173
10.8k
                        {
2174
10.8k
                            writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2175
10.8k
                            X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2176
10.8k
                            const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2177
10.8k
                            goRiceParam += adjust;
2178
10.8k
                            threshold += (adjust) ? threshold : 0;
2179
10.8k
                            X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2180
10.8k
                        }
2181
#if CHECKED_BUILD || _DEBUG
2182
                        firstCoeff2 = 0;
2183
#endif
2184
10.8k
                        baseLevel = 2;
2185
10.8k
                        idx++;
2186
10.8k
                    }
2187
10.8k
                    while(idx < numNonZero);
2188
2.09k
                }
2189
2.22k
            } // end of !bitIf
2190
184k
        } // end of (numNonZero > 0)
2191
2192
        // Initialize value for next loop
2193
184k
        numNonZero = 0;
2194
184k
        scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2195
184k
    }
2196
66.8k
}
2197
2198
void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2199
839k
{
2200
839k
    X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2201
2202
839k
    uint32_t isCodeNonZero = !!code;
2203
2204
839k
    encodeBinEP(isCodeNonZero);
2205
839k
    if (isCodeNonZero)
2206
74
    {
2207
74
        uint32_t isCodeLast = (maxSymbol > code);
2208
74
        uint32_t mask = (1 << (code - 1)) - 1;
2209
74
        uint32_t len = code - 1 + isCodeLast;
2210
74
        mask <<= isCodeLast;
2211
2212
74
        encodeBinsEP(mask, len);
2213
74
    }
2214
839k
}
2215
2216
/* estimate bit cost for CBP, significant map and significant coefficients */
2217
void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2218
9.12M
{
2219
9.12M
    estCBFBit(estBitsSbac);
2220
2221
9.12M
    estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2222
2223
    // encode significance map
2224
9.12M
    estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2225
2226
    // encode significant coefficients
2227
9.12M
    estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2228
9.12M
}
2229
2230
/* estimate bit cost for each CBP bit */
2231
void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2232
9.12M
{
2233
9.12M
    const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2234
2235
73.0M
    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2236
63.8M
    {
2237
63.8M
        estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2238
63.8M
        estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2239
63.8M
    }
2240
2241
9.12M
    ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2242
2243
9.12M
    estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2244
9.12M
    estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2245
9.12M
}
2246
2247
/* estimate SAMBAC bit cost for significant coefficient group map */
2248
void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2249
9.14M
{
2250
9.14M
    int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2251
2252
27.4M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2253
54.8M
        for (uint32_t bin = 0; bin < 2; bin++)
2254
36.5M
            estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2255
9.14M
}
2256
2257
/* estimate SAMBAC bit cost for significant coefficient map */
2258
void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2259
9.17M
{
2260
9.17M
    int firstCtx = 1, numCtx = 8;
2261
2262
9.17M
    if (log2TrSize >= 4)
2263
361k
    {
2264
361k
        firstCtx = bIsLuma ? 21 : 12;
2265
361k
        numCtx = bIsLuma ? 6 : 3;
2266
361k
    }
2267
8.81M
    else if (log2TrSize == 3)
2268
1.41M
    {
2269
1.41M
        firstCtx = 9;
2270
1.41M
        numCtx = bIsLuma ? 12 : 3;
2271
1.41M
    }
2272
2273
9.17M
    const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2274
2275
9.17M
    estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2276
9.17M
    estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2277
2278
83.4M
    for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2279
74.3M
    {
2280
74.3M
        estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2281
74.3M
        estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2282
74.3M
    }
2283
2284
9.17M
    const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2285
9.17M
    if (bIsLuma)
2286
5.33M
    {
2287
5.33M
        if (log2TrSize == 2)
2288
4.05M
        {
2289
12.1M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2290
8.11M
            {
2291
8.11M
                int bits = 0;
2292
8.11M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2293
2294
32.4M
                for (uint32_t ctx = 0; ctx < 3; ctx++)
2295
24.3M
                {
2296
24.3M
                    estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2297
24.3M
                    bits += sbacGetEntropyBits(ctxState[ctx], 1);
2298
24.3M
                }
2299
2300
8.11M
                estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2301
8.11M
            }
2302
4.05M
        }
2303
1.28M
        else
2304
1.28M
        {
2305
1.28M
            const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2306
2307
3.84M
            for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2308
2.56M
            {
2309
2.56M
                int bits = 0;
2310
2.56M
                const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2311
2.56M
                X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2312
2313
10.8M
                for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2314
8.32M
                {
2315
8.32M
                    const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2316
8.32M
                    const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2317
8.32M
                    estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2318
8.32M
                    estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2319
8.32M
                    bits += 2 * cost1;
2320
8.32M
                }
2321
                // correct latest bit cost, it didn't include cost0
2322
2.56M
                estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2323
2.56M
            }
2324
1.28M
        }
2325
5.33M
    }
2326
3.83M
    else
2327
3.83M
    {
2328
3.83M
        const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2329
3.83M
        const int ctxShift = log2TrSize - 2;
2330
2331
11.5M
        for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2332
7.75M
        {
2333
7.75M
            int bits = 0;
2334
7.75M
            const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2335
2336
33.3M
            for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2337
25.5M
            {
2338
25.5M
                int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2339
25.5M
                estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2340
25.5M
                bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2341
25.5M
            }
2342
2343
7.75M
            estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2344
7.75M
        }
2345
3.83M
    }
2346
9.17M
}
2347
2348
/* estimate bit cost of significant coefficient */
2349
void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2350
9.21M
{
2351
9.21M
    if (bIsLuma)
2352
5.33M
    {
2353
5.33M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2354
5.33M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2355
2356
90.7M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2357
85.3M
        {
2358
85.3M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2359
85.3M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2360
85.3M
        }
2361
2362
26.6M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2363
21.3M
        {
2364
21.3M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2365
21.3M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2366
21.3M
        }
2367
5.33M
    }
2368
3.87M
    else
2369
3.87M
    {
2370
3.87M
        const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2371
3.87M
        const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2372
2373
34.9M
        for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2374
31.0M
        {
2375
31.0M
            estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2376
31.0M
            estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2377
31.0M
        }
2378
2379
11.6M
        for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2380
7.76M
        {
2381
7.76M
            estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2382
7.76M
            estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2383
7.76M
        }
2384
3.87M
    }
2385
9.21M
}
2386
2387
/* Initialize our context information from the nominated source */
2388
void Entropy::copyContextsFrom(const Entropy& src)
2389
10.1k
{
2390
10.1k
    X265_CHECK(src.m_valid, "invalid copy source context\n");
2391
2392
10.1k
    memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2393
10.1k
    markValid();
2394
10.1k
}
2395
2396
void Entropy::start()
2397
698
{
2398
698
    m_low = 0;
2399
698
    m_range = 510;
2400
698
    m_bitsLeft = -12;
2401
698
    m_numBufferedBytes = 0;
2402
698
    m_bufferedByte = 0xff;
2403
698
}
2404
2405
void Entropy::finish()
2406
2.93k
{
2407
2.93k
    if (m_low >> (21 + m_bitsLeft))
2408
2
    {
2409
2
        m_bitIf->writeByte(m_bufferedByte + 1);
2410
3
        while (m_numBufferedBytes > 1)
2411
1
        {
2412
1
            m_bitIf->writeByte(0x00);
2413
1
            m_numBufferedBytes--;
2414
1
        }
2415
2416
2
        m_low -= 1 << (21 + m_bitsLeft);
2417
2
    }
2418
2.93k
    else
2419
2.93k
    {
2420
2.93k
        if (m_numBufferedBytes > 0)
2421
2.93k
            m_bitIf->writeByte(m_bufferedByte);
2422
2423
2.93k
        while (m_numBufferedBytes > 1)
2424
4
        {
2425
4
            m_bitIf->writeByte(0xff);
2426
4
            m_numBufferedBytes--;
2427
4
        }
2428
2.93k
    }
2429
2.93k
    m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2430
2.93k
}
2431
2432
void Entropy::copyState(const Entropy& other)
2433
12.3M
{
2434
12.3M
    m_low = other.m_low;
2435
12.3M
    m_range = other.m_range;
2436
12.3M
    m_bitsLeft = other.m_bitsLeft;
2437
12.3M
    m_bufferedByte = other.m_bufferedByte;
2438
12.3M
    m_numBufferedBytes = other.m_numBufferedBytes;
2439
12.3M
    m_fracBits = other.m_fracBits;
2440
12.3M
}
2441
2442
void Entropy::resetBits()
2443
10.7M
{
2444
10.7M
    m_low = 0;
2445
10.7M
    m_bitsLeft = -12;
2446
10.7M
    m_numBufferedBytes = 0;
2447
10.7M
    m_bufferedByte = 0xff;
2448
10.7M
    m_fracBits &= 32767;
2449
10.7M
    if (m_bitIf)
2450
0
        m_bitIf->resetBits();
2451
10.7M
}
2452
2453
/** Encode bin */
2454
void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2455
32.3M
{
2456
32.3M
    uint32_t mstate = ctxModel;
2457
2458
32.3M
    ctxModel = sbacNext(mstate, binValue);
2459
2460
32.3M
    if (!m_bitIf)
2461
31.9M
    {
2462
31.9M
        m_fracBits += sbacGetEntropyBits(mstate, binValue);
2463
31.9M
        return;
2464
31.9M
    }
2465
2466
365k
    uint32_t range = m_range;
2467
365k
    uint32_t state = sbacGetState(mstate);
2468
365k
    uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2469
365k
    range -= lps;
2470
2471
365k
    X265_CHECK(lps >= 2, "lps is too small\n");
2472
2473
365k
    int numBits = (uint32_t)(range - 256) >> 31;
2474
365k
    uint32_t low = m_low;
2475
2476
    // NOTE: MPS must be LOWEST bit in mstate
2477
365k
    X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2478
365k
    if ((binValue ^ mstate) & 1)
2479
39.0k
    {
2480
        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2481
        //numBits = g_renormTable[lps >> 3];
2482
39.0k
        unsigned long idx;
2483
39.0k
        CLZ(idx, lps);
2484
39.0k
        X265_CHECK(state != 63 || idx == 1, "state failure\n");
2485
2486
39.0k
        numBits = 8 - idx;
2487
39.0k
        if (state >= 63)
2488
0
            numBits = 6;
2489
39.0k
        X265_CHECK(numBits <= 6, "numBits failure\n");
2490
2491
39.0k
        low += range;
2492
39.0k
        range = lps;
2493
39.0k
    }
2494
365k
    m_low = (low << numBits);
2495
365k
    m_range = (range << numBits);
2496
365k
    m_bitsLeft += numBits;
2497
2498
365k
    if (m_bitsLeft >= 0)
2499
17.5k
        writeOut();
2500
365k
}
2501
2502
/** Encode equiprobable bin */
2503
void Entropy::encodeBinEP(uint32_t binValue)
2504
984k
{
2505
984k
    if (!m_bitIf)
2506
983k
    {
2507
983k
        m_fracBits += 32768;
2508
983k
        return;
2509
983k
    }
2510
493
    m_low <<= 1;
2511
493
    if (binValue)
2512
493
        m_low += m_range;
2513
493
    m_bitsLeft++;
2514
2515
493
    if (m_bitsLeft >= 0)
2516
85
        writeOut();
2517
493
}
2518
2519
/** Encode equiprobable bins */
2520
void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2521
9.59M
{
2522
9.59M
    if (!m_bitIf)
2523
9.50M
    {
2524
9.50M
        m_fracBits += 32768 * numBins;
2525
9.50M
        return;
2526
9.50M
    }
2527
2528
93.0k
    while (numBins > 8)
2529
3.82k
    {
2530
3.82k
        numBins -= 8;
2531
3.82k
        uint32_t pattern = binValues >> numBins;
2532
3.82k
        m_low <<= 8;
2533
3.82k
        m_low += m_range * pattern;
2534
3.82k
        binValues -= pattern << numBins;
2535
3.82k
        m_bitsLeft += 8;
2536
2537
3.82k
        if (m_bitsLeft >= 0)
2538
3.82k
            writeOut();
2539
3.82k
    }
2540
2541
89.1k
    m_low <<= numBins;
2542
89.1k
    m_low += m_range * binValues;
2543
89.1k
    m_bitsLeft += numBins;
2544
2545
89.1k
    if (m_bitsLeft >= 0)
2546
26.3k
        writeOut();
2547
89.1k
}
2548
2549
/** Encode terminating bin */
2550
void Entropy::encodeBinTrm(uint32_t binValue)
2551
29.5k
{
2552
29.5k
    if (!m_bitIf)
2553
13.2k
    {
2554
13.2k
        m_fracBits += sbacGetEntropyBitsTrm(binValue);
2555
13.2k
        return;
2556
13.2k
    }
2557
2558
16.2k
    m_range -= 2;
2559
16.2k
    if (binValue)
2560
2.93k
    {
2561
2.93k
        m_low += m_range;
2562
2.93k
        m_low <<= 7;
2563
2.93k
        m_range = 2 << 7;
2564
2.93k
        m_bitsLeft += 7;
2565
2.93k
    }
2566
13.2k
    else if (m_range >= 256)
2567
12.5k
        return;
2568
747
    else
2569
747
    {
2570
747
        m_low <<= 1;
2571
747
        m_range <<= 1;
2572
747
        m_bitsLeft++;
2573
747
    }
2574
2575
3.68k
    if (m_bitsLeft >= 0)
2576
2.73k
        writeOut();
2577
3.68k
}
2578
2579
/** Move bits from register into bitstream */
2580
void Entropy::writeOut()
2581
50.5k
{
2582
50.5k
    uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2583
50.5k
    uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2584
2585
50.5k
    m_bitsLeft -= 8;
2586
50.5k
    m_low &= low_mask;
2587
2588
50.5k
    if (leadByte == 0xff)
2589
4.01k
        m_numBufferedBytes++;
2590
46.5k
    else
2591
46.5k
    {
2592
46.5k
        uint32_t numBufferedBytes = m_numBufferedBytes;
2593
46.5k
        if (numBufferedBytes > 0)
2594
43.6k
        {
2595
43.6k
            uint32_t carry = leadByte >> 8;
2596
43.6k
            uint32_t byteTowrite = m_bufferedByte + carry;
2597
43.6k
            m_bitIf->writeByte(byteTowrite);
2598
2599
43.6k
            byteTowrite = (0xff + carry) & 0xff;
2600
47.6k
            while (numBufferedBytes > 1)
2601
4.00k
            {
2602
4.00k
                m_bitIf->writeByte(byteTowrite);
2603
4.00k
                numBufferedBytes--;
2604
4.00k
            }
2605
43.6k
        }
2606
46.5k
        m_numBufferedBytes = 1;
2607
46.5k
        m_bufferedByte = (uint8_t)leadByte;
2608
46.5k
    }
2609
50.5k
}
2610
2611
const uint32_t g_entropyBits[128] =
2612
{
2613
    // Corrected table, most notably for last state
2614
    0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2615
    0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2616
    0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2617
    0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2618
    0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2619
    0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2620
    0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2621
    0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2622
};
2623
2624
const uint8_t g_nextState[128][2] =
2625
{
2626
    { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2627
    { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2628
    { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2629
    { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2630
    { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2631
    { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2632
    { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2633
    { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2634
    { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2635
    { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2636
    { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2637
    { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2638
    { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2639
    { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2640
    { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2641
    { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2642
};
2643
2644
}
2645
2646
// [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
2647
extern "C" const uint32_t PFX(entropyStateBits)[128] =
2648
{
2649
    // Corrected table, most notably for last state
2650
    0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
2651
    0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
2652
    0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
2653
    0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
2654
    0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
2655
    0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
2656
    0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
2657
    0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
2658
    0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
2659
    0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
2660
    0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
2661
    0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
2662
    0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
2663
    0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
2664
    0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
2665
    0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
2666
};
2667