Coverage Report

Created: 2025-08-28 06:38

/src/libhevc/encoder/ihevce_cabac_tu.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
******************************************************************************
22
* @file ihevce_cabac_tu.c
23
*
24
* @brief
25
*  This file contains function definitions for cabac entropy coding of
26
*  transform units of HEVC syntax
27
*
28
* @author
29
*  ittiam
30
*
31
* @List of Functions
32
*  ihevce_cabac_encode_qp_delta()
33
*  ihevce_cabac_encode_last_coeff_x_y()
34
*  ihevce_encode_transform_tree()
35
*  ihevce_cabac_residue_encode()
36
*  ihevce_cabac_residue_encode_rdopt()
37
*  ihevce_cabac_residue_encode_rdoq()
38
*  ihevce_code_all_sig_coeffs_as_0_explicitly()
39
*  ihevce_find_new_last_csb()
40
*  ihevce_copy_backup_ctxt()
41
*  ihevce_estimate_num_bits_till_next_non_zero_coeff()
42
*
43
******************************************************************************
44
*/
45
46
/*****************************************************************************/
47
/* File Includes                                                             */
48
/*****************************************************************************/
49
50
/* System include files */
51
#include <stdio.h>
52
#include <string.h>
53
#include <stdlib.h>
54
#include <assert.h>
55
#include <stdarg.h>
56
#include <math.h>
57
58
/* User include files */
59
#include "ihevc_typedefs.h"
60
#include "itt_video_api.h"
61
#include "ihevce_api.h"
62
63
#include "rc_cntrl_param.h"
64
#include "rc_frame_info_collector.h"
65
#include "rc_look_ahead_params.h"
66
67
#include "ihevc_defs.h"
68
#include "ihevc_structs.h"
69
#include "ihevc_platform_macros.h"
70
#include "ihevc_deblk.h"
71
#include "ihevc_itrans_recon.h"
72
#include "ihevc_chroma_itrans_recon.h"
73
#include "ihevc_chroma_intra_pred.h"
74
#include "ihevc_intra_pred.h"
75
#include "ihevc_inter_pred.h"
76
#include "ihevc_mem_fns.h"
77
#include "ihevc_padding.h"
78
#include "ihevc_weighted_pred.h"
79
#include "ihevc_sao.h"
80
#include "ihevc_resi_trans.h"
81
#include "ihevc_quant_iquant_ssd.h"
82
#include "ihevc_cabac_tables.h"
83
#include "ihevc_trans_macros.h"
84
#include "ihevc_trans_tables.h"
85
86
#include "ihevce_defs.h"
87
#include "ihevce_lap_enc_structs.h"
88
#include "ihevce_multi_thrd_structs.h"
89
#include "ihevce_me_common_defs.h"
90
#include "ihevce_had_satd.h"
91
#include "ihevce_error_codes.h"
92
#include "ihevce_bitstream.h"
93
#include "ihevce_cabac.h"
94
#include "ihevce_rdoq_macros.h"
95
#include "ihevce_function_selector.h"
96
#include "ihevce_enc_structs.h"
97
#include "ihevce_entropy_structs.h"
98
#include "ihevce_cmn_utils_instr_set_router.h"
99
#include "ihevce_enc_loop_structs.h"
100
#include "ihevce_bs_compute_ctb.h"
101
#include "ihevce_global_tables.h"
102
#include "ihevce_common_utils.h"
103
#include "ihevce_trace.h"
104
105
/*****************************************************************************/
106
/* Globals                                                                   */
107
/*****************************************************************************/
108
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
109
110
/**
111
******************************************************************************
112
* @brief  LUT for deriving of last significant coeff prefix.
113
*
114
* @input   : last_significant_coeff
115
*
116
* @output  : last_significant_prefix (does not include the
117
*
118
* @remarks Look up tables taken frm HM-8.0-dev
119
******************************************************************************
120
*/
121
const UWORD8 gu1_hevce_last_coeff_prefix[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
122
                                                 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
123
124
/**
125
*****************************************************************************
126
* @brief  LUT for deriving of last significant coeff suffix
127
*
128
* @input   : last significant prefix
129
*
130
* @output  : prefix code that needs to be subtracted from last_pos to get
131
*           suffix as per equation 7-55 in section 7.4.12.
132
*
133
*           It returns the following code for last_significant_prefix > 3
134
*            ((1 << ((last_significant_coeff_x_prefix >> 1) - 1))  *
135
*            (2 + (last_significant_coeff_x_prefix & 1))
136
*
137
*
138
* @remarks Look up tables taken frm HM-8.0-dev
139
*****************************************************************************
140
*/
141
const UWORD8 gu1_hevce_last_coeff_prefix_code[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
142
143
/**
144
*****************************************************************************
145
* @brief  returns raster index of 4x4 block for diag up-right/horz/vert scans
146
*
147
* @input   : scan type and scan idx
148
*
149
* @output  : packed y pos(msb 4bit) and x pos(lsb 2bit)
150
*
151
*****************************************************************************
152
*/
153
const UWORD8 gu1_hevce_scan4x4[3][16] = {
154
    /* diag up right */
155
    { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
156
157
    /* horz */
158
    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
159
160
    /* vert */
161
    { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
162
};
163
164
/**
165
*****************************************************************************
166
* @brief  returns context increment for sig coeff based on csbf neigbour
167
*         flags (bottom and right) and current coeff postion in 4x4 block
168
*         See section 9.3.3.1.4 for details on this context increment
169
*
170
* @input   : neigbour csbf flags(bit0:rightcsbf, bit1:bottom csbf)
171
*           coeff idx in raster order (0-15)
172
*
173
* @output  : context increment for sig coeff flag
174
*
175
*****************************************************************************
176
*/
177
const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16] = {
178
    /* nbr csbf = 0:  sigCtx = (xP+yP == 0) ? 2 : (xP+yP < 3) ? 1: 0 */
179
    { 2, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
180
181
    /* nbr csbf = 1:  sigCtx = (yP == 0) ? 2 : (yP == 1) ? 1: 0      */
182
    { 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
183
184
    /* nbr csbf = 2:  sigCtx = (xP == 0) ? 2 : (xP == 1) ? 1: 0      */
185
    { 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0 },
186
187
    /* nbr csbf = 3:  sigCtx = 2                                     */
188
    { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }
189
};
190
191
const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
192
193
/**
194
*****************************************************************************
195
* @brief  returns context increment for sig coeff for 4x4 tranform size as
196
*         per Table 9-39 in section 9.3.3.1.4
197
*
198
* @input   : coeff idx in raster order (0-15)
199
*
200
* @output  : context increment for sig coeff flag
201
*
202
*****************************************************************************
203
*/
204
const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16] = { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 0 };
205
206
#define DISABLE_ZCSBF 0
207
208
#define TEST_CABAC_BITESTIMATE 0
209
210
/*****************************************************************************/
211
/* Function Definitions                                                      */
212
/*****************************************************************************/
213
/**
214
******************************************************************************
215
*
216
*  @brief Entropy encoding of qp_delta in a tu as per sec 9.3.2 Table 9-32
217
*
218
*  @par   Description
219
*  trunacted unary binarization is done based upto abs_delta of 5 and the rest
220
*  is coded as 0th order Exponential Golomb code
221
*
222
*  @param[inout]   ps_cabac
223
*  pointer to cabac encoding context (handle)
224
*
225
*  @param[in]      qp_delta
226
*  delta qp that needs to be encoded
227
*
228
*  @return      success or failure error code
229
*
230
******************************************************************************
231
*/
232
WORD32 ihevce_cabac_encode_qp_delta(cab_ctxt_t *ps_cabac, WORD32 qp_delta)
233
212k
{
234
212k
    WORD32 qp_delta_abs = ABS(qp_delta);
235
212k
    WORD32 c_max = TU_MAX_QP_DELTA_ABS;
236
212k
    WORD32 ctxt_inc = IHEVC_CAB_QP_DELTA_ABS;
237
212k
    WORD32 ctxt_inc_max = CTXT_MAX_QP_DELTA_ABS;
238
212k
    WORD32 ret = IHEVCE_SUCCESS;
239
240
    /* qp_delta_abs is coded as combination of tunary and eg0 code  */
241
    /* See Table 9-32 and Table 9-37 for details on cu_qp_delta_abs */
242
212k
    ret |= ihevce_cabac_encode_tunary(
243
212k
        ps_cabac, MIN(qp_delta_abs, c_max), c_max, ctxt_inc, 0, ctxt_inc_max);
244
212k
    if(qp_delta_abs >= c_max)
245
17.6k
    {
246
17.6k
        ret |= ihevce_cabac_encode_egk(ps_cabac, qp_delta_abs - c_max, 0);
247
17.6k
    }
248
212k
    AEV_TRACE("cu_qp_delta_abs", qp_delta_abs, ps_cabac->u4_range);
249
250
    /* code the qp delta sign flag */
251
212k
    if(qp_delta_abs)
252
55.9k
    {
253
55.9k
        WORD32 sign = (qp_delta < 0) ? 1 : 0;
254
55.9k
        ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, sign);
255
55.9k
        AEV_TRACE("cu_qp_delta_sign", sign, ps_cabac->u4_range);
256
55.9k
    }
257
258
212k
    return (ret);
259
212k
}
260
261
/**
262
******************************************************************************
263
*
264
*  @brief Encodes position of the last coded coeff (in scan order) of TU
265
*
266
*  @par   Description
267
*  Entropy encode of last coded coeff of a TU as per section:7.3.13
268
*
269
*  @param[inout]   ps_cabac
270
*  pointer to cabac context (handle)
271
*
272
*  @param[in]      last_coeff_x
273
*  x co-ordinate of the last coded coeff of TU(in scan order)
274
*
275
*  @param[in]      last_coeff_y
276
*  x co-ordinate of the last coded coeff of TU (in scan order
277
*
278
*  @param[in]      log2_tr_size
279
*  transform block size corresponding to this node in quad tree
280
*
281
*  @param[in]      is_luma
282
*  indicates if residual block corresponds to luma or chroma block
283
*
284
*  @return      success or failure error code
285
*
286
******************************************************************************
287
*/
288
WORD32 ihevce_cabac_encode_last_coeff_x_y(
289
    cab_ctxt_t *ps_cabac,
290
    WORD32 last_coeff_x,
291
    WORD32 last_coeff_y,
292
    WORD32 log2_tr_size,
293
    WORD32 is_luma)
294
18.2M
{
295
18.2M
    WORD32 ret = IHEVCE_SUCCESS;
296
297
18.2M
    WORD32 last_coeff_x_prefix;
298
18.2M
    WORD32 last_coeff_y_prefix;
299
18.2M
    WORD32 suffix, suf_length;
300
18.2M
    WORD32 c_max;
301
18.2M
    WORD32 ctxt_idx_x, ctxt_idx_y, ctx_shift;
302
303
    /* derive the prefix code */
304
18.2M
    last_coeff_x_prefix = gu1_hevce_last_coeff_prefix[last_coeff_x];
305
18.2M
    last_coeff_y_prefix = gu1_hevce_last_coeff_prefix[last_coeff_y];
306
307
18.2M
    c_max = gu1_hevce_last_coeff_prefix[(1 << log2_tr_size) - 1];
308
309
    /* context increment as per section 9.3.3.1.2 */
310
18.2M
    if(is_luma)
311
12.2M
    {
312
12.2M
        WORD32 ctx_offset = (3 * (log2_tr_size - 2)) + ((log2_tr_size - 1) >> 2);
313
314
12.2M
        ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + ctx_offset;
315
12.2M
        ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + ctx_offset;
316
12.2M
        ctx_shift = (log2_tr_size + 1) >> 2;
317
12.2M
    }
318
5.98M
    else
319
5.98M
    {
320
5.98M
        ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + 15;
321
5.98M
        ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + 15;
322
5.98M
        ctx_shift = log2_tr_size - 2;
323
5.98M
    }
324
325
    /* code the last_coeff_x_prefix as tunary binarized code */
326
18.2M
    ret |= ihevce_cabac_encode_tunary(
327
18.2M
        ps_cabac, last_coeff_x_prefix, c_max, ctxt_idx_x, ctx_shift, c_max);
328
329
18.2M
    AEV_TRACE("last_coeff_x_prefix", last_coeff_x_prefix, ps_cabac->u4_range);
330
331
    /* code the last_coeff_y_prefix as tunary binarized code */
332
18.2M
    ret |= ihevce_cabac_encode_tunary(
333
18.2M
        ps_cabac, last_coeff_y_prefix, c_max, ctxt_idx_y, ctx_shift, c_max);
334
335
18.2M
    AEV_TRACE("last_coeff_y_prefix", last_coeff_y_prefix, ps_cabac->u4_range);
336
337
18.2M
    if(last_coeff_x_prefix > 3)
338
3.63M
    {
339
        /* code the last_coeff_x_suffix as FLC bypass code */
340
3.63M
        suffix = last_coeff_x - gu1_hevce_last_coeff_prefix_code[last_coeff_x_prefix];
341
342
3.63M
        suf_length = ((last_coeff_x_prefix - 2) >> 1);
343
344
3.63M
        ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
345
346
3.63M
        AEV_TRACE("last_coeff_x_suffix", suffix, ps_cabac->u4_range);
347
3.63M
    }
348
349
18.2M
    if(last_coeff_y_prefix > 3)
350
3.65M
    {
351
        /* code the last_coeff_y_suffix as FLC bypass code */
352
3.65M
        suffix = last_coeff_y - gu1_hevce_last_coeff_prefix_code[last_coeff_y_prefix];
353
354
3.65M
        suf_length = ((last_coeff_y_prefix - 2) >> 1);
355
356
3.65M
        ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
357
358
3.65M
        AEV_TRACE("last_coeff_y_suffix", suffix, ps_cabac->u4_range);
359
3.65M
    }
360
361
18.2M
    return (ret);
362
18.2M
}
363
364
/**
365
******************************************************************************
366
*
367
*  @brief Encodes a transform tree as per section 7.3.11
368
*
369
*  @par   Description
370
*  Uses recursion till a leaf node is reached where a transform unit
371
*  is coded. While recursing split_transform_flag and parent chroma cbf flags
372
*  are coded before recursing to leaf node
373
*
374
*  @param[inout]   ps_entropy_ctxt
375
*  pointer to entropy context (handle)
376
*
377
*  @param[in]      x0_ctb
378
*  x co-ordinate w.r.t ctb start of current tu node of coding tree
379
*
380
*  @param[in]      y0_ctb
381
*  y co-ordinate w.r.t ctb start of current cu node of coding tree
382
*
383
*  @param[in]      log2_tr_size
384
*  transform block size corresponding to this node in quad tree
385
*
386
*  @param[in]      tr_depth
387
*  current depth of the tree
388
*
389
*  @param[in]      tr_depth
390
*  current depth of the tree
391
*
392
*  @param[in]      blk_num
393
*  current block number in the quad tree (required for chorma 4x4 coding)
394
*
395
*  @return      success or failure error code
396
*
397
******************************************************************************
398
*/
399
WORD32 ihevce_encode_transform_tree(
400
    entropy_context_t *ps_entropy_ctxt,
401
    WORD32 x0_ctb,
402
    WORD32 y0_ctb,
403
    WORD32 log2_tr_size,
404
    WORD32 tr_depth,
405
    WORD32 blk_num,
406
    cu_enc_loop_out_t *ps_enc_cu)
407
27.1M
{
408
27.1M
    WORD32 ret = IHEVCE_SUCCESS;
409
27.1M
    sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
410
27.1M
    WORD32 split_tr_flag;
411
412
27.1M
    WORD32 tu_idx = ps_entropy_ctxt->i4_tu_idx;
413
27.1M
    tu_enc_loop_out_t *ps_enc_tu = ps_enc_cu->ps_enc_tu + tu_idx;
414
415
    /* TU size in pels */
416
27.1M
    WORD32 tu_size = 4 << ps_enc_tu->s_tu.b3_size;
417
418
27.1M
    cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
419
420
27.1M
    WORD32 max_tr_depth;
421
27.1M
    WORD32 is_intra = (ps_enc_cu->b1_pred_mode_flag == PRED_MODE_INTRA);
422
27.1M
    WORD32 log2_min_trafo_size, log2_max_trafo_size;
423
27.1M
    UWORD32 u4_bits_estimated_prev;
424
425
27.1M
    WORD32 intra_nxn_pu = 0;
426
27.1M
    WORD32 ctxt_inc;
427
27.1M
    WORD32 cbf_luma = 0;
428
27.1M
    WORD32 ai4_cbf_cb[2] = { 0, 0 };
429
27.1M
    WORD32 ai4_cbf_cr[2] = { 0, 0 };
430
27.1M
    UWORD32 tu_split_bits = 0;
431
27.1M
    UWORD8 u1_is_422 = (ps_sps->i1_chroma_format_idc == 2);
432
433
27.1M
    tu_split_bits = ps_cabac->u4_bits_estimated_q12;
434
    /* intialize min / max transform sizes based on sps */
435
27.1M
    log2_min_trafo_size = ps_sps->i1_log2_min_transform_block_size;
436
437
27.1M
    log2_max_trafo_size = log2_min_trafo_size + ps_sps->i1_log2_diff_max_min_transform_block_size;
438
439
    /* intialize max transform depth for intra / inter signalled in sps */
440
27.1M
    if(is_intra)
441
22.5M
    {
442
22.5M
        max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_intra;
443
22.5M
        intra_nxn_pu = ps_enc_cu->b3_part_mode == PART_NxN;
444
22.5M
    }
445
4.57M
    else
446
4.57M
    {
447
4.57M
        max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
448
4.57M
    }
449
450
    /* Sanity checks */
451
27.1M
    ASSERT(tr_depth <= 4);
452
27.1M
    ASSERT(log2_min_trafo_size >= 2);
453
27.1M
    ASSERT(log2_max_trafo_size <= 5);
454
27.1M
    ASSERT((tu_idx >= 0) && (tu_idx < ps_enc_cu->u2_num_tus_in_cu));
455
27.1M
    ASSERT((tu_size >= 4) && (tu_size <= (1 << log2_tr_size)));
456
457
    /* Encode split transform flag based on following conditions; sec 7.3.11 */
458
27.1M
    if((log2_tr_size <= log2_max_trafo_size) && (log2_tr_size > log2_min_trafo_size) &&
459
27.1M
       (tr_depth < max_tr_depth) && (!(intra_nxn_pu && (tr_depth == 0))))
460
14.1M
    {
461
        /* encode the split transform flag, context derived as per Table9-37 */
462
14.1M
        ctxt_inc = IHEVC_CAB_SPLIT_TFM + (5 - log2_tr_size);
463
464
        /* split if actual tu size is smaller than target tu size */
465
14.1M
        split_tr_flag = tu_size < (1 << log2_tr_size);
466
14.1M
        u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
467
14.1M
        ret |= ihevce_cabac_encode_bin(ps_cabac, split_tr_flag, ctxt_inc);
468
469
14.1M
        if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
470
1.43M
        {  // clang-format off
471
            /*PIC INFO : populate cu split flag*/
472
1.43M
            ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_split_tu_flag +=
473
1.43M
                (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
474
1.43M
        }  // clang-format on
475
476
14.1M
        AEV_TRACE("split_transform_flag", split_tr_flag, ps_cabac->u4_range);
477
14.1M
    }
478
12.9M
    else
479
12.9M
    {
480
12.9M
        WORD32 inter_split;
481
        /*********************************************************************/
482
        /*                                                                   */
483
        /* split tr is implicitly derived as 1 if  (see section 7.4.10)      */
484
        /*  a. log2_tr_size > log2_max_trafo_size                            */
485
        /*  b. intra cu has NXN pu                                           */
486
        /*  c. inter cu is not 2Nx2N && max_transform_hierarchy_depth_inter=0*/
487
        /*                                                                   */
488
        /* split tu is implicitly derived as 0 otherwise                     */
489
        /*********************************************************************/
490
12.9M
        inter_split = (!is_intra) && (max_tr_depth == 0) && (tr_depth == 0) &&
491
12.9M
                      (ps_enc_cu->b3_part_mode != PART_2Nx2N);
492
493
12.9M
        if((log2_tr_size > log2_max_trafo_size) || (intra_nxn_pu && (tr_depth == 0)) ||
494
12.9M
           (inter_split))
495
889k
        {
496
889k
            split_tr_flag = 1;
497
889k
        }
498
12.1M
        else
499
12.1M
        {
500
12.1M
            split_tr_flag = 0;
501
12.1M
        }
502
12.9M
    }
503
    /*accumulate only tu tree bits*/
504
27.1M
    ps_cabac->u4_true_tu_split_flag_q12 += ps_cabac->u4_bits_estimated_q12 - tu_split_bits;
505
506
    /* Encode the cbf flags for chroma before the split as per sec 7.3.11   */
507
27.1M
    if(log2_tr_size > 2)
508
20.3M
    {
509
        /* encode the cbf cb, context derived as per Table 9-37 */
510
20.3M
        ctxt_inc = IHEVC_CAB_CBCR_IDX + tr_depth;
511
512
        /* Note chroma cbf is coded for depth=0 or if parent cbf was coded */
513
20.3M
        if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1]) ||
514
20.3M
           (ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1]))
515
12.4M
        {
516
12.4M
#if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
517
            /*************************************************************/
518
            /* Bit-Efficient chroma cbf signalling                       */
519
            /* if children nodes have 0 cbf parent cbf can be coded as 0 */
520
            /* peeking through all the child nodes for cb to check if    */
521
            /* parent can be coded as 0                                  */
522
            /*************************************************************/
523
12.4M
            WORD32 tu_cnt = 0;
524
18.5M
            while(1)
525
18.5M
            {
526
18.5M
                WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
527
18.5M
                WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
528
18.5M
                WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
529
530
18.5M
                ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
531
532
18.5M
                if((ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1))
533
2.72M
                {
534
2.72M
                    ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
535
2.72M
                    ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
536
2.72M
                    break;
537
2.72M
                }
538
539
                /* 8x8 parent has only one 4x4 valid chroma block for 420 */
540
15.8M
                if(3 == log2_tr_size)
541
3.84M
                    break;
542
543
11.9M
                if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
544
11.9M
                   (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
545
5.90M
                {
546
5.90M
                    ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
547
5.90M
                    ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
548
5.90M
                    ASSERT(
549
5.90M
                        (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) &&
550
5.90M
                        (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1));
551
5.90M
                    break;
552
5.90M
                }
553
554
6.08M
                tu_cnt++;
555
6.08M
            }
556
#else
557
            /* read cbf only when split is 0 (child node) else force cbf=1 */
558
            ai4_cbf_cb[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf;
559
            ai4_cbf_cb[1] =
560
                (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf_subtu1;
561
562
#endif
563
12.4M
            if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
564
0
            {
565
0
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
566
0
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0], ctxt_inc);
567
568
0
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
569
0
                {  // clang-format off
570
                    /*PIC INFO : Populate CBF cr bits*/
571
0
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
572
0
                        (ps_cabac->u4_bits_estimated_q12 -
573
0
                            u4_bits_estimated_prev);
574
0
                }  // clang-format on
575
576
0
                AEV_TRACE("cbf_cb", ai4_cbf_cb[0], ps_cabac->u4_range);
577
578
0
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
579
0
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[1], ctxt_inc);
580
581
0
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
582
0
                {  // clang-format off
583
                    /*PIC INFO : Populate CBF cr bits*/
584
0
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
585
0
                        (ps_cabac->u4_bits_estimated_q12 -
586
0
                            u4_bits_estimated_prev);
587
0
                }  // clang-format on
588
589
0
                AEV_TRACE("cbf_cb", ai4_cbf_cb[1], ps_cabac->u4_range);
590
0
            }
591
12.4M
            else
592
12.4M
            {
593
12.4M
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
594
12.4M
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0] || ai4_cbf_cb[1], ctxt_inc);
595
596
12.4M
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
597
1.76M
                {  // clang-format off
598
                    /*PIC INFO : Populate CBF cr bits*/
599
1.76M
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
600
1.76M
                        (ps_cabac->u4_bits_estimated_q12 -
601
1.76M
                            u4_bits_estimated_prev);
602
1.76M
                }  // clang-format on
603
604
12.4M
                AEV_TRACE("cbf_cb", ai4_cbf_cb[0] || ai4_cbf_cb[1], ps_cabac->u4_range);
605
12.4M
            }
606
12.4M
        }
607
7.83M
        else
608
7.83M
        {
609
7.83M
            ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
610
7.83M
            ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
611
7.83M
        }
612
613
20.3M
        if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1]) ||
614
20.3M
           (ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1]))
615
12.4M
        {
616
12.4M
#if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
617
            /*************************************************************/
618
            /* Bit-Efficient chroma cbf signalling                       */
619
            /* if children nodes have 0 cbf parent cbf can be coded as 0 */
620
            /* peeking through all the child nodes for cr to check if    */
621
            /* parent can be coded as 0                                  */
622
            /*************************************************************/
623
12.4M
            WORD32 tu_cnt = 0;
624
18.5M
            while(1)
625
18.5M
            {
626
18.5M
                WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
627
18.5M
                WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
628
18.5M
                WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
629
630
18.5M
                ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
631
632
18.5M
                if((ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1))
633
2.63M
                {
634
2.63M
                    ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
635
2.63M
                    ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
636
2.63M
                    break;
637
2.63M
                }
638
639
                /* 8x8 parent has only one 4x4 valid chroma block for 420 */
640
15.9M
                if(3 == log2_tr_size)
641
3.91M
                    break;
642
643
12.0M
                if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
644
12.0M
                   (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
645
5.92M
                {
646
5.92M
                    ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
647
5.92M
                    ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
648
5.92M
                    ASSERT(
649
5.92M
                        (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) &&
650
5.92M
                        (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1));
651
5.92M
                    break;
652
5.92M
                }
653
654
6.09M
                tu_cnt++;
655
6.09M
            }
656
#else
657
            /* read cbf only when split is 0 (child node) else force cbf=1 */
658
            ai4_cbf_cr[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf;
659
            ai4_cbf_cr[1] =
660
                (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf_subtu1;
661
#endif
662
663
12.4M
            if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
664
0
            {
665
0
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
666
0
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0], ctxt_inc);
667
668
0
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
669
0
                {  // clang-format off
670
                    /*PIC INFO : Populate CBF cr bits*/
671
0
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
672
0
                        (ps_cabac->u4_bits_estimated_q12 -
673
0
                            u4_bits_estimated_prev);
674
0
                }  // clang-format on
675
676
0
                AEV_TRACE("cbf_cr", ai4_cbf_cr[0], ps_cabac->u4_range);
677
678
0
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
679
0
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[1], ctxt_inc);
680
681
0
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
682
0
                {  // clang-format off
683
                    /*PIC INFO : Populate CBF cr bits*/
684
0
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
685
0
                        (ps_cabac->u4_bits_estimated_q12 -
686
0
                            u4_bits_estimated_prev);
687
0
                }  // clang-format on
688
689
0
                AEV_TRACE("cbf_cr", ai4_cbf_cr[1], ps_cabac->u4_range);
690
0
            }
691
12.4M
            else
692
12.4M
            {
693
12.4M
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
694
12.4M
                ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0] || ai4_cbf_cr[1], ctxt_inc);
695
696
12.4M
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
697
1.75M
                {  // clang-format off
698
                    /*PIC INFO : Populate CBF cr bits*/
699
1.75M
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
700
1.75M
                        (ps_cabac->u4_bits_estimated_q12 -
701
1.75M
                            u4_bits_estimated_prev);
702
1.75M
                }  // clang-format on
703
704
12.4M
                AEV_TRACE("cbf_cr", ai4_cbf_cr[0] || ai4_cbf_cr[1], ps_cabac->u4_range);
705
12.4M
            }
706
12.4M
        }
707
7.85M
        else
708
7.85M
        {
709
7.85M
            ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
710
7.85M
            ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
711
7.85M
        }
712
713
20.3M
        ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth] = ai4_cbf_cb[0];
714
20.3M
        ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth] = ai4_cbf_cr[0];
715
20.3M
        ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth] = ai4_cbf_cb[1];
716
20.3M
        ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth] = ai4_cbf_cr[1];
717
20.3M
    }
718
6.84M
    else
719
6.84M
    {
720
6.84M
        ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
721
6.84M
        ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
722
6.84M
        ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
723
6.84M
        ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
724
6.84M
    }
725
726
27.1M
    if(split_tr_flag)
727
4.18M
    {
728
        /* recurse into quad child nodes till a leaf node is reached */
729
4.18M
        WORD32 x1_ctb = x0_ctb + ((1 << log2_tr_size) >> 1);
730
4.18M
        WORD32 y1_ctb = y0_ctb + ((1 << log2_tr_size) >> 1);
731
732
        /* node0 of quad tree */
733
4.18M
        ret |= ihevce_encode_transform_tree(
734
4.18M
            ps_entropy_ctxt,
735
4.18M
            x0_ctb,
736
4.18M
            y0_ctb,
737
4.18M
            log2_tr_size - 1,
738
4.18M
            tr_depth + 1,
739
4.18M
            0, /* block 0 */
740
4.18M
            ps_enc_cu);
741
742
        /* node1 of quad tree */
743
4.18M
        ret |= ihevce_encode_transform_tree(
744
4.18M
            ps_entropy_ctxt,
745
4.18M
            x1_ctb,
746
4.18M
            y0_ctb,
747
4.18M
            log2_tr_size - 1,
748
4.18M
            tr_depth + 1,
749
4.18M
            1, /* block 1 */
750
4.18M
            ps_enc_cu);
751
752
        /* node2 of quad tree */
753
4.18M
        ret |= ihevce_encode_transform_tree(
754
4.18M
            ps_entropy_ctxt,
755
4.18M
            x0_ctb,
756
4.18M
            y1_ctb,
757
4.18M
            log2_tr_size - 1,
758
4.18M
            tr_depth + 1,
759
4.18M
            2, /* block 2 */
760
4.18M
            ps_enc_cu);
761
762
        /* node3 of quad tree */
763
4.18M
        ret |= ihevce_encode_transform_tree(
764
4.18M
            ps_entropy_ctxt,
765
4.18M
            x1_ctb,
766
4.18M
            y1_ctb,
767
4.18M
            log2_tr_size - 1,
768
4.18M
            tr_depth + 1,
769
4.18M
            3, /* block 3 */
770
4.18M
            ps_enc_cu);
771
4.18M
    }
772
22.9M
    else
773
22.9M
    {
774
        /* leaf node is reached! Encode the TU */
775
22.9M
        WORD32 encode_delta_qp;
776
22.9M
        void *pv_coeff;
777
22.9M
        void *pv_cu_coeff = ps_enc_cu->pv_coeff;
778
779
        /* condition to encode qp of cu in first coded tu */
780
22.9M
        encode_delta_qp = ps_entropy_ctxt->i1_encode_qp_delta &&
781
22.9M
                          (ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS);
782
783
22.9M
        if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
784
3.02M
        {  // clang-format off
785
            /*PIC INFO : Tota TUs based on size*/
786
3.02M
            if(32 == tu_size)
787
455k
            {
788
455k
                ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3]++;
789
455k
            }
790
2.56M
            else
791
2.56M
            {
792
2.56M
                ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[tu_size >> 3]++;
793
2.56M
            }
794
3.02M
        }  // clang-format on
795
796
        /* sanity checks */
797
22.9M
        ASSERT(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0);
798
22.9M
        ASSERT((ps_enc_tu->s_tu.b4_pos_x << 2) == x0_ctb);
799
22.9M
        ASSERT((ps_enc_tu->s_tu.b4_pos_y << 2) == y0_ctb);
800
22.9M
        ASSERT(tu_size == (1 << log2_tr_size));
801
802
        /********************************************************************/
803
        /* encode luma cbf if any of following conditions are true          */
804
        /* intra cu | transform depth > 0 | any of chroma cbfs are coded    */
805
        /*                                                                  */
806
        /* Note that these conditions mean that cbf_luma need not be        */
807
        /* signalled and implicitly derived as 1 for inter cu whose tfr size*/
808
        /* is same as cu size and cbf for cb+cr are zero as no_residue_flag */
809
        /* at cu level = 1 indicated cbf luma is coded                      */
810
        /********************************************************************/
811
22.9M
        if(is_intra || (tr_depth != 0) || ai4_cbf_cb[0] || ai4_cbf_cr[0] ||
812
22.9M
           ((u1_is_422) && (ai4_cbf_cb[1] || ai4_cbf_cr[1])))
813
22.9M
        {
814
            /* encode  cbf luma, context derived as per Table 9-37 */
815
22.9M
            cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
816
817
22.9M
            ctxt_inc = IHEVC_CAB_CBF_LUMA_IDX;
818
22.9M
            ctxt_inc += (tr_depth == 0) ? 1 : 0;
819
820
22.9M
            if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
821
3.02M
            {
822
3.02M
                if(1 == cbf_luma)
823
1.27M
                {
824
                    // clang-format off
825
                    /*PIC INFO: Populated coded Intra/Inter TUs in CU*/
826
1.27M
                    if(1 == is_intra)
827
1.01M
                        ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_coded_tu++;
828
255k
                    else
829
255k
                        ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
830
                    // clang-format on
831
1.27M
                }
832
1.75M
                else
833
1.75M
                { /*PIC INFO: Populated coded non-coded TUs in CU*/
834
1.75M
                    ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu++;
835
1.75M
                }
836
3.02M
            }
837
22.9M
            u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
838
22.9M
            ret |= ihevce_cabac_encode_bin(ps_cabac, cbf_luma, ctxt_inc);
839
840
22.9M
            if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
841
3.02M
            {  // clang-format off
842
                /*PIC INFO : Populate CBF luma bits*/
843
3.02M
                ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_luma_bits +=
844
3.02M
                    (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
845
3.02M
            }  // clang-format on
846
22.9M
            AEV_TRACE("cbf_luma", cbf_luma, ps_cabac->u4_range);
847
22.9M
        }
848
6.08k
        else
849
6.08k
        {
850
6.08k
            if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
851
206
            {
852
                /*PIC INFO: Populated coded Inter TUs in CU*/
853
206
                ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
854
206
            }
855
856
            /* shall be 1 as no_residue_flag was encoded as 1 in inter cu */
857
6.08k
            ASSERT(1 == ps_enc_tu->s_tu.b1_y_cbf);
858
6.08k
            cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
859
6.08k
        }
860
861
        /*******************************************************************/
862
        /* code qp delta conditionally if following conditions are true    */
863
        /* any cbf coded (luma/cb/cr) and qp_delta_coded is 0 for this cu  */
864
        /* see section 7.3.12 Transform unit Syntax                        */
865
        /*******************************************************************/
866
22.9M
        {
867
22.9M
            WORD32 cbf_chroma = (ai4_cbf_cb[0] || ai4_cbf_cr[0]) ||
868
22.9M
                                (u1_is_422 && (ai4_cbf_cb[1] || ai4_cbf_cr[1]));
869
870
22.9M
            if((cbf_luma || cbf_chroma) && encode_delta_qp)
871
212k
            {
872
212k
                WORD32 tu_qp = ps_enc_tu->s_tu.b7_qp;
873
212k
                WORD32 qp_pred, qp_left, qp_top;
874
212k
                WORD32 qp_delta = tu_qp - ps_entropy_ctxt->i1_cur_qp;
875
212k
                WORD32 x_nbr_indx, y_nbr_indx;
876
877
                /* Added code for handling the QP neighbour population depending
878
                   on the diff_cu_qp_delta_depth: Lokesh  */
879
                /* minus 2 becoz the pos_x and pos_y are given in the order of
880
                 * 8x8 blocks rather than pixels */
881
212k
                WORD32 log2_min_cu_qp_delta_size =
882
212k
                    ps_entropy_ctxt->i1_log2_ctb_size -
883
212k
                    ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
884
                //WORD32 min_cu_qp_delta_size = 1 << log2_min_cu_qp_delta_size;
885
886
                //WORD32 curr_pos_x = ps_enc_cu->b3_cu_pos_x << 3;
887
                //WORD32 curr_pos_y = ps_enc_cu->b3_cu_pos_y << 3;
888
889
212k
                WORD32 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
890
891
212k
                ps_entropy_ctxt->i4_qg_pos_x = ps_enc_cu->b3_cu_pos_x & block_addr_align;
892
212k
                ps_entropy_ctxt->i4_qg_pos_y = ps_enc_cu->b3_cu_pos_y & block_addr_align;
893
894
212k
                x_nbr_indx = ps_entropy_ctxt->i4_qg_pos_x - 1;
895
212k
                y_nbr_indx = ps_entropy_ctxt->i4_qg_pos_y - 1;
896
897
212k
                if(ps_entropy_ctxt->i4_qg_pos_x > 0)
898
123k
                {
899
                    // clang-format off
900
123k
                    qp_left =
901
123k
                        ps_entropy_ctxt->ai4_8x8_cu_qp[x_nbr_indx +
902
123k
                                            (ps_entropy_ctxt->i4_qg_pos_y * 8)];
903
                    // clang-format on
904
123k
                }
905
212k
                if(ps_entropy_ctxt->i4_qg_pos_y > 0)
906
125k
                {
907
                    // clang-format off
908
125k
                    qp_top = ps_entropy_ctxt->ai4_8x8_cu_qp[ps_entropy_ctxt->i4_qg_pos_x +
909
125k
                                                 y_nbr_indx * 8];
910
                    // clang-format on
911
125k
                }
912
212k
                if(ps_entropy_ctxt->i4_qg_pos_x == 0)
913
88.5k
                {
914
                    /*previous coded Qp*/
915
88.5k
                    qp_left = ps_entropy_ctxt->i1_cur_qp;
916
88.5k
                }
917
212k
                if(ps_entropy_ctxt->i4_qg_pos_y == 0)
918
86.8k
                {
919
                    /*previous coded Qp*/
920
86.8k
                    qp_top = ps_entropy_ctxt->i1_cur_qp;
921
86.8k
                }
922
923
212k
                qp_pred = (qp_left + qp_top + 1) >> 1;
924
                // clang-format off
925
                /* start of every frame encode qp delta wrt slice qp when entrop
926
                 * sync is enabled */
927
212k
                if(ps_entropy_ctxt->i4_ctb_x == 0 &&
928
212k
                    ps_entropy_ctxt->i4_qg_pos_x == 0 &&
929
212k
                    ps_entropy_ctxt->i4_qg_pos_y == 0 &&
930
212k
                    ps_entropy_ctxt->s_cabac_ctxt.i1_entropy_coding_sync_enabled_flag)
931
                // clang-format on
932
4.26k
                {
933
4.26k
                    qp_pred = ps_entropy_ctxt->ps_slice_hdr->i1_slice_qp_delta +
934
4.26k
                              ps_entropy_ctxt->ps_pps->i1_pic_init_qp;
935
4.26k
                }
936
212k
                qp_delta = tu_qp - qp_pred;
937
938
                /*PIC INFO : Populate QP delta bits*/
939
212k
                u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
940
941
                /* code the qp delta */
942
212k
                ret |= ihevce_cabac_encode_qp_delta(ps_cabac, qp_delta);
943
944
212k
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
945
212k
                {
946
                    // clang-format off
947
212k
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_qp_delta_bits +=
948
212k
                        (ps_cabac->u4_bits_estimated_q12 -
949
212k
                            u4_bits_estimated_prev);
950
                    // clang-format on
951
212k
                }
952
953
212k
                ps_entropy_ctxt->i1_cur_qp = tu_qp;
954
                //ps_entropy_ctxt->i1_cur_qp = Qp_pred;
955
212k
                ps_entropy_ctxt->i1_encode_qp_delta = 0;
956
                //ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
957
212k
            }
958
959
22.9M
            if(cbf_luma || cbf_chroma)
960
7.09M
            {
961
7.09M
                ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
962
7.09M
            }
963
964
            /* code the residue of for luma and chroma tu based on cbf */
965
22.9M
            if((cbf_luma) && (1 == ps_entropy_ctxt->i4_enable_res_encode))
966
1.27M
            {
967
1.27M
                u4_bits_estimated_prev = ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
968
                /* code the luma residue */
969
1.27M
                pv_coeff = (void *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->i4_luma_coeff_offset);
970
971
1.27M
                ret |= ihevce_cabac_residue_encode(ps_entropy_ctxt, pv_coeff, log2_tr_size, 1);
972
973
1.27M
                if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
974
1.27M
                {  // clang-format off
975
                    /*PIC INFO : Populate Residue Luma Bits*/
976
1.27M
                    ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_luma_bits +=
977
1.27M
                        (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
978
1.27M
                            u4_bits_estimated_prev);
979
1.27M
                }  // clang-format on
980
1.27M
            }
981
982
            /* code chroma residue based on tranform size                  */
983
            /* For Inta 4x4 pu chroma is coded after all 4 luma blks coded */
984
            /* Note: chroma not encoded in rdopt mode                      */
985
22.9M
            if(((log2_tr_size > 2) || (3 == blk_num)) /* &&
986
                (CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode) */
987
22.9M
            )
988
17.8M
            {
989
17.8M
                WORD32 log2_chroma_tr_size;
990
17.8M
                WORD32 i4_subtu_idx;
991
17.8M
                void *pv_coeff_cb, *pv_coeff_cr;
992
993
17.8M
                WORD32 i4_num_subtus = u1_is_422 + 1;
994
995
17.8M
                if(1 == ps_entropy_ctxt->i4_enable_res_encode)
996
1.86M
                {
997
3.72M
                    for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
998
1.86M
                    {
999
1.86M
                        if(ai4_cbf_cb[i4_subtu_idx])
1000
568k
                        {
1001
                            /* initailize chroma transform size and coeff based
1002
                             * on luma size */
1003
568k
                            if(2 == log2_tr_size)
1004
280k
                            {
1005
                                /*********************************************************/
1006
                                /* For Intra 4x4, chroma transform size is 4 and chroma  */
1007
                                /* coeff offset is present  in the first Luma block      */
1008
                                /*********************************************************/
1009
280k
                                log2_chroma_tr_size = 2;
1010
1011
                                /* -3 is for going to first luma tu of the 4 TUs in min CU */
1012
280k
                                pv_coeff_cb =
1013
280k
                                    (void
1014
280k
                                         *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cb_coeff_offset[i4_subtu_idx]);
1015
280k
                            }
1016
287k
                            else
1017
287k
                            {
1018
287k
                                log2_chroma_tr_size = (log2_tr_size - 1);
1019
1020
287k
                                pv_coeff_cb =
1021
287k
                                    (void
1022
287k
                                         *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cb_coeff_offset[i4_subtu_idx]);
1023
287k
                            }
1024
                            // clang-format off
1025
568k
                            u4_bits_estimated_prev =
1026
568k
                                ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
1027
                            // clang-format on
1028
                            /* code the cb residue */
1029
568k
                            ret |= ihevce_cabac_residue_encode(
1030
568k
                                ps_entropy_ctxt, pv_coeff_cb, log2_chroma_tr_size, 0);
1031
1032
568k
                            if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
1033
568k
                            {  // clang-format off
1034
                                /*PIC INFO : Populate Residue Chroma cr Bits*/
1035
568k
                                ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
1036
568k
                                    (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
1037
568k
                                        u4_bits_estimated_prev);
1038
568k
                            }  // clang-format on
1039
568k
                        }
1040
1.86M
                    }
1041
1.86M
                }
1042
1043
17.8M
                if(1 == ps_entropy_ctxt->i4_enable_res_encode)
1044
1.86M
                {
1045
3.72M
                    for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
1046
1.86M
                    {
1047
1.86M
                        if(ai4_cbf_cr[i4_subtu_idx])
1048
548k
                        {
1049
                            /* initailize chroma transform size and coeff based on luma size */
1050
548k
                            if(2 == log2_tr_size)
1051
271k
                            {
1052
                                /*********************************************************/
1053
                                /* For Intra 4x4, chroma transform size is 4 and chroma  */
1054
                                /* coeff offset is present  in the first Luma block      */
1055
                                /*********************************************************/
1056
271k
                                log2_chroma_tr_size = 2;
1057
1058
271k
                                pv_coeff_cr =
1059
271k
                                    (void
1060
271k
                                         *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cr_coeff_offset[i4_subtu_idx]);
1061
271k
                            }
1062
276k
                            else
1063
276k
                            {
1064
276k
                                log2_chroma_tr_size = (log2_tr_size - 1);
1065
1066
276k
                                pv_coeff_cr =
1067
276k
                                    (void
1068
276k
                                         *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cr_coeff_offset[i4_subtu_idx]);
1069
276k
                            }
1070
                            // clang-format off
1071
548k
                            u4_bits_estimated_prev =
1072
548k
                                ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
1073
                            // clang-format on
1074
                            /* code the cb residue */
1075
548k
                            ret |= ihevce_cabac_residue_encode(
1076
548k
                                ps_entropy_ctxt, pv_coeff_cr, log2_chroma_tr_size, 0);
1077
548k
                            if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
1078
548k
                            {  // clang-format off
1079
                                /*PIC INFO : Populate Residue Chroma cr Bits*/
1080
548k
                                ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
1081
548k
                                    (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
1082
548k
                                        u4_bits_estimated_prev);
1083
548k
                            }  // clang-format on
1084
548k
                        }
1085
1.86M
                    }
1086
1.86M
                }
1087
17.8M
            }
1088
22.9M
        }
1089
1090
        /* update tu_idx after encoding current tu */
1091
22.9M
        ps_entropy_ctxt->i4_tu_idx++;
1092
22.9M
    }
1093
1094
27.1M
    return ret;
1095
27.1M
}
1096
1097
/**
1098
******************************************************************************
1099
*
1100
*  @brief Encodes a transform residual block as per section 7.3.13
1101
*
1102
*  @par   Description
1103
*   The residual block is read from a compressed coeff buffer populated during
1104
*   the scanning of the quantized coeffs. The contents of the buffer are
1105
*   breifly explained in param description of pv_coeff
1106
*
1107
*  @remarks Does not support sign data hiding and transform skip flag currently
1108
*
1109
*  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
1110
*           HM.8.0-dev for related abs_greater_than_1 context initialization
1111
*           and rice_max paramtere used for coeff abs level remaining
1112
*
1113
*  @param[inout]   ps_entropy_ctxt
1114
*  pointer to entropy context (handle)
1115
*
1116
*  @param[in]      pv_coeff
1117
*  Compressed residue buffer containing following information:
1118
*
1119
*  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
1120
*
1121
*  For each 4x4 subblock starting from last_subblock_num (in scan order)
1122
*     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
1123
*
1124
*    `If cur_csbf
1125
*      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
1126
*      Read 2 bytes : abs_gt1_flags (max of 8 only)
1127
*      Read 2 bytes : coeff_sign_flags
1128
*
1129
*      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
1130
*      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
1131
*
1132
*  @param[in]      log2_tr_size
1133
*  transform size of the current TU
1134
*
1135
*  @param[in]      is_luma
1136
*  boolean indicating if the texture type is luma / chroma
1137
*
1138
*
1139
*  @return      success or failure error code
1140
*
1141
******************************************************************************
1142
*/
1143
WORD32 ihevce_cabac_residue_encode(
1144
    entropy_context_t *ps_entropy_ctxt, void *pv_coeff, WORD32 log2_tr_size, WORD32 is_luma)
1145
2.38M
{
1146
2.38M
    WORD32 ret = IHEVCE_SUCCESS;
1147
2.38M
    cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
1148
2.38M
    WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
1149
1150
2.38M
    UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
1151
2.38M
    UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
1152
1153
    /* last sig coeff indices in scan order */
1154
2.38M
    WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
1155
2.38M
    WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
1156
1157
    /* read the scan type : upright diag / horz / vert */
1158
2.38M
    WORD32 scan_type = pu1_coeff_buf_hdr[2];
1159
1160
    /************************************************************************/
1161
    /* position of the last coded sub block. This sub block contains coeff  */
1162
    /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
1163
    /* be derived here it better to be populated by scanning module         */
1164
    /************************************************************************/
1165
2.38M
    WORD32 last_csb = pu1_coeff_buf_hdr[3];
1166
1167
2.38M
    WORD32 cur_csbf = 0, nbr_csbf;
1168
2.38M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1169
2.38M
    WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
1170
1171
2.38M
    WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
1172
1173
2.38M
    WORD32 i;
1174
1175
    /* sanity checks */
1176
    /* transform skip not supported */
1177
2.38M
    ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
1178
1179
2.38M
    cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
1180
1181
2.38M
    i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
1182
1183
2.38M
    if(SCAN_VERT == scan_type)
1184
612k
    {
1185
        /* last coeff x and y are swapped for vertical scan */
1186
612k
        SWAP(last_sig_coeff_x, last_sig_coeff_y);
1187
612k
    }
1188
1189
    /* Encode the last_sig_coeff_x and last_sig_coeff_y */
1190
2.38M
    ret |= ihevce_cabac_encode_last_coeff_x_y(
1191
2.38M
        ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
1192
1193
    /*************************************************************************/
1194
    /* derive base context index for sig coeff as per section 9.3.3.1.4      */
1195
    /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
1196
    /*************************************************************************/
1197
2.38M
    if(is_luma)
1198
1.27M
    {
1199
1.27M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1200
1.27M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1201
1202
1.27M
        if(3 == log2_tr_size)
1203
202k
        {
1204
            /* 8x8 transform size */
1205
202k
            sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
1206
202k
        }
1207
1.06M
        else if(3 < log2_tr_size)
1208
38.6k
        {
1209
            /* larger transform sizes */
1210
38.6k
            sig_coeff_base_ctxt += 21;
1211
38.6k
        }
1212
1.27M
    }
1213
1.11M
    else
1214
1.11M
    {
1215
        /* chroma context initializations */
1216
1.11M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1217
1.11M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1218
1219
1.11M
        if(3 == log2_tr_size)
1220
90.4k
        {
1221
            /* 8x8 transform size */
1222
90.4k
            sig_coeff_base_ctxt += 9;
1223
90.4k
        }
1224
1.02M
        else if(3 < log2_tr_size)
1225
39.1k
        {
1226
            /* larger transform sizes */
1227
39.1k
            sig_coeff_base_ctxt += 12;
1228
39.1k
        }
1229
1.11M
    }
1230
1231
    /* go to csbf flags */
1232
2.38M
    pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
1233
1234
    /************************************************************************/
1235
    /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
1236
    /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
1237
    /************************************************************************/
1238
5.84M
    for(i = last_csb; i >= 0; i--)
1239
3.45M
    {
1240
3.45M
        UWORD16 u2_marker_csbf;
1241
3.45M
        WORD32 ctxt_idx;
1242
1243
3.45M
        u2_marker_csbf = *pu2_sig_coeff_buf;
1244
3.45M
        pu2_sig_coeff_buf++;
1245
1246
        /* sanity checks for marker present in every csbf flag */
1247
3.45M
        ASSERT((u2_marker_csbf >> 4) == 0xBAD);
1248
1249
        /* extract the current and neigbour csbf flags */
1250
3.45M
        cur_csbf = u2_marker_csbf & 0x1;
1251
3.45M
        nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
1252
1253
        /*********************************************************************/
1254
        /* code the csbf flags; last and first csb not sent as it is derived */
1255
        /*********************************************************************/
1256
3.45M
        if((i < last_csb) && (i > 0))
1257
804k
        {
1258
804k
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1259
1260
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1261
804k
            ctxt_idx += nbr_csbf ? 1 : 0;
1262
804k
            ctxt_idx += is_luma ? 0 : 2;
1263
1264
804k
            ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
1265
804k
            AEV_TRACE("coded_sub_block_flag", cur_csbf, ps_cabac->u4_range);
1266
804k
        }
1267
2.64M
        else
1268
2.64M
        {
1269
            /* sanity check, this csb contains the last_sig_coeff */
1270
2.64M
            if(i == last_csb)
1271
2.38M
            {
1272
2.38M
                ASSERT(cur_csbf == 1);
1273
2.38M
            }
1274
2.64M
        }
1275
1276
3.45M
        if(cur_csbf)
1277
3.31M
        {
1278
            /*****************************************************************/
1279
            /* encode the sig coeff map as per section 7.3.13                */
1280
            /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
1281
            /*****************************************************************/
1282
1283
            /* Added for Sign bit data hiding*/
1284
3.31M
            WORD32 first_scan_pos = 16;
1285
3.31M
            WORD32 last_scan_pos = -1;
1286
3.31M
            WORD32 sign_hidden = 0;
1287
1288
3.31M
            UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
1289
3.31M
            WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
1290
3.31M
            WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
1291
1292
3.31M
            WORD32 sig_coeff_map = u2_gt0_flags;
1293
1294
3.31M
            WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
1295
1296
3.31M
            WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
1297
3.31M
            WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
1298
1299
3.31M
            WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
1300
3.31M
            WORD32 bit; /* temp boolean */
1301
1302
            /* total count of coeffs to be coded as abs level remaining */
1303
3.31M
            WORD32 num_coeffs_remaining = 0;
1304
1305
            /* count of coeffs to be coded as  abslevel-1 */
1306
3.31M
            WORD32 num_coeffs_base1 = 0;
1307
3.31M
            WORD32 scan_pos;
1308
3.31M
            WORD32 first_gt1_coeff = 0;
1309
1310
3.31M
            if((i != 0) || (0 == last_csb))
1311
3.05M
            {
1312
                /* sanity check, atleast one coeff is coded as csbf is set */
1313
3.05M
                ASSERT(sig_coeff_map != 0);
1314
3.05M
            }
1315
1316
3.31M
            pu2_sig_coeff_buf += 3;
1317
1318
3.31M
            scan_pos = 15;
1319
3.31M
            if(i == last_csb)
1320
2.38M
            {
1321
                /*************************************************************/
1322
                /* clear last_scan_pos for last block in scan order as this  */
1323
                /* is communicated  throught last_coeff_x and last_coeff_y   */
1324
                /*************************************************************/
1325
2.38M
                WORD32 next_sig = CLZ(sig_coeff_map) + 1;
1326
1327
2.38M
                scan_pos = WORD_SIZE - next_sig;
1328
1329
                /* prepare the bins for gt1 flags */
1330
2.38M
                EXTRACT_BIT(bit, gt1_flags, scan_pos);
1331
1332
                /* insert gt1 bin in lsb */
1333
2.38M
                gt1_bins |= bit;
1334
1335
                /* prepare the bins for sign flags */
1336
2.38M
                EXTRACT_BIT(bit, sign_flags, scan_pos);
1337
1338
                /* insert sign bin in lsb */
1339
2.38M
                sign_bins |= bit;
1340
1341
2.38M
                sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
1342
1343
2.38M
                if(-1 == last_scan_pos)
1344
2.38M
                    last_scan_pos = scan_pos;
1345
1346
2.38M
                scan_pos--;
1347
2.38M
                num_coded++;
1348
2.38M
            }
1349
1350
            /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
1351
3.31M
            infer_coeff = (i < last_csb) && (i > 0);
1352
1353
            /* encode the required sigcoeff flags (abslevel > 0)   */
1354
47.3M
            while(scan_pos >= 0)
1355
44.0M
            {
1356
44.0M
                WORD32 y_pos_x_pos;
1357
44.0M
                WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
1358
1359
44.0M
                WORD32 sig_coeff;
1360
1361
44.0M
                EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
1362
1363
                /* derive the x,y pos */
1364
44.0M
                y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
1365
1366
                /* derive the context inc as per section 9.3.3.1.4 */
1367
44.0M
                if(2 == log2_tr_size)
1368
26.1M
                {
1369
                    /* 4x4 transform size increment uses lookup */
1370
26.1M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1371
26.1M
                }
1372
17.8M
                else if(scan_pos || i)
1373
17.5M
                {
1374
                    /* ctxt for AC coeff depends on curpos and neigbour csbf */
1375
17.5M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
1376
1377
                    /* based on luma subblock pos */
1378
17.5M
                    sig_ctxinc += (i && is_luma) ? 3 : 0;
1379
17.5M
                }
1380
290k
                else
1381
290k
                {
1382
                    /* DC coeff has fixed context for luma and chroma */
1383
290k
                    sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
1384
290k
                                                  : IHEVC_CAB_COEFF_FLAG + 27;
1385
290k
                }
1386
1387
                /*************************************************************/
1388
                /* encode sig coeff only if required                         */
1389
                /* decoder infers 0,0 coeff when all the other coeffs are 0  */
1390
                /*************************************************************/
1391
44.0M
                if(scan_pos || (!infer_coeff))
1392
44.0M
                {
1393
44.0M
                    ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
1394
44.0M
                    ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
1395
44.0M
                    AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
1396
44.0M
                }
1397
1398
44.0M
                if(sig_coeff)
1399
35.1M
                {
1400
                    /* prepare the bins for gt1 flags */
1401
35.1M
                    EXTRACT_BIT(bit, gt1_flags, scan_pos);
1402
1403
                    /* shift and insert gt1 bin in lsb */
1404
35.1M
                    gt1_bins <<= 1;
1405
35.1M
                    gt1_bins |= bit;
1406
1407
                    /* prepare the bins for sign flags */
1408
35.1M
                    EXTRACT_BIT(bit, sign_flags, scan_pos);
1409
1410
                    /* shift and insert sign bin in lsb */
1411
35.1M
                    sign_bins <<= 1;
1412
35.1M
                    sign_bins |= bit;
1413
1414
35.1M
                    num_coded++;
1415
1416
                    /* 0,0 coeff can no more be inferred :( */
1417
35.1M
                    infer_coeff = 0;
1418
1419
35.1M
                    if(-1 == last_scan_pos)
1420
918k
                        last_scan_pos = scan_pos;
1421
1422
35.1M
                    first_scan_pos = scan_pos;
1423
35.1M
                }
1424
1425
44.0M
                scan_pos--;
1426
44.0M
            }
1427
1428
            /* Added for sign bit hiding*/
1429
3.31M
            sign_hidden = ((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag);
1430
1431
            /****************************************************************/
1432
            /* encode the abs level greater than 1 bins; Section 7.3.13     */
1433
            /* These have already been prepared during sig_coeff_map encode */
1434
            /* Context modelling done as per section 9.3.3.1.5              */
1435
            /****************************************************************/
1436
3.31M
            {
1437
3.31M
                WORD32 j;
1438
1439
                /* context set based on luma subblock pos */
1440
3.31M
                WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
1441
1442
                /* count of coeffs with abslevel > 1; max of 8 to be coded */
1443
3.31M
                WORD32 num_gt1_bins = MIN(8, num_coded);
1444
1445
3.31M
                if(num_coded > 8)
1446
2.40M
                {
1447
                    /* pull back the bins to required number */
1448
2.40M
                    gt1_bins >>= (num_coded - 8);
1449
1450
2.40M
                    num_coeffs_remaining += (num_coded - 8);
1451
2.40M
                    num_coeffs_base1 = (num_coded - 8);
1452
2.40M
                }
1453
1454
                /* See section 9.3.3.1.5           */
1455
3.31M
                ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
1456
1457
3.31M
                gt1_ctxt = 1;
1458
1459
25.7M
                for(j = num_gt1_bins - 1; j >= 0; j--)
1460
22.3M
                {
1461
                    /* Encodet the abs level gt1 bins */
1462
22.3M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1463
1464
22.3M
                    EXTRACT_BIT(bit, gt1_bins, j);
1465
1466
22.3M
                    ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
1467
1468
22.3M
                    AEV_TRACE("coeff_abs_level_greater1_flag", bit, ps_cabac->u4_range);
1469
1470
22.3M
                    if(bit)
1471
16.3M
                    {
1472
16.3M
                        gt1_ctxt = 0;
1473
16.3M
                        num_coeffs_remaining++;
1474
16.3M
                    }
1475
6.07M
                    else if(gt1_ctxt && (gt1_ctxt < 3))
1476
2.10M
                    {
1477
2.10M
                        gt1_ctxt++;
1478
2.10M
                    }
1479
22.3M
                }
1480
1481
                /*************************************************************/
1482
                /* encode abs level greater than 2 bin; Section 7.3.13       */
1483
                /*************************************************************/
1484
3.31M
                if(gt1_bins)
1485
2.79M
                {
1486
2.79M
                    WORD32 gt2_bin;
1487
1488
2.79M
                    first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
1489
2.79M
                    gt2_bin = (first_gt1_coeff > 2);
1490
1491
                    /* atleast one level > 2 */
1492
2.79M
                    ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
1493
1494
2.79M
                    ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
1495
1496
2.79M
                    ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
1497
1498
2.79M
                    if(!gt2_bin)
1499
941k
                    {
1500
                        /* sanity check */
1501
941k
                        ASSERT(first_gt1_coeff == 2);
1502
1503
                        /* no need to send this coeff as bypass bins */
1504
941k
                        pu2_sig_coeff_buf++;
1505
941k
                        num_coeffs_remaining--;
1506
941k
                    }
1507
1508
2.79M
                    AEV_TRACE("coeff_abs_level_greater2_flag", gt2_bin, ps_cabac->u4_range);
1509
2.79M
                }
1510
3.31M
            }
1511
1512
            /*************************************************************/
1513
            /* encode the coeff signs and abs remaing levels             */
1514
            /*************************************************************/
1515
3.31M
            if(num_coded)
1516
3.30M
            {
1517
3.30M
                WORD32 base_level;
1518
3.30M
                WORD32 rice_param = 0;
1519
3.30M
                WORD32 j;
1520
1521
                /*************************************************************/
1522
                /* encode the coeff signs populated in sign_bins             */
1523
                /*************************************************************/
1524
1525
3.30M
                if(sign_hidden && i4_sign_data_hiding_flag)
1526
1.10M
                {
1527
1.10M
                    sign_bins >>= 1;
1528
1.10M
                    num_coded--;
1529
1.10M
                }
1530
1531
3.30M
                if(num_coded > 0)
1532
3.30M
                {
1533
3.30M
                    ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
1534
3.30M
                }
1535
1536
3.30M
                AEV_TRACE("sign_flags", sign_bins, ps_cabac->u4_range);
1537
1538
                /*************************************************************/
1539
                /* encode the coeff_abs_level_remaining as TR / EGK bins     */
1540
                /* See section 9.3.2.7 for details                           */
1541
                /*************************************************************/
1542
1543
                /* first remaining coeff baselevel */
1544
3.30M
                if(first_gt1_coeff > 2)
1545
1.85M
                {
1546
1.85M
                    base_level = 3;
1547
1.85M
                }
1548
1.44M
                else if(num_coeffs_remaining > num_coeffs_base1)
1549
800k
                {
1550
                    /* atleast one coeff in first 8 is gt > 1 */
1551
800k
                    base_level = 2;
1552
800k
                }
1553
647k
                else
1554
647k
                {
1555
                    /* all coeffs have base of 1 */
1556
647k
                    base_level = 1;
1557
647k
                }
1558
1559
33.8M
                for(j = 0; j < num_coeffs_remaining; j++)
1560
30.5M
                {
1561
30.5M
                    WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
1562
30.5M
                    WORD32 abs_coeff_rem;
1563
30.5M
                    WORD32 rice_max = (4 << rice_param);
1564
1565
30.5M
                    pu2_sig_coeff_buf++;
1566
1567
                    /* sanity check */
1568
30.5M
                    ASSERT(abs_coeff >= base_level);
1569
1570
30.5M
                    abs_coeff_rem = (abs_coeff - base_level);
1571
1572
                    /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
1573
                    /* TODO://HM-8.0-dev does either TR or EGK but not both */
1574
30.5M
                    if(abs_coeff_rem >= rice_max)
1575
5.36M
                    {
1576
5.36M
                        UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
1577
1578
                        /* coeff exceeds max rice limit                    */
1579
                        /* encode the TR prefix as tunary code             */
1580
                        /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
1581
5.36M
                        ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
1582
1583
                        /* encode the exponential golomb code suffix */
1584
5.36M
                        ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
1585
5.36M
                    }
1586
25.1M
                    else
1587
25.1M
                    {
1588
                        /* code coeff as truncated rice code  */
1589
25.1M
                        ret |= ihevce_cabac_encode_trunc_rice(
1590
25.1M
                            ps_cabac, abs_coeff_rem, rice_param, rice_max);
1591
25.1M
                    }
1592
1593
30.5M
                    AEV_TRACE("coeff_abs_level_remaining", abs_coeff_rem, ps_cabac->u4_range);
1594
1595
                    /* update the rice param based on coeff level */
1596
30.5M
                    if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
1597
6.68M
                    {
1598
6.68M
                        rice_param++;
1599
6.68M
                    }
1600
1601
                    /* change base level to 1 if more than 8 coded coeffs */
1602
30.5M
                    if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
1603
12.7M
                    {
1604
12.7M
                        base_level = 2;
1605
12.7M
                    }
1606
17.7M
                    else
1607
17.7M
                    {
1608
17.7M
                        base_level = 1;
1609
17.7M
                    }
1610
30.5M
                }
1611
3.30M
            }
1612
3.31M
        }
1613
3.45M
    }
1614
    /*tap texture bits*/
1615
2.38M
    if(ps_cabac->e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS)
1616
0
    {  // clang-format off
1617
0
        ps_cabac->u4_texture_bits_estimated_q12 +=
1618
0
            (ps_cabac->u4_bits_estimated_q12 -
1619
0
                ps_cabac->u4_header_bits_estimated_q12);  //(ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
1620
0
    }  // clang-format on
1621
1622
2.38M
    return (ret);
1623
2.38M
}
1624
1625
/**
1626
******************************************************************************
1627
*
1628
*  @brief Get the bits estimate for a transform residual block as per section
1629
*   7.3.13
1630
*
1631
*  @par   Description
1632
*   The residual block is read from a compressed coeff buffer populated during
1633
*   the scanning of the quantized coeffs. The contents of the buffer are
1634
*   breifly explained in param description of pv_coeff
1635
*
1636
*  @remarks Does not support sign data hiding and transform skip flag currently
1637
*
1638
*  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
1639
*           HM.8.0-dev for related abs_greater_than_1 context initialization
1640
*           and rice_max paramtere used for coeff abs level remaining
1641
*
1642
*  @param[inout]   ps_entropy_ctxt
1643
*  pointer to entropy context (handle)
1644
*
1645
*  @param[in]      pv_coeff
1646
*  Compressed residue buffer containing following information:
1647
*
1648
*  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
1649
*
1650
*  For each 4x4 subblock starting from last_subblock_num (in scan order)
1651
*     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
1652
*
1653
*    `If cur_csbf
1654
*      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
1655
*      Read 2 bytes : abs_gt1_flags (max of 8 only)
1656
*      Read 2 bytes : coeff_sign_flags
1657
*
1658
*      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
1659
*      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
1660
*
1661
*  @param[in]      log2_tr_size
1662
*  transform size of the current TU
1663
*
1664
*  @param[in]      is_luma
1665
*  boolean indicating if the texture type is luma / chroma
1666
*
1667
*
1668
*  @return      success or failure error code
1669
*
1670
******************************************************************************
1671
*/
1672
WORD32 ihevce_cabac_residue_encode_rdopt(
1673
    entropy_context_t *ps_entropy_ctxt,
1674
    void *pv_coeff,
1675
    WORD32 log2_tr_size,
1676
    WORD32 is_luma,
1677
    WORD32 perform_sbh)
1678
9.92M
{
1679
9.92M
    WORD32 ret = IHEVCE_SUCCESS;
1680
9.92M
    cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
1681
9.92M
    UWORD32 temp_tex_bits_q12;
1682
9.92M
    WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
1683
1684
9.92M
    UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
1685
9.92M
    UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
1686
1687
    /* last sig coeff indices in scan order */
1688
9.92M
    WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
1689
9.92M
    WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
1690
1691
    /* read the scan type : upright diag / horz / vert */
1692
9.92M
    WORD32 scan_type = pu1_coeff_buf_hdr[2];
1693
1694
    /************************************************************************/
1695
    /* position of the last coded sub block. This sub block contains coeff  */
1696
    /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
1697
    /* be derived here it better to be populated by scanning module         */
1698
    /************************************************************************/
1699
9.92M
    WORD32 last_csb = pu1_coeff_buf_hdr[3];
1700
1701
9.92M
    WORD32 cur_csbf = 0, nbr_csbf;
1702
9.92M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1703
9.92M
    WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
1704
1705
9.92M
    WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
1706
1707
9.92M
    WORD32 i;
1708
1709
9.92M
    UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1710
1711
    /* sanity checks */
1712
    /* transform skip not supported */
1713
9.92M
    ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
1714
1715
9.92M
    cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
1716
1717
9.92M
    i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
1718
1719
9.92M
    {
1720
9.92M
        temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
1721
9.92M
    }
1722
1723
9.92M
    if(SCAN_VERT == scan_type)
1724
2.37M
    {
1725
        /* last coeff x and y are swapped for vertical scan */
1726
2.37M
        SWAP(last_sig_coeff_x, last_sig_coeff_y);
1727
2.37M
    }
1728
1729
    /* Encode the last_sig_coeff_x and last_sig_coeff_y */
1730
9.92M
    ret |= ihevce_cabac_encode_last_coeff_x_y(
1731
9.92M
        ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
1732
1733
    /*************************************************************************/
1734
    /* derive base context index for sig coeff as per section 9.3.3.1.4      */
1735
    /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
1736
    /*************************************************************************/
1737
9.92M
    if(is_luma)
1738
6.83M
    {
1739
6.83M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1740
6.83M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1741
1742
6.83M
        if(3 == log2_tr_size)
1743
1.34M
        {
1744
            /* 8x8 transform size */
1745
1.34M
            sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
1746
1.34M
        }
1747
5.48M
        else if(3 < log2_tr_size)
1748
591k
        {
1749
            /* larger transform sizes */
1750
591k
            sig_coeff_base_ctxt += 21;
1751
591k
        }
1752
6.83M
    }
1753
3.08M
    else
1754
3.08M
    {
1755
        /* chroma context initializations */
1756
3.08M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1757
3.08M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1758
1759
3.08M
        if(3 == log2_tr_size)
1760
447k
        {
1761
            /* 8x8 transform size */
1762
447k
            sig_coeff_base_ctxt += 9;
1763
447k
        }
1764
2.64M
        else if(3 < log2_tr_size)
1765
226k
        {
1766
            /* larger transform sizes */
1767
226k
            sig_coeff_base_ctxt += 12;
1768
226k
        }
1769
3.08M
    }
1770
1771
    /* go to csbf flags */
1772
9.92M
    pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
1773
1774
    /************************************************************************/
1775
    /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
1776
    /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
1777
    /************************************************************************/
1778
36.1M
    for(i = last_csb; i >= 0; i--)
1779
26.2M
    {
1780
26.2M
        UWORD16 u2_marker_csbf;
1781
26.2M
        WORD32 ctxt_idx;
1782
1783
26.2M
        u2_marker_csbf = *pu2_sig_coeff_buf;
1784
26.2M
        pu2_sig_coeff_buf++;
1785
1786
        /* sanity checks for marker present in every csbf flag */
1787
26.2M
        ASSERT((u2_marker_csbf >> 4) == 0xBAD);
1788
1789
        /* extract the current and neigbour csbf flags */
1790
26.2M
        cur_csbf = u2_marker_csbf & 0x1;
1791
26.2M
        nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
1792
1793
        /*********************************************************************/
1794
        /* code the csbf flags; last and first csb not sent as it is derived */
1795
        /*********************************************************************/
1796
26.2M
        if((i < last_csb) && (i > 0))
1797
14.1M
        {
1798
14.1M
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1799
1800
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1801
14.1M
            ctxt_idx += nbr_csbf ? 1 : 0;
1802
14.1M
            ctxt_idx += is_luma ? 0 : 2;
1803
1804
14.1M
            {
1805
14.1M
                WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
1806
1807
                /* increment bits generated based on state and bin encoded */
1808
14.1M
                ps_cabac->u4_bits_estimated_q12 +=
1809
14.1M
                    gau2_ihevce_cabac_bin_to_bits[state_mps ^ cur_csbf];
1810
1811
                /* update the context model from state transition LUT */
1812
14.1M
                pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | cur_csbf];
1813
14.1M
            }
1814
14.1M
        }
1815
12.1M
        else
1816
12.1M
        {
1817
            /* sanity check, this csb contains the last_sig_coeff */
1818
12.1M
            if(i == last_csb)
1819
9.92M
            {
1820
9.92M
                ASSERT(cur_csbf == 1);
1821
9.92M
            }
1822
12.1M
        }
1823
1824
26.2M
        if(cur_csbf)
1825
24.5M
        {
1826
            /*****************************************************************/
1827
            /* encode the sig coeff map as per section 7.3.13                */
1828
            /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
1829
            /*****************************************************************/
1830
1831
            /* Added for Sign bit data hiding*/
1832
24.5M
            WORD32 first_scan_pos = 16;
1833
24.5M
            WORD32 last_scan_pos = -1;
1834
24.5M
            WORD32 sign_hidden;
1835
1836
24.5M
            UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
1837
24.5M
            WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
1838
24.5M
            WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
1839
1840
24.5M
            WORD32 sig_coeff_map = u2_gt0_flags;
1841
1842
24.5M
            WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
1843
1844
24.5M
            WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
1845
24.5M
            WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
1846
1847
24.5M
            WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
1848
24.5M
            WORD32 bit; /* temp boolean */
1849
1850
            /* total count of coeffs to be coded as abs level remaining */
1851
24.5M
            WORD32 num_coeffs_remaining = 0;
1852
1853
            /* count of coeffs to be coded as  abslevel-1 */
1854
24.5M
            WORD32 num_coeffs_base1 = 0;
1855
24.5M
            WORD32 scan_pos;
1856
24.5M
            WORD32 first_gt1_coeff = 0;
1857
1858
24.5M
            if((i != 0) || (0 == last_csb))
1859
22.3M
            {
1860
                /* sanity check, atleast one coeff is coded as csbf is set */
1861
22.3M
                ASSERT(sig_coeff_map != 0);
1862
22.3M
            }
1863
1864
24.5M
            pu2_sig_coeff_buf += 3;
1865
1866
24.5M
            scan_pos = 15;
1867
24.5M
            if(i == last_csb)
1868
9.92M
            {
1869
                /*************************************************************/
1870
                /* clear last_scan_pos for last block in scan order as this  */
1871
                /* is communicated  throught last_coeff_x and last_coeff_y   */
1872
                /*************************************************************/
1873
9.92M
                WORD32 next_sig = CLZ(sig_coeff_map) + 1;
1874
1875
9.92M
                scan_pos = WORD_SIZE - next_sig;
1876
1877
                /* prepare the bins for gt1 flags */
1878
9.92M
                EXTRACT_BIT(bit, gt1_flags, scan_pos);
1879
1880
                /* insert gt1 bin in lsb */
1881
9.92M
                gt1_bins |= bit;
1882
1883
                /* prepare the bins for sign flags */
1884
9.92M
                EXTRACT_BIT(bit, sign_flags, scan_pos);
1885
1886
                /* insert sign bin in lsb */
1887
9.92M
                sign_bins |= bit;
1888
1889
9.92M
                sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
1890
1891
9.92M
                if(-1 == last_scan_pos)
1892
9.92M
                    last_scan_pos = scan_pos;
1893
1894
9.92M
                scan_pos--;
1895
9.92M
                num_coded++;
1896
9.92M
            }
1897
1898
            /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
1899
24.5M
            infer_coeff = (i < last_csb) && (i > 0);
1900
1901
            /* encode the required sigcoeff flags (abslevel > 0)   */
1902
377M
            while(scan_pos >= 0)
1903
352M
            {
1904
352M
                WORD32 y_pos_x_pos;
1905
352M
                WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
1906
1907
352M
                WORD32 sig_coeff;
1908
1909
352M
                EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
1910
1911
                /* derive the x,y pos */
1912
352M
                y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
1913
1914
                /* derive the context inc as per section 9.3.3.1.4 */
1915
352M
                if(2 == log2_tr_size)
1916
94.4M
                {
1917
                    /* 4x4 transform size increment uses lookup */
1918
94.4M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1919
94.4M
                }
1920
258M
                else if(scan_pos || i)
1921
255M
                {
1922
                    /* ctxt for AC coeff depends on curpos and neigbour csbf */
1923
255M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
1924
1925
                    /* based on luma subblock pos */
1926
255M
                    sig_ctxinc += (i && is_luma) ? 3 : 0;
1927
255M
                }
1928
2.33M
                else
1929
2.33M
                {
1930
                    /* DC coeff has fixed context for luma and chroma */
1931
2.33M
                    sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
1932
2.33M
                                                  : IHEVC_CAB_COEFF_FLAG + 27;
1933
2.33M
                }
1934
1935
                /*************************************************************/
1936
                /* encode sig coeff only if required                         */
1937
                /* decoder infers 0,0 coeff when all the other coeffs are 0  */
1938
                /*************************************************************/
1939
352M
                if(scan_pos || (!infer_coeff))
1940
352M
                {
1941
352M
                    ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
1942
1943
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
1944
352M
                    {
1945
352M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
1946
1947
                        /* increment bits generated based on state and bin encoded */
1948
352M
                        ps_cabac->u4_bits_estimated_q12 +=
1949
352M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
1950
1951
                        /* update the context model from state transition LUT */
1952
352M
                        pu1_ctxt_model[ctxt_idx] =
1953
352M
                            gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
1954
352M
                    }
1955
352M
                }
1956
1957
352M
                if(sig_coeff)
1958
281M
                {
1959
                    /* prepare the bins for gt1 flags */
1960
281M
                    EXTRACT_BIT(bit, gt1_flags, scan_pos);
1961
1962
                    /* shift and insert gt1 bin in lsb */
1963
281M
                    gt1_bins <<= 1;
1964
281M
                    gt1_bins |= bit;
1965
1966
                    /* prepare the bins for sign flags */
1967
281M
                    EXTRACT_BIT(bit, sign_flags, scan_pos);
1968
1969
                    /* shift and insert sign bin in lsb */
1970
281M
                    sign_bins <<= 1;
1971
281M
                    sign_bins |= bit;
1972
1973
281M
                    num_coded++;
1974
1975
                    /* 0,0 coeff can no more be inferred :( */
1976
281M
                    infer_coeff = 0;
1977
1978
281M
                    if(-1 == last_scan_pos)
1979
14.5M
                        last_scan_pos = scan_pos;
1980
1981
281M
                    first_scan_pos = scan_pos;
1982
281M
                }
1983
1984
352M
                scan_pos--;
1985
352M
            }
1986
1987
            /* Added for sign bit hiding*/
1988
24.5M
            sign_hidden =
1989
24.5M
                (((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag) && (perform_sbh));
1990
1991
            /****************************************************************/
1992
            /* encode the abs level greater than 1 bins; Section 7.3.13     */
1993
            /* These have already been prepared during sig_coeff_map encode */
1994
            /* Context modelling done as per section 9.3.3.1.5              */
1995
            /****************************************************************/
1996
24.5M
            {
1997
24.5M
                WORD32 j;
1998
1999
                /* context set based on luma subblock pos */
2000
24.5M
                WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
2001
2002
                /* count of coeffs with abslevel > 1; max of 8 to be coded */
2003
24.5M
                WORD32 num_gt1_bins = MIN(8, num_coded);
2004
2005
24.5M
                if(num_coded > 8)
2006
18.8M
                {
2007
                    /* pull back the bins to required number */
2008
18.8M
                    gt1_bins >>= (num_coded - 8);
2009
2010
18.8M
                    num_coeffs_remaining += (num_coded - 8);
2011
18.8M
                    num_coeffs_base1 = (num_coded - 8);
2012
18.8M
                }
2013
2014
                /* See section 9.3.3.1.5           */
2015
24.5M
                ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
2016
2017
24.5M
                gt1_ctxt = 1;
2018
2019
198M
                for(j = num_gt1_bins - 1; j >= 0; j--)
2020
173M
                {
2021
                    /* Encodet the abs level gt1 bins */
2022
173M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
2023
2024
173M
                    EXTRACT_BIT(bit, gt1_bins, j);
2025
2026
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
2027
173M
                    {
2028
173M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2029
2030
                        /* increment bits generated based on state and bin encoded */
2031
173M
                        ps_cabac->u4_bits_estimated_q12 +=
2032
173M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
2033
2034
                        /* update the context model from state transition LUT */
2035
173M
                        pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
2036
173M
                    }
2037
2038
173M
                    if(bit)
2039
121M
                    {
2040
121M
                        gt1_ctxt = 0;
2041
121M
                        num_coeffs_remaining++;
2042
121M
                    }
2043
52.5M
                    else if(gt1_ctxt && (gt1_ctxt < 3))
2044
17.1M
                    {
2045
17.1M
                        gt1_ctxt++;
2046
17.1M
                    }
2047
173M
                }
2048
2049
                /*************************************************************/
2050
                /* encode abs level greater than 2 bin; Section 7.3.13       */
2051
                /*************************************************************/
2052
24.5M
                if(gt1_bins)
2053
20.7M
                {
2054
20.7M
                    WORD32 gt2_bin;
2055
2056
20.7M
                    first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
2057
20.7M
                    gt2_bin = (first_gt1_coeff > 2);
2058
2059
                    /* atleast one level > 2 */
2060
20.7M
                    ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
2061
2062
20.7M
                    ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
2063
2064
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
2065
20.7M
                    {
2066
20.7M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2067
2068
                        /* increment bits generated based on state and bin encoded */
2069
20.7M
                        ps_cabac->u4_bits_estimated_q12 +=
2070
20.7M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
2071
2072
                        /* update the context model from state transition LUT */
2073
20.7M
                        pu1_ctxt_model[ctxt_idx] =
2074
20.7M
                            gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
2075
20.7M
                    }
2076
2077
20.7M
                    if(!gt2_bin)
2078
7.92M
                    {
2079
                        /* sanity check */
2080
7.92M
                        ASSERT(first_gt1_coeff == 2);
2081
2082
                        /* no need to send this coeff as bypass bins */
2083
7.92M
                        pu2_sig_coeff_buf++;
2084
7.92M
                        num_coeffs_remaining--;
2085
7.92M
                    }
2086
20.7M
                }
2087
24.5M
            }
2088
2089
            /*************************************************************/
2090
            /* encode the coeff signs and abs remaing levels             */
2091
            /*************************************************************/
2092
24.5M
            if(num_coded)
2093
24.4M
            {
2094
24.4M
                WORD32 base_level;
2095
24.4M
                WORD32 rice_param = 0;
2096
24.4M
                WORD32 j;
2097
2098
                /*************************************************************/
2099
                /* encode the coeff signs populated in sign_bins             */
2100
                /*************************************************************/
2101
24.4M
                if(sign_hidden && i4_sign_data_hiding_flag)
2102
15.1M
                {
2103
15.1M
                    sign_bins >>= 1;
2104
15.1M
                    num_coded--;
2105
15.1M
                }
2106
2107
24.4M
                if(num_coded > 0)
2108
24.4M
                {
2109
                    /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac,
2110
                                                       sign_bins,
2111
                                                       num_coded);
2112
                    */
2113
2114
                    /* increment bits generated based on num bypass bins */
2115
24.4M
                    ps_cabac->u4_bits_estimated_q12 += (num_coded << CABAC_FRAC_BITS_Q);
2116
24.4M
                }
2117
2118
                /*************************************************************/
2119
                /* encode the coeff_abs_level_remaining as TR / EGK bins     */
2120
                /* See section 9.3.2.7 for details                           */
2121
                /*************************************************************/
2122
2123
                /* first remaining coeff baselevel */
2124
24.4M
                if(first_gt1_coeff > 2)
2125
12.8M
                {
2126
12.8M
                    base_level = 3;
2127
12.8M
                }
2128
11.6M
                else if(num_coeffs_remaining > num_coeffs_base1)
2129
6.94M
                {
2130
                    /* atleast one coeff in first 8 is gt > 1 */
2131
6.94M
                    base_level = 2;
2132
6.94M
                }
2133
4.73M
                else
2134
4.73M
                {
2135
                    /* all coeffs have base of 1 */
2136
4.73M
                    base_level = 1;
2137
4.73M
                }
2138
2139
255M
                for(j = 0; j < num_coeffs_remaining; j++)
2140
230M
                {
2141
230M
                    WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
2142
230M
                    WORD32 abs_coeff_rem;
2143
230M
                    WORD32 rice_max = (4 << rice_param);
2144
230M
                    WORD32 num_bins, unary_length;
2145
230M
                    UWORD32 u4_sym_shiftk_plus1;
2146
2147
230M
                    pu2_sig_coeff_buf++;
2148
2149
                    /* sanity check */
2150
230M
                    ASSERT(abs_coeff >= base_level);
2151
2152
230M
                    abs_coeff_rem = (abs_coeff - base_level);
2153
2154
                    /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
2155
                    /* TODO://HM-8.0-dev does either TR or EGK but not both */
2156
230M
                    if(abs_coeff_rem >= rice_max)
2157
29.1M
                    {
2158
29.1M
                        UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
2159
2160
                        /* coeff exceeds max rice limit                    */
2161
                        /* encode the TR prefix as tunary code             */
2162
                        /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
2163
                        /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4); */
2164
2165
                        /* increment bits generated based on num bypass bins */
2166
29.1M
                        ps_cabac->u4_bits_estimated_q12 += (4 << CABAC_FRAC_BITS_Q);
2167
2168
                        /* encode the exponential golomb code suffix */
2169
                        /*ret |= ihevce_cabac_encode_egk(ps_cabac,
2170
                                                       u4_suffix,
2171
                                                       (rice_param+1)
2172
                                                      ); */
2173
2174
                        /* k = rice_param+1 */
2175
                        /************************************************************************/
2176
                        /* shift symbol by k bits to find unary code prefix (111110)            */
2177
                        /* Use GETRANGE to elminate the while loop in sec 9.3.2.4 of HEVC spec  */
2178
                        /************************************************************************/
2179
29.1M
                        u4_sym_shiftk_plus1 = (u4_suffix >> (rice_param + 1)) + 1;
2180
2181
                        /* GETRANGE(unary_length, (u4_sym_shiftk_plus1 + 1)); */
2182
29.1M
                        GETRANGE(unary_length, u4_sym_shiftk_plus1);
2183
2184
                        /* length of the code = 2 *(unary_length - 1) + 1 + k */
2185
29.1M
                        num_bins = (2 * unary_length) + rice_param;
2186
2187
                        /* increment bits generated based on num bypass bins */
2188
29.1M
                        ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
2189
29.1M
                    }
2190
201M
                    else
2191
201M
                    {
2192
                        /* code coeff as truncated rice code  */
2193
                        /* ret |= ihevce_cabac_encode_trunc_rice(ps_cabac,
2194
                                                              abs_coeff_rem,
2195
                                                              rice_param,
2196
                                                              rice_max);
2197
                                                              */
2198
2199
                        /************************************************************************/
2200
                        /* shift symbol by c_rice_param bits to find unary code prefix (111.10) */
2201
                        /************************************************************************/
2202
201M
                        unary_length = (abs_coeff_rem >> rice_param) + 1;
2203
2204
                        /* length of the code */
2205
201M
                        num_bins = unary_length + rice_param;
2206
2207
                        /* increment bits generated based on num bypass bins */
2208
201M
                        ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
2209
201M
                    }
2210
2211
                    /* update the rice param based on coeff level */
2212
230M
                    if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
2213
43.1M
                    {
2214
43.1M
                        rice_param++;
2215
43.1M
                    }
2216
2217
                    /* change base level to 1 if more than 8 coded coeffs */
2218
230M
                    if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
2219
93.4M
                    {
2220
93.4M
                        base_level = 2;
2221
93.4M
                    }
2222
137M
                    else
2223
137M
                    {
2224
137M
                        base_level = 1;
2225
137M
                    }
2226
230M
                }
2227
24.4M
            }
2228
24.5M
        }
2229
26.2M
    }
2230
    /*tap texture bits*/
2231
9.92M
    {
2232
9.92M
        ps_cabac->u4_texture_bits_estimated_q12 +=
2233
9.92M
            (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
2234
9.92M
    }
2235
2236
9.92M
    return (ret);
2237
9.92M
}
2238
2239
/**
2240
******************************************************************************
2241
*
2242
*  @brief Encodes a transform residual block as per section 7.3.13
2243
*
2244
*  @par   Description
2245
*  RDOQ optimization is carried out here. When sub-blk RDOQ is turned on, we calculate
2246
*  the distortion(D) and bits(R) for when the sub blk is coded and when not coded. We
2247
*  then use the D+lambdaR metric to decide whether the sub-blk should be coded or not, and
2248
*  aprropriately signal it. When coeff RDOQ is turned on, we traverse through the TU to
2249
*  find all non-zero coeffs. If the non zero coeff is a 1, then we make a decision(based on D+lambdaR)
2250
*  metric as to whether to code it as a 0 or 1. In case the coeff is > 1(say L where L>1) we choose betweem
2251
*  L and L+1
2252
*
2253
*  @remarks Does not support sign data hiding and transform skip flag currently
2254
*
2255
*  @remarks Need to resolve the differences between JVT-J1003_d7 spec and
2256
*           HM.8.0-dev for related abs_greater_than_1 context initialization
2257
*           and rice_max paramtere used for coeff abs level remaining
2258
*
2259
*  @param[inout]   ps_entropy_ctxt
2260
*  pointer to entropy context (handle)
2261
*
2262
*  @param[in]      pv_coeff
2263
*  Compressed residue buffer containing following information:
2264
*
2265
*
2266
*  HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
2267
*
2268
*  For each 4x4 subblock starting from last_subblock_num (in scan order)
2269
*     Read 2 bytes  : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
2270
*
2271
*    `If cur_csbf
2272
*      Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
2273
*      Read 2 bytes : abs_gt1_flags (max of 8 only)
2274
*      Read 2 bytes : coeff_sign_flags
2275
*
2276
*      Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
2277
*      Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
2278
*
2279
*  @param[in]      log2_tr_size
2280
*  transform size of the current TU
2281
*
2282
*  @param[in]      is_luma
2283
*  boolean indicating if the texture type is luma / chroma
2284
*
2285
*  @param[out]    pi4_tu_coded_dist
2286
*  The distortion when the TU is coded(not all coeffs are set to 0) is stored here
2287
*
2288
*  @param[out]    pi4_tu_not_coded_dist
2289
*  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
2290
*
2291
*
2292
*  @return      success or failure error code
2293
*
2294
******************************************************************************
2295
*/
2296
2297
WORD32 ihevce_cabac_residue_encode_rdoq(
2298
    entropy_context_t *ps_entropy_ctxt,
2299
    void *pv_coeff,
2300
    WORD32 log2_tr_size,
2301
    WORD32 is_luma,
2302
    void *pv_rdoq_ctxt,
2303
    LWORD64 *pi8_tu_coded_dist,
2304
    LWORD64 *pi8_tu_not_coded_dist,
2305
    WORD32 perform_sbh)
2306
5.79M
{
2307
5.79M
    WORD32 *pi4_subBlock2csbfId_map;
2308
2309
5.79M
    WORD32 ret = IHEVCE_SUCCESS;
2310
2311
5.79M
    cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
2312
5.79M
    cab_ctxt_t s_sub_blk_not_coded_cabac_ctxt;
2313
5.79M
    backup_ctxt_t s_backup_ctxt;
2314
5.79M
    backup_ctxt_t s_backup_ctxt_sub_blk_not_coded;
2315
2316
5.79M
    UWORD32 temp_tex_bits_q12;
2317
2318
5.79M
    UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
2319
5.79M
    UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
2320
2321
5.79M
    LWORD64 i8_sub_blk_not_coded_dist = 0, i8_sub_blk_coded_dist = 0;
2322
5.79M
    WORD32 i4_sub_blk_not_coded_bits = 0, i4_sub_blk_coded_bits = 0;
2323
5.79M
    LWORD64 i8_sub_blk_not_coded_metric, i8_sub_blk_coded_metric;
2324
5.79M
    LWORD64 i8_tu_not_coded_dist = 0, i8_tu_coded_dist = 0;
2325
5.79M
    WORD32 i4_tu_coded_bits = 0;
2326
5.79M
    WORD32 temp_zero_col = 0, temp_zero_row = 0;
2327
2328
5.79M
    UWORD8 *pu1_last_sig_coeff_x;
2329
5.79M
    UWORD8 *pu1_last_sig_coeff_y;
2330
5.79M
    WORD32 scan_type;
2331
5.79M
    WORD32 last_csb;
2332
2333
5.79M
    WORD32 cur_csbf = 0, nbr_csbf;
2334
    // WORD32 i4_temp_bits;
2335
2336
5.79M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
2337
5.79M
    WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
2338
2339
5.79M
    UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
2340
2341
5.79M
    rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
2342
5.79M
    WORD16 *pi2_coeffs = ps_rdoq_ctxt->pi2_quant_coeffs;
2343
5.79M
    WORD16 *pi2_tr_coeffs = ps_rdoq_ctxt->pi2_trans_values;
2344
5.79M
    WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
2345
5.79M
    WORD32 i4_round_val = ps_rdoq_ctxt->i4_round_val_ssd_in_td;
2346
5.79M
    WORD32 i4_shift_val = ps_rdoq_ctxt->i4_shift_val_ssd_in_td;
2347
5.79M
    WORD32 scan_idx = ps_rdoq_ctxt->i4_scan_idx;
2348
2349
5.79M
    UWORD8 *pu1_csb_table, *pu1_trans_table;
2350
5.79M
    WORD32 shift_value, mask_value;
2351
2352
5.79M
    WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
2353
5.79M
    WORD32 temp_gt1_ctxt = gt1_ctxt;
2354
2355
5.79M
    WORD32 i;
2356
#if DISABLE_ZCSBF
2357
    WORD32 i4_skip_zero_cbf = 0;
2358
    WORD32 i4_skip_zero_csbf = 0;
2359
    WORD32 i4_num_abs_1_coeffs = 0;
2360
#endif
2361
5.79M
    (void)perform_sbh;
2362
5.79M
    pi4_subBlock2csbfId_map = ps_rdoq_ctxt->pi4_subBlock2csbfId_map;
2363
2364
    /* scan order inside a csb */
2365
5.79M
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
2366
    /*Initializing the backup_ctxt structures*/
2367
5.79M
    s_backup_ctxt.i4_num_bits = 0;
2368
5.79M
    s_backup_ctxt_sub_blk_not_coded.i4_num_bits = 0;
2369
2370
5.79M
    memset(&s_backup_ctxt.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
2371
5.79M
    memset(&s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
2372
2373
5.79M
    pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
2374
5.79M
    pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
2375
2376
    /* last sig coeff indices in scan order */
2377
5.79M
    pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
2378
5.79M
    pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
2379
2380
    /* read the scan type : upright diag / horz / vert */
2381
5.79M
    scan_type = pu1_coeff_buf_hdr[2];
2382
2383
    /************************************************************************/
2384
    /* position of the last coded sub block. This sub block contains coeff  */
2385
    /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
2386
    /* be derived here it better to be populated by scanning module         */
2387
    /************************************************************************/
2388
5.79M
    last_csb = pu1_coeff_buf_hdr[3];
2389
2390
5.79M
    shift_value = ps_rdoq_ctxt->i4_log2_trans_size + 1;
2391
    /* for finding. row no. from scan index */
2392
5.79M
    shift_value = shift_value - 3;
2393
    /*for finding the col. no. from scan index*/
2394
5.79M
    mask_value = (ps_rdoq_ctxt->i4_trans_size / 4) - 1;
2395
2396
5.79M
    switch(ps_rdoq_ctxt->i4_trans_size)
2397
5.79M
    {
2398
151k
    case 32:
2399
151k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
2400
151k
        break;
2401
493k
    case 16:
2402
493k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
2403
493k
        break;
2404
1.08M
    case 8:
2405
1.08M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
2406
1.08M
        break;
2407
4.05M
    case 4:
2408
4.05M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
2409
4.05M
        break;
2410
0
    default:
2411
0
        DBG_PRINTF("Invalid Trans Size\n");
2412
0
        return -1;
2413
0
        break;
2414
5.79M
    }
2415
2416
    /* sanity checks */
2417
    /* transform skip not supported */
2418
5.79M
    ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
2419
5.79M
    {
2420
5.79M
        temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
2421
5.79M
    }
2422
    /*************************************************************************/
2423
    /* derive base context index for sig coeff as per section 9.3.3.1.4      */
2424
    /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
2425
    /*************************************************************************/
2426
5.79M
    if(is_luma)
2427
4.07M
    {
2428
4.07M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
2429
4.07M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
2430
2431
4.07M
        if(3 == log2_tr_size)
2432
745k
        {
2433
            /* 8x8 transform size */
2434
745k
            sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
2435
745k
        }
2436
3.32M
        else if(3 < log2_tr_size)
2437
462k
        {
2438
            /* larger transform sizes */
2439
462k
            sig_coeff_base_ctxt += 21;
2440
462k
        }
2441
4.07M
    }
2442
1.72M
    else
2443
1.72M
    {
2444
        /* chroma context initializations */
2445
1.72M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
2446
1.72M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
2447
2448
1.72M
        if(3 == log2_tr_size)
2449
344k
        {
2450
            /* 8x8 transform size */
2451
344k
            sig_coeff_base_ctxt += 9;
2452
344k
        }
2453
1.37M
        else if(3 < log2_tr_size)
2454
182k
        {
2455
            /* larger transform sizes */
2456
182k
            sig_coeff_base_ctxt += 12;
2457
182k
        }
2458
1.72M
    }
2459
2460
    /* go to csbf flags */
2461
5.79M
    pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
2462
2463
    /*Calculating the distortion produced by all the zero coeffs in the TU*/
2464
421M
    for(i = (trans_size * trans_size) - 1; i >= 0; i--)
2465
416M
    {
2466
416M
        WORD32 i4_dist;
2467
416M
        WORD16 *pi2_orig_coeff = ps_rdoq_ctxt->pi2_trans_values;
2468
2469
416M
        if(pi2_coeffs[i] == 0)
2470
207M
        {
2471
207M
            i4_dist = CALC_SSD_IN_TRANS_DOMAIN(pi2_orig_coeff[i], 0, 0, 0);
2472
207M
            i8_tu_not_coded_dist += i4_dist;
2473
207M
            i8_tu_coded_dist += i4_dist;
2474
207M
        }
2475
416M
    }
2476
2477
    /*Backup of the various cabac ctxts*/
2478
5.79M
    memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
2479
    /************************************************************************/
2480
    /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign  */
2481
    /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
2482
    /************************************************************************/
2483
2484
24.4M
    for(i = last_csb; i >= 0; i--)
2485
18.6M
    {
2486
18.6M
        UWORD16 u2_marker_csbf;
2487
18.6M
        WORD32 ctxt_idx;
2488
18.6M
        WORD32 i4_sub_blk_is_coded = 0;
2489
18.6M
        WORD32 blk_row, blk_col;
2490
18.6M
        WORD32 scaled_blk_row;
2491
18.6M
        WORD32 scaled_blk_col;
2492
18.6M
        WORD32 infer_coeff;
2493
2494
18.6M
        gt1_ctxt = temp_gt1_ctxt;
2495
#if DISABLE_ZCSBF
2496
        /*Initialize skip zero cbf flag to 0*/
2497
        i4_skip_zero_csbf = 0;
2498
        i4_num_abs_1_coeffs = 0;
2499
#endif
2500
2501
18.6M
#if OPT_MEMCPY
2502
18.6M
        ihevce_copy_backup_ctxt(
2503
18.6M
            (void *)&s_sub_blk_not_coded_cabac_ctxt,
2504
18.6M
            (void *)ps_cabac,
2505
18.6M
            (void *)&s_backup_ctxt_sub_blk_not_coded,
2506
18.6M
            (void *)&s_backup_ctxt);
2507
18.6M
        memset(s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, 5);
2508
18.6M
        memset(s_backup_ctxt.au1_ctxt_to_backup, 0, 5);
2509
#else
2510
        memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
2511
#endif
2512
        // i4_temp_bits = s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;
2513
2514
18.6M
        blk_row = pu1_trans_table[i] >> shift_value; /*row of csb*/
2515
18.6M
        blk_col = pu1_trans_table[i] & mask_value; /*col of csb*/
2516
2517
18.6M
        scaled_blk_row = blk_row << 2;
2518
18.6M
        scaled_blk_col = blk_col << 2;
2519
2520
18.6M
        infer_coeff = (i < last_csb) && (i > 0);
2521
18.6M
        u2_marker_csbf = *pu2_sig_coeff_buf;
2522
2523
18.6M
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
2524
11.2M
        {
2525
11.2M
            if(!ps_rdoq_ctxt
2526
11.2M
                    ->pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
2527
2.60M
            {
2528
                /* clear the 2nd bit if the right csb is 0 */
2529
2.60M
                u2_marker_csbf = u2_marker_csbf & (~(1 << 1));
2530
2.60M
            }
2531
11.2M
        }
2532
18.6M
        if((blk_row + 1 < trans_size / 4)) /* checking bottom boundary */
2533
11.1M
        {
2534
11.1M
            if(!ps_rdoq_ctxt
2535
11.1M
                    ->pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
2536
2.40M
            {
2537
                /* clear the 3rd bit if the bottom csb is 0*/
2538
2.40M
                u2_marker_csbf = u2_marker_csbf & (~(1 << 2));
2539
2.40M
            }
2540
11.1M
        }
2541
18.6M
        pu2_sig_coeff_buf++;
2542
2543
        /* sanity checks for marker present in every csbf flag */
2544
18.6M
        ASSERT((u2_marker_csbf >> 4) == 0xBAD);
2545
2546
        /* extract the current and neigbour csbf flags */
2547
18.6M
        cur_csbf = u2_marker_csbf & 0x1;
2548
18.6M
        nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
2549
2550
18.6M
        if((i < last_csb) && (i > 0))
2551
11.1M
        {
2552
11.1M
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
2553
2554
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
2555
11.1M
            ctxt_idx += nbr_csbf ? 1 : 0;
2556
11.1M
            ctxt_idx += is_luma ? 0 : 2;
2557
2558
11.1M
            ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
2559
2560
11.1M
            s_backup_ctxt.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
2561
2562
11.1M
            if(cur_csbf)
2563
9.86M
            {
2564
9.86M
                ret |= ihevce_cabac_encode_bin(&s_sub_blk_not_coded_cabac_ctxt, 0, ctxt_idx);
2565
                // clang-format off
2566
9.86M
                i4_sub_blk_not_coded_bits =
2567
9.86M
                    s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;  // - i4_temp_bits;
2568
9.86M
                s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
2569
                // clang-format on
2570
9.86M
            }
2571
11.1M
        }
2572
7.44M
        else
2573
7.44M
        {
2574
            /* sanity check, this csb contains the last_sig_coeff */
2575
7.44M
            if(i == last_csb)
2576
5.97M
            {
2577
5.97M
                ASSERT(cur_csbf == 1);
2578
5.97M
            }
2579
7.44M
        }
2580
        /*If any block in the TU is coded and the 0th block is not coded, the 0th
2581
          block is still signalled as csbf = 1, and with all sig_coeffs sent as
2582
          0(HEVC requirement)*/
2583
18.6M
        if((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0))
2584
1.46M
        {
2585
1.46M
            i4_sub_blk_not_coded_bits = ihevce_code_all_sig_coeffs_as_0_explicitly(
2586
1.46M
                (void *)ps_rdoq_ctxt,
2587
1.46M
                i,
2588
1.46M
                pu1_trans_table,
2589
1.46M
                is_luma,
2590
1.46M
                scan_type,
2591
1.46M
                infer_coeff,
2592
1.46M
                nbr_csbf,
2593
1.46M
                &s_sub_blk_not_coded_cabac_ctxt);
2594
1.46M
        }
2595
2596
18.6M
        if(i == last_csb)
2597
5.97M
        {
2598
5.97M
            WORD32 i4_last_x = *pu1_last_sig_coeff_x;
2599
5.97M
            WORD32 i4_last_y = *pu1_last_sig_coeff_y;
2600
5.97M
            if(SCAN_VERT == scan_type)
2601
1.24M
            {
2602
                /* last coeff x and y are swapped for vertical scan */
2603
1.24M
                SWAP(i4_last_x, i4_last_y);
2604
1.24M
            }
2605
            /* Encode the last_sig_coeff_x and last_sig_coeff_y */
2606
5.97M
            ret |= ihevce_cabac_encode_last_coeff_x_y(
2607
5.97M
                ps_cabac, i4_last_x, i4_last_y, log2_tr_size, is_luma);
2608
5.97M
            s_backup_ctxt.au1_ctxt_to_backup[LASTXY] = 1;
2609
5.97M
        }
2610
2611
18.6M
        if(cur_csbf)
2612
17.3M
        {
2613
            /*****************************************************************/
2614
            /* encode the sig coeff map as per section 7.3.13                */
2615
            /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
2616
            /*****************************************************************/
2617
2618
17.3M
            WORD32 i4_bit_depth;
2619
17.3M
            WORD32 i4_shift_iq;
2620
17.3M
            WORD32 i4_dequant_val;
2621
17.3M
            WORD32 bit; /* temp boolean */
2622
2623
17.3M
            UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
2624
17.3M
            WORD32 sig_coeff_map = u2_gt0_flags;
2625
17.3M
            WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
2626
17.3M
            WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
2627
2628
17.3M
            WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
2629
2630
17.3M
            WORD16 *pi2_dequant_coeff = ps_rdoq_ctxt->pi2_dequant_coeff;
2631
17.3M
            WORD16 i2_qp_rem = ps_rdoq_ctxt->i2_qp_rem;
2632
17.3M
            WORD32 i4_qp_div = ps_rdoq_ctxt->i4_qp_div;
2633
2634
17.3M
            WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs  */
2635
17.3M
            WORD32 num_coded = 0; /* total coeffs coded in 4x4            */
2636
2637
            /* total count of coeffs to be coded as abs level remaining */
2638
17.3M
            WORD32 num_coeffs_remaining = 0;
2639
2640
            /* count of coeffs to be coded as  abslevel-1 */
2641
17.3M
            WORD32 num_coeffs_base1 = 0;
2642
17.3M
            WORD32 scan_pos;
2643
17.3M
            WORD32 first_gt1_coeff = 0;
2644
2645
17.3M
            i4_bit_depth = ps_entropy_ctxt->ps_sps->i1_bit_depth_luma_minus8 + 8;
2646
17.3M
            i4_shift_iq = i4_bit_depth + ps_rdoq_ctxt->i4_log2_trans_size - 5;
2647
2648
17.3M
            i4_sub_blk_is_coded = 1;
2649
2650
17.3M
            if((i != 0) || (0 == last_csb))
2651
15.8M
            {
2652
                /* sanity check, atleast one coeff is coded as csbf is set */
2653
15.8M
                ASSERT(sig_coeff_map != 0);
2654
15.8M
            }
2655
            /*Calculating the distortions produced*/
2656
17.3M
            {
2657
17.3M
                WORD32 k, j;
2658
17.3M
                WORD16 *pi2_temp_coeff =
2659
17.3M
                    &pi2_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
2660
17.3M
                WORD16 *pi2_temp_tr_coeff =
2661
17.3M
                    &pi2_tr_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
2662
17.3M
                WORD16 *pi2_temp_dequant_coeff =
2663
17.3M
                    &pi2_dequant_coeff[scaled_blk_col + (scaled_blk_row * trans_size)];
2664
2665
86.5M
                for(k = 0; k < 4; k++)
2666
69.2M
                {
2667
346M
                    for(j = 0; j < 4; j++)
2668
276M
                    {
2669
276M
                        if(*pi2_temp_coeff)
2670
208M
                        {
2671
                            /*Inverse quantizing for distortion calculation*/
2672
208M
                            if(ps_rdoq_ctxt->i4_trans_size != 4)
2673
158M
                            {
2674
158M
                                IQUANT(
2675
158M
                                    i4_dequant_val,
2676
158M
                                    *pi2_temp_coeff,
2677
158M
                                    *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
2678
158M
                                    i4_shift_iq,
2679
158M
                                    i4_qp_div);
2680
158M
                            }
2681
50.1M
                            else
2682
50.1M
                            {
2683
50.1M
                                IQUANT_4x4(
2684
50.1M
                                    i4_dequant_val,
2685
50.1M
                                    *pi2_temp_coeff,
2686
50.1M
                                    *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
2687
50.1M
                                    i4_shift_iq,
2688
50.1M
                                    i4_qp_div);
2689
50.1M
                            }
2690
2691
208M
                            i8_sub_blk_coded_dist +=
2692
208M
                                CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, i4_dequant_val, 0, 0);
2693
2694
208M
                            i8_sub_blk_not_coded_dist +=
2695
208M
                                CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, 0, 0, 0);
2696
208M
                        }
2697
#if DISABLE_ZCSBF
2698
                        if(abs(*pi2_temp_coeff) > 1)
2699
                        {
2700
                            i4_skip_zero_csbf = 1;
2701
                        }
2702
                        else if(abs(*pi2_temp_coeff) == 1)
2703
                        {
2704
                            i4_num_abs_1_coeffs++;
2705
                        }
2706
#endif
2707
276M
                        pi2_temp_coeff++;
2708
276M
                        pi2_temp_tr_coeff++;
2709
276M
                        pi2_temp_dequant_coeff++;
2710
276M
                    }
2711
69.2M
                    pi2_temp_tr_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
2712
69.2M
                    pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
2713
69.2M
                    pi2_dequant_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
2714
69.2M
                }
2715
17.3M
            }
2716
2717
#if DISABLE_ZCSBF
2718
            i4_skip_zero_csbf = i4_skip_zero_csbf || (i4_num_abs_1_coeffs > 3);
2719
#endif
2720
17.3M
            pu2_sig_coeff_buf += 3;
2721
2722
17.3M
            scan_pos = 15;
2723
17.3M
            if(i == last_csb)
2724
5.97M
            {
2725
                /*************************************************************/
2726
                /* clear last_scan_pos for last block in scan order as this  */
2727
                /* is communicated  throught last_coeff_x and last_coeff_y   */
2728
                /*************************************************************/
2729
5.97M
                WORD32 next_sig = CLZ(sig_coeff_map) + 1;
2730
2731
5.97M
                scan_pos = WORD_SIZE - next_sig;
2732
2733
                /* prepare the bins for gt1 flags */
2734
5.97M
                EXTRACT_BIT(bit, gt1_flags, scan_pos);
2735
2736
                /* insert gt1 bin in lsb */
2737
5.97M
                gt1_bins |= bit;
2738
2739
                /* prepare the bins for sign flags */
2740
5.97M
                EXTRACT_BIT(bit, sign_flags, scan_pos);
2741
2742
                /* insert sign bin in lsb */
2743
5.97M
                sign_bins |= bit;
2744
2745
5.97M
                sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
2746
2747
5.97M
                scan_pos--;
2748
5.97M
                num_coded++;
2749
5.97M
            }
2750
2751
            /* encode the required sigcoeff flags (abslevel > 0)   */
2752
270M
            while(scan_pos >= 0)
2753
252M
            {
2754
252M
                WORD32 y_pos_x_pos;
2755
252M
                WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
2756
2757
252M
                WORD32 sig_coeff;
2758
2759
252M
                EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
2760
2761
                /* derive the x,y pos */
2762
252M
                y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
2763
2764
                /* derive the context inc as per section 9.3.3.1.4 */
2765
252M
                if(2 == log2_tr_size)
2766
52.8M
                {
2767
                    /* 4x4 transform size increment uses lookup */
2768
52.8M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
2769
52.8M
                }
2770
199M
                else if(scan_pos || i)
2771
198M
                {
2772
                    /* ctxt for AC coeff depends on curpos and neigbour csbf */
2773
198M
                    sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
2774
2775
                    /* based on luma subblock pos */
2776
198M
                    sig_ctxinc += (i && is_luma) ? 3 : 0;
2777
198M
                }
2778
1.56M
                else
2779
1.56M
                {
2780
                    /* DC coeff has fixed context for luma and chroma */
2781
1.56M
                    sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
2782
1.56M
                                                  : IHEVC_CAB_COEFF_FLAG + 27;
2783
1.56M
                }
2784
2785
                /*************************************************************/
2786
                /* encode sig coeff only if required                         */
2787
                /* decoder infers 0,0 coeff when all the other coeffs are 0  */
2788
                /*************************************************************/
2789
252M
                if(scan_pos || (!infer_coeff))
2790
252M
                {
2791
252M
                    ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
2792
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
2793
252M
                    {
2794
252M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2795
2796
                        /* increment bits generated based on state and bin encoded */
2797
252M
                        ps_cabac->u4_bits_estimated_q12 +=
2798
252M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
2799
2800
                        /* update the context model from state transition LUT */
2801
252M
                        pu1_ctxt_model[ctxt_idx] =
2802
252M
                            gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
2803
252M
                    }
2804
252M
                }
2805
2806
252M
                if(sig_coeff)
2807
202M
                {
2808
                    /* prepare the bins for gt1 flags */
2809
202M
                    EXTRACT_BIT(bit, gt1_flags, scan_pos);
2810
2811
                    /* shift and insert gt1 bin in lsb */
2812
202M
                    gt1_bins <<= 1;
2813
202M
                    gt1_bins |= bit;
2814
2815
                    /* prepare the bins for sign flags */
2816
202M
                    EXTRACT_BIT(bit, sign_flags, scan_pos);
2817
2818
                    /* shift and insert sign bin in lsb */
2819
202M
                    sign_bins <<= 1;
2820
202M
                    sign_bins |= bit;
2821
2822
202M
                    num_coded++;
2823
2824
                    /* 0,0 coeff can no more be inferred :( */
2825
202M
                    infer_coeff = 0;
2826
202M
                }
2827
2828
252M
                scan_pos--;
2829
252M
            }
2830
2831
17.3M
            s_backup_ctxt.au1_ctxt_to_backup[SIG_COEFF] = 1;
2832
2833
            /****************************************************************/
2834
            /* encode the abs level greater than 1 bins; Section 7.3.13     */
2835
            /* These have already been prepared during sig_coeff_map encode */
2836
            /* Context modelling done as per section 9.3.3.1.5              */
2837
            /****************************************************************/
2838
17.3M
            {
2839
17.3M
                WORD32 j;
2840
2841
                /* context set based on luma subblock pos */
2842
17.3M
                WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
2843
2844
                /* count of coeffs with abslevel > 1; max of 8 to be coded */
2845
17.3M
                WORD32 num_gt1_bins = MIN(8, num_coded);
2846
2847
17.3M
                if(num_coded > 8)
2848
13.3M
                {
2849
                    /* pull back the bins to required number */
2850
13.3M
                    gt1_bins >>= (num_coded - 8);
2851
2852
13.3M
                    num_coeffs_remaining += (num_coded - 8);
2853
13.3M
                    num_coeffs_base1 = (num_coded - 8);
2854
13.3M
                }
2855
2856
                /* See section 9.3.3.1.5           */
2857
17.3M
                ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
2858
2859
17.3M
                gt1_ctxt = 1;
2860
2861
140M
                for(j = num_gt1_bins - 1; j >= 0; j--)
2862
123M
                {
2863
                    /* Encodet the abs level gt1 bins */
2864
123M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
2865
2866
123M
                    EXTRACT_BIT(bit, gt1_bins, j);
2867
2868
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
2869
123M
                    {
2870
123M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2871
2872
                        /* increment bits generated based on state and bin encoded */
2873
123M
                        ps_cabac->u4_bits_estimated_q12 +=
2874
123M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
2875
2876
                        /* update the context model from state transition LUT */
2877
123M
                        pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
2878
123M
                    }
2879
2880
123M
                    if(bit)
2881
91.4M
                    {
2882
91.4M
                        gt1_ctxt = 0;
2883
91.4M
                        num_coeffs_remaining++;
2884
91.4M
                    }
2885
31.8M
                    else if(gt1_ctxt && (gt1_ctxt < 3))
2886
10.4M
                    {
2887
10.4M
                        gt1_ctxt++;
2888
10.4M
                    }
2889
123M
                }
2890
17.3M
                s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_1] = 1;
2891
                /*************************************************************/
2892
                /* encode abs level greater than 2 bin; Section 7.3.13       */
2893
                /*************************************************************/
2894
17.3M
                if(gt1_bins)
2895
15.1M
                {
2896
15.1M
                    WORD32 gt2_bin;
2897
2898
15.1M
                    first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
2899
15.1M
                    gt2_bin = (first_gt1_coeff > 2);
2900
2901
                    /* atleast one level > 2 */
2902
15.1M
                    ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
2903
2904
15.1M
                    ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
2905
2906
                    //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
2907
15.1M
                    {
2908
15.1M
                        WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
2909
2910
                        /* increment bits generated based on state and bin encoded */
2911
15.1M
                        ps_cabac->u4_bits_estimated_q12 +=
2912
15.1M
                            gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
2913
2914
                        /* update the context model from state transition LUT */
2915
15.1M
                        pu1_ctxt_model[ctxt_idx] =
2916
15.1M
                            gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
2917
15.1M
                    }
2918
2919
15.1M
                    if(!gt2_bin)
2920
5.29M
                    {
2921
                        /* sanity check */
2922
5.29M
                        ASSERT(first_gt1_coeff == 2);
2923
2924
                        /* no need to send this coeff as bypass bins */
2925
5.29M
                        pu2_sig_coeff_buf++;
2926
5.29M
                        num_coeffs_remaining--;
2927
5.29M
                    }
2928
15.1M
                    s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_2] = 1;
2929
15.1M
                }
2930
17.3M
            }
2931
2932
            /*************************************************************/
2933
            /* encode the coeff signs and abs remaing levels             */
2934
            /*************************************************************/
2935
17.3M
            if(num_coded)
2936
17.2M
            {
2937
17.2M
                WORD32 base_level;
2938
17.2M
                WORD32 rice_param = 0;
2939
17.2M
                WORD32 j;
2940
2941
                /*************************************************************/
2942
                /* encode the coeff signs populated in sign_bins             */
2943
                /*************************************************************/
2944
17.2M
                if(num_coded > 0)
2945
17.2M
                {
2946
17.2M
                    ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
2947
17.2M
                }
2948
                /*************************************************************/
2949
                /* encode the coeff_abs_level_remaining as TR / EGK bins     */
2950
                /* See section 9.3.2.7 for details                           */
2951
                /*************************************************************/
2952
2953
                /* first remaining coeff baselevel */
2954
17.2M
                if(first_gt1_coeff > 2)
2955
9.81M
                {
2956
9.81M
                    base_level = 3;
2957
9.81M
                }
2958
7.48M
                else if(num_coeffs_remaining > num_coeffs_base1)
2959
4.70M
                {
2960
                    /* atleast one coeff in first 8 is gt > 1 */
2961
4.70M
                    base_level = 2;
2962
4.70M
                }
2963
2.77M
                else
2964
2.77M
                {
2965
                    /* all coeffs have base of 1 */
2966
2.77M
                    base_level = 1;
2967
2.77M
                }
2968
2969
188M
                for(j = 0; j < num_coeffs_remaining; j++)
2970
171M
                {
2971
171M
                    WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
2972
171M
                    WORD32 abs_coeff_rem;
2973
171M
                    WORD32 rice_max = (4 << rice_param);
2974
2975
171M
                    pu2_sig_coeff_buf++;
2976
2977
                    /* sanity check */
2978
171M
                    ASSERT(abs_coeff >= base_level);
2979
2980
171M
                    abs_coeff_rem = (abs_coeff - base_level);
2981
2982
                    /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
2983
                    /* TODO://HM-8.0-dev does either TR or EGK but not both */
2984
171M
                    if(abs_coeff_rem >= rice_max)
2985
24.5M
                    {
2986
24.5M
                        UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
2987
2988
                        /* coeff exceeds max rice limit                    */
2989
                        /* encode the TR prefix as tunary code             */
2990
                        /* prefix = 1111 as (rice_max >> rice_praram) = 4  */
2991
24.5M
                        ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
2992
2993
                        /* encode the exponential golomb code suffix */
2994
24.5M
                        ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
2995
24.5M
                    }
2996
146M
                    else
2997
146M
                    {
2998
                        /* code coeff as truncated rice code  */
2999
146M
                        ret |= ihevce_cabac_encode_trunc_rice(
3000
146M
                            ps_cabac, abs_coeff_rem, rice_param, rice_max);
3001
146M
                    }
3002
3003
                    /* update the rice param based on coeff level */
3004
171M
                    if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
3005
33.3M
                    {
3006
33.3M
                        rice_param++;
3007
33.3M
                    }
3008
3009
                    /* change base level to 1 if more than 8 coded coeffs */
3010
171M
                    if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
3011
71.6M
                    {
3012
71.6M
                        base_level = 2;
3013
71.6M
                    }
3014
99.3M
                    else
3015
99.3M
                    {
3016
99.3M
                        base_level = 1;
3017
99.3M
                    }
3018
171M
                }
3019
17.2M
            }
3020
3021
17.3M
            i4_sub_blk_coded_bits = ps_cabac->u4_bits_estimated_q12;
3022
            /**********************************************************/
3023
            /**********************************************************/
3024
            /**********************************************************/
3025
            /*Decide whether sub block should be coded or not*/
3026
            /**********************************************************/
3027
            /**********************************************************/
3028
            /**********************************************************/
3029
17.3M
            i8_sub_blk_coded_metric = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3030
17.3M
                                          i8_sub_blk_coded_dist, 0, i4_round_val, i4_shift_val) +
3031
17.3M
                                      COMPUTE_RATE_COST_CLIP30_RDOQ(
3032
17.3M
                                          i4_sub_blk_coded_bits,
3033
17.3M
                                          ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
3034
17.3M
                                          (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
3035
17.3M
            i8_sub_blk_not_coded_metric =
3036
17.3M
                CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3037
17.3M
                    i8_sub_blk_not_coded_dist, 0, i4_round_val, i4_shift_val) +
3038
17.3M
                COMPUTE_RATE_COST_CLIP30_RDOQ(
3039
17.3M
                    i4_sub_blk_not_coded_bits,
3040
17.3M
                    ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
3041
17.3M
                    (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
3042
3043
#if DISABLE_ZCSBF
3044
            if(((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
3045
                (i4_sub_blk_is_coded == 0)) &&
3046
               (i4_skip_zero_csbf == 0))
3047
#else
3048
17.3M
            if((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
3049
17.3M
               (i4_sub_blk_is_coded == 0))
3050
603k
#endif
3051
603k
            {
3052
603k
#if OPT_MEMCPY
3053
603k
                ihevce_copy_backup_ctxt(
3054
603k
                    (void *)ps_cabac,
3055
603k
                    (void *)&s_sub_blk_not_coded_cabac_ctxt,
3056
603k
                    (void *)&s_backup_ctxt,
3057
603k
                    (void *)&s_backup_ctxt_sub_blk_not_coded);
3058
#else
3059
                memcpy(ps_cabac, &s_sub_blk_not_coded_cabac_ctxt, sizeof(cab_ctxt_t));
3060
#endif
3061
603k
                scan_pos = 15;
3062
603k
                i4_sub_blk_is_coded = 0;
3063
3064
603k
                {
3065
603k
                    WORD32 k, j;
3066
603k
                    WORD16 *pi2_temp_coeff =
3067
603k
                        &pi2_coeffs[scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_q_data_strd)];
3068
603k
                    WORD16 *pi2_temp_iquant_coeff =
3069
603k
                        &ps_rdoq_ctxt->pi2_iquant_coeffs
3070
603k
                             [scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_iq_data_strd)];
3071
3.01M
                    for(k = 0; k < 4; k++)
3072
2.41M
                    {
3073
12.0M
                        for(j = 0; j < 4; j++)
3074
9.65M
                        {
3075
9.65M
                            *pi2_temp_coeff = 0;
3076
9.65M
                            *pi2_temp_iquant_coeff = 0;
3077
3078
9.65M
                            pi2_temp_coeff++;
3079
9.65M
                            pi2_temp_iquant_coeff++;
3080
9.65M
                        }
3081
2.41M
                        pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
3082
2.41M
                        pi2_temp_iquant_coeff += ps_rdoq_ctxt->i4_iq_data_strd - 4;
3083
2.41M
                    }
3084
603k
                }
3085
3086
                /* If the csb to be masked is the last csb, then we should
3087
                 * signal last x and last y from the next coded sub_blk */
3088
603k
                if(i == last_csb)
3089
400k
                {
3090
400k
                    pu1_coeff_buf_hdr = (UWORD8 *)pu2_sig_coeff_buf;
3091
3092
400k
                    ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
3093
400k
                    last_csb = ihevce_find_new_last_csb(
3094
400k
                        pi4_subBlock2csbfId_map,
3095
400k
                        i,
3096
400k
                        (void *)ps_rdoq_ctxt,
3097
400k
                        pu1_trans_table,
3098
400k
                        pu1_csb_table,
3099
400k
                        pi2_coeffs,
3100
400k
                        shift_value,
3101
400k
                        mask_value,
3102
400k
                        &pu1_coeff_buf_hdr);
3103
                    /*We are in a for loop. This means that the decrement to i happens immediately right
3104
                      at the end of the for loop. This would decrement the value of i to (last_csb - 1).
3105
                      Hence we increment i by 1, so that after the decrement i becomes last_csb.*/
3106
400k
                    i = last_csb + 1;
3107
400k
                    pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
3108
400k
                    pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
3109
400k
                    scan_type = pu1_coeff_buf_hdr[2];
3110
400k
                    pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + 4);
3111
400k
                }
3112
603k
                i8_tu_coded_dist += i8_sub_blk_not_coded_dist;
3113
603k
                i4_tu_coded_bits += i4_sub_blk_not_coded_bits;
3114
603k
            }
3115
16.7M
            else
3116
16.7M
            {
3117
16.7M
                ps_rdoq_ctxt->i1_tu_is_coded = 1;
3118
16.7M
                temp_gt1_ctxt = gt1_ctxt;
3119
3120
16.7M
                i8_tu_coded_dist += i8_sub_blk_coded_dist;
3121
16.7M
                i4_tu_coded_bits += i4_sub_blk_coded_bits;
3122
16.7M
            }
3123
#if DISABLE_ZCSBF
3124
            i4_skip_zero_cbf = i4_skip_zero_cbf || i4_skip_zero_csbf;
3125
#endif
3126
            /*Cumulating the distortion for the entire TU*/
3127
17.3M
            i8_tu_not_coded_dist += i8_sub_blk_not_coded_dist;
3128
            //i4_tu_coded_dist                += i4_sub_blk_coded_dist;
3129
            //i4_tu_coded_bits                += i4_sub_blk_coded_bits;
3130
17.3M
            i8_sub_blk_not_coded_dist = 0;
3131
17.3M
            i4_sub_blk_not_coded_bits = 0;
3132
17.3M
            i8_sub_blk_coded_dist = 0;
3133
17.3M
            i4_sub_blk_coded_bits = 0;
3134
3135
17.3M
            if(i4_sub_blk_is_coded)
3136
16.7M
            {
3137
16.7M
                ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 1;
3138
16.7M
                temp_zero_col = (temp_zero_col) | (0xF << scaled_blk_col);
3139
16.7M
                temp_zero_row = (temp_zero_row) | (0xF << scaled_blk_row);
3140
16.7M
            }
3141
603k
            else
3142
603k
            {
3143
603k
                if(!((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0)))
3144
602k
                {
3145
602k
                    ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
3146
602k
                }
3147
603k
            }
3148
17.3M
        }
3149
18.6M
    }
3150
3151
    /*tap texture bits*/
3152
5.79M
    {
3153
5.79M
        ps_cabac->u4_texture_bits_estimated_q12 +=
3154
5.79M
            (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
3155
5.79M
    }
3156
3157
5.79M
    i8_tu_not_coded_dist =
3158
5.79M
        CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(i8_tu_not_coded_dist, 0, i4_round_val, i4_shift_val);
3159
3160
    /* i4_tu_coded_dist = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
3161
        i4_tu_coded_dist, 0, i4_round_val, i4_shift_val); */
3162
5.79M
    *pi8_tu_coded_dist = i8_tu_coded_dist;
3163
5.79M
    *pi8_tu_not_coded_dist = i8_tu_not_coded_dist;
3164
#if DISABLE_ZCSBF
3165
    if(i4_skip_zero_cbf == 1)
3166
    {
3167
        *pi8_tu_not_coded_dist = 0x7FFFFFFF;
3168
    }
3169
#endif
3170
3171
5.79M
    *ps_rdoq_ctxt->pi4_zero_col = ~temp_zero_col;
3172
5.79M
    *ps_rdoq_ctxt->pi4_zero_row = ~temp_zero_row;
3173
3174
5.79M
    return (ret);
3175
5.79M
}
3176
3177
/**
3178
******************************************************************************
3179
*
3180
*  @brief Codes all the sig coeffs as 0
3181
*
3182
*  @param[in]   i
3183
*  Index of the current csb
3184
*
3185
*  @param[in]   pu1_trans_table
3186
*  Pointer to the trans table
3187
*
3188
*  @param[in]  scan_type
3189
*  Determines the scan order
3190
*
3191
*  @param[in]  infer_coeff
3192
*  Indicates whether the 0,0 coeff can be inferred or not
3193
*
3194
*  @param[in]   nbr_csbf
3195
*  Talks about if the neighboour csbs(right and bottom) are coded or not
3196
*
3197
*  @param[in]    ps_cabac
3198
*  Cabac state
3199
*
3200
*  @param[out]    pi4_tu_not_coded_dist
3201
*  The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
3202
*
3203
*  @return    The number of bits generated when the 0th sub blk is coded as all 0s
3204
*             This is the cumulate bits(i.e. for all blocks in the TU), and not only
3205
*             the bits generated for this block
3206
*
3207
******************************************************************************
3208
*/
3209
WORD32 ihevce_code_all_sig_coeffs_as_0_explicitly(
3210
    void *pv_rdoq_ctxt,
3211
    WORD32 i,
3212
    UWORD8 *pu1_trans_table,
3213
    WORD32 is_luma,
3214
    WORD32 scan_type,
3215
    WORD32 infer_coeff,
3216
    WORD32 nbr_csbf,
3217
    cab_ctxt_t *ps_cabac)
3218
1.46M
{
3219
1.46M
    WORD32 sig_coeff_base_ctxt;
3220
1.46M
    WORD32 scan_pos = 15;
3221
1.46M
    WORD32 ctxt_idx;
3222
1.46M
    WORD32 ret = 0;
3223
3224
1.46M
    rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
3225
3226
1.46M
    WORD32 log2_tr_size = ps_rdoq_ctxt->i4_log2_trans_size;
3227
3228
1.46M
    (void)pu1_trans_table;
3229
1.46M
    if(is_luma)
3230
1.05M
    {
3231
1.05M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
3232
1.05M
        if(3 == log2_tr_size)
3233
669k
        {
3234
            /* 8x8 transform size */
3235
669k
            sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
3236
669k
        }
3237
384k
        else if(3 < log2_tr_size)
3238
384k
        {
3239
            /* larger transform sizes */
3240
384k
            sig_coeff_base_ctxt += 21;
3241
384k
        }
3242
1.05M
    }
3243
413k
    else
3244
413k
    {
3245
        /* chroma context initializations */
3246
413k
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
3247
3248
413k
        if(3 == log2_tr_size)
3249
274k
        {
3250
            /* 8x8 transform size */
3251
274k
            sig_coeff_base_ctxt += 9;
3252
274k
        }
3253
138k
        else if(3 < log2_tr_size)
3254
138k
        {
3255
            /* larger transform sizes */
3256
138k
            sig_coeff_base_ctxt += 12;
3257
138k
        }
3258
413k
    }
3259
24.9M
    while(scan_pos >= 0)
3260
23.4M
    {
3261
23.4M
        WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
3262
23.4M
        WORD32 sig_coeff = 0;
3263
        /* derive the x,y pos */
3264
23.4M
        WORD32 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
3265
3266
        /* derive the context inc as per section 9.3.3.1.4 */
3267
23.4M
        if(2 == log2_tr_size)
3268
0
        {
3269
            /* 4x4 transform size increment uses lookup */
3270
0
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
3271
0
        }
3272
23.4M
        else if(scan_pos || i)
3273
22.0M
        {
3274
            /* ctxt for AC coeff depends on curpos and neigbour csbf */
3275
22.0M
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
3276
3277
            /* based on luma subblock pos */
3278
22.0M
            sig_ctxinc += (i && is_luma) ? 3 : 0;
3279
22.0M
        }
3280
1.46M
        else
3281
1.46M
        {
3282
            /* DC coeff has fixed context for luma and chroma */
3283
1.46M
            sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
3284
1.46M
        }
3285
3286
23.4M
        if(scan_pos || (!infer_coeff))
3287
23.4M
        {
3288
23.4M
            ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
3289
23.4M
            ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
3290
23.4M
            AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
3291
23.4M
        }
3292
23.4M
        scan_pos--;
3293
23.4M
    }
3294
1.46M
    return (ps_cabac->u4_bits_estimated_q12);  // - i4_temp_bits);
3295
1.46M
}
3296
3297
/**
3298
******************************************************************************
3299
*
3300
*  @brief Finds the next csb with a non-zero coeff
3301
*
3302
*  @paramp[in]  cur_last_csb_pos
3303
*  The index of the current csb with a non-zero coeff
3304
*
3305
*  @param[inout]   pv_rdoq_ctxt
3306
*  RODQ context structure
3307
*
3308
*  @param[in]   pu1_trans_table
3309
*  Pointer to the trans table
3310
*
3311
*  @param[in]   pi2_coeffs
3312
*  Pointer to all the quantized coefficients
3313
*
3314
*  @param[in]  shift_value
3315
*  Determines the shifting value for determining appropriate position of coeff
3316
*
3317
*  @param[in]  mask_value
3318
*  Determines the masking value for determining appropriate position of coeff
3319
*
3320
*  @param[in]   nbr_csbf
3321
*  Talks about if the neighboour csbs(right and bottom) are coded or not
3322
*
3323
*  @param[in]    ps_cabac
3324
*  Cabac state
3325
*
3326
*  @param[inout] ppu1_addr
3327
*  Pointer to the header(i.e. pointer used for traversing the ecd data generated
3328
*  in ihevce_scan_coeffs)
3329
*
3330
*  @return    The index of the csb with the next non-zero coeff
3331
*
3332
******************************************************************************
3333
*/
3334
WORD32 ihevce_find_new_last_csb(
3335
    WORD32 *pi4_subBlock2csbfId_map,
3336
    WORD32 cur_last_csb_pos,
3337
    void *pv_rdoq_ctxt,
3338
    UWORD8 *pu1_trans_table,
3339
    UWORD8 *pu1_csb_table,
3340
    WORD16 *pi2_coeffs,
3341
    WORD32 shift_value,
3342
    WORD32 mask_value,
3343
    UWORD8 **ppu1_addr)
3344
400k
{
3345
400k
    WORD32 blk_row;
3346
400k
    WORD32 blk_col;
3347
400k
    WORD32 x_pos;
3348
400k
    WORD32 y_pos;
3349
400k
    WORD32 i;
3350
400k
    WORD32 j;
3351
400k
    UWORD16 *pu2_out_data_coeff;
3352
400k
    rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
3353
400k
    WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
3354
400k
    UWORD8 *pu1_out_data_header = *ppu1_addr;
3355
3356
677k
    for(i = cur_last_csb_pos - 1; i >= 0; i--)
3357
461k
    {
3358
        /* check for the first csb flag in our scan order */
3359
461k
        if(ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]])
3360
184k
        {
3361
184k
            UWORD8 u1_last_x, u1_last_y;
3362
184k
            WORD32 quant_coeff;
3363
3364
184k
            pu1_out_data_header -= 4;  //To move the pointer back to the appropriate position
3365
            /* row of csb */
3366
184k
            blk_row = pu1_trans_table[i] >> shift_value;
3367
            /* col of csb */
3368
184k
            blk_col = pu1_trans_table[i] & mask_value;
3369
3370
            /*check for the 1st non-0 values inside the csb in our scan order*/
3371
1.47M
            for(j = 15; j >= 0; j--)
3372
1.47M
            {
3373
1.47M
                x_pos = (pu1_csb_table[j] & 0x3) + blk_col * 4;
3374
1.47M
                y_pos = (pu1_csb_table[j] >> 2) + blk_row * 4;
3375
3376
1.47M
                quant_coeff = pi2_coeffs[x_pos + (y_pos * trans_size)];
3377
3378
1.47M
                if(quant_coeff != 0)
3379
184k
                    break;
3380
1.47M
            }
3381
3382
184k
            ASSERT(j >= 0);
3383
3384
184k
            u1_last_x = x_pos;
3385
184k
            u1_last_y = y_pos;
3386
3387
            /* storing last_x and last_y */
3388
184k
            *(pu1_out_data_header) = u1_last_x;
3389
184k
            *(pu1_out_data_header + 1) = u1_last_y;
3390
3391
            /* storing the scan order */
3392
184k
            *(pu1_out_data_header + 2) = ps_rdoq_ctxt->i4_scan_idx;
3393
3394
            /* storing last_sub_block pos. in scan order count */
3395
184k
            *(pu1_out_data_header + 3) = i;
3396
3397
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
3398
184k
            pu2_out_data_coeff = (UWORD16 *)(pu1_out_data_header + 4);
3399
3400
184k
            *pu2_out_data_coeff = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
3401
184k
            *ppu1_addr = pu1_out_data_header;
3402
3403
184k
            break; /*We just need this loop for finding 1st non-zero csb only*/
3404
184k
        }
3405
276k
        else
3406
276k
            pu1_out_data_header += 2;
3407
461k
    }
3408
400k
    return i;
3409
400k
}
3410
3411
/**
3412
******************************************************************************
3413
*
3414
*  @brief Used to optimize the memcpy of cabac states. It copies only those
3415
*  states in the cabac context which have been altered.
3416
*
3417
*  @paramp[inout]  pv_dest
3418
*  Pointer to desitination cabac state.
3419
*
3420
*  @param[inout]   pv_backup_ctxt_dest
3421
*  Pointer to destination backup context
3422
*
3423
*  @param[inout]   pv_backup_ctxt_src
3424
*  Pointer to source backup context
3425
*
3426
*  @Desc:
3427
*  We go through each element in the backup_ctxt structure which will tell us
3428
*  if the states corresponding to lastxlasty, sigcoeffs, grtr_than_1_bins,
3429
*  grtr_than_2_bins and sub_blk_coded_flag(i.e. 0xBAD0) context elements
3430
*  have been altered. If they have been altered, we will memcpy the states
3431
*  corresponding to these context elements alone
3432
*
3433
*  @return  Nothing
3434
*
3435
******************************************************************************
3436
*/
3437
void ihevce_copy_backup_ctxt(
3438
    void *pv_dest, void *pv_src, void *pv_backup_ctxt_dest, void *pv_backup_ctxt_src)
3439
19.2M
{
3440
19.2M
    UWORD8 *pu1_dest = (UWORD8 *)(((cab_ctxt_t *)pv_dest)->au1_ctxt_models);
3441
19.2M
    UWORD8 *pu1_src = (UWORD8 *)(((cab_ctxt_t *)pv_src)->au1_ctxt_models);
3442
19.2M
    backup_ctxt_t *ps_backup_dest_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_dest);
3443
19.2M
    backup_ctxt_t *ps_backup_src_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_src);
3444
19.2M
    WORD32 i4_i;
3445
3446
    /*
3447
    0       IHEVC_CAB_COEFFX_PREFIX         lastx last y has been coded
3448
    1       IHEVC_CAB_CODED_SUBLK_IDX       sub-blk coded or not flag has been coded
3449
    2       IHEVC_CAB_COEFF_FLAG            sigcoeff has been coded
3450
    3       IHEVC_CAB_COEFABS_GRTR1_FLAG    greater than 1 bin has been coded
3451
    4       IHEVC_CAB_COEFABS_GRTR2_FLAG    greater than 2 bin has been coded*/
3452
19.2M
    assert(MAX_NUM_CONTEXT_ELEMENTS == 5);
3453
115M
    for(i4_i = 0; i4_i < MAX_NUM_CONTEXT_ELEMENTS; i4_i++)
3454
96.0M
    {
3455
96.0M
        if((ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF]) ||
3456
96.0M
           (ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF]))
3457
11.7M
        {
3458
11.7M
            memcpy(&pu1_dest[IHEVC_CAB_COEFF_FLAG], &pu1_src[IHEVC_CAB_COEFF_FLAG], 42);
3459
11.7M
            ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
3460
11.7M
            ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
3461
11.7M
        }
3462
96.0M
        if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]) ||
3463
96.0M
           (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]))
3464
11.7M
        {
3465
11.7M
            memcpy(
3466
11.7M
                &pu1_dest[IHEVC_CAB_COEFABS_GRTR1_FLAG],
3467
11.7M
                &pu1_src[IHEVC_CAB_COEFABS_GRTR1_FLAG],
3468
11.7M
                24);
3469
11.7M
            ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
3470
11.7M
            ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
3471
11.7M
        }
3472
96.0M
        if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]) ||
3473
96.0M
           (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]))
3474
9.92M
        {
3475
9.92M
            memcpy(
3476
9.92M
                &pu1_dest[IHEVC_CAB_COEFABS_GRTR2_FLAG], &pu1_src[IHEVC_CAB_COEFABS_GRTR2_FLAG], 6);
3477
9.92M
            ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
3478
9.92M
            ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
3479
9.92M
        }
3480
96.0M
        if((ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]) ||
3481
96.0M
           (ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]))
3482
11.1M
        {
3483
11.1M
            memcpy(&pu1_dest[IHEVC_CAB_CODED_SUBLK_IDX], &pu1_src[IHEVC_CAB_CODED_SUBLK_IDX], 4);
3484
11.1M
            ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
3485
11.1M
            ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
3486
11.1M
        }
3487
96.0M
        if((ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY]) ||
3488
96.0M
           (ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY]))
3489
1.86M
        {
3490
1.86M
            memcpy(&pu1_dest[IHEVC_CAB_COEFFX_PREFIX], &pu1_src[IHEVC_CAB_COEFFX_PREFIX], 36);
3491
1.86M
            ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
3492
1.86M
            ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
3493
1.86M
        }
3494
96.0M
    }
3495
19.2M
    ((cab_ctxt_t *)pv_dest)->u4_bits_estimated_q12 = ((cab_ctxt_t *)pv_src)->u4_bits_estimated_q12;
3496
19.2M
}